def int_ppc_vsx_xvdivdp : PowerPC_VSX_Vec_DDD_Intrinsic<"xvdivdp">;
def int_ppc_vsx_xvdivsp : PowerPC_VSX_Vec_FFF_Intrinsic<"xvdivsp">;
}
+
+//===----------------------------------------------------------------------===//
+// PowerPC QPX Intrinsics.
+//
+
+let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
+ /// PowerPC_QPX_Intrinsic - Base class for all QPX intrinsics.
+ class PowerPC_QPX_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
+ list<LLVMType> param_types,
+ list<IntrinsicProperty> properties>
+ : GCCBuiltin<!strconcat("__builtin_qpx_", GCCIntSuffix)>,
+ Intrinsic<ret_types, param_types, properties>;
+}
+
+//===----------------------------------------------------------------------===//
+// PowerPC QPX Intrinsic Class Definitions.
+//
+
+/// PowerPC_QPX_FF_Intrinsic - A PowerPC intrinsic that takes one v4f64
+/// vector and returns one. These intrinsics have no side effects.
+class PowerPC_QPX_FF_Intrinsic<string GCCIntSuffix>
+ : PowerPC_QPX_Intrinsic<GCCIntSuffix,
+ [llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+
+/// PowerPC_QPX_FFF_Intrinsic - A PowerPC intrinsic that takes two v4f64
+/// vectors and returns one. These intrinsics have no side effects.
+class PowerPC_QPX_FFF_Intrinsic<string GCCIntSuffix>
+ : PowerPC_QPX_Intrinsic<GCCIntSuffix,
+ [llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
+ [IntrNoMem]>;
+
+/// PowerPC_QPX_FFFF_Intrinsic - A PowerPC intrinsic that takes three v4f64
+/// vectors and returns one. These intrinsics have no side effects.
+class PowerPC_QPX_FFFF_Intrinsic<string GCCIntSuffix>
+ : PowerPC_QPX_Intrinsic<GCCIntSuffix,
+ [llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+ [IntrNoMem]>;
+
+/// PowerPC_QPX_Load_Intrinsic - A PowerPC intrinsic that takes a pointer
+/// and returns a v4f64.
+class PowerPC_QPX_Load_Intrinsic<string GCCIntSuffix>
+ : PowerPC_QPX_Intrinsic<GCCIntSuffix,
+ [llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
+
+/// PowerPC_QPX_LoadPerm_Intrinsic - A PowerPC intrinsic that takes a pointer
+/// and returns a v4f64 permutation.
+class PowerPC_QPX_LoadPerm_Intrinsic<string GCCIntSuffix>
+ : PowerPC_QPX_Intrinsic<GCCIntSuffix,
+ [llvm_v4f64_ty], [llvm_ptr_ty], [IntrNoMem]>;
+
+/// PowerPC_QPX_Store_Intrinsic - A PowerPC intrinsic that takes a pointer
+/// and stores a v4f64.
+class PowerPC_QPX_Store_Intrinsic<string GCCIntSuffix>
+ : PowerPC_QPX_Intrinsic<GCCIntSuffix,
+ [], [llvm_v4f64_ty, llvm_ptr_ty],
+ [IntrReadWriteArgMem]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC QPX Intrinsic Definitions.
+
+let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
+ // Add Instructions
+ def int_ppc_qpx_qvfadd : PowerPC_QPX_FFF_Intrinsic<"qvfadd">;
+ def int_ppc_qpx_qvfadds : PowerPC_QPX_FFF_Intrinsic<"qvfadds">;
+ def int_ppc_qpx_qvfsub : PowerPC_QPX_FFF_Intrinsic<"qvfsub">;
+ def int_ppc_qpx_qvfsubs : PowerPC_QPX_FFF_Intrinsic<"qvfsubs">;
+
+ // Estimate Instructions
+ def int_ppc_qpx_qvfre : PowerPC_QPX_FF_Intrinsic<"qvfre">;
+ def int_ppc_qpx_qvfres : PowerPC_QPX_FF_Intrinsic<"qvfres">;
+ def int_ppc_qpx_qvfrsqrte : PowerPC_QPX_FF_Intrinsic<"qvfrsqrte">;
+ def int_ppc_qpx_qvfrsqrtes : PowerPC_QPX_FF_Intrinsic<"qvfrsqrtes">;
+
+ // Multiply Instructions
+ def int_ppc_qpx_qvfmul : PowerPC_QPX_FFF_Intrinsic<"qvfmul">;
+ def int_ppc_qpx_qvfmuls : PowerPC_QPX_FFF_Intrinsic<"qvfmuls">;
+ def int_ppc_qpx_qvfxmul : PowerPC_QPX_FFF_Intrinsic<"qvfxmul">;
+ def int_ppc_qpx_qvfxmuls : PowerPC_QPX_FFF_Intrinsic<"qvfxmuls">;
+
+ // Multiply-add instructions
+ def int_ppc_qpx_qvfmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfmadd">;
+ def int_ppc_qpx_qvfmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfmadds">;
+ def int_ppc_qpx_qvfnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadd">;
+ def int_ppc_qpx_qvfnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadds">;
+ def int_ppc_qpx_qvfmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfmsub">;
+ def int_ppc_qpx_qvfmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfmsubs">;
+ def int_ppc_qpx_qvfnmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsub">;
+ def int_ppc_qpx_qvfnmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsubs">;
+ def int_ppc_qpx_qvfxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadd">;
+ def int_ppc_qpx_qvfxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadds">;
+ def int_ppc_qpx_qvfxxnpmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadd">;
+ def int_ppc_qpx_qvfxxnpmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadds">;
+ def int_ppc_qpx_qvfxxcpnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadd">;
+ def int_ppc_qpx_qvfxxcpnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadds">;
+ def int_ppc_qpx_qvfxxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadd">;
+ def int_ppc_qpx_qvfxxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadds">;
+
+ // Select Instruction
+ def int_ppc_qpx_qvfsel : PowerPC_QPX_FFFF_Intrinsic<"qvfsel">;
+
+ // Permute Instruction
+ def int_ppc_qpx_qvfperm : PowerPC_QPX_FFFF_Intrinsic<"qvfperm">;
+
+ // Convert and Round Instructions
+ def int_ppc_qpx_qvfctid : PowerPC_QPX_FF_Intrinsic<"qvfctid">;
+ def int_ppc_qpx_qvfctidu : PowerPC_QPX_FF_Intrinsic<"qvfctidu">;
+ def int_ppc_qpx_qvfctidz : PowerPC_QPX_FF_Intrinsic<"qvfctidz">;
+ def int_ppc_qpx_qvfctiduz : PowerPC_QPX_FF_Intrinsic<"qvfctiduz">;
+ def int_ppc_qpx_qvfctiw : PowerPC_QPX_FF_Intrinsic<"qvfctiw">;
+ def int_ppc_qpx_qvfctiwu : PowerPC_QPX_FF_Intrinsic<"qvfctiwu">;
+ def int_ppc_qpx_qvfctiwz : PowerPC_QPX_FF_Intrinsic<"qvfctiwz">;
+ def int_ppc_qpx_qvfctiwuz : PowerPC_QPX_FF_Intrinsic<"qvfctiwuz">;
+ def int_ppc_qpx_qvfcfid : PowerPC_QPX_FF_Intrinsic<"qvfcfid">;
+ def int_ppc_qpx_qvfcfidu : PowerPC_QPX_FF_Intrinsic<"qvfcfidu">;
+ def int_ppc_qpx_qvfcfids : PowerPC_QPX_FF_Intrinsic<"qvfcfids">;
+ def int_ppc_qpx_qvfcfidus : PowerPC_QPX_FF_Intrinsic<"qvfcfidus">;
+ def int_ppc_qpx_qvfrsp : PowerPC_QPX_FF_Intrinsic<"qvfrsp">;
+ def int_ppc_qpx_qvfriz : PowerPC_QPX_FF_Intrinsic<"qvfriz">;
+ def int_ppc_qpx_qvfrin : PowerPC_QPX_FF_Intrinsic<"qvfrin">;
+ def int_ppc_qpx_qvfrip : PowerPC_QPX_FF_Intrinsic<"qvfrip">;
+ def int_ppc_qpx_qvfrim : PowerPC_QPX_FF_Intrinsic<"qvfrim">;
+
+ // Move Instructions
+ def int_ppc_qpx_qvfneg : PowerPC_QPX_FF_Intrinsic<"qvfneg">;
+ def int_ppc_qpx_qvfabs : PowerPC_QPX_FF_Intrinsic<"qvfabs">;
+ def int_ppc_qpx_qvfnabs : PowerPC_QPX_FF_Intrinsic<"qvfnabs">;
+ def int_ppc_qpx_qvfcpsgn : PowerPC_QPX_FFF_Intrinsic<"qvfcpsgn">;
+
+ // Compare Instructions
+ def int_ppc_qpx_qvftstnan : PowerPC_QPX_FFF_Intrinsic<"qvftstnan">;
+ def int_ppc_qpx_qvfcmplt : PowerPC_QPX_FFF_Intrinsic<"qvfcmplt">;
+ def int_ppc_qpx_qvfcmpgt : PowerPC_QPX_FFF_Intrinsic<"qvfcmpgt">;
+ def int_ppc_qpx_qvfcmpeq : PowerPC_QPX_FFF_Intrinsic<"qvfcmpeq">;
+
+ // Load instructions
+ def int_ppc_qpx_qvlfd : PowerPC_QPX_Load_Intrinsic<"qvlfd">;
+ def int_ppc_qpx_qvlfda : PowerPC_QPX_Load_Intrinsic<"qvlfda">;
+ def int_ppc_qpx_qvlfs : PowerPC_QPX_Load_Intrinsic<"qvlfs">;
+ def int_ppc_qpx_qvlfsa : PowerPC_QPX_Load_Intrinsic<"qvlfsa">;
+
+ def int_ppc_qpx_qvlfcda : PowerPC_QPX_Load_Intrinsic<"qvlfcda">;
+ def int_ppc_qpx_qvlfcd : PowerPC_QPX_Load_Intrinsic<"qvlfcd">;
+ def int_ppc_qpx_qvlfcsa : PowerPC_QPX_Load_Intrinsic<"qvlfcsa">;
+ def int_ppc_qpx_qvlfcs : PowerPC_QPX_Load_Intrinsic<"qvlfcs">;
+ def int_ppc_qpx_qvlfiwaa : PowerPC_QPX_Load_Intrinsic<"qvlfiwaa">;
+ def int_ppc_qpx_qvlfiwa : PowerPC_QPX_Load_Intrinsic<"qvlfiwa">;
+ def int_ppc_qpx_qvlfiwza : PowerPC_QPX_Load_Intrinsic<"qvlfiwza">;
+ def int_ppc_qpx_qvlfiwz : PowerPC_QPX_Load_Intrinsic<"qvlfiwz">;
+
+ def int_ppc_qpx_qvlpcld : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcld">;
+ def int_ppc_qpx_qvlpcls : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcls">;
+ def int_ppc_qpx_qvlpcrd : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrd">;
+ def int_ppc_qpx_qvlpcrs : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrs">;
+
+ // Store instructions
+ def int_ppc_qpx_qvstfd : PowerPC_QPX_Store_Intrinsic<"qvstfd">;
+ def int_ppc_qpx_qvstfda : PowerPC_QPX_Store_Intrinsic<"qvstfda">;
+ def int_ppc_qpx_qvstfs : PowerPC_QPX_Store_Intrinsic<"qvstfs">;
+ def int_ppc_qpx_qvstfsa : PowerPC_QPX_Store_Intrinsic<"qvstfsa">;
+
+ def int_ppc_qpx_qvstfcda : PowerPC_QPX_Store_Intrinsic<"qvstfcda">;
+ def int_ppc_qpx_qvstfcd : PowerPC_QPX_Store_Intrinsic<"qvstfcd">;
+ def int_ppc_qpx_qvstfcsa : PowerPC_QPX_Store_Intrinsic<"qvstfcsa">;
+ def int_ppc_qpx_qvstfcs : PowerPC_QPX_Store_Intrinsic<"qvstfcs">;
+ def int_ppc_qpx_qvstfiwa : PowerPC_QPX_Store_Intrinsic<"qvstfiwa">;
+ def int_ppc_qpx_qvstfiw : PowerPC_QPX_Store_Intrinsic<"qvstfiw">;
+
+ // Logical and permutation formation
+ def int_ppc_qpx_qvflogical : PowerPC_QPX_Intrinsic<"qvflogical",
+ [llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+ def int_ppc_qpx_qvgpci : PowerPC_QPX_Intrinsic<"qvgpci",
+ [llvm_v4f64_ty], [llvm_i32_ty], [IntrNoMem]>;
+}
+
PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
};
+static unsigned QFRegs[32] = {
+ PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3,
+ PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
+ PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11,
+ PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15,
+ PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19,
+ PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23,
+ PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27,
+ PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31
+};
static const MCPhysReg CRBITRegs[32] = {
PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
bool isU8ImmX8() const { return Kind == Immediate &&
isUInt<8>(getImm()) &&
(getImm() & 7) == 0; }
+ bool isU12Imm() const { return Kind == Immediate && isUInt<12>(getImm()); }
bool isU16Imm() const {
switch (Kind) {
case Expression:
Inst.addOperand(MCOperand::CreateReg(VSFRegs[getVSReg()]));
}
+ void addRegQFRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(QFRegs[getReg()]));
+ }
+
+ void addRegQSRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(QFRegs[getReg()]));
+ }
+
+ void addRegQBRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(QFRegs[getReg()]));
+ }
+
void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getCRBit()]));
PPC::X28, PPC::X29, PPC::X30, PPC::X31
};
+static const unsigned QFRegs[] = {
+ PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3,
+ PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
+ PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11,
+ PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15,
+ PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19,
+ PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23,
+ PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27,
+ PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31
+};
+
template <std::size_t N>
static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
const unsigned (&Regs)[N]) {
#define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass
#define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass
+static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, QFRegs);
+}
+
+#define DecodeQSRCRegisterClass DecodeQFRCRegisterClass
+#define DecodeQBRCRegisterClass DecodeQFRCRegisterClass
+
template<unsigned N>
static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm,
int64_t Address, const void *Decoder) {
uint32_t Inst =
(Bytes[0] << 24) | (Bytes[1] << 16) | (Bytes[2] << 8) | (Bytes[3] << 0);
+ if ((STI.getFeatureBits() & PPC::FeatureQPX) != 0) {
+ DecodeStatus result =
+ decodeInstruction(DecoderTableQPX32, MI, Inst, Address, this, STI);
+ if (result != MCDisassembler::Fail)
+ return result;
+
+ MI.clear();
+ }
+
return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI);
}
#include "PPCGenAsmWriter.inc"
void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
- OS << getRegisterName(RegNo);
+ const char *RegName = getRegisterName(RegNo);
+ if (RegName[0] == 'q' /* QPX */) {
+ // The system toolchain on the BG/Q does not understand QPX register names
+ // in .cfi_* directives, so print the name of the floating-point
+ // subregister instead.
+ std::string RN(RegName);
+
+ RN[0] = 'f';
+ OS << RN;
+
+ return;
+ }
+
+ OS << RegName;
}
void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
O << (unsigned int)Value;
}
+void PPCInstPrinter::printU12ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned short Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 4095 && "Invalid u12imm argument!");
+ O << (unsigned short)Value;
+}
+
void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
if (MI->getOperand(OpNo).isImm())
switch (RegName[0]) {
case 'r':
case 'f':
+ case 'q': // for QPX
case 'v':
if (RegName[1] == 's')
return RegName + 2;
void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU12ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
switch (RegName[0]) {
case 'r':
case 'f':
+ case 'q': // for QPX
case 'v':
if (RegName[1] == 's')
return RegName + 2;
// only the ELFv2 ABI fully utilizes all these registers.
CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
-
+
+ // QPX vectors are returned in QF1 and QF2.
+ CCIfType<[v4f64, v4f32, v4i1],
+ CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
+
// Vector types returned as "direct" go into V2 .. V9; note that only the
// ELFv2 ABI fully utilizes all these registers.
- CCIfType<[v16i8, v8i16, v4i32, v4f32],
- CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>,
- CCIfType<[v2f64, v2i64],
- CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
+ CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()",
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
+ CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
+ CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
]>;
// No explicit register is specified for the AnyReg calling convention. The
CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
- CCIfType<[v16i8, v8i16, v4i32, v4f32],
- CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>,
- CCIfType<[v2f64, v2i64],
- CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
+ CCIfType<[v4f64, v4f32, v4i1],
+ CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
+ CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()",
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
+ CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
+ CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
]>;
//===----------------------------------------------------------------------===//
// alignment and size as doubles.
CCIfType<[f32,f64], CCAssignToStack<8, 8>>,
+ // QPX vectors that are stored in double precision need 32-byte alignment.
+ CCIfType<[v4f64, v4i1], CCAssignToStack<32, 32>>,
+
// Vectors get 16-byte stack slots that are 16-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 16>>
]>;
// In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to
// put vector arguments in vector registers before putting them on the stack.
def CC_PPC32_SVR4 : CallingConv<[
+ // QPX vectors mirror the scalar FP convention.
+ CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()",
+ CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>,
+
// The first 12 Vector arguments are passed in AltiVec registers.
- CCIfType<[v16i8, v8i16, v4i32, v4f32],
- CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>,
- CCIfType<[v2f64, v2i64],
+ CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()",
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9,
+ V10, V11, V12, V13]>>>,
+ CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9,
- VSH10, VSH11, VSH12, VSH13]>>,
+ VSH10, VSH11, VSH12, VSH13]>>>,
CCDelegateTo<CC_PPC32_SVR4_Common>
]>;
PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
: TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
- (STI.hasQPX() || STI.isBGQ()) ? 32 : 16, 0),
+ STI.getPlatformStackAlignment(), 0),
Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
// Altivec Vector compare instructions do not set any CR register by default and
// vector compare operations return the same type as the operands.
if (LHS.getValueType().isVector()) {
+ if (PPCSubTarget->hasQPX())
+ return nullptr;
+
EVT VecVT = LHS.getValueType();
bool Swap, Negate;
unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
switch (LoadedVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
+ case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
case MVT::f64: Opcode = PPC::LFDUX; break;
case MVT::f32: Opcode = PPC::LFSUX; break;
case MVT::i32: Opcode = PPC::LWZUX; break;
SelectCCOp = PPC::SELECT_CC_VSFRC;
else
SelectCCOp = PPC::SELECT_CC_F8;
+ else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
+ SelectCCOp = PPC::SELECT_CC_QFRC;
+ else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
+ SelectCCOp = PPC::SELECT_CC_QSRC;
+ else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
+ SelectCCOp = PPC::SELECT_CC_QBRC;
else if (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)
SelectCCOp = PPC::SELECT_CC_VSRC;
case PPC::SELECT_I8:
case PPC::SELECT_F4:
case PPC::SELECT_F8:
+ case PPC::SELECT_QFRC:
+ case PPC::SELECT_QSRC:
+ case PPC::SELECT_QBRC:
case PPC::SELECT_VRRC:
case PPC::SELECT_VSFRC:
case PPC::SELECT_VSRC: {
case PPC::SELECT_I8:
case PPC::SELECT_F4:
case PPC::SELECT_F8:
+ case PPC::SELECT_QFRC:
+ case PPC::SELECT_QSRC:
+ case PPC::SELECT_QBRC:
case PPC::SELECT_VRRC:
case PPC::SELECT_VSFRC:
case PPC::SELECT_VSRC:
addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
}
+ if (Subtarget.hasQPX()) {
+ setOperationAction(ISD::FADD, MVT::v4f64, Legal);
+ setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
+ setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
+ setOperationAction(ISD::FREM, MVT::v4f64, Expand);
+
+ setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
+ setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
+
+ setOperationAction(ISD::LOAD , MVT::v4f64, Custom);
+ setOperationAction(ISD::STORE , MVT::v4f64, Custom);
+
+ setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
+
+ if (!Subtarget.useCRBits())
+ setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
+ setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
+ setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
+
+ setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
+ setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
+
+ setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
+ setOperationAction(ISD::FABS , MVT::v4f64, Legal);
+ setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
+ setOperationAction(ISD::FPOWI , MVT::v4f64, Expand);
+ setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
+ setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
+ setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
+ setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
+ setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
+ setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
+
+ setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
+
+ setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
+
+ addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
+
+ setOperationAction(ISD::FADD, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FREM, MVT::v4f32, Expand);
+
+ setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
+ setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
+
+ setOperationAction(ISD::LOAD , MVT::v4f32, Custom);
+ setOperationAction(ISD::STORE , MVT::v4f32, Custom);
+
+ if (!Subtarget.useCRBits())
+ setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
+ setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
+ setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
+
+ setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
+ setOperationAction(ISD::FABS , MVT::v4f32, Legal);
+ setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
+ setOperationAction(ISD::FPOWI , MVT::v4f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
+ setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
+ setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
+ setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
+
+ setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
+
+ setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
+
+ addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
+
+ setOperationAction(ISD::AND , MVT::v4i1, Legal);
+ setOperationAction(ISD::OR , MVT::v4i1, Legal);
+ setOperationAction(ISD::XOR , MVT::v4i1, Legal);
+
+ if (!Subtarget.useCRBits())
+ setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
+ setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
+
+ setOperationAction(ISD::LOAD , MVT::v4i1, Custom);
+ setOperationAction(ISD::STORE , MVT::v4i1, Custom);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
+
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
+
+ addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
+
+ setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+ setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
+
+ // These need to set FE_INEXACT, and so cannot be vectorized here.
+ setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
+ setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
+
+ if (TM.Options.UnsafeFPMath) {
+ setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
+
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+ } else {
+ setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
+
+ setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
+ }
+ }
+
if (Subtarget.has64BitSupport())
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
}
setBooleanContents(ZeroOrOneBooleanContent);
- // Altivec instructions set fields to all zeros or all ones.
- setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
+ if (Subtarget.hasAltivec()) {
+ // Altivec instructions set fields to all zeros or all ones.
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+ }
if (!isPPC64) {
// These libcalls are not available in 32-bit.
case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
case PPCISD::SC: return "PPCISD::SC";
+ case PPCISD::QVFPERM: return "PPCISD::QVFPERM";
+ case PPCISD::QVGPCI: return "PPCISD::QVGPCI";
+ case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI";
+ case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI";
+ case PPCISD::QBFLT: return "PPCISD::QBFLT";
+ case PPCISD::QVLFSb: return "PPCISD::QVLFSb";
}
}
-EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT PPCTargetLowering::getSetCCResultType(LLVMContext &C, EVT VT) const {
if (!VT.isVector())
return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
+
+ if (Subtarget.hasQPX())
+ return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
+
return VT.changeVectorElementTypeToInteger();
}
return SDValue();
}
+/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
+/// amount, otherwise return -1.
+int PPC::isQVALIGNIShuffleMask(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
+ return -1;
+
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+
+ // Find the first non-undef value in the shuffle mask.
+ unsigned i;
+ for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
+ /*search*/;
+
+ if (i == 4) return -1; // all undef.
+
+ // Otherwise, check to see if the rest of the elements are consecutively
+ // numbered from this value.
+ unsigned ShiftAmt = SVOp->getMaskElt(i);
+ if (ShiftAmt < i) return -1;
+ ShiftAmt -= i;
+
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 4; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
+ return -1;
+
+ return ShiftAmt;
+}
+
//===----------------------------------------------------------------------===//
// Addressing Mode Selection
//===----------------------------------------------------------------------===//
} else
return false;
- // PowerPC doesn't have preinc load/store instructions for vectors.
- if (VT.isVector())
- return false;
+ // PowerPC doesn't have preinc load/store instructions for vectors (except
+ // for QPX, which does have preinc r+r forms).
+ if (VT.isVector()) {
+ if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
+ return false;
+ } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
+ AM = ISD::PRE_INC;
+ return true;
+ }
+ }
if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
return FPR;
}
+/// GetQFPR - Get the set of QPX registers that should be allocated for
+/// arguments.
+static const MCPhysReg *GetQFPR() {
+ static const MCPhysReg QFPR[] = {
+ PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
+ PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13
+ };
+
+ return QFPR;
+}
+
/// CalculateStackSlotSize - Calculates the size reserved for this argument on
/// the stack.
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
Align = 16;
+ // QPX vector types stored in double-precision are padded to a 32 byte
+ // boundary.
+ else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
+ Align = 32;
// ByVal parameters are aligned as requested.
if (Flags.isByVal()) {
unsigned ParamAreaSize,
unsigned &ArgOffset,
unsigned &AvailableFPRs,
- unsigned &AvailableVRs) {
+ unsigned &AvailableVRs, bool HasQPX) {
bool UseMemory = false;
// Respect alignment of argument on the stack.
// However, if the argument is actually passed in an FPR or a VR,
// we don't use memory after all.
if (!Flags.isByVal()) {
- if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
+ if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
+ // QPX registers overlap with the scalar FP registers.
+ (HasQPX && (ArgVT == MVT::v4f32 ||
+ ArgVT == MVT::v4f64 ||
+ ArgVT == MVT::v4i1)))
if (AvailableFPRs > 0) {
--AvailableFPRs;
return false;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
- case MVT::v4f32:
RC = &PPC::VRRCRegClass;
break;
+ case MVT::v4f32:
+ RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
+ break;
case MVT::v2f64:
case MVT::v2i64:
RC = &PPC::VSHRCRegClass;
break;
+ case MVT::v4f64:
+ RC = &PPC::QFRCRegClass;
+ break;
+ case MVT::v4i1:
+ RC = &PPC::QBRCRegClass;
+ break;
}
// Transform the arguments stored in physical registers into virtual ones.
PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
};
+ static const MCPhysReg *QFPR = GetQFPR();
+
const unsigned Num_GPR_Regs = array_lengthof(GPR);
const unsigned Num_FPR_Regs = 13;
const unsigned Num_VR_Regs = array_lengthof(VR);
+ const unsigned Num_QFPR_Regs = Num_FPR_Regs;
// Do a first pass over the arguments to determine whether the ABI
// guarantees that our caller has allocated the parameter save area
for (unsigned i = 0, e = Ins.size(); i != e; ++i)
if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
PtrByteSize, LinkageSize, ParamAreaSize,
- NumBytes, AvailableFPRs, AvailableVRs))
+ NumBytes, AvailableFPRs, AvailableVRs,
+ Subtarget.hasQPX()))
HasParameterArea = true;
// Add DAG nodes to load the arguments or copy them out of registers. On
unsigned ArgOffset = LinkageSize;
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+ unsigned &QFPR_idx = FPR_idx;
SmallVector<SDValue, 8> MemOps;
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
case MVT::v16i8:
case MVT::v2f64:
case MVT::v2i64:
+ if (!Subtarget.hasQPX()) {
// These can be scalar arguments or elements of a vector array type
// passed directly. The latter are used to implement ELFv2 homogenous
// vector aggregates.
if (CallConv != CallingConv::Fast || needsLoad)
ArgOffset += 16;
break;
+ } // not QPX
+
+ assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
+ "Invalid QPX parameter type");
+ /* fall through */
+
+ case MVT::v4f64:
+ case MVT::v4i1:
+ // QPX vectors are treated like their scalar floating-point subregisters
+ // (except that they're larger).
+ unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
+ if (QFPR_idx != Num_QFPR_Regs) {
+ const TargetRegisterClass *RC;
+ switch (ObjectVT.getSimpleVT().SimpleTy) {
+ case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
+ case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
+ default: RC = &PPC::QBRCRegClass; break;
+ }
+
+ unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ ++QFPR_idx;
+ } else {
+ if (CallConv == CallingConv::Fast)
+ ComputeArgOffset();
+ needsLoad = true;
+ }
+ if (CallConv != CallingConv::Fast || needsLoad)
+ ArgOffset += Sz;
+ break;
}
// We need to load the argument to a virtual register if we determined
unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
unsigned NumBytes = LinkageSize;
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+ unsigned &QFPR_idx = FPR_idx;
static const MCPhysReg GPR[] = {
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
};
+ static const MCPhysReg *QFPR = GetQFPR();
+
const unsigned NumGPRs = array_lengthof(GPR);
const unsigned NumFPRs = 13;
const unsigned NumVRs = array_lengthof(VR);
+ const unsigned NumQFPRs = NumFPRs;
// When using the fast calling convention, we don't provide backing for
// arguments that will be in registers.
if (++NumGPRsUsed <= NumGPRs)
continue;
break;
- case MVT::f32:
- case MVT::f64:
- if (++NumFPRsUsed <= NumFPRs)
- continue;
- break;
- case MVT::v4f32:
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
if (++NumVRsUsed <= NumVRs)
continue;
break;
+ case MVT::v4f32:
+ // When using QPX, this is handled like a FP register, otherwise, it
+ // is an Altivec register.
+ if (Subtarget.hasQPX()) {
+ if (++NumFPRsUsed <= NumFPRs)
+ continue;
+ } else {
+ if (++NumVRsUsed <= NumVRs)
+ continue;
+ }
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::v4f64: // QPX
+ case MVT::v4i1: // QPX
+ if (++NumFPRsUsed <= NumFPRs)
+ continue;
+ break;
}
}
case MVT::v16i8:
case MVT::v2f64:
case MVT::v2i64:
+ if (!Subtarget.hasQPX()) {
// These can be scalar arguments or elements of a vector array type
// passed directly. The latter are used to implement ELFv2 homogenous
// vector aggregates.
if (CallConv != CallingConv::Fast)
ArgOffset += 16;
break;
+ } // not QPX
+
+ assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
+ "Invalid QPX parameter type");
+
+ /* fall through */
+ case MVT::v4f64:
+ case MVT::v4i1: {
+ bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
+ if (isVarArg) {
+ // We could elide this store in the case where the object fits
+ // entirely in R registers. Maybe later.
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0);
+ MemOpChains.push_back(Store);
+ if (QFPR_idx != NumQFPRs) {
+ SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl,
+ Store, PtrOff, MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
+ }
+ ArgOffset += (IsF32 ? 16 : 32);
+ for (unsigned i=0; i<(IsF32 ? 16 : 32); i+=PtrByteSize) {
+ if (GPR_idx == NumGPRs)
+ break;
+ SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
+ DAG.getConstant(i, PtrVT));
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ break;
+ }
+
+ // Non-varargs QPX params go into registers or on the stack.
+ if (QFPR_idx != NumQFPRs) {
+ RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
+ } else {
+ if (CallConv == CallingConv::Fast)
+ ComputePtrOff();
+
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ true, isTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ if (CallConv == CallingConv::Fast)
+ ArgOffset += (IsF32 ? 16 : 32);
+ }
+
+ if (CallConv != CallingConv::Fast)
+ ArgOffset += (IsF32 ? 16 : 32);
+ break;
+ }
}
}
}
SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ if (Op.getValueType().isVector())
+ return LowerVectorLoad(Op, DAG);
+
assert(Op.getValueType() == MVT::i1 &&
"Custom lowering only for i1 loads");
}
SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ if (Op.getOperand(1).getValueType().isVector())
+ return LowerVectorStore(Op, DAG);
+
assert(Op.getOperand(1).getValueType() == MVT::i1 &&
"Custom lowering only for i1 stores");
SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
+
+ if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
+ if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
+ return SDValue();
+
+ SDValue Value = Op.getOperand(0);
+ // The values are now known to be -1 (false) or 1 (true). To convert this
+ // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
+ // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
+ Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
+
+ SDValue FPHalfs = DAG.getConstantFP(0.5, MVT::f64);
+ FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
+ FPHalfs, FPHalfs, FPHalfs, FPHalfs);
+
+ Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
+
+ if (Op.getValueType() != MVT::v4f64)
+ Value = DAG.getNode(ISD::FP_ROUND, dl,
+ Op.getValueType(), Value, DAG.getIntPtrConstant(1));
+ return Value;
+ }
+
// Don't handle ppc_fp128 here; let it be lowered to a libcall.
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
return SDValue();
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
+ if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
+ // We first build an i32 vector, load it into a QPX register,
+ // then convert it to a floating-point vector and compare it
+ // to a zero vector to get the boolean result.
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+ EVT PtrVT = getPointerTy();
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ assert(BVN->getNumOperands() == 4 &&
+ "BUILD_VECTOR for v4i1 does not have 4 operands");
+
+ bool IsConst = true;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
+ IsConst = false;
+ break;
+ }
+ }
+
+ if (IsConst) {
+ Constant *One =
+ ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
+ Constant *NegOne =
+ ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
+
+ SmallVector<Constant*, 4> CV(4, NegOne);
+ for (unsigned i = 0; i < 4; ++i) {
+ if (BVN->getOperand(i).getOpcode() == ISD::UNDEF)
+ CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
+ else if (cast<ConstantSDNode>(BVN->getOperand(i))->
+ getConstantIntValue()->isZero())
+ continue;
+ else
+ CV[i] = One;
+ }
+
+ Constant *CP = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(),
+ 16 /* alignment */);
+
+ SmallVector<SDValue, 2> Ops;
+ Ops.push_back(DAG.getEntryNode());
+ Ops.push_back(CPIdx);
+
+ SmallVector<EVT, 2> ValueVTs;
+ ValueVTs.push_back(MVT::v4i1);
+ ValueVTs.push_back(MVT::Other); // chain
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+
+ return DAG.getMemIntrinsicNode(PPCISD::QVLFSb,
+ dl, VTs, Ops, MVT::v4f32,
+ MachinePointerInfo::getConstantPool());
+ }
+
+ SmallVector<SDValue, 4> Stores;
+ for (unsigned i = 0; i < 4; ++i) {
+ if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+ unsigned Offset = 4*i;
+ SDValue Idx = DAG.getConstant(Offset, FIdx.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
+
+ unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
+ if (StoreSize > 4) {
+ Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
+ BVN->getOperand(i), Idx,
+ PtrInfo.getWithOffset(Offset),
+ MVT::i32, false, false, 0));
+ } else {
+ SDValue StoreValue = BVN->getOperand(i);
+ if (StoreSize < 4)
+ StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
+
+ Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
+ StoreValue, Idx,
+ PtrInfo.getWithOffset(Offset),
+ false, false, 0));
+ }
+ }
+
+ SDValue StoreChain;
+ if (!Stores.empty())
+ StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+ else
+ StoreChain = DAG.getEntryNode();
+
+ // Now load from v4i32 into the QPX register; this will extend it to
+ // v4i64 but not yet convert it to a floating point. Nevertheless, this
+ // is typed as v4f64 because the QPX register integer states are not
+ // explicitly represented.
+
+ SmallVector<SDValue, 2> Ops;
+ Ops.push_back(StoreChain);
+ Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, MVT::i32));
+ Ops.push_back(FIdx);
+
+ SmallVector<EVT, 2> ValueVTs;
+ ValueVTs.push_back(MVT::v4f64);
+ ValueVTs.push_back(MVT::Other); // chain
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+
+ SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
+ dl, VTs, Ops, MVT::v4i32, PtrInfo);
+ LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
+ DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, MVT::i32),
+ LoadedVect);
+
+ SDValue FPZeros = DAG.getConstantFP(0.0, MVT::f64);
+ FPZeros = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
+ FPZeros, FPZeros, FPZeros, FPZeros);
+
+ return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
+ }
+
+ // All other QPX vectors are handled by generic code.
+ if (Subtarget.hasQPX())
+ return SDValue();
+
// Check if this is a splat of a constant value.
APInt APSplatBits, APSplatUndef;
unsigned SplatBitSize;
EVT VT = Op.getValueType();
bool isLittleEndian = Subtarget.isLittleEndian();
+ if (Subtarget.hasQPX()) {
+ if (VT.getVectorNumElements() != 4)
+ return SDValue();
+
+ if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+
+ int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
+ if (AlignIdx != -1) {
+ return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
+ DAG.getConstant(AlignIdx, MVT::i32));
+ } else if (SVOp->isSplat()) {
+ int SplatIdx = SVOp->getSplatIndex();
+ if (SplatIdx >= 4) {
+ std::swap(V1, V2);
+ SplatIdx -= 4;
+ }
+
+ // FIXME: If SplatIdx == 0 and the input came from a load, then there is
+ // nothing to do.
+
+ return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
+ DAG.getConstant(SplatIdx, MVT::i32));
+ }
+
+ // Lower this into a qvgpci/qvfperm pair.
+
+ // Compute the qvgpci literal
+ unsigned idx = 0;
+ for (unsigned i = 0; i < 4; ++i) {
+ int m = SVOp->getMaskElt(i);
+ unsigned mm = m >= 0 ? (unsigned) m : i;
+ idx |= mm << (3-i)*3;
+ }
+
+ SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
+ DAG.getConstant(idx, MVT::i32));
+ return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
+ }
+
// Cases that are handled by instructions that take permute immediates
// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
// selected by the instruction selector.
false, false, false, 0);
}
+SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ SDNode *N = Op.getNode();
+
+ assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
+ "Unknown extract_vector_elt type");
+
+ SDValue Value = N->getOperand(0);
+
+ // The first part of this is like the store lowering except that we don't
+ // need to track the chain.
+
+ // The values are now known to be -1 (false) or 1 (true). To convert this
+ // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
+ // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
+ Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
+
+ // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
+ // understand how to form the extending load.
+ SDValue FPHalfs = DAG.getConstantFP(0.5, MVT::f64);
+ FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
+ FPHalfs, FPHalfs, FPHalfs, FPHalfs);
+
+ Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
+
+ // Now convert to an integer and store.
+ Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
+ DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, MVT::i32),
+ Value);
+
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+ EVT PtrVT = getPointerTy();
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue StoreChain = DAG.getEntryNode();
+ SmallVector<SDValue, 2> Ops;
+ Ops.push_back(StoreChain);
+ Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, MVT::i32));
+ Ops.push_back(Value);
+ Ops.push_back(FIdx);
+
+ SmallVector<EVT, 2> ValueVTs;
+ ValueVTs.push_back(MVT::Other); // chain
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+
+ StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
+ dl, VTs, Ops, MVT::v4i32, PtrInfo);
+
+ // Extract the value requested.
+ unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ SDValue Idx = DAG.getConstant(Offset, FIdx.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
+
+ SDValue IntVal = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
+ PtrInfo.getWithOffset(Offset),
+ false, false, false, 0);
+
+ if (!Subtarget.useCRBits())
+ return IntVal;
+
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
+}
+
+/// Lowering for QPX v4i1 loads
+SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
+ SDValue LoadChain = LN->getChain();
+ SDValue BasePtr = LN->getBasePtr();
+
+ if (Op.getValueType() == MVT::v4f64 ||
+ Op.getValueType() == MVT::v4f32) {
+ EVT MemVT = LN->getMemoryVT();
+ unsigned Alignment = LN->getAlignment();
+
+ // If this load is properly aligned, then it is legal.
+ if (Alignment >= MemVT.getStoreSize())
+ return Op;
+
+ EVT ScalarVT = Op.getValueType().getScalarType(),
+ ScalarMemVT = MemVT.getScalarType();
+ unsigned Stride = ScalarMemVT.getStoreSize();
+
+ SmallVector<SDValue, 8> Vals, LoadChains;
+ for (unsigned Idx = 0; Idx < 4; ++Idx) {
+ SDValue Load;
+ if (ScalarVT != ScalarMemVT)
+ Load =
+ DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
+ BasePtr,
+ LN->getPointerInfo().getWithOffset(Idx*Stride),
+ ScalarMemVT, LN->isVolatile(), LN->isNonTemporal(),
+ LN->isInvariant(), MinAlign(Alignment, Idx*Stride),
+ LN->getAAInfo());
+ else
+ Load =
+ DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
+ LN->getPointerInfo().getWithOffset(Idx*Stride),
+ LN->isVolatile(), LN->isNonTemporal(),
+ LN->isInvariant(), MinAlign(Alignment, Idx*Stride),
+ LN->getAAInfo());
+
+ if (Idx == 0 && LN->isIndexed()) {
+ assert(LN->getAddressingMode() == ISD::PRE_INC &&
+ "Unknown addressing mode on vector load");
+ Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
+ LN->getAddressingMode());
+ }
+
+ Vals.push_back(Load);
+ LoadChains.push_back(Load.getValue(1));
+
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(Stride, BasePtr.getValueType()));
+ }
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
+ SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
+ Op.getValueType(), Vals);
+
+ if (LN->isIndexed()) {
+ SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
+ return DAG.getMergeValues(RetOps, dl);
+ }
+
+ SDValue RetOps[] = { Value, TF };
+ return DAG.getMergeValues(RetOps, dl);
+ }
+
+ assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
+ assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
+
+ // To lower v4i1 from a byte array, we load the byte elements of the
+ // vector and then reuse the BUILD_VECTOR logic.
+
+ SmallVector<SDValue, 4> VectElmts, VectElmtChains;
+ for (unsigned i = 0; i < 4; ++i) {
+ SDValue Idx = DAG.getConstant(i, BasePtr.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
+
+ VectElmts.push_back(DAG.getExtLoad(ISD::EXTLOAD,
+ dl, MVT::i32, LoadChain, Idx,
+ LN->getPointerInfo().getWithOffset(i),
+ MVT::i8 /* memory type */,
+ LN->isVolatile(), LN->isNonTemporal(),
+ LN->isInvariant(),
+ 1 /* alignment */, LN->getAAInfo()));
+ VectElmtChains.push_back(VectElmts[i].getValue(1));
+ }
+
+ LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
+ SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i1, VectElmts);
+
+ SDValue RVals[] = { Value, LoadChain };
+ return DAG.getMergeValues(RVals, dl);
+}
+
+/// Lowering for QPX v4i1 stores
+SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
+ SDValue StoreChain = SN->getChain();
+ SDValue BasePtr = SN->getBasePtr();
+ SDValue Value = SN->getValue();
+
+ if (Value.getValueType() == MVT::v4f64 ||
+ Value.getValueType() == MVT::v4f32) {
+ EVT MemVT = SN->getMemoryVT();
+ unsigned Alignment = SN->getAlignment();
+
+ // If this store is properly aligned, then it is legal.
+ if (Alignment >= MemVT.getStoreSize())
+ return Op;
+
+ EVT ScalarVT = Value.getValueType().getScalarType(),
+ ScalarMemVT = MemVT.getScalarType();
+ unsigned Stride = ScalarMemVT.getStoreSize();
+
+ SmallVector<SDValue, 8> Stores;
+ for (unsigned Idx = 0; Idx < 4; ++Idx) {
+ SDValue Ex =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
+ DAG.getConstant(Idx, getVectorIdxTy()));
+ SDValue Store;
+ if (ScalarVT != ScalarMemVT)
+ Store =
+ DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
+ SN->getPointerInfo().getWithOffset(Idx*Stride),
+ ScalarMemVT, SN->isVolatile(), SN->isNonTemporal(),
+ MinAlign(Alignment, Idx*Stride), SN->getAAInfo());
+ else
+ Store =
+ DAG.getStore(StoreChain, dl, Ex, BasePtr,
+ SN->getPointerInfo().getWithOffset(Idx*Stride),
+ SN->isVolatile(), SN->isNonTemporal(),
+ MinAlign(Alignment, Idx*Stride), SN->getAAInfo());
+
+ if (Idx == 0 && SN->isIndexed()) {
+ assert(SN->getAddressingMode() == ISD::PRE_INC &&
+ "Unknown addressing mode on vector store");
+ Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
+ SN->getAddressingMode());
+ }
+
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(Stride, BasePtr.getValueType()));
+ Stores.push_back(Store);
+ }
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+
+ if (SN->isIndexed()) {
+ SDValue RetOps[] = { TF, Stores[0].getValue(1) };
+ return DAG.getMergeValues(RetOps, dl);
+ }
+
+ return TF;
+ }
+
+ assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
+ assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
+
+ // The values are now known to be -1 (false) or 1 (true). To convert this
+ // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
+ // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
+ Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
+
+ // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
+ // understand how to form the extending load.
+ SDValue FPHalfs = DAG.getConstantFP(0.5, MVT::f64);
+ FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
+ FPHalfs, FPHalfs, FPHalfs, FPHalfs);
+
+ Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
+
+ // Now convert to an integer and store.
+ Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
+ DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, MVT::i32),
+ Value);
+
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+ EVT PtrVT = getPointerTy();
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SmallVector<SDValue, 2> Ops;
+ Ops.push_back(StoreChain);
+ Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, MVT::i32));
+ Ops.push_back(Value);
+ Ops.push_back(FIdx);
+
+ SmallVector<EVT, 2> ValueVTs;
+ ValueVTs.push_back(MVT::Other); // chain
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+
+ StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
+ dl, VTs, Ops, MVT::v4i32, PtrInfo);
+
+ // Move data into the byte array.
+ SmallVector<SDValue, 4> Loads, LoadChains;
+ for (unsigned i = 0; i < 4; ++i) {
+ unsigned Offset = 4*i;
+ SDValue Idx = DAG.getConstant(Offset, FIdx.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
+
+ Loads.push_back(DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
+ PtrInfo.getWithOffset(Offset),
+ false, false, false, 0));
+ LoadChains.push_back(Loads[i].getValue(1));
+ }
+
+ StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
+
+ SmallVector<SDValue, 4> Stores;
+ for (unsigned i = 0; i < 4; ++i) {
+ SDValue Idx = DAG.getConstant(i, BasePtr.getValueType());
+ Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
+
+ Stores.push_back(DAG.getTruncStore(StoreChain, dl, Loads[i], Idx,
+ SN->getPointerInfo().getWithOffset(i),
+ MVT::i8 /* memory type */,
+ SN->isNonTemporal(), SN->isVolatile(),
+ 1 /* alignment */, SN->getAAInfo()));
+ }
+
+ StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+
+ return StoreChain;
+}
+
SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
if (Op.getValueType() == MVT::v4i32) {
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
// For counter-based loop handling.
MI->getOpcode() == PPC::SELECT_CC_I8 ||
MI->getOpcode() == PPC::SELECT_CC_F4 ||
MI->getOpcode() == PPC::SELECT_CC_F8 ||
+ MI->getOpcode() == PPC::SELECT_CC_QFRC ||
+ MI->getOpcode() == PPC::SELECT_CC_QSRC ||
+ MI->getOpcode() == PPC::SELECT_CC_QBRC ||
MI->getOpcode() == PPC::SELECT_CC_VRRC ||
MI->getOpcode() == PPC::SELECT_CC_VSFRC ||
MI->getOpcode() == PPC::SELECT_CC_VSRC ||
MI->getOpcode() == PPC::SELECT_I8 ||
MI->getOpcode() == PPC::SELECT_F4 ||
MI->getOpcode() == PPC::SELECT_F8 ||
+ MI->getOpcode() == PPC::SELECT_QFRC ||
+ MI->getOpcode() == PPC::SELECT_QSRC ||
+ MI->getOpcode() == PPC::SELECT_QBRC ||
MI->getOpcode() == PPC::SELECT_VRRC ||
MI->getOpcode() == PPC::SELECT_VSFRC ||
MI->getOpcode() == PPC::SELECT_VSRC) {
MI->getOpcode() == PPC::SELECT_I8 ||
MI->getOpcode() == PPC::SELECT_F4 ||
MI->getOpcode() == PPC::SELECT_F8 ||
+ MI->getOpcode() == PPC::SELECT_QFRC ||
+ MI->getOpcode() == PPC::SELECT_QSRC ||
+ MI->getOpcode() == PPC::SELECT_QBRC ||
MI->getOpcode() == PPC::SELECT_VRRC ||
MI->getOpcode() == PPC::SELECT_VSFRC ||
MI->getOpcode() == PPC::SELECT_VSRC) {
if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
(VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
- (VT == MVT::v2f64 && Subtarget.hasVSX())) {
+ (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
+ (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
+ (VT == MVT::v4f64 && Subtarget.hasQPX())) {
// Convergence is quadratic, so we essentially double the number of digits
// correct after every iteration. For both FRE and FRSQRTE, the minimum
// architected relative accuracy is 2^-5. When hasRecipPrec(), this is
if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
(VT == MVT::f64 && Subtarget.hasFRE()) ||
(VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
- (VT == MVT::v2f64 && Subtarget.hasVSX())) {
+ (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
+ (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
+ (VT == MVT::v4f64 && Subtarget.hasQPX())) {
// Convergence is quadratic, so we essentially double the number of digits
// correct after every iteration. For both FRE and FRSQRTE, the minimum
// architected relative accuracy is 2^-5. When hasRecipPrec(), this is
EVT VT;
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
default: return false;
+ case Intrinsic::ppc_qpx_qvlfd:
+ case Intrinsic::ppc_qpx_qvlfda:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfs:
+ case Intrinsic::ppc_qpx_qvlfsa:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcd:
+ case Intrinsic::ppc_qpx_qvlfcda:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcs:
+ case Intrinsic::ppc_qpx_qvlfcsa:
+ VT = MVT::v2f32;
+ break;
+ case Intrinsic::ppc_qpx_qvlfiwa:
+ case Intrinsic::ppc_qpx_qvlfiwz:
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
case Intrinsic::ppc_vsx_lxvw4x:
EVT VT;
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
default: return false;
+ case Intrinsic::ppc_qpx_qvstfd:
+ case Intrinsic::ppc_qpx_qvstfda:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfs:
+ case Intrinsic::ppc_qpx_qvstfsa:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcd:
+ case Intrinsic::ppc_qpx_qvstfcda:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcs:
+ case Intrinsic::ppc_qpx_qvstfcsa:
+ VT = MVT::v2f32;
+ break;
+ case Intrinsic::ppc_qpx_qvstfiw:
+ case Intrinsic::ppc_qpx_qvstfiwa:
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
case Intrinsic::ppc_vsx_stxvw4x:
return expandVSXLoadForLE(N, DCI);
}
- Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ EVT MemVT = LD->getMemoryVT();
+ Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
- if (ISD::isNON_EXTLoad(N) && VT.isVector() && Subtarget.hasAltivec() &&
- // P8 and later hardware should just use LOAD.
- !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
- VT == MVT::v4i32 || VT == MVT::v4f32) &&
+ Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
+ unsigned ScalarABIAlignment = getDataLayout()->getABITypeAlignment(STy);
+ if (LD->isUnindexed() && VT.isVector() &&
+ ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
+ // P8 and later hardware should just use LOAD.
+ !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
+ VT == MVT::v4i32 || VT == MVT::v4f32)) ||
+ (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
+ LD->getAlignment() >= ScalarABIAlignment)) &&
LD->getAlignment() < ABIAlignment) {
- // This is a type-legal unaligned Altivec load.
+ // This is a type-legal unaligned Altivec or QPX load.
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
bool isLittleEndian = Subtarget.isLittleEndian();
// a different base address offset from this one by an aligned amount.
// The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
// optimization later.
- Intrinsic::ID Intr = (isLittleEndian ?
- Intrinsic::ppc_altivec_lvsr :
- Intrinsic::ppc_altivec_lvsl);
- SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8);
+ Intrinsic::ID Intr, IntrLD, IntrPerm;
+ MVT PermCntlTy, PermTy, LDTy;
+ if (Subtarget.hasAltivec()) {
+ Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr :
+ Intrinsic::ppc_altivec_lvsl;
+ IntrLD = Intrinsic::ppc_altivec_lvx;
+ IntrPerm = Intrinsic::ppc_altivec_vperm;
+ PermCntlTy = MVT::v16i8;
+ PermTy = MVT::v4i32;
+ LDTy = MVT::v4i32;
+ } else {
+ Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
+ Intrinsic::ppc_qpx_qvlpcls;
+ IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
+ Intrinsic::ppc_qpx_qvlfs;
+ IntrPerm = Intrinsic::ppc_qpx_qvfperm;
+ PermCntlTy = MVT::v4f64;
+ PermTy = MVT::v4f64;
+ LDTy = MemVT.getSimpleVT();
+ }
+
+ SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
// Create the new MMO for the new base load. It is like the original MMO,
// but represents an area in memory almost twice the vector size centered
// original unaligned load.
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *BaseMMO =
- MF.getMachineMemOperand(LD->getMemOperand(),
- -LD->getMemoryVT().getStoreSize()+1,
- 2*LD->getMemoryVT().getStoreSize()-1);
+ MF.getMachineMemOperand(LD->getMemOperand(), -MemVT.getStoreSize()+1,
+ 2*MemVT.getStoreSize()-1);
// Create the new base load.
- SDValue LDXIntID = DAG.getTargetConstant(Intrinsic::ppc_altivec_lvx,
- getPointerTy());
+ SDValue LDXIntID = DAG.getTargetConstant(IntrLD, getPointerTy());
SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
SDValue BaseLoad =
DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
- DAG.getVTList(MVT::v4i32, MVT::Other),
- BaseLoadOps, MVT::v4i32, BaseMMO);
+ DAG.getVTList(PermTy, MVT::Other),
+ BaseLoadOps, LDTy, BaseMMO);
// Note that the value of IncOffset (which is provided to the next
// load's pointer info offset value, and thus used to calculate the
MachineMemOperand *ExtraMMO =
MF.getMachineMemOperand(LD->getMemOperand(),
- 1, 2*LD->getMemoryVT().getStoreSize()-1);
+ 1, 2*MemVT.getStoreSize()-1);
SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
SDValue ExtraLoad =
DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
- DAG.getVTList(MVT::v4i32, MVT::Other),
- ExtraLoadOps, MVT::v4i32, ExtraMMO);
+ DAG.getVTList(PermTy, MVT::Other),
+ ExtraLoadOps, LDTy, ExtraMMO);
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
BaseLoad.getValue(1), ExtraLoad.getValue(1));
// and ExtraLoad here.
SDValue Perm;
if (isLittleEndian)
- Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
+ Perm = BuildIntrinsicOp(IntrPerm,
ExtraLoad, BaseLoad, PermCntl, DAG, dl);
else
- Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
+ Perm = BuildIntrinsicOp(IntrPerm,
BaseLoad, ExtraLoad, PermCntl, DAG, dl);
- if (VT != MVT::v4i32)
- Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm);
+ if (VT != PermTy)
+ Perm = Subtarget.hasAltivec() ?
+ DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
+ DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
+ DAG.getTargetConstant(1, MVT::i64));
+ // second argument is 1 because this rounding
+ // is always exact.
// The output of the permutation is our loaded result, the TokenFactor is
// our new chain.
break;
case ISD::INTRINSIC_WO_CHAIN: {
bool isLittleEndian = Subtarget.isLittleEndian();
+ unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
: Intrinsic::ppc_altivec_lvsl);
- if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == Intr &&
- N->getOperand(1)->getOpcode() == ISD::ADD) {
+ if ((IID == Intr ||
+ IID == Intrinsic::ppc_qpx_qvlpcld ||
+ IID == Intrinsic::ppc_qpx_qvlpcls) &&
+ N->getOperand(1)->getOpcode() == ISD::ADD) {
SDValue Add = N->getOperand(1);
+ int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
+ 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
+
if (DAG.MaskedValueIsZero(
Add->getOperand(1),
- APInt::getAllOnesValue(4 /* 16 byte alignment */)
+ APInt::getAllOnesValue(Bits /* alignment */)
.zext(
Add.getValueType().getScalarType().getSizeInBits()))) {
SDNode *BasePtr = Add->getOperand(0).getNode();
UE = BasePtr->use_end();
UI != UE; ++UI) {
if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
- cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
- Intr) {
+ cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
// We've found another LVSL/LVSR, and this address is an aligned
// multiple of that one. The results will be the same, so use the
// one we've just found instead.
}
}
}
+
+ if (isa<ConstantSDNode>(Add->getOperand(1))) {
+ SDNode *BasePtr = Add->getOperand(0).getNode();
+ for (SDNode::use_iterator UI = BasePtr->use_begin(),
+ UE = BasePtr->use_end(); UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::ADD &&
+ isa<ConstantSDNode>(UI->getOperand(1)) &&
+ (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
+ cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
+ (1 << Bits) == 0) {
+ SDNode *OtherAdd = *UI;
+ for (SDNode::use_iterator VI = OtherAdd->use_begin(),
+ VE = OtherAdd->use_end(); VI != VE; ++VI) {
+ if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
+ return SDValue(*VI, 0);
+ }
+ }
+ }
+ }
+ }
}
}
return std::make_pair(0U, &PPC::F4RCRegClass);
if (VT == MVT::f64 || VT == MVT::i64)
return std::make_pair(0U, &PPC::F8RCRegClass);
+ if (VT == MVT::v4f64 && Subtarget.hasQPX())
+ return std::make_pair(0U, &PPC::QFRCRegClass);
+ if (VT == MVT::v4f32 && Subtarget.hasQPX())
+ return std::make_pair(0U, &PPC::QSRCRegClass);
break;
case 'v':
+ if (VT == MVT::v4f64 && Subtarget.hasQPX())
+ return std::make_pair(0U, &PPC::QFRCRegClass);
+ if (VT == MVT::v4f32 && Subtarget.hasQPX())
+ return std::make_pair(0U, &PPC::QSRCRegClass);
return std::make_pair(0U, &PPC::VRRCRegClass);
case 'y': // crrc
return std::make_pair(0U, &PPC::CRRCRegClass);
// by AM is legal for this target, for a load/store of the specified type.
bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
Type *Ty) const {
- // FIXME: PPC does not allow r+i addressing modes for vectors!
+ // PPC does not allow r+i addressing modes for vectors!
+ if (Ty->isVectorTy() && AM.BaseOffs != 0)
+ return false;
// PPC allows a sign-extended 16-bit immediate field.
if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
unsigned Intrinsic) const {
switch (Intrinsic) {
+ case Intrinsic::ppc_qpx_qvlfd:
+ case Intrinsic::ppc_qpx_qvlfs:
+ case Intrinsic::ppc_qpx_qvlfcd:
+ case Intrinsic::ppc_qpx_qvlfcs:
+ case Intrinsic::ppc_qpx_qvlfiwa:
+ case Intrinsic::ppc_qpx_qvlfiwz:
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
case Intrinsic::ppc_altivec_lvebx:
case Intrinsic::ppc_vsx_lxvd2x:
VT = MVT::v2f64;
break;
+ case Intrinsic::ppc_qpx_qvlfd:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfs:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcd:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcs:
+ VT = MVT::v2f32;
+ break;
default:
VT = MVT::v4i32;
break;
Info.writeMem = false;
return true;
}
+ case Intrinsic::ppc_qpx_qvlfda:
+ case Intrinsic::ppc_qpx_qvlfsa:
+ case Intrinsic::ppc_qpx_qvlfcda:
+ case Intrinsic::ppc_qpx_qvlfcsa:
+ case Intrinsic::ppc_qpx_qvlfiwaa:
+ case Intrinsic::ppc_qpx_qvlfiwza: {
+ EVT VT;
+ switch (Intrinsic) {
+ case Intrinsic::ppc_qpx_qvlfda:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfsa:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcda:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvlfcsa:
+ VT = MVT::v2f32;
+ break;
+ default:
+ VT = MVT::v4i32;
+ break;
+ }
+
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = VT;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.size = VT.getStoreSize();
+ Info.align = 1;
+ Info.vol = false;
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ case Intrinsic::ppc_qpx_qvstfd:
+ case Intrinsic::ppc_qpx_qvstfs:
+ case Intrinsic::ppc_qpx_qvstfcd:
+ case Intrinsic::ppc_qpx_qvstfcs:
+ case Intrinsic::ppc_qpx_qvstfiw:
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
case Intrinsic::ppc_altivec_stvebx:
case Intrinsic::ppc_vsx_stxvd2x:
VT = MVT::v2f64;
break;
+ case Intrinsic::ppc_qpx_qvstfd:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfs:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcd:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcs:
+ VT = MVT::v2f32;
+ break;
default:
VT = MVT::v4i32;
break;
Info.writeMem = true;
return true;
}
+ case Intrinsic::ppc_qpx_qvstfda:
+ case Intrinsic::ppc_qpx_qvstfsa:
+ case Intrinsic::ppc_qpx_qvstfcda:
+ case Intrinsic::ppc_qpx_qvstfcsa:
+ case Intrinsic::ppc_qpx_qvstfiwa: {
+ EVT VT;
+ switch (Intrinsic) {
+ case Intrinsic::ppc_qpx_qvstfda:
+ VT = MVT::v4f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfsa:
+ VT = MVT::v4f32;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcda:
+ VT = MVT::v2f64;
+ break;
+ case Intrinsic::ppc_qpx_qvstfcsa:
+ VT = MVT::v2f32;
+ break;
+ default:
+ VT = MVT::v4i32;
+ break;
+ }
+
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.memVT = VT;
+ Info.ptrVal = I.getArgOperand(1);
+ Info.offset = 0;
+ Info.size = VT.getStoreSize();
+ Info.align = 1;
+ Info.vol = false;
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
default:
break;
}
if (VT == MVT::v2i64)
return false;
+ if (Subtarget.hasQPX()) {
+ if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1)
+ return true;
+ }
+
return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
}
/// of outputs.
XXSWAPD,
+ /// QVFPERM = This corresponds to the QPX qvfperm instruction.
+ QVFPERM,
+
+ /// QVGPCI = This corresponds to the QPX qvgpci instruction.
+ QVGPCI,
+
+ /// QVALIGNI = This corresponds to the QPX qvaligni instruction.
+ QVALIGNI,
+
+ /// QVESPLATI = This corresponds to the QPX qvesplati instruction.
+ QVESPLATI,
+
+ /// QBFLT = Access the underlying QPX floating-point boolean
+ /// representation.
+ QBFLT,
+
/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
/// the GPRC input, then stores it through Ptr. Type can be either i16 or
/// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
/// Maps directly to an stxvd2x instruction that will be preceded by
/// an xxswapd.
- STXVD2X
+ STXVD2X,
+
+ /// QBRC, CHAIN = QVLFSb CHAIN, Ptr
+ /// The 4xf32 load used for v4i1 constants.
+ QVLFSb
};
}
/// size, return the constant being splatted. The ByteSize field indicates
/// the number of bytes of each element [124] -> [bhw].
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
+
+ /// If this is a qvaligni shuffle mask, return the shift
+ /// amount, otherwise return -1.
+ int isQVALIGNIShuffleMask(SDNode *N);
}
class PPCTargetLowering : public TargetLowering {
SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
+
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
let Inst{31} = 0;
}
+// Used for QPX
+class XForm_18<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> FRT;
+ bits<5> FRA;
+ bits<5> FRB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = FRT;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_19<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_18<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRA = 0;
+}
+
+class XForm_20<bits<6> opcode, bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> FRT;
+ bits<5> FRA;
+ bits<5> FRB;
+ bits<4> tttt;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = FRT;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-24} = tttt;
+ let Inst{25-30} = xo;
+ let Inst{31} = 0;
+}
+
class XForm_24<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
let Inst{31} = 0;
}
+// Used for QPX
+class AForm_4a<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRA = 0;
+ let FRC = 0;
+}
+
// 1.7.13 M-Form
class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
let Inst{22-31} = xo;
}
+// Z23-Form (used by QPX)
+class Z23Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> FRT;
+ bits<5> FRA;
+ bits<5> FRB;
+ bits<2> idx;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = FRT;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-22} = idx;
+ let Inst{23-30} = xo;
+ let Inst{31} = RC;
+}
+
+class Z23Form_2<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : Z23Form_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRB = 0;
+}
+
+class Z23Form_3<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> FRT;
+ bits<12> idx;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = FRT;
+ let Inst{11-22} = idx;
+ let Inst{23-30} = xo;
+ let Inst{31} = RC;
+}
+
//===----------------------------------------------------------------------===//
class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
: I<0, OOL, IOL, asmstr, NoItinerary> {
case PPC::RESTORE_CRBIT:
case PPC::LVX:
case PPC::LXVD2X:
+ case PPC::QVLFDX:
+ case PPC::QVLFSXs:
+ case PPC::QVLFDXb:
case PPC::RESTORE_VRSAVE:
// Check for the operands added by addFrameReference (the immediate is the
// offset which defaults to 0).
case PPC::SPILL_CRBIT:
case PPC::STVX:
case PPC::STXVD2X:
+ case PPC::QVSTFDX:
+ case PPC::QVSTFSXs:
+ case PPC::QVSTFDXb:
case PPC::SPILL_VRSAVE:
// Check for the operands added by addFrameReference (the immediate is the
// offset which defaults to 0).
Opc = PPC::XXLOR;
else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::XXLORf;
+ else if (PPC::QFRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::QVFMR;
+ else if (PPC::QSRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::QVFMRs;
+ else if (PPC::QBRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::QVFMRb;
else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
Opc = PPC::CROR;
else
getKillRegState(isKill)),
FrameIdx));
SpillsVRS = true;
+ } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFDX))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFSXs))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFDXb))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
} else {
llvm_unreachable("Unknown regclass!");
}
DestReg),
FrameIdx));
SpillsVRS = true;
+ } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDX), DestReg),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFSXs), DestReg),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDXb), DestReg),
+ FrameIdx));
+ NonRI = true;
} else {
llvm_unreachable("Unknown regclass!");
}
let MIOperandInfo = (ops i32imm:$imm);
}
+def SDT_PPCqvfperm : SDTypeProfile<1, 3, [
+ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVec<3>
+]>;
+def SDT_PPCqvgpci : SDTypeProfile<1, 1, [
+ SDTCisVec<0>, SDTCisInt<1>
+]>;
+def SDT_PPCqvaligni : SDTypeProfile<1, 3, [
+ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<3>
+]>;
+def SDT_PPCqvesplati : SDTypeProfile<1, 2, [
+ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>
+]>;
+
+def SDT_PPCqbflt : SDTypeProfile<1, 1, [
+ SDTCisVec<0>, SDTCisVec<1>
+]>;
+
+def SDT_PPCqvlfsb : SDTypeProfile<1, 1, [
+ SDTCisVec<0>, SDTCisPtrTy<1>
+]>;
+
//===----------------------------------------------------------------------===//
// PowerPC specific DAG Nodes.
//
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
+def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>;
+def PPCqvgpci : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>;
+def PPCqvaligni : SDNode<"PPCISD::QVALIGNI", SDT_PPCqvaligni, []>;
+def PPCqvesplati : SDNode<"PPCISD::QVESPLATI", SDT_PPCqvesplati, []>;
+
+def PPCqbflt : SDNode<"PPCISD::QBFLT", SDT_PPCqbflt, []>;
+
+def PPCqvlfsb : SDNode<"PPCISD::QVLFSb", SDT_PPCqvlfsb,
+ [SDNPHasChain, SDNPMayLoad]>;
+
def PPCcmpb : SDNode<"PPCISD::CMPB", SDTIntBinOp, []>;
// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
let ParserMatchClass = PPCU6ImmAsmOperand;
let DecoderMethod = "decodeUImmOperand<6>";
}
+def PPCU12ImmAsmOperand : AsmOperandClass {
+ let Name = "U12Imm"; let PredicateMethod = "isU12Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u12imm : Operand<i32> {
+ let PrintMethod = "printU12ImmOperand";
+ let ParserMatchClass = PPCU12ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<12>";
+}
def PPCS16ImmAsmOperand : AsmOperandClass {
let Name = "S16Imm"; let PredicateMethod = "isS16Imm";
let RenderMethod = "addS16ImmOperands";
def IsE500 : Predicate<"PPCSubTarget->isE500()">;
def HasSPE : Predicate<"PPCSubTarget->HasSPE()">;
def HasICBT : Predicate<"PPCSubTarget->hasICBT()">;
+
+def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
+def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">;
+
//===----------------------------------------------------------------------===//
// PowerPC Multiclass Definitions.
include "PPCInstrSPE.td"
include "PPCInstr64Bit.td"
include "PPCInstrVSX.td"
+include "PPCInstrQPX.td"
def crnot : OutPatFrag<(ops node:$in),
(CRNOR $in, $in)>;
--- /dev/null
+//===- PPCInstrQPX.td - The PowerPC QPX Extension --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the QPX extension to the PowerPC instruction set.
+// Reference:
+// Book Q: QPX Architecture Definition. IBM (as updated in) 2011.
+//
+//===----------------------------------------------------------------------===//
+
+def PPCRegQFRCAsmOperand : AsmOperandClass {
+ let Name = "RegQFRC"; let PredicateMethod = "isRegNumber";
+}
+def qfrc : RegisterOperand<QFRC> {
+ let ParserMatchClass = PPCRegQFRCAsmOperand;
+}
+def PPCRegQSRCAsmOperand : AsmOperandClass {
+ let Name = "RegQSRC"; let PredicateMethod = "isRegNumber";
+}
+def qsrc : RegisterOperand<QSRC> {
+ let ParserMatchClass = PPCRegQSRCAsmOperand;
+}
+def PPCRegQBRCAsmOperand : AsmOperandClass {
+ let Name = "RegQBRC"; let PredicateMethod = "isRegNumber";
+}
+def qbrc : RegisterOperand<QBRC> {
+ let ParserMatchClass = PPCRegQBRCAsmOperand;
+}
+
+//===----------------------------------------------------------------------===//
+// Helpers for defining instructions that directly correspond to intrinsics.
+
+// QPXA1_Int - A AForm_1 intrinsic definition.
+class QPXA1_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+ : AForm_1<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ !strconcat(opc, " $FRT, $FRA, $FRC, $FRB"), IIC_FPFused,
+ [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
+// QPXA1s_Int - A AForm_1 intrinsic definition (simple instructions).
+class QPXA1s_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+ : AForm_1<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ !strconcat(opc, " $FRT, $FRA, $FRC, $FRB"), IIC_VecPerm,
+ [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
+// QPXA2_Int - A AForm_2 intrinsic definition.
+class QPXA2_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+ : AForm_2<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ !strconcat(opc, " $FRT, $FRA, $FRB"), IIC_FPGeneral,
+ [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB))]>;
+// QPXA3_Int - A AForm_3 intrinsic definition.
+class QPXA3_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+ : AForm_3<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC),
+ !strconcat(opc, " $FRT, $FRA, $FRC"), IIC_FPGeneral,
+ [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRC))]>;
+// QPXA4_Int - A AForm_4a intrinsic definition.
+class QPXA4_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+ : AForm_4a<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ !strconcat(opc, " $FRT, $FRB"), IIC_FPGeneral,
+ [(set v4f64:$FRT, (IntID v4f64:$FRB))]>;
+// QPXX18_Int - A XForm_18 intrinsic definition.
+class QPXX18_Int<bits<6> opcode, bits<10> xo, string opc, Intrinsic IntID>
+ : XForm_18<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ !strconcat(opc, " $FRT, $FRA, $FRB"), IIC_FPCompare,
+ [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB))]>;
+// QPXX19_Int - A XForm_19 intrinsic definition.
+class QPXX19_Int<bits<6> opcode, bits<10> xo, string opc, Intrinsic IntID>
+ : XForm_19<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ !strconcat(opc, " $FRT, $FRB"), IIC_FPGeneral,
+ [(set v4f64:$FRT, (IntID v4f64:$FRB))]>;
+
+//===----------------------------------------------------------------------===//
+// Pattern Frags.
+
+def extloadv4f32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v4f32;
+}]>;
+
+def truncstorev4f32 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32;
+}]>;
+def pre_truncstv4f32 : PatFrag<(ops node:$val, node:$base, node:$offset),
+ (pre_truncst node:$val,
+ node:$base, node:$offset), [{
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32;
+}]>;
+
+def fround_inexact : PatFrag<(ops node:$val), (fround node:$val), [{
+ return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() == 0;
+}]>;
+
+def fround_exact : PatFrag<(ops node:$val), (fround node:$val), [{
+ return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() == 1;
+}]>;
+
+let FastIselShouldIgnore = 1 in // FastIsel should ignore all u12 instrs.
+ def u12 : ImmLeaf<i32, [{ return (Imm & 0xFFF) == Imm; }]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions.
+
+def HasQPX : Predicate<"PPCSubTarget->hasQPX()">;
+let Predicates = [HasQPX] in {
+let DecoderNamespace = "QPX" in {
+let hasSideEffects = 0 in { // QPX instructions don't have side effects.
+let Uses = [RM] in {
+ // Add Instructions
+ let isCommutable = 1 in {
+ def QVFADD : AForm_2<4, 21,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfadd $FRT, $FRA, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (fadd v4f64:$FRA, v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFADDS : QPXA2_Int<0, 21, "qvfadds", int_ppc_qpx_qvfadds>;
+ def QVFADDSs : AForm_2<0, 21,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfadds $FRT, $FRA, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (fadd v4f32:$FRA, v4f32:$FRB))]>;
+ }
+ def QVFSUB : AForm_2<4, 20,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfsub $FRT, $FRA, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (fsub v4f64:$FRA, v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFSUBS : QPXA2_Int<0, 20, "qvfsubs", int_ppc_qpx_qvfsubs>;
+ def QVFSUBSs : AForm_2<0, 20,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfsubs $FRT, $FRA, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (fsub v4f32:$FRA, v4f32:$FRB))]>;
+
+ // Estimate Instructions
+ def QVFRE : AForm_4a<4, 24, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfre $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (PPCfre v4f64:$FRB))]>;
+ def QVFRES : QPXA4_Int<0, 24, "qvfres", int_ppc_qpx_qvfres>;
+ let isCodeGenOnly = 1 in
+ def QVFRESs : AForm_4a<0, 24, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfres $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (PPCfre v4f32:$FRB))]>;
+
+ def QVFRSQRTE : AForm_4a<4, 26, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfrsqrte $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (PPCfrsqrte v4f64:$FRB))]>;
+ def QVFRSQRTES : QPXA4_Int<0, 26, "qvfrsqrtes", int_ppc_qpx_qvfrsqrtes>;
+ let isCodeGenOnly = 1 in
+ def QVFRSQRTESs : AForm_4a<0, 26, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfrsqrtes $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (PPCfrsqrte v4f32:$FRB))]>;
+
+ // Multiply Instructions
+ let isCommutable = 1 in {
+ def QVFMUL : AForm_3<4, 25,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC),
+ "qvfmul $FRT, $FRA, $FRC", IIC_FPGeneral,
+ [(set v4f64:$FRT, (fmul v4f64:$FRA, v4f64:$FRC))]>;
+ let isCodeGenOnly = 1 in
+ def QVFMULS : QPXA3_Int<0, 25, "qvfmuls", int_ppc_qpx_qvfmuls>;
+ def QVFMULSs : AForm_3<0, 25,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC),
+ "qvfmuls $FRT, $FRA, $FRC", IIC_FPGeneral,
+ [(set v4f32:$FRT, (fmul v4f32:$FRA, v4f32:$FRC))]>;
+ }
+ def QVFXMUL : QPXA3_Int<4, 17, "qvfxmul", int_ppc_qpx_qvfxmul>;
+ def QVFXMULS : QPXA3_Int<0, 17, "qvfxmuls", int_ppc_qpx_qvfxmuls>;
+
+ // Multiply-add instructions
+ def QVFMADD : AForm_1<4, 29,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ "qvfmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC, v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFMADDS : QPXA1_Int<0, 29, "qvfmadds", int_ppc_qpx_qvfmadds>;
+ def QVFMADDSs : AForm_1<0, 29,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ "qvfmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC, v4f32:$FRB))]>;
+ def QVFNMADD : AForm_1<4, 31,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ "qvfnmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC,
+ v4f64:$FRB)))]>;
+ let isCodeGenOnly = 1 in
+ def QVFNMADDS : QPXA1_Int<0, 31, "qvfnmadds", int_ppc_qpx_qvfnmadds>;
+ def QVFNMADDSs : AForm_1<0, 31,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ "qvfnmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC,
+ v4f32:$FRB)))]>;
+ def QVFMSUB : AForm_1<4, 28,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ "qvfmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC,
+ (fneg v4f64:$FRB)))]>;
+ let isCodeGenOnly = 1 in
+ def QVFMSUBS : QPXA1_Int<0, 28, "qvfmsubs", int_ppc_qpx_qvfmsubs>;
+ def QVFMSUBSs : AForm_1<0, 28,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ "qvfmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC,
+ (fneg v4f32:$FRB)))]>;
+ def QVFNMSUB : AForm_1<4, 30,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ "qvfnmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC,
+ (fneg v4f64:$FRB))))]>;
+ let isCodeGenOnly = 1 in
+ def QVFNMSUBS : QPXA1_Int<0, 30, "qvfnmsubs", int_ppc_qpx_qvfnmsubs>;
+ def QVFNMSUBSs : AForm_1<0, 30,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ "qvfnmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC,
+ (fneg v4f32:$FRB))))]>;
+ def QVFXMADD : QPXA1_Int<4, 9, "qvfxmadd", int_ppc_qpx_qvfxmadd>;
+ def QVFXMADDS : QPXA1_Int<0, 9, "qvfxmadds", int_ppc_qpx_qvfxmadds>;
+ def QVFXXNPMADD : QPXA1_Int<4, 11, "qvfxxnpmadd", int_ppc_qpx_qvfxxnpmadd>;
+ def QVFXXNPMADDS : QPXA1_Int<0, 11, "qvfxxnpmadds", int_ppc_qpx_qvfxxnpmadds>;
+ def QVFXXCPNMADD : QPXA1_Int<4, 3, "qvfxxcpnmadd", int_ppc_qpx_qvfxxcpnmadd>;
+ def QVFXXCPNMADDS : QPXA1_Int<0, 3, "qvfxxcpnmadds", int_ppc_qpx_qvfxxcpnmadds>;
+ def QVFXXMADD : QPXA1_Int<4, 1, "qvfxxmadd", int_ppc_qpx_qvfxxmadd>;
+ def QVFXXMADDS : QPXA1_Int<0, 1, "qvfxxmadds", int_ppc_qpx_qvfxxmadds>;
+
+ // Select Instruction
+ let isCodeGenOnly = 1 in
+ def QVFSEL : QPXA1s_Int<4, 23, "qvfsel", int_ppc_qpx_qvfsel>;
+ def QVFSELb : AForm_1<4, 23, (outs qfrc:$FRT),
+ (ins qbrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
+ [(set v4f64:$FRT, (vselect v4i1:$FRA,
+ v4f64:$FRC, v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFSELbs : AForm_1<4, 23, (outs qsrc:$FRT),
+ (ins qbrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+ "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
+ [(set v4f32:$FRT, (vselect v4i1:$FRA,
+ v4f32:$FRC, v4f32:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFSELbb: AForm_1<4, 23, (outs qbrc:$FRT),
+ (ins qbrc:$FRA, qbrc:$FRB, qbrc:$FRC),
+ "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
+ [(set v4i1:$FRT, (vselect v4i1:$FRA,
+ v4i1:$FRC, v4i1:$FRB))]>;
+
+ // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
+ // instruction selection into a branch sequence.
+ let usesCustomInserter = 1 in {
+ def SELECT_CC_QFRC: Pseudo<(outs qfrc:$dst), (ins crrc:$cond, qfrc:$T, qfrc:$F,
+ i32imm:$BROPC), "#SELECT_CC_QFRC",
+ []>;
+ def SELECT_CC_QSRC: Pseudo<(outs qsrc:$dst), (ins crrc:$cond, qsrc:$T, qsrc:$F,
+ i32imm:$BROPC), "#SELECT_CC_QSRC",
+ []>;
+ def SELECT_CC_QBRC: Pseudo<(outs qbrc:$dst), (ins crrc:$cond, qbrc:$T, qbrc:$F,
+ i32imm:$BROPC), "#SELECT_CC_QBRC",
+ []>;
+
+ // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
+ // register bit directly.
+ def SELECT_QFRC: Pseudo<(outs qfrc:$dst), (ins crbitrc:$cond,
+ qfrc:$T, qfrc:$F), "#SELECT_QFRC",
+ [(set v4f64:$dst,
+ (select i1:$cond, v4f64:$T, v4f64:$F))]>;
+ def SELECT_QSRC: Pseudo<(outs qsrc:$dst), (ins crbitrc:$cond,
+ qsrc:$T, qsrc:$F), "#SELECT_QSRC",
+ [(set v4f32:$dst,
+ (select i1:$cond, v4f32:$T, v4f32:$F))]>;
+ def SELECT_QBRC: Pseudo<(outs qbrc:$dst), (ins crbitrc:$cond,
+ qbrc:$T, qbrc:$F), "#SELECT_QBRC",
+ [(set v4i1:$dst,
+ (select i1:$cond, v4i1:$T, v4i1:$F))]>;
+ }
+
+ // Convert and Round Instructions
+ def QVFCTID : QPXX19_Int<4, 814, "qvfctid", int_ppc_qpx_qvfctid>;
+ let isCodeGenOnly = 1 in
+ def QVFCTIDb : XForm_19<4, 814, (outs qbrc:$FRT), (ins qbrc:$FRB),
+ "qvfctid $FRT, $FRB", IIC_FPGeneral, []>;
+
+ def QVFCTIDU : QPXX19_Int<4, 942, "qvfctidu", int_ppc_qpx_qvfctidu>;
+ def QVFCTIDZ : QPXX19_Int<4, 815, "qvfctidz", int_ppc_qpx_qvfctidz>;
+ def QVFCTIDUZ : QPXX19_Int<4, 943, "qvfctiduz", int_ppc_qpx_qvfctiduz>;
+ def QVFCTIW : QPXX19_Int<4, 14, "qvfctiw", int_ppc_qpx_qvfctiw>;
+ def QVFCTIWU : QPXX19_Int<4, 142, "qvfctiwu", int_ppc_qpx_qvfctiwu>;
+ def QVFCTIWZ : QPXX19_Int<4, 15, "qvfctiwz", int_ppc_qpx_qvfctiwz>;
+ def QVFCTIWUZ : QPXX19_Int<4, 143, "qvfctiwuz", int_ppc_qpx_qvfctiwuz>;
+ def QVFCFID : QPXX19_Int<4, 846, "qvfcfid", int_ppc_qpx_qvfcfid>;
+ let isCodeGenOnly = 1 in
+ def QVFCFIDb : XForm_19<4, 846, (outs qbrc:$FRT), (ins qbrc:$FRB),
+ "qvfcfid $FRT, $FRB", IIC_FPGeneral, []>;
+
+ def QVFCFIDU : QPXX19_Int<4, 974, "qvfcfidu", int_ppc_qpx_qvfcfidu>;
+ def QVFCFIDS : QPXX19_Int<0, 846, "qvfcfids", int_ppc_qpx_qvfcfids>;
+ def QVFCFIDUS : QPXX19_Int<0, 974, "qvfcfidus", int_ppc_qpx_qvfcfidus>;
+
+ let isCodeGenOnly = 1 in
+ def QVFRSP : QPXX19_Int<4, 12, "qvfrsp", int_ppc_qpx_qvfrsp>;
+ def QVFRSPs : XForm_19<4, 12,
+ (outs qsrc:$FRT), (ins qfrc:$FRB),
+ "qvfrsp $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (fround_inexact v4f64:$FRB))]>;
+
+ def QVFRIZ : XForm_19<4, 424, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfriz $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (ftrunc v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFRIZs : XForm_19<4, 424, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfriz $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (ftrunc v4f32:$FRB))]>;
+
+ def QVFRIN : XForm_19<4, 392, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfrin $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (frnd v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFRINs : XForm_19<4, 392, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfrin $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (frnd v4f32:$FRB))]>;
+
+ def QVFRIP : XForm_19<4, 456, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfrip $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (fceil v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFRIPs : XForm_19<4, 456, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfrip $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (fceil v4f32:$FRB))]>;
+
+ def QVFRIM : XForm_19<4, 488, (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfrim $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f64:$FRT, (ffloor v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFRIMs : XForm_19<4, 488, (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfrim $FRT, $FRB", IIC_FPGeneral,
+ [(set v4f32:$FRT, (ffloor v4f32:$FRB))]>;
+
+ // Move Instructions
+ def QVFMR : XForm_19<4, 72,
+ (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfmr $FRT, $FRB", IIC_VecPerm,
+ [/* (set v4f64:$FRT, v4f64:$FRB) */]>;
+ let isCodeGenOnly = 1 in {
+ def QVFMRs : XForm_19<4, 72,
+ (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfmr $FRT, $FRB", IIC_VecPerm,
+ [/* (set v4f32:$FRT, v4f32:$FRB) */]>;
+ def QVFMRb : XForm_19<4, 72,
+ (outs qbrc:$FRT), (ins qbrc:$FRB),
+ "qvfmr $FRT, $FRB", IIC_VecPerm,
+ [/* (set v4i1:$FRT, v4i1:$FRB) */]>;
+ }
+ def QVFNEG : XForm_19<4, 40,
+ (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfneg $FRT, $FRB", IIC_VecPerm,
+ [(set v4f64:$FRT, (fneg v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFNEGs : XForm_19<4, 40,
+ (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfneg $FRT, $FRB", IIC_VecPerm,
+ [(set v4f32:$FRT, (fneg v4f32:$FRB))]>;
+ def QVFABS : XForm_19<4, 264,
+ (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfabs $FRT, $FRB", IIC_VecPerm,
+ [(set v4f64:$FRT, (fabs v4f64:$FRB))]>;
+ let isCodeGenOnly = 1 in
+ def QVFABSs : XForm_19<4, 264,
+ (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfabs $FRT, $FRB", IIC_VecPerm,
+ [(set v4f32:$FRT, (fabs v4f32:$FRB))]>;
+ def QVFNABS : XForm_19<4, 136,
+ (outs qfrc:$FRT), (ins qfrc:$FRB),
+ "qvfnabs $FRT, $FRB", IIC_VecPerm,
+ [(set v4f64:$FRT, (fneg (fabs v4f64:$FRB)))]>;
+ let isCodeGenOnly = 1 in
+ def QVFNABSs : XForm_19<4, 136,
+ (outs qsrc:$FRT), (ins qsrc:$FRB),
+ "qvfnabs $FRT, $FRB", IIC_VecPerm,
+ [(set v4f32:$FRT, (fneg (fabs v4f32:$FRB)))]>;
+ def QVFCPSGN : XForm_18<4, 8,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfcpsgn $FRT, $FRA, $FRB", IIC_VecPerm,
+ [(set v4f64:$FRT, (fcopysign v4f64:$FRB, v4f64:$FRA))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCPSGNs : XForm_18<4, 8,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfcpsgn $FRT, $FRA, $FRB", IIC_VecPerm,
+ [(set v4f32:$FRT, (fcopysign v4f32:$FRB, v4f32:$FRA))]>;
+
+ def QVALIGNI : Z23Form_1<4, 5,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, u2imm:$idx),
+ "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
+ [(set v4f64:$FRT,
+ (PPCqvaligni v4f64:$FRA, v4f64:$FRB,
+ (i32 imm:$idx)))]>;
+ let isCodeGenOnly = 1 in
+ def QVALIGNIs : Z23Form_1<4, 5,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, u2imm:$idx),
+ "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
+ [(set v4f32:$FRT,
+ (PPCqvaligni v4f32:$FRA, v4f32:$FRB,
+ (i32 imm:$idx)))]>;
+ let isCodeGenOnly = 1 in
+ def QVALIGNIb : Z23Form_1<4, 5,
+ (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u2imm:$idx),
+ "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
+ [(set v4i1:$FRT,
+ (PPCqvaligni v4i1:$FRA, v4i1:$FRB,
+ (i32 imm:$idx)))]>;
+
+ def QVESPLATI : Z23Form_2<4, 37,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, u2imm:$idx),
+ "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
+ [(set v4f64:$FRT,
+ (PPCqvesplati v4f64:$FRA, (i32 imm:$idx)))]>;
+ let isCodeGenOnly = 1 in
+ def QVESPLATIs : Z23Form_2<4, 37,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, u2imm:$idx),
+ "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
+ [(set v4f32:$FRT,
+ (PPCqvesplati v4f32:$FRA, (i32 imm:$idx)))]>;
+ let isCodeGenOnly = 1 in
+ def QVESPLATIb : Z23Form_2<4, 37,
+ (outs qbrc:$FRT), (ins qbrc:$FRA, u2imm:$idx),
+ "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
+ [(set v4i1:$FRT,
+ (PPCqvesplati v4i1:$FRA, (i32 imm:$idx)))]>;
+
+ def QVFPERM : AForm_1<4, 6,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+ "qvfperm $FRT, $FRA, $FRB, $FRC", IIC_VecPerm,
+ [(set v4f64:$FRT,
+ (PPCqvfperm v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
+ let isCodeGenOnly = 1 in
+ def QVFPERMs : AForm_1<4, 6,
+ (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qfrc:$FRC),
+ "qvfperm $FRT, $FRA, $FRB, $FRC", IIC_VecPerm,
+ [(set v4f32:$FRT,
+ (PPCqvfperm v4f32:$FRA, v4f32:$FRB, v4f64:$FRC))]>;
+
+ let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+ def QVGPCI : Z23Form_3<4, 133,
+ (outs qfrc:$FRT), (ins u12imm:$idx),
+ "qvgpci $FRT, $idx", IIC_VecPerm,
+ [(set v4f64:$FRT, (PPCqvgpci (u12:$idx)))]>;
+
+ // Compare Instruction
+ let isCodeGenOnly = 1 in
+ def QVFTSTNAN : QPXX18_Int<4, 64, "qvftstnan", int_ppc_qpx_qvftstnan>;
+ def QVFTSTNANb : XForm_18<4, 64, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvftstnan $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f64:$FRA, v4f64:$FRB, SETUO))]>;
+ let isCodeGenOnly = 1 in
+ def QVFTSTNANbs : XForm_18<4, 64, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvftstnan $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f32:$FRA, v4f32:$FRB, SETUO))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPLT : QPXX18_Int<4, 96, "qvfcmplt", int_ppc_qpx_qvfcmplt>;
+ def QVFCMPLTb : XForm_18<4, 96, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfcmplt $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f64:$FRA, v4f64:$FRB, SETOLT))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPLTbs : XForm_18<4, 96, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfcmplt $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f32:$FRA, v4f32:$FRB, SETOLT))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPGT : QPXX18_Int<4, 32, "qvfcmpgt", int_ppc_qpx_qvfcmpgt>;
+ def QVFCMPGTb : XForm_18<4, 32, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfcmpgt $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f64:$FRA, v4f64:$FRB, SETOGT))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPGTbs : XForm_18<4, 32, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfcmpgt $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f32:$FRA, v4f32:$FRB, SETOGT))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPEQ : QPXX18_Int<4, 0, "qvfcmpeq", int_ppc_qpx_qvfcmpeq>;
+ def QVFCMPEQb : XForm_18<4, 0, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+ "qvfcmpeq $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f64:$FRA, v4f64:$FRB, SETOEQ))]>;
+ let isCodeGenOnly = 1 in
+ def QVFCMPEQbs : XForm_18<4, 0, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+ "qvfcmpeq $FRT, $FRA, $FRB", IIC_FPCompare,
+ [(set v4i1:$FRT,
+ (setcc v4f32:$FRA, v4f32:$FRB, SETOEQ))]>;
+
+ let isCodeGenOnly = 1 in
+ def QVFLOGICAL : XForm_20<4, 4,
+ (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, u12imm:$tttt),
+ "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
+ def QVFLOGICALb : XForm_20<4, 4,
+ (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u12imm:$tttt),
+ "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
+ let isCodeGenOnly = 1 in
+ def QVFLOGICALs : XForm_20<4, 4,
+ (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u12imm:$tttt),
+ "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
+
+ // Load indexed instructions
+ let mayLoad = 1, canFoldAsLoad = 1 in {
+ def QVLFDX : XForm_1<31, 583,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfdx $FRT, $src", IIC_LdStLFD,
+ [(set v4f64:$FRT, (load xoaddr:$src))]>;
+ let isCodeGenOnly = 1 in
+ def QVLFDXb : XForm_1<31, 583,
+ (outs qbrc:$FRT), (ins memrr:$src),
+ "qvlfdx $FRT, $src", IIC_LdStLFD, []>;
+
+ let RC = 1 in
+ def QVLFDXA : XForm_1<31, 583,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfdxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFDUX : XForm_1<31, 615,
+ (outs qfrc:$FRT, ptr_rc_nor0:$ea_result),
+ (ins memrr:$src),
+ "qvlfdux $FRT, $src", IIC_LdStLFDU, []>,
+ RegConstraint<"$src.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+ let RC = 1 in
+ def QVLFDUXA : XForm_1<31, 615,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfduxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFSX : XForm_1<31, 519,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfsx $FRT, $src", IIC_LdStLFD,
+ [(set v4f64:$FRT, (extloadv4f32 xoaddr:$src))]>;
+
+ let isCodeGenOnly = 1 in
+ def QVLFSXb : XForm_1<31, 519,
+ (outs qbrc:$FRT), (ins memrr:$src),
+ "qvlfsx $FRT, $src", IIC_LdStLFD,
+ [(set v4i1:$FRT, (PPCqvlfsb xoaddr:$src))]>;
+ let isCodeGenOnly = 1 in
+ def QVLFSXs : XForm_1<31, 519,
+ (outs qsrc:$FRT), (ins memrr:$src),
+ "qvlfsx $FRT, $src", IIC_LdStLFD,
+ [(set v4f32:$FRT, (load xoaddr:$src))]>;
+
+ let RC = 1 in
+ def QVLFSXA : XForm_1<31, 519,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfsxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFSUX : XForm_1<31, 551,
+ (outs qsrc:$FRT, ptr_rc_nor0:$ea_result),
+ (ins memrr:$src),
+ "qvlfsux $FRT, $src", IIC_LdStLFDU, []>,
+ RegConstraint<"$src.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+ let RC = 1 in
+ def QVLFSUXA : XForm_1<31, 551,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfsuxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFCDX : XForm_1<31, 71,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcdx $FRT, $src", IIC_LdStLFD, []>;
+ let RC = 1 in
+ def QVLFCDXA : XForm_1<31, 71,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcdxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFCDUX : XForm_1<31, 103,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcdux $FRT, $src", IIC_LdStLFD, []>;
+ let RC = 1 in
+ def QVLFCDUXA : XForm_1<31, 103,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcduxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFCSX : XForm_1<31, 7,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcsx $FRT, $src", IIC_LdStLFD, []>;
+ let isCodeGenOnly = 1 in
+ def QVLFCSXs : XForm_1<31, 7,
+ (outs qsrc:$FRT), (ins memrr:$src),
+ "qvlfcsx $FRT, $src", IIC_LdStLFD, []>;
+
+ let RC = 1 in
+ def QVLFCSXA : XForm_1<31, 7,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcsxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFCSUX : XForm_1<31, 39,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcsux $FRT, $src", IIC_LdStLFD, []>;
+ let RC = 1 in
+ def QVLFCSUXA : XForm_1<31, 39,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfcsuxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFIWAX : XForm_1<31, 871,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfiwax $FRT, $src", IIC_LdStLFD, []>;
+ let RC = 1 in
+ def QVLFIWAXA : XForm_1<31, 871,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfiwaxa $FRT, $src", IIC_LdStLFD, []>;
+
+ def QVLFIWZX : XForm_1<31, 839,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfiwzx $FRT, $src", IIC_LdStLFD, []>;
+ let RC = 1 in
+ def QVLFIWZXA : XForm_1<31, 839,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlfiwzxa $FRT, $src", IIC_LdStLFD, []>;
+ }
+
+
+ def QVLPCLDX : XForm_1<31, 582,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlpcldx $FRT, $src", IIC_LdStLFD, []>;
+ def QVLPCLSX : XForm_1<31, 518,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlpclsx $FRT, $src", IIC_LdStLFD, []>;
+ let isCodeGenOnly = 1 in
+ def QVLPCLSXint : XForm_11<31, 518,
+ (outs qfrc:$FRT), (ins G8RC:$src),
+ "qvlpclsx $FRT, 0, $src", IIC_LdStLFD, []>;
+ def QVLPCRDX : XForm_1<31, 70,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlpcrdx $FRT, $src", IIC_LdStLFD, []>;
+ def QVLPCRSX : XForm_1<31, 6,
+ (outs qfrc:$FRT), (ins memrr:$src),
+ "qvlpcrsx $FRT, $src", IIC_LdStLFD, []>;
+
+ // Store indexed instructions
+ let mayStore = 1 in {
+ def QVSTFDX : XForm_8<31, 711,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfdx $FRT, $dst", IIC_LdStSTFD,
+ [(store qfrc:$FRT, xoaddr:$dst)]>;
+ let isCodeGenOnly = 1 in
+ def QVSTFDXb : XForm_8<31, 711,
+ (outs), (ins qbrc:$FRT, memrr:$dst),
+ "qvstfdx $FRT, $dst", IIC_LdStSTFD, []>;
+
+ let RC = 1 in
+ def QVSTFDXA : XForm_8<31, 711,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfdxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFDUX : XForm_8<31, 743, (outs ptr_rc_nor0:$ea_res),
+ (ins qfrc:$FRT, memrr:$dst),
+ "qvstfdux $FRT, $dst", IIC_LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">,
+ NoEncode<"$ea_res">;
+
+ let RC = 1 in
+ def QVSTFDUXA : XForm_8<31, 743,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfduxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFDXI : XForm_8<31, 709,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfdxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFDXIA : XForm_8<31, 709,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfdxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFDUXI : XForm_8<31, 741,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfduxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFDUXIA : XForm_8<31, 741,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfduxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFSX : XForm_8<31, 647,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsx $FRT, $dst", IIC_LdStSTFD,
+ [(truncstorev4f32 qfrc:$FRT, xoaddr:$dst)]>;
+ let isCodeGenOnly = 1 in
+ def QVSTFSXs : XForm_8<31, 647,
+ (outs), (ins qsrc:$FRT, memrr:$dst),
+ "qvstfsx $FRT, $dst", IIC_LdStSTFD,
+ [(store qsrc:$FRT, xoaddr:$dst)]>;
+
+ let RC = 1 in
+ def QVSTFSXA : XForm_8<31, 647,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFSUX : XForm_8<31, 679, (outs ptr_rc_nor0:$ea_res),
+ (ins qsrc:$FRT, memrr:$dst),
+ "qvstfsux $FRT, $dst", IIC_LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">,
+ NoEncode<"$ea_res">;
+ let isCodeGenOnly = 1 in
+ def QVSTFSUXs: XForm_8<31, 679, (outs ptr_rc_nor0:$ea_res),
+ (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsux $FRT, $dst", IIC_LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">,
+ NoEncode<"$ea_res">;
+
+ let RC = 1 in
+ def QVSTFSUXA : XForm_8<31, 679,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsuxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFSXI : XForm_8<31, 645,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFSXIA : XForm_8<31, 645,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFSUXI : XForm_8<31, 677,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsuxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFSUXIA : XForm_8<31, 677,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfsuxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCDX : XForm_8<31, 199,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcdx $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCDXA : XForm_8<31, 199,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcdxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCSX : XForm_8<31, 135,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsx $FRT, $dst", IIC_LdStSTFD, []>;
+ let isCodeGenOnly = 1 in
+ def QVSTFCSXs : XForm_8<31, 135,
+ (outs), (ins qsrc:$FRT, memrr:$dst),
+ "qvstfcsx $FRT, $dst", IIC_LdStSTFD, []>;
+
+ let RC = 1 in
+ def QVSTFCSXA : XForm_8<31, 135,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCDUX : XForm_8<31, 231,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcdux $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCDUXA : XForm_8<31, 231,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcduxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCSUX : XForm_8<31, 167,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsux $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCSUXA : XForm_8<31, 167,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsuxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCDXI : XForm_8<31, 197,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcdxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCDXIA : XForm_8<31, 197,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcdxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCSXI : XForm_8<31, 133,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCSXIA : XForm_8<31, 133,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCDUXI : XForm_8<31, 229,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcduxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCDUXIA : XForm_8<31, 229,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcduxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFCSUXI : XForm_8<31, 165,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsuxi $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFCSUXIA : XForm_8<31, 165,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfcsuxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+ def QVSTFIWX : XForm_8<31, 967,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfiwx $FRT, $dst", IIC_LdStSTFD, []>;
+ let RC = 1 in
+ def QVSTFIWXA : XForm_8<31, 967,
+ (outs), (ins qfrc:$FRT, memrr:$dst),
+ "qvstfiwxa $FRT, $dst", IIC_LdStSTFD, []>;
+ }
+}
+
+} // neverHasSideEffects
+}
+
+def : InstAlias<"qvfclr $FRT",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRT, qbrc:$FRT, 0)>;
+def : InstAlias<"qvfand $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 1)>;
+def : InstAlias<"qvfandc $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 4)>;
+def : InstAlias<"qvfctfb $FRT, $FRA",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRA, 5)>;
+def : InstAlias<"qvfxor $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 6)>;
+def : InstAlias<"qvfor $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 7)>;
+def : InstAlias<"qvfnor $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 8)>;
+def : InstAlias<"qvfequ $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 9)>;
+def : InstAlias<"qvfnot $FRT, $FRA",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRA, 10)>;
+def : InstAlias<"qvforc $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 13)>;
+def : InstAlias<"qvfnand $FRT, $FRA, $FRB",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 14)>;
+def : InstAlias<"qvfset $FRT",
+ (QVFLOGICALb qbrc:$FRT, qbrc:$FRT, qbrc:$FRT, 15)>;
+
+//===----------------------------------------------------------------------===//
+// Additional QPX Patterns
+//
+
+def : Pat<(v4f64 (scalar_to_vector f64:$A)),
+ (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), $A, sub_64)>;
+def : Pat<(v4f32 (scalar_to_vector f32:$A)),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $A, sub_64)>;
+
+def : Pat<(f64 (vector_extract v4f64:$S, 0)),
+ (EXTRACT_SUBREG $S, sub_64)>;
+def : Pat<(f32 (vector_extract v4f32:$S, 0)),
+ (EXTRACT_SUBREG $S, sub_64)>;
+
+def : Pat<(f64 (vector_extract v4f64:$S, 1)),
+ (EXTRACT_SUBREG (QVESPLATI $S, 1), sub_64)>;
+def : Pat<(f64 (vector_extract v4f64:$S, 2)),
+ (EXTRACT_SUBREG (QVESPLATI $S, 2), sub_64)>;
+def : Pat<(f64 (vector_extract v4f64:$S, 3)),
+ (EXTRACT_SUBREG (QVESPLATI $S, 3), sub_64)>;
+
+def : Pat<(f32 (vector_extract v4f32:$S, 1)),
+ (EXTRACT_SUBREG (QVESPLATIs $S, 1), sub_64)>;
+def : Pat<(f32 (vector_extract v4f32:$S, 2)),
+ (EXTRACT_SUBREG (QVESPLATIs $S, 2), sub_64)>;
+def : Pat<(f32 (vector_extract v4f32:$S, 3)),
+ (EXTRACT_SUBREG (QVESPLATIs $S, 3), sub_64)>;
+
+def : Pat<(f64 (vector_extract v4f64:$S, i64:$F)),
+ (EXTRACT_SUBREG (QVFPERM $S, $S,
+ (QVLPCLSXint (RLDICR $F, 2,
+ /* 63-2 = */ 61))),
+ sub_64)>;
+def : Pat<(f32 (vector_extract v4f32:$S, i64:$F)),
+ (EXTRACT_SUBREG (QVFPERMs $S, $S,
+ (QVLPCLSXint (RLDICR $F, 2,
+ /* 63-2 = */ 61))),
+ sub_64)>;
+
+def : Pat<(int_ppc_qpx_qvfperm v4f64:$A, v4f64:$B, v4f64:$C),
+ (QVFPERM $A, $B, $C)>;
+
+def : Pat<(int_ppc_qpx_qvfcpsgn v4f64:$A, v4f64:$B),
+ (QVFCPSGN $A, $B)>;
+
+// FCOPYSIGN's operand types need not agree.
+def : Pat<(fcopysign v4f64:$frB, v4f32:$frA),
+ (QVFCPSGN (COPY_TO_REGCLASS $frA, QFRC), $frB)>;
+def : Pat<(fcopysign QSRC:$frB, QFRC:$frA),
+ (QVFCPSGNs (COPY_TO_REGCLASS $frA, QSRC), $frB)>;
+
+def : Pat<(int_ppc_qpx_qvfneg v4f64:$A), (QVFNEG $A)>;
+def : Pat<(int_ppc_qpx_qvfabs v4f64:$A), (QVFABS $A)>;
+def : Pat<(int_ppc_qpx_qvfnabs v4f64:$A), (QVFNABS $A)>;
+
+def : Pat<(int_ppc_qpx_qvfriz v4f64:$A), (QVFRIZ $A)>;
+def : Pat<(int_ppc_qpx_qvfrin v4f64:$A), (QVFRIN $A)>;
+def : Pat<(int_ppc_qpx_qvfrip v4f64:$A), (QVFRIP $A)>;
+def : Pat<(int_ppc_qpx_qvfrim v4f64:$A), (QVFRIM $A)>;
+
+def : Pat<(int_ppc_qpx_qvfre v4f64:$A), (QVFRE $A)>;
+def : Pat<(int_ppc_qpx_qvfrsqrte v4f64:$A), (QVFRSQRTE $A)>;
+
+def : Pat<(int_ppc_qpx_qvfadd v4f64:$A, v4f64:$B),
+ (QVFADD $A, $B)>;
+def : Pat<(int_ppc_qpx_qvfsub v4f64:$A, v4f64:$B),
+ (QVFSUB $A, $B)>;
+def : Pat<(int_ppc_qpx_qvfmul v4f64:$A, v4f64:$B),
+ (QVFMUL $A, $B)>;
+
+// Additional QVFNMSUB patterns: -a*c + b == -(a*c - b)
+def : Pat<(fma (fneg v4f64:$A), v4f64:$C, v4f64:$B),
+ (QVFNMSUB $A, $B, $C)>;
+def : Pat<(fma v4f64:$A, (fneg v4f64:$C), v4f64:$B),
+ (QVFNMSUB $A, $B, $C)>;
+def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B),
+ (QVFNMSUBSs $A, $B, $C)>;
+def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B),
+ (QVFNMSUBSs $A, $B, $C)>;
+
+def : Pat<(int_ppc_qpx_qvfmadd v4f64:$A, v4f64:$B, v4f64:$C),
+ (QVFMADD $A, $B, $C)>;
+def : Pat<(int_ppc_qpx_qvfnmadd v4f64:$A, v4f64:$B, v4f64:$C),
+ (QVFNMADD $A, $B, $C)>;
+def : Pat<(int_ppc_qpx_qvfmsub v4f64:$A, v4f64:$B, v4f64:$C),
+ (QVFMSUB $A, $B, $C)>;
+def : Pat<(int_ppc_qpx_qvfnmsub v4f64:$A, v4f64:$B, v4f64:$C),
+ (QVFNMSUB $A, $B, $C)>;
+
+def : Pat<(int_ppc_qpx_qvlfd xoaddr:$src),
+ (QVLFDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfda xoaddr:$src),
+ (QVLFDXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfs xoaddr:$src),
+ (QVLFSX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfsa xoaddr:$src),
+ (QVLFSXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcda xoaddr:$src),
+ (QVLFCDXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcd xoaddr:$src),
+ (QVLFCDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcsa xoaddr:$src),
+ (QVLFCSXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcs xoaddr:$src),
+ (QVLFCSX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfda xoaddr:$src),
+ (QVLFDXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwaa xoaddr:$src),
+ (QVLFIWAXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwa xoaddr:$src),
+ (QVLFIWAX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwza xoaddr:$src),
+ (QVLFIWZXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwz xoaddr:$src),
+ (QVLFIWZX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfsa xoaddr:$src),
+ (QVLFSXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcld xoaddr:$src),
+ (QVLPCLDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcls xoaddr:$src),
+ (QVLPCLSX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcrd xoaddr:$src),
+ (QVLPCRDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcrs xoaddr:$src),
+ (QVLPCRSX xoaddr:$src)>;
+
+def : Pat<(int_ppc_qpx_qvstfd v4f64:$T, xoaddr:$dst),
+ (QVSTFDX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfs v4f64:$T, xoaddr:$dst),
+ (QVSTFSX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcda v4f64:$T, xoaddr:$dst),
+ (QVSTFCDXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcd v4f64:$T, xoaddr:$dst),
+ (QVSTFCDX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcsa v4f64:$T, xoaddr:$dst),
+ (QVSTFCSXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcs v4f64:$T, xoaddr:$dst),
+ (QVSTFCSX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfda v4f64:$T, xoaddr:$dst),
+ (QVSTFDXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfiwa v4f64:$T, xoaddr:$dst),
+ (QVSTFIWXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfiw v4f64:$T, xoaddr:$dst),
+ (QVSTFIWX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfsa v4f64:$T, xoaddr:$dst),
+ (QVSTFSXA $T, xoaddr:$dst)>;
+
+def : Pat<(pre_store v4f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (QVSTFDUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store v4f32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (QVSTFSUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncstv4f32 v4f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (QVSTFSUXs $rS, $ptrreg, $ptroff)>;
+
+def : Pat<(int_ppc_qpx_qvflogical v4f64:$A, v4f64:$B, (i32 imm:$idx)),
+ (QVFLOGICAL $A, $B, imm:$idx)>;
+def : Pat<(int_ppc_qpx_qvgpci (u12:$idx)),
+ (QVGPCI imm:$idx)>;
+
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETOGE),
+ (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETOLE),
+ (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETONE),
+ (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETO),
+ (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUEQ),
+ (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUGT),
+ (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUGE),
+ (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+ (QVFCMPLTb $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETULT),
+ (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+ (QVFTSTNANb $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETULE),
+ (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+ (QVFCMPGTb $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUNE),
+ (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+ (QVFCMPEQb $FRA, $FRB), (i32 13))>;
+
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETEQ),
+ (QVFCMPEQb $FRA, $FRB)>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETGT),
+ (QVFCMPGTb $FRA, $FRB)>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETGE),
+ (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+ (QVFCMPLTb $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETLT),
+ (QVFCMPLTb $FRA, $FRB)>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETLE),
+ (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+ (QVFCMPGTb $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETNE),
+ (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
+ (QVFCMPEQb $FRA, $FRB), (i32 10))>;
+
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETOGE),
+ (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETOLE),
+ (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETONE),
+ (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETO),
+ (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUEQ),
+ (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUGT),
+ (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUGE),
+ (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+ (QVFCMPLTbs $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETULT),
+ (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETULE),
+ (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+ (QVFCMPGTbs $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUNE),
+ (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+ (QVFCMPEQbs $FRA, $FRB), (i32 13))>;
+
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETEQ),
+ (QVFCMPEQbs $FRA, $FRB)>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETGT),
+ (QVFCMPGTbs $FRA, $FRB)>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETGE),
+ (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+ (QVFCMPLTbs $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETLT),
+ (QVFCMPLTbs $FRA, $FRB)>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETLE),
+ (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+ (QVFCMPGTbs $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETNE),
+ (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
+ (QVFCMPEQbs $FRA, $FRB), (i32 10))>;
+
+def : Pat<(and v4i1:$FRA, (not v4i1:$FRB)),
+ (QVFLOGICALb $FRA, $FRB, (i32 4))>;
+def : Pat<(not (or v4i1:$FRA, v4i1:$FRB)),
+ (QVFLOGICALb $FRA, $FRB, (i32 8))>;
+def : Pat<(not (xor v4i1:$FRA, v4i1:$FRB)),
+ (QVFLOGICALb $FRA, $FRB, (i32 9))>;
+def : Pat<(or v4i1:$FRA, (not v4i1:$FRB)),
+ (QVFLOGICALb $FRA, $FRB, (i32 13))>;
+def : Pat<(not (and v4i1:$FRA, v4i1:$FRB)),
+ (QVFLOGICALb $FRA, $FRB, (i32 14))>;
+
+def : Pat<(and v4i1:$FRA, v4i1:$FRB),
+ (QVFLOGICALb $FRA, $FRB, (i32 1))>;
+def : Pat<(or v4i1:$FRA, v4i1:$FRB),
+ (QVFLOGICALb $FRA, $FRB, (i32 7))>;
+def : Pat<(xor v4i1:$FRA, v4i1:$FRB),
+ (QVFLOGICALb $FRA, $FRB, (i32 6))>;
+def : Pat<(not v4i1:$FRA),
+ (QVFLOGICALb $FRA, $FRA, (i32 10))>;
+
+def : Pat<(v4f64 (fextend v4f32:$src)),
+ (COPY_TO_REGCLASS $src, QFRC)>;
+
+def : Pat<(v4f32 (fround_exact v4f64:$src)),
+ (COPY_TO_REGCLASS $src, QSRC)>;
+
+// Extract the underlying floating-point values from the
+// QPX (-1.0, 1.0) boolean representation.
+def : Pat<(v4f64 (PPCqbflt v4i1:$src)),
+ (COPY_TO_REGCLASS $src, QFRC)>;
+
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLT)),
+ (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLE)),
+ (SELECT_QFRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETEQ)),
+ (SELECT_QFRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGE)),
+ (SELECT_QFRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGT)),
+ (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETNE)),
+ (SELECT_QFRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLT)),
+ (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLE)),
+ (SELECT_QSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETEQ)),
+ (SELECT_QSRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGE)),
+ (SELECT_QSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGT)),
+ (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETNE)),
+ (SELECT_QSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLT)),
+ (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLE)),
+ (SELECT_QBRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETEQ)),
+ (SELECT_QBRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGE)),
+ (SELECT_QBRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGT)),
+ (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETNE)),
+ (SELECT_QBRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+} // end HasQPX
+
+let Predicates = [HasQPX, NoNaNsFPMath] in {
+def : Pat<(fminnum v4f64:$FRA, v4f64:$FRB),
+ (QVFSELb (QVFCMPLTb $FRA, $FRB), $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f64:$FRA, v4f64:$FRB),
+ (QVFSELb (QVFCMPGTb $FRA, $FRB), $FRB, $FRA)>;
+
+def : Pat<(fminnum v4f32:$FRA, v4f32:$FRB),
+ (QVFSELbs (QVFCMPLTbs $FRA, $FRB), $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB),
+ (QVFSELbs (QVFCMPGTbs $FRA, $FRB), $FRB, $FRA)>;
+}
+
+let Predicates = [HasQPX, NaNsFPMath] in {
+// When either of these operands is NaN, we should return the other operand.
+// QVFCMPLT/QVFCMPGT return false is either operand is NaN, which means we need
+// to explicitly or with a NaN test on the second operand.
+def : Pat<(fminnum v4f64:$FRA, v4f64:$FRB),
+ (QVFSELb (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+ (QVFTSTNANb $FRB, $FRB), (i32 7)),
+ $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f64:$FRA, v4f64:$FRB),
+ (QVFSELb (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+ (QVFTSTNANb $FRB, $FRB), (i32 7)),
+ $FRB, $FRA)>;
+
+def : Pat<(fminnum v4f32:$FRA, v4f32:$FRB),
+ (QVFSELbs (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRB, $FRB), (i32 7)),
+ $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB),
+ (QVFSELbs (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+ (QVFTSTNANbs $FRB, $FRB), (i32 7)),
+ $FRB, $FRA)>;
+}
+
}
case PPC::F8RCRegClassID:
case PPC::F4RCRegClassID:
+ case PPC::QFRCRegClassID:
+ case PPC::QSRCRegClassID:
+ case PPC::QBRCRegClassID:
case PPC::VRRCRegClassID:
case PPC::VFRCRegClassID:
case PPC::VSLRCRegClassID:
let HWEncoding{4-0} = num;
}
+// QFPR - One of the 32 256-bit floating-point vector registers (used for QPX)
+class QFPR<FPR SubReg, string n> : PPCReg<n> {
+ let HWEncoding = SubReg.HWEncoding;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_64];
+}
+
// VF - One of the 32 64-bit floating-point subregisters of the vector
// registers (used by VSX).
class VF<bits<5> num, string n> : PPCReg<n> {
def VF#Index : VF<Index, "vs" # !add(Index, 32)>;
}
+// QPX Floating-point registers
+foreach Index = 0-31 in {
+ def QF#Index : QFPR<!cast<FPR>("F"#Index), "q"#Index>,
+ DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
+}
+
// Vector registers
foreach Index = 0-31 in {
def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>,
VF22, VF21, VF20)>;
def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>;
+// For QPX
+def QFRC : RegisterClass<"PPC", [v4f64], 256, (add (sequence "QF%u", 0, 13),
+ (sequence "QF%u", 31, 14))>;
+def QSRC : RegisterClass<"PPC", [v4f32], 128, (add QFRC)>;
+def QBRC : RegisterClass<"PPC", [v4i1], 256, (add QFRC)> {
+ // These are actually stored as floating-point values where a positive
+ // number is true and anything else (including NaN) is false.
+ let Size = 256;
+}
+
def CRBITRC : RegisterClass<"PPC", [i1], 32,
(add CR2LT, CR2GT, CR2EQ, CR2UN,
CR3LT, CR3GT, CR3EQ, CR3UN,
static cl::opt<bool> UseSubRegLiveness("ppc-track-subreg-liveness",
cl::desc("Enable subregister liveness tracking for PPC"), cl::Hidden);
+static cl::opt<bool> QPXStackUnaligned("qpx-stack-unaligned",
+ cl::desc("Even when QPX is enabled the stack is not 32-byte aligned"),
+ cl::Hidden);
+
PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
initializeEnvironment();
HasLazyResolverStubs = false;
HasICBT = false;
HasInvariantFunctionDescriptors = false;
+ IsQPXStackUnaligned = false;
}
void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
// QPX requires a 32-byte aligned stack. Note that we need to do this if
// we're compiling for a BG/Q system regardless of whether or not QPX
// is enabled because external functions will assume this alignment.
- if (hasQPX() || isBGQ())
- StackAlignment = 32;
+ IsQPXStackUnaligned = QPXStackUnaligned;
+ StackAlignment = getPlatformStackAlignment();
// Determine endianness.
// FIXME: Part of the TargetMachine.
bool HasICBT;
bool HasInvariantFunctionDescriptors;
+ /// When targeting QPX running a stock PPC64 Linux kernel where the stack
+ /// alignment has not been changed, we need to keep the 16-byte alignment
+ /// of the stack.
+ bool IsQPXStackUnaligned;
+
const PPCTargetMachine &TM;
PPCFrameLowering FrameLowering;
PPCInstrInfo InstrInfo;
return HasInvariantFunctionDescriptors;
}
+ bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
+ unsigned getPlatformStackAlignment() const {
+ if ((hasQPX() || isBGQ()) && !isQPXStackUnaligned())
+ return 32;
+
+ return 16;
+ }
+
const Triple &getTargetTriple() const { return TargetTriple; }
/// isDarwin - True if this is any darwin platform.
}
unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
- if (Vector && !ST->hasAltivec())
+ if (Vector && !ST->hasAltivec() && !ST->hasQPX())
return 0;
return ST->hasVSX() ? 64 : 32;
}
unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) {
if (Vector) {
+ if (ST->hasQPX()) return 256;
if (ST->hasAltivec()) return 128;
return 0;
}
if (Index == 0)
return 0;
+ return BaseT::getVectorInstrCost(Opcode, Val, Index);
+ } else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) {
+ // Floating point scalars are already located in index #0.
+ if (Index == 0)
+ return 0;
+
return BaseT::getVectorInstrCost(Opcode, Val, Index);
}
--- /dev/null
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+
+define void @s452() nounwind {
+entry:
+ br label %for.body4
+
+for.body4: ; preds = %for.body4, %entry
+ %conv.4 = sitofp i32 undef to double
+ %conv.5 = sitofp i32 undef to double
+ %mul.4.v.i0.1 = insertelement <2 x double> undef, double %conv.4, i32 0
+ %mul.4.v.i0.2 = insertelement <2 x double> %mul.4.v.i0.1, double %conv.5, i32 1
+ %mul.4 = fmul <2 x double> %mul.4.v.i0.2, undef
+ %add7.4 = fadd <2 x double> undef, %mul.4
+ store <2 x double> %add7.4, <2 x double>* undef, align 16
+ br i1 undef, label %for.end, label %for.body4
+
+for.end: ; preds = %for.body4
+ unreachable
+; CHECK-LABEL: @s452
+; CHECK: lfiwax [[REG1:[0-9]+]],
+; CHECK: fcfid [[REG2:[0-9]+]], [[REG1]]
+; FIXME: We could 'promote' this to a vector earlier and remove this splat.
+; CHECK: qvesplati {{[0-9]+}}, [[REG2]], 0
+; CHECK: qvfmul
+; CHECK: qvfadd
+; CHECK: qvesplati {{[0-9]+}},
+; FIXME: We can use qvstfcdx here instead of two stores.
+; CHECK: stfd
+; CHECK: stfd
+}
+
--- /dev/null
+; RUN: llc < %s -mcpu=a2q | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+define <4 x double> @foo(double %f1, double %f2, double %f3, double %f4) {
+ %v1 = insertelement <4 x double> undef, double %f1, i32 0
+ %v2 = insertelement <4 x double> %v1, double %f2, i32 1
+ %v3 = insertelement <4 x double> %v2, double %f3, i32 2
+ %v4 = insertelement <4 x double> %v3, double %f4, i32 3
+ ret <4 x double> %v4
+
+; CHECK-LABEL: @foo
+; CHECK: qvgpci [[REG1:[0-9]+]], 275
+; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101
+; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]]
+; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]]
+; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]]
+; CHECK: blr
+}
+
+define <4 x float> @goo(float %f1, float %f2, float %f3, float %f4) {
+ %v1 = insertelement <4 x float> undef, float %f1, i32 0
+ %v2 = insertelement <4 x float> %v1, float %f2, i32 1
+ %v3 = insertelement <4 x float> %v2, float %f3, i32 2
+ %v4 = insertelement <4 x float> %v3, float %f4, i32 3
+ ret <4 x float> %v4
+
+; CHECK-LABEL: @goo
+; CHECK: qvgpci [[REG1:[0-9]+]], 275
+; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101
+; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]]
+; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]]
+; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]]
+; CHECK: blr
+}
+
--- /dev/null
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+
+declare <4 x double> @foo(<4 x double> %p)
+
+define <4 x double> @bar(<4 x double> %p, <4 x double> %q) {
+entry:
+ %v = call <4 x double> @foo(<4 x double> %p)
+ %w = call <4 x double> @foo(<4 x double> %q)
+ %x = fadd <4 x double> %v, %w
+ ret <4 x double> %x
+
+; CHECK-LABEL: @bar
+; CHECK: qvstfdx 2,
+; CHECK: bl foo
+; CHECK: qvstfdx 1,
+; CHECK: qvlfdx 1,
+; CHECK: bl foo
+; CHECK: qvlfdx [[REG:[0-9]+]],
+; CHECK: qvfadd 1, [[REG]], 1
+}
+
--- /dev/null
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+
+define <4 x double> @foo(<4 x double>* %p) {
+entry:
+ %v = load <4 x double>* %p, align 8
+ ret <4 x double> %v
+}
+
+; CHECK: @foo
+; CHECK-DAG: li [[REG1:[0-9]+]], 31
+; CHECK-DAG: qvlfdx [[REG4:[0-9]+]], 0, 3
+; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: qvlpcldx [[REG3:[0-9]+]], 0, 3
+; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]]
+; CHECK: blr
+
+define <4 x double> @bar(<4 x double>* %p) {
+entry:
+ %v = load <4 x double>* %p, align 32
+ ret <4 x double> %v
+}
+
+; CHECK: @bar
+; CHECK: qvlfdx
+
--- /dev/null
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q -enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck -check-prefix=CHECK-SAFE %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
+
+define <4 x double> @foo(<4 x double> %a, <4 x double> %b) nounwind {
+entry:
+ %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
+ %r = fdiv <4 x double> %a, %x
+ ret <4 x double> %r
+
+; CHECK-LABEL: @foo
+; CHECK: qvfrsqrte
+; CHECK: qvfmul
+; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
+; an qvfmadd instead of a qvfnmsub
+; CHECK: qvfmadd
+; CHECK: qvfmadd
+; CHECK: qvfmul
+; CHECK: qvfmul
+; CHECK: qvfmadd
+; CHECK: qvfmul
+; CHECK: qvfmul
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @foo
+; CHECK-SAFE: fsqrt
+; CHECK-SAFE: fdiv
+; CHECK-SAFE: blr
+}
+
+define <4 x double> @foof(<4 x double> %a, <4 x float> %b) nounwind {
+entry:
+ %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
+ %y = fpext <4 x float> %x to <4 x double>
+ %r = fdiv <4 x double> %a, %y
+ ret <4 x double> %r
+
+; CHECK-LABEL: @foof
+; CHECK: qvfrsqrtes
+; CHECK: qvfmuls
+; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
+; an qvfmadd instead of a qvfnmsubs
+; CHECK: qvfmadds
+; CHECK: qvfmadds
+; CHECK: qvfmuls
+; CHECK: qvfmul
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @foof
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: fdiv
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @food(<4 x float> %a, <4 x double> %b) nounwind {
+entry:
+ %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
+ %y = fptrunc <4 x double> %x to <4 x float>
+ %r = fdiv <4 x float> %a, %y
+ ret <4 x float> %r
+
+; CHECK-LABEL: @food
+; CHECK: qvfrsqrte
+; CHECK: qvfmul
+; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
+; an qvfmadd instead of a qvfnmsub
+; CHECK: qvfmadd
+; CHECK: qvfmadd
+; CHECK: qvfmul
+; CHECK: qvfmul
+; CHECK: qvfmadd
+; CHECK: qvfmul
+; CHECK: qvfrsp
+; CHECK: qvfmuls
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @food
+; CHECK-SAFE: fsqrt
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @goo(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+ %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
+ %r = fdiv <4 x float> %a, %x
+ ret <4 x float> %r
+
+; CHECK-LABEL: @goo
+; CHECK: qvfrsqrtes
+; CHECK: qvfmuls
+; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
+; an qvfmadd instead of a qvfnmsubs
+; CHECK: qvfmadds
+; CHECK: qvfmadds
+; CHECK: qvfmuls
+; CHECK: qvfmuls
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @goo
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
+define <4 x double> @foo2(<4 x double> %a, <4 x double> %b) nounwind {
+entry:
+ %r = fdiv <4 x double> %a, %b
+ ret <4 x double> %r
+
+; CHECK-LABEL: @foo2
+; CHECK: qvfre
+; CHECK: qvfnmsub
+; CHECK: qvfmadd
+; CHECK: qvfnmsub
+; CHECK: qvfmadd
+; CHECK: qvfmul
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @foo2
+; CHECK-SAFE: fdiv
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @goo2(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+ %r = fdiv <4 x float> %a, %b
+ ret <4 x float> %r
+
+; CHECK-LABEL: @goo2
+; CHECK: qvfres
+; CHECK: qvfnmsubs
+; CHECK: qvfmadds
+; CHECK: qvfmuls
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @goo2
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
+define <4 x double> @foo3(<4 x double> %a) nounwind {
+entry:
+ %r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
+ ret <4 x double> %r
+
+; CHECK-LABEL: @foo3
+; CHECK: qvfrsqrte
+; CHECK: qvfmul
+; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
+; an qvfmadd instead of a qvfnmsub
+; CHECK-DAG: qvfmadd
+; CHECK-DAG: qvfcmpeq
+; CHECK-DAG: qvfmadd
+; CHECK-DAG: qvfmul
+; CHECK-DAG: qvfmul
+; CHECK-DAG: qvfmadd
+; CHECK-DAG: qvfmul
+; CHECK-DAG: qvfmul
+; CHECK: qvfsel
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @foo3
+; CHECK-SAFE: fsqrt
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @goo3(<4 x float> %a) nounwind {
+entry:
+ %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
+ ret <4 x float> %r
+
+; CHECK-LABEL: @goo3
+; CHECK: qvfrsqrtes
+; CHECK: qvfmuls
+; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
+; an qvfmadds instead of a qvfnmsubs
+; CHECK-DAG: qvfmadds
+; CHECK-DAG: qvfcmpeq
+; CHECK-DAG: qvfmadds
+; CHECK-DAG: qvfmuls
+; CHECK-DAG: qvfmuls
+; CHECK: qvfsel
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @goo3
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: blr
+}
+
--- /dev/null
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q -enable-unsafe-fp-math | FileCheck -check-prefix=CHECK-FM %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define <4 x float> @test1(<4 x float> %x) nounwind {
+ %call = tail call <4 x float> @llvm.floor.v4f32(<4 x float> %x) nounwind readnone
+ ret <4 x float> %call
+
+; CHECK: test1:
+; CHECK: qvfrim 1, 1
+
+; CHECK-FM: test1:
+; CHECK-FM: qvfrim 1, 1
+}
+
+declare <4 x float> @llvm.floor.v4f32(<4 x float>) nounwind readnone
+
+define <4 x double> @test2(<4 x double> %x) nounwind {
+ %call = tail call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
+ ret <4 x double> %call
+
+; CHECK: test2:
+; CHECK: qvfrim 1, 1
+
+; CHECK-FM: test2:
+; CHECK-FM: qvfrim 1, 1
+}
+
+declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone
+
+define <4 x float> @test3(<4 x float> %x) nounwind {
+ %call = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %x) nounwind readnone
+ ret <4 x float> %call
+
+; CHECK: test3:
+; CHECK-NOT: qvfrin
+
+; CHECK-FM: test3:
+; CHECK-FM-NOT: qvfrin
+}
+
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) nounwind readnone
+
+define <4 x double> @test4(<4 x double> %x) nounwind {
+ %call = tail call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %x) nounwind readnone
+ ret <4 x double> %call
+
+; CHECK: test4:
+; CHECK-NOT: qvfrin
+
+; CHECK-FM: test4:
+; CHECK-FM-NOT: qvfrin
+}
+
+declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) nounwind readnone
+
+define <4 x float> @test5(<4 x float> %x) nounwind {
+ %call = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) nounwind readnone
+ ret <4 x float> %call
+
+; CHECK: test5:
+; CHECK: qvfrip 1, 1
+
+; CHECK-FM: test5:
+; CHECK-FM: qvfrip 1, 1
+}
+
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
+
+define <4 x double> @test6(<4 x double> %x) nounwind {
+ %call = tail call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
+ ret <4 x double> %call
+
+; CHECK: test6:
+; CHECK: qvfrip 1, 1
+
+; CHECK-FM: test6:
+; CHECK-FM: qvfrip 1, 1
+}
+
+declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
+
+define <4 x float> @test9(<4 x float> %x) nounwind {
+ %call = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) nounwind readnone
+ ret <4 x float> %call
+
+; CHECK: test9:
+; CHECK: qvfriz 1, 1
+
+; CHECK-FM: test9:
+; CHECK-FM: qvfriz 1, 1
+}
+
+declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone
+
+define <4 x double> @test10(<4 x double> %x) nounwind {
+ %call = tail call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone
+ ret <4 x double> %call
+
+; CHECK: test10:
+; CHECK: qvfriz 1, 1
+
+; CHECK-FM: test10:
+; CHECK-FM: qvfriz 1, 1
+}
+
+declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone
+
--- /dev/null
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+
+define <4 x float> @foo(<4 x float>* %p) {
+entry:
+ %v = load <4 x float>* %p, align 4
+ ret <4 x float> %v
+}
+
+; CHECK: @foo
+; CHECK-DAG: li [[REG1:[0-9]+]], 15
+; CHECK-DAG: qvlfsx [[REG4:[0-9]+]], 0, 3
+; CHECK-DAG: qvlfsx [[REG2:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: qvlpclsx [[REG3:[0-9]+]], 0, 3
+; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]]
+; CHECK: blr
+
+define <4 x float> @bar(<4 x float>* %p) {
+entry:
+ %v = load <4 x float>* %p, align 16
+ ret <4 x float> %v
+}
+
+; CHECK: @bar
+; CHECK: qvlfsx
+
--- /dev/null
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+
+@Q = constant <4 x i1> <i1 0, i1 undef, i1 1, i1 1>, align 16
+@R = global <4 x i1> <i1 0, i1 0, i1 0, i1 0>, align 16
+
+define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x i1> %c) nounwind readnone {
+entry:
+ %r = select <4 x i1> %c, <4 x float> %a, <4 x float> %b
+ ret <4 x float> %r
+
+; CHECK-LABEL: @test1
+; CHECK: qvfsel 1, 3, 1, 2
+; CHECK: blr
+}
+
+define <4 x float> @test2(<4 x float> %a, <4 x float> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone {
+entry:
+ %v = insertelement <4 x i1> undef, i1 %c1, i32 0
+ %v2 = insertelement <4 x i1> %v, i1 %c2, i32 1
+ %v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2
+ %v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3
+ %r = select <4 x i1> %v4, <4 x float> %a, <4 x float> %b
+ ret <4 x float> %r
+
+; CHECK-LABEL: @test2
+; CHECK: stw
+; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
+; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
+; CHECK: qvfsel 1, [[REG4]], 1, 2
+; CHECK: blr
+}
+
+define <4 x i1> @test3(<4 x i1> %a) nounwind readnone {
+entry:
+ %v = and <4 x i1> %a, <i1 0, i1 undef, i1 1, i1 1>
+ ret <4 x i1> %v
+
+; CHECK-LABEL: @test3
+; CHECK: qvlfsx [[REG:[0-9]+]],
+; qvflogical 1, 1, [[REG]], 1
+; blr
+}
+
+define <4 x i1> @test4(<4 x i1> %a) nounwind {
+entry:
+ %q = load <4 x i1>* @Q, align 16
+ %v = and <4 x i1> %a, %q
+ ret <4 x i1> %v
+
+; CHECK-LABEL: @test4
+; CHECK-DAG: lbz
+; CHECK-DAG: qvlfdx [[REG1:[0-9]+]],
+; CHECK-DAG: stw
+; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]]
+; CHECK: qvflogical 1, 1, [[REG4]], 1
+; CHECK: blr
+}
+
+define void @test5(<4 x i1> %a) nounwind {
+entry:
+ store <4 x i1> %a, <4 x i1>* @R
+ ret void
+
+; CHECK-LABEL: @test5
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: stb
+; CHECK: blr
+}
+
+define i1 @test6(<4 x i1> %a) nounwind {
+entry:
+ %r = extractelement <4 x i1> %a, i32 2
+ ret i1 %r
+
+; CHECK-LABEL: @test6
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: blr
+}
+
+define i1 @test7(<4 x i1> %a) nounwind {
+entry:
+ %r = extractelement <4 x i1> %a, i32 2
+ %s = extractelement <4 x i1> %a, i32 3
+ %q = and i1 %r, %s
+ ret i1 %q
+
+; CHECK-LABEL: @test7
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK-DAG: lwz [[REG4:[0-9]+]],
+; FIXME: We're storing the vector twice, and that's silly.
+; CHECK-DAG: qvstfiwx [[REG3]],
+; CHECK: lwz [[REG5:[0-9]+]],
+; CHECK: and 3,
+; CHECK: blr
+}
+
+define i1 @test8(<3 x i1> %a) nounwind {
+entry:
+ %r = extractelement <3 x i1> %a, i32 2
+ ret i1 %r
+
+; CHECK-LABEL: @test8
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: blr
+}
+
+define <3 x float> @test9(<3 x float> %a, <3 x float> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone {
+entry:
+ %v = insertelement <3 x i1> undef, i1 %c1, i32 0
+ %v2 = insertelement <3 x i1> %v, i1 %c2, i32 1
+ %v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2
+ %r = select <3 x i1> %v3, <3 x float> %a, <3 x float> %b
+ ret <3 x float> %r
+
+; CHECK-LABEL: @test9
+; CHECK: stw
+; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
+; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
+; CHECK: qvfsel 1, [[REG4]], 1, 2
+; CHECK: blr
+}
+
--- /dev/null
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+
+define void @foo(<4 x float> %v, <4 x float>* %p) {
+entry:
+ store <4 x float> %v, <4 x float>* %p, align 4
+ ret void
+}
+
+; CHECK: @foo
+; CHECK: stfs
+; CHECK: stfs
+; CHECK: stfs
+; CHECK: stfs
+; CHECK: blr
+
+define void @bar(<4 x float> %v, <4 x float>* %p) {
+entry:
+ store <4 x float> %v, <4 x float>* %p, align 16
+ ret void
+}
+
+; CHECK: @bar
+; CHECK: qvstfsx
+
--- /dev/null
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+
+@Q = constant <4 x i1> <i1 0, i1 undef, i1 1, i1 1>, align 16
+@R = global <4 x i1> <i1 0, i1 0, i1 0, i1 0>, align 16
+
+define <4 x double> @test1(<4 x double> %a, <4 x double> %b, <4 x i1> %c) nounwind readnone {
+entry:
+ %r = select <4 x i1> %c, <4 x double> %a, <4 x double> %b
+ ret <4 x double> %r
+
+; CHECK-LABEL: @test1
+; CHECK: qvfsel 1, 3, 1, 2
+; CHECK: blr
+}
+
+define <4 x double> @test2(<4 x double> %a, <4 x double> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone {
+entry:
+ %v = insertelement <4 x i1> undef, i1 %c1, i32 0
+ %v2 = insertelement <4 x i1> %v, i1 %c2, i32 1
+ %v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2
+ %v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3
+ %r = select <4 x i1> %v4, <4 x double> %a, <4 x double> %b
+ ret <4 x double> %r
+
+; CHECK-LABEL: @test2
+
+; FIXME: This load/store sequence is unnecessary.
+; CHECK-DAG: lbz
+; CHECK-DAG: stw
+
+; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
+; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
+; CHECK: qvfsel 1, [[REG4]], 1, 2
+; CHECK: blr
+}
+
+define <4 x i1> @test3(<4 x i1> %a) nounwind readnone {
+entry:
+ %v = and <4 x i1> %a, <i1 0, i1 undef, i1 1, i1 1>
+ ret <4 x i1> %v
+
+; CHECK-LABEL: @test3
+; CHECK: qvlfsx [[REG:[0-9]+]],
+; qvflogical 1, 1, [[REG]], 1
+; blr
+}
+
+define <4 x i1> @test4(<4 x i1> %a) nounwind {
+entry:
+ %q = load <4 x i1>* @Q, align 16
+ %v = and <4 x i1> %a, %q
+ ret <4 x i1> %v
+
+; CHECK-LABEL: @test4
+; CHECK-DAG: lbz
+; CHECK-DAG: qvlfdx [[REG1:[0-9]+]],
+; CHECK-DAG: stw
+; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]]
+; CHECK: qvflogical 1, 1, [[REG4]], 1
+; CHECK: blr
+}
+
+define void @test5(<4 x i1> %a) nounwind {
+entry:
+ store <4 x i1> %a, <4 x i1>* @R
+ ret void
+
+; CHECK-LABEL: @test5
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: stb
+; CHECK: blr
+}
+
+define i1 @test6(<4 x i1> %a) nounwind {
+entry:
+ %r = extractelement <4 x i1> %a, i32 2
+ ret i1 %r
+
+; CHECK-LABEL: @test6
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: blr
+}
+
+define i1 @test7(<4 x i1> %a) nounwind {
+entry:
+ %r = extractelement <4 x i1> %a, i32 2
+ %s = extractelement <4 x i1> %a, i32 3
+ %q = and i1 %r, %s
+ ret i1 %q
+
+; CHECK-LABEL: @test7
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK-DAG: lwz [[REG4:[0-9]+]],
+; FIXME: We're storing the vector twice, and that's silly.
+; CHECK-DAG: qvstfiwx [[REG3]],
+; CHECK-DAG: lwz [[REG5:[0-9]+]],
+; CHECK: and 3,
+; CHECK: blr
+}
+
+define i1 @test8(<3 x i1> %a) nounwind {
+entry:
+ %r = extractelement <3 x i1> %a, i32 2
+ ret i1 %r
+
+; CHECK-LABEL: @test8
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: blr
+}
+
+define <3 x double> @test9(<3 x double> %a, <3 x double> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone {
+entry:
+ %v = insertelement <3 x i1> undef, i1 %c1, i32 0
+ %v2 = insertelement <3 x i1> %v, i1 %c2, i32 1
+ %v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2
+ %r = select <3 x i1> %v3, <3 x double> %a, <3 x double> %b
+ ret <3 x double> %r
+
+; CHECK-LABEL: @test9
+
+; FIXME: This load/store sequence is unnecessary.
+; CHECK-DAG: lbz
+; CHECK-DAG: stw
+
+; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
+; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
+; CHECK: qvfsel 1, [[REG4]], 1, 2
+; CHECK: blr
+}
+
--- /dev/null
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+
+define void @foo(<4 x double> %v, <4 x double>* %p) {
+entry:
+ store <4 x double> %v, <4 x double>* %p, align 8
+ ret void
+}
+
+; CHECK: @foo
+; CHECK: stfd
+; CHECK: stfd
+; CHECK: stfd
+; CHECK: stfd
+; CHECK: blr
+
+define void @bar(<4 x double> %v, <4 x double>* %p) {
+entry:
+ store <4 x double> %v, <4 x double>* %p, align 32
+ ret void
+}
+
+; CHECK: @bar
+; CHECK: qvstfdx
+
--- /dev/null
+; RUN: llc < %s -mcpu=a2q | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+define <4 x double> @foo(<4 x double>* %a) {
+entry:
+ %r = load <4 x double>* %a, align 32
+ ret <4 x double> %r
+; CHECK: qvlfdx
+; CHECK: blr
+}
+
+define <4 x double> @bar(<4 x double>* %a) {
+entry:
+ %r = load <4 x double>* %a, align 8
+ %b = getelementptr <4 x double>* %a, i32 16
+ %s = load <4 x double>* %b, align 32
+ %t = fadd <4 x double> %r, %s
+ ret <4 x double> %t
+; CHECK: qvlpcldx
+; CHECK: qvlfdx
+; CHECK: qvfperm
+; CHECK: blr
+}
+
+define <4 x double> @bar1(<4 x double>* %a) {
+entry:
+ %r = load <4 x double>* %a, align 8
+ %b = getelementptr <4 x double>* %a, i32 16
+ %s = load <4 x double>* %b, align 8
+ %t = fadd <4 x double> %r, %s
+ ret <4 x double> %t
+}
+
+define <4 x double> @bar2(<4 x double>* %a) {
+entry:
+ %r = load <4 x double>* %a, align 8
+ %b = getelementptr <4 x double>* %a, i32 1
+ %s = load <4 x double>* %b, align 32
+ %t = fadd <4 x double> %r, %s
+ ret <4 x double> %t
+}
+
+define <4 x double> @bar3(<4 x double>* %a) {
+entry:
+ %r = load <4 x double>* %a, align 8
+ %b = getelementptr <4 x double>* %a, i32 1
+ %s = load <4 x double>* %b, align 8
+ %t = fadd <4 x double> %r, %s
+ ret <4 x double> %t
+}
+
+define <4 x double> @bar4(<4 x double>* %a) {
+entry:
+ %r = load <4 x double>* %a, align 8
+ %b = getelementptr <4 x double>* %a, i32 1
+ %s = load <4 x double>* %b, align 8
+ %c = getelementptr <4 x double>* %b, i32 1
+ %t = load <4 x double>* %c, align 8
+ %u = fadd <4 x double> %r, %s
+ %v = fadd <4 x double> %u, %t
+ ret <4 x double> %v
+}
+
br i1 false, label %loop2_start, label %if.end5
; CHECK-LABEL: @_Z28test_goto_loop_unroll_factorILi22EiEvPKT0_iPKc
-; CHECK: xxlor
loop2_start: ; preds = %loop2_start, %entry
br i1 undef, label %loop2_start, label %if.then.i31
--- /dev/null
+# RUN: llvm-mc --disassemble %s -triple powerpc64-bgq-linux -mcpu=a2q | FileCheck %s
+
+# CHECK: qvfabs 3, 5
+0x10 0x60 0x2a 0x10
+
+# CHECK: qvfadd 3, 4, 5
+0x10 0x64 0x28 0x2a
+
+# CHECK: qvfadds 3, 4, 5
+0x00 0x64 0x28 0x2a
+
+# FIXME: decode as qvfandc 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 4
+0x10 0x64 0x2a 0x08
+
+# FIXME: decode as qvfand 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 1
+0x10 0x64 0x28 0x88
+
+# CHECK: qvfcfid 3, 5
+0x10 0x60 0x2e 0x9c
+
+# CHECK: qvfcfids 3, 5
+0x00 0x60 0x2e 0x9c
+
+# CHECK: qvfcfidu 3, 5
+0x10 0x60 0x2f 0x9c
+
+# CHECK: qvfcfidus 3, 5
+0x00 0x60 0x2f 0x9c
+
+# FIXME: decode as qvfclr 3
+# CHECK: qvflogical 3, 3, 3, 0
+0x10 0x63 0x18 0x08
+
+# CHECK: qvfcpsgn 3, 4, 5
+0x10 0x64 0x28 0x10
+
+# FIXME: decode as qvfctfb 3, 4
+# CHECK: qvflogical 3, 4, 4, 5
+0x10 0x64 0x22 0x88
+
+# CHECK: qvfctid 3, 5
+0x10 0x60 0x2e 0x5c
+
+# CHECK: qvfctidu 3, 5
+0x10 0x60 0x2f 0x5c
+
+# CHECK: qvfctiduz 3, 5
+0x10 0x60 0x2f 0x5e
+
+# CHECK: qvfctidz 3, 5
+0x10 0x60 0x2e 0x5e
+
+# CHECK: qvfctiw 3, 5
+0x10 0x60 0x28 0x1c
+
+# CHECK: qvfctiwu 3, 5
+0x10 0x60 0x29 0x1c
+
+# CHECK: qvfctiwuz 3, 5
+0x10 0x60 0x29 0x1e
+
+# CHECK: qvfctiwz 3, 5
+0x10 0x60 0x28 0x1e
+
+# FIXME: decode as qvfequ 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 9
+0x10 0x64 0x2c 0x88
+
+# CHECK: qvflogical 3, 4, 5, 12
+0x10 0x64 0x2e 0x08
+
+# CHECK: qvfmadd 3, 4, 6, 5
+0x10 0x64 0x29 0xba
+
+# CHECK: qvfmadds 3, 4, 6, 5
+0x00 0x64 0x29 0xba
+
+# CHECK: qvfmr 3, 5
+0x10 0x60 0x28 0x90
+
+# CHECK: qvfmsub 3, 4, 6, 5
+0x10 0x64 0x29 0xb8
+
+# CHECK: qvfmsubs 3, 4, 6, 5
+0x00 0x64 0x29 0xb8
+
+# CHECK: qvfmul 3, 4, 6
+0x10 0x64 0x01 0xb2
+
+# CHECK: qvfmuls 3, 4, 6
+0x00 0x64 0x01 0xb2
+
+# CHECK: qvfnabs 3, 5
+0x10 0x60 0x29 0x10
+
+# FIXME: decode as qvfnand 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 14
+0x10 0x64 0x2f 0x08
+
+# CHECK: qvfneg 3, 5
+0x10 0x60 0x28 0x50
+
+# CHECK: qvfnmadd 3, 4, 6, 5
+0x10 0x64 0x29 0xbe
+
+# CHECK: qvfnmadds 3, 4, 6, 5
+0x00 0x64 0x29 0xbe
+
+# CHECK: qvfnmsub 3, 4, 6, 5
+0x10 0x64 0x29 0xbc
+
+# CHECK: qvfnmsubs 3, 4, 6, 5
+0x00 0x64 0x29 0xbc
+
+# FIXME: decode as qvfnor 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 8
+0x10 0x64 0x2c 0x08
+
+# FIXME: decode as qvfnot 3, 4
+# CHECK: qvflogical 3, 4, 4, 10
+0x10 0x64 0x25 0x08
+
+# FIXME: decode as qvforc 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 13
+0x10 0x64 0x2e 0x88
+
+# FIXME: decode as qvfor 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 7
+0x10 0x64 0x2b 0x88
+
+# CHECK: qvfperm 3, 4, 5, 6
+0x10 0x64 0x29 0x8c
+
+# CHECK: qvfre 3, 5
+0x10 0x60 0x28 0x30
+
+# CHECK: qvfres 3, 5
+0x00 0x60 0x28 0x30
+
+# CHECK: qvfrim 3, 5
+0x10 0x60 0x2b 0xd0
+
+# CHECK: qvfrin 3, 5
+0x10 0x60 0x2b 0x10
+
+# CHECK: qvfrip 3, 5
+0x10 0x60 0x2b 0x90
+
+# CHECK: qvfriz 3, 5
+0x10 0x60 0x2b 0x50
+
+# CHECK: qvfrsp 3, 5
+0x10 0x60 0x28 0x18
+
+# CHECK: qvfrsqrte 3, 5
+0x10 0x60 0x28 0x34
+
+# CHECK: qvfrsqrtes 3, 5
+0x00 0x60 0x28 0x34
+
+# CHECK: qvfsel 3, 4, 6, 5
+0x10 0x64 0x29 0xae
+
+# FIXME: decode as qvfset 3
+# CHECK: qvflogical 3, 3, 3, 15
+0x10 0x63 0x1f 0x88
+
+# CHECK: qvfsub 3, 4, 5
+0x10 0x64 0x28 0x28
+
+# CHECK: qvfsubs 3, 4, 5
+0x00 0x64 0x28 0x28
+
+# CHECK: qvfxmadd 3, 4, 6, 5
+0x10 0x64 0x29 0x92
+
+# CHECK: qvfxmadds 3, 4, 6, 5
+0x00 0x64 0x29 0x92
+
+# CHECK: qvfxmul 3, 4, 6
+0x10 0x64 0x01 0xa2
+
+# CHECK: qvfxmuls 3, 4, 6
+0x00 0x64 0x01 0xa2
+
+# FIXME: decode as qvfxor 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 6
+0x10 0x64 0x2b 0x08
+
+# CHECK: qvfxxcpnmadd 3, 4, 6, 5
+0x10 0x64 0x29 0x86
+
+# CHECK: qvfxxcpnmadds 3, 4, 6, 5
+0x00 0x64 0x29 0x86
+
+# CHECK: qvfxxmadd 3, 4, 6, 5
+0x10 0x64 0x29 0x82
+
+# CHECK: qvfxxmadds 3, 4, 6, 5
+0x00 0x64 0x29 0x82
+
+# CHECK: qvfxxnpmadd 3, 4, 6, 5
+0x10 0x64 0x29 0x96
+
+# CHECK: qvfxxnpmadds 3, 4, 6, 5
+0x00 0x64 0x29 0x96
+
+# CHECK: qvlfcduxa 3, 9, 11
+0x7c 0x69 0x58 0xcf
+
+# CHECK: qvlfcdux 3, 9, 11
+0x7c 0x69 0x58 0xce
+
+# CHECK: qvlfcdxa 3, 10, 11
+0x7c 0x6a 0x58 0x8f
+
+# CHECK: qvlfcdx 3, 10, 11
+0x7c 0x6a 0x58 0x8e
+
+# CHECK: qvlfcsuxa 3, 9, 11
+0x7c 0x69 0x58 0x4f
+
+# CHECK: qvlfcsux 3, 9, 11
+0x7c 0x69 0x58 0x4e
+
+# CHECK: qvlfcsxa 3, 10, 11
+0x7c 0x6a 0x58 0x0f
+
+# CHECK: qvlfcsx 3, 10, 11
+0x7c 0x6a 0x58 0x0e
+
+# CHECK: qvlfduxa 3, 9, 11
+0x7c 0x69 0x5c 0xcf
+
+# CHECK: qvlfdux 3, 9, 11
+0x7c 0x69 0x5c 0xce
+
+# CHECK: qvlfdxa 3, 10, 11
+0x7c 0x6a 0x5c 0x8f
+
+# CHECK: qvlfdx 3, 10, 11
+0x7c 0x6a 0x5c 0x8e
+
+# CHECK: qvlfiwaxa 3, 10, 11
+0x7c 0x6a 0x5e 0xcf
+
+# CHECK: qvlfiwax 3, 10, 11
+0x7c 0x6a 0x5e 0xce
+
+# CHECK: qvlfiwzxa 3, 10, 11
+0x7c 0x6a 0x5e 0x8f
+
+# CHECK: qvlfiwzx 3, 10, 11
+0x7c 0x6a 0x5e 0x8e
+
+# CHECK: qvlfsuxa 3, 9, 11
+0x7c 0x69 0x5c 0x4f
+
+# CHECK: qvlfsux 3, 9, 11
+0x7c 0x69 0x5c 0x4e
+
+# CHECK: qvlfsxa 3, 10, 11
+0x7c 0x6a 0x5c 0x0f
+
+# CHECK: qvlfsx 3, 10, 11
+0x7c 0x6a 0x5c 0x0e
+
+# CHECK: qvlpcldx 3, 10, 11
+0x7c 0x6a 0x5c 0x8c
+
+# CHECK: qvlpclsx 3, 10, 11
+0x7c 0x6a 0x5c 0x0c
+
+# CHECK: qvlpcrdx 3, 10, 11
+0x7c 0x6a 0x58 0x8c
+
+# CHECK: qvlpcrsx 3, 10, 11
+0x7c 0x6a 0x58 0x0c
+
+# CHECK: qvstfcduxa 2, 9, 11
+0x7c 0x49 0x59 0xcf
+
+# CHECK: qvstfcduxia 2, 9, 11
+0x7c 0x49 0x59 0xcb
+
+# CHECK: qvstfcduxi 2, 9, 11
+0x7c 0x49 0x59 0xca
+
+# CHECK: qvstfcdux 2, 9, 11
+0x7c 0x49 0x59 0xce
+
+# CHECK: qvstfcdxa 2, 10, 11
+0x7c 0x4a 0x59 0x8f
+
+# CHECK: qvstfcdxia 2, 10, 11
+0x7c 0x4a 0x59 0x8b
+
+# CHECK: qvstfcdxi 2, 10, 11
+0x7c 0x4a 0x59 0x8a
+
+# CHECK: qvstfcdx 2, 10, 11
+0x7c 0x4a 0x59 0x8e
+
+# CHECK: qvstfcsuxa 2, 9, 11
+0x7c 0x49 0x59 0x4f
+
+# CHECK: qvstfcsuxia 2, 9, 11
+0x7c 0x49 0x59 0x4b
+
+# CHECK: qvstfcsuxi 2, 9, 11
+0x7c 0x49 0x59 0x4a
+
+# CHECK: qvstfcsux 2, 9, 11
+0x7c 0x49 0x59 0x4e
+
+# CHECK: qvstfcsxa 2, 10, 11
+0x7c 0x4a 0x59 0x0f
+
+# CHECK: qvstfcsxia 2, 10, 11
+0x7c 0x4a 0x59 0x0b
+
+# CHECK: qvstfcsxi 2, 10, 11
+0x7c 0x4a 0x59 0x0a
+
+# CHECK: qvstfcsx 2, 10, 11
+0x7c 0x4a 0x59 0x0e
+
+# CHECK: qvstfduxa 2, 9, 11
+0x7c 0x49 0x5d 0xcf
+
+# CHECK: qvstfduxia 2, 9, 11
+0x7c 0x49 0x5d 0xcb
+
+# CHECK: qvstfduxi 2, 9, 11
+0x7c 0x49 0x5d 0xca
+
+# CHECK: qvstfdux 2, 9, 11
+0x7c 0x49 0x5d 0xce
+
+# CHECK: qvstfdxa 2, 10, 11
+0x7c 0x4a 0x5d 0x8f
+
+# CHECK: qvstfdxia 2, 10, 11
+0x7c 0x4a 0x5d 0x8b
+
+# CHECK: qvstfdxi 2, 10, 11
+0x7c 0x4a 0x5d 0x8a
+
+# CHECK: qvstfdx 2, 10, 11
+0x7c 0x4a 0x5d 0x8e
+
+# CHECK: qvstfiwxa 2, 10, 11
+0x7c 0x4a 0x5f 0x8f
+
+# CHECK: qvstfiwx 2, 10, 11
+0x7c 0x4a 0x5f 0x8e
+
+# CHECK: qvstfsuxa 2, 9, 11
+0x7c 0x49 0x5d 0x4f
+
+# CHECK: qvstfsuxia 2, 9, 11
+0x7c 0x49 0x5d 0x4b
+
+# CHECK: qvstfsuxi 2, 9, 11
+0x7c 0x49 0x5d 0x4a
+
+# CHECK: qvstfsux 2, 9, 11
+0x7c 0x49 0x5d 0x4e
+
+# CHECK: qvstfsxa 2, 10, 11
+0x7c 0x4a 0x5d 0x0f
+
+# CHECK: qvstfsxia 2, 10, 11
+0x7c 0x4a 0x5d 0x0b
+
+# CHECK: qvstfsxi 2, 10, 11
+0x7c 0x4a 0x5d 0x0a
+
+# CHECK: qvstfsx 2, 10, 11
+0x7c 0x4a 0x5d 0x0e
+
--- /dev/null
+# RUN: llvm-mc -triple powerpc64-bgq-linux --show-encoding %s | FileCheck %s
+
+# FIXME: print qvflogical aliases.
+
+# CHECK: qvfabs 3, 5 # encoding: [0x10,0x60,0x2a,0x10]
+ qvfabs 3, 5
+# CHECK: qvfadd 3, 4, 5 # encoding: [0x10,0x64,0x28,0x2a]
+ qvfadd 3, 4, 5
+# CHECK: qvfadds 3, 4, 5 # encoding: [0x00,0x64,0x28,0x2a]
+ qvfadds 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 4 # encoding: [0x10,0x64,0x2a,0x08]
+ qvfandc 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 1 # encoding: [0x10,0x64,0x28,0x88]
+ qvfand 3, 4, 5
+# CHECK: qvfcfid 3, 5 # encoding: [0x10,0x60,0x2e,0x9c]
+ qvfcfid 3, 5
+# CHECK: qvfcfids 3, 5 # encoding: [0x00,0x60,0x2e,0x9c]
+ qvfcfids 3, 5
+# CHECK: qvfcfidu 3, 5 # encoding: [0x10,0x60,0x2f,0x9c]
+ qvfcfidu 3, 5
+# CHECK: qvfcfidus 3, 5 # encoding: [0x00,0x60,0x2f,0x9c]
+ qvfcfidus 3, 5
+# CHECK: qvflogical 3, 3, 3, 0 # encoding: [0x10,0x63,0x18,0x08]
+ qvfclr 3
+# CHECK: qvfcpsgn 3, 4, 5 # encoding: [0x10,0x64,0x28,0x10]
+ qvfcpsgn 3, 4, 5
+# CHECK: qvflogical 3, 4, 4, 5 # encoding: [0x10,0x64,0x22,0x88]
+ qvfctfb 3, 4
+# CHECK: qvfctid 3, 5 # encoding: [0x10,0x60,0x2e,0x5c]
+ qvfctid 3, 5
+# CHECK: qvfctidu 3, 5 # encoding: [0x10,0x60,0x2f,0x5c]
+ qvfctidu 3, 5
+# CHECK: qvfctiduz 3, 5 # encoding: [0x10,0x60,0x2f,0x5e]
+ qvfctiduz 3, 5
+# CHECK: qvfctidz 3, 5 # encoding: [0x10,0x60,0x2e,0x5e]
+ qvfctidz 3, 5
+# CHECK: qvfctiw 3, 5 # encoding: [0x10,0x60,0x28,0x1c]
+ qvfctiw 3, 5
+# CHECK: qvfctiwu 3, 5 # encoding: [0x10,0x60,0x29,0x1c]
+ qvfctiwu 3, 5
+# CHECK: qvfctiwuz 3, 5 # encoding: [0x10,0x60,0x29,0x1e]
+ qvfctiwuz 3, 5
+# CHECK: qvfctiwz 3, 5 # encoding: [0x10,0x60,0x28,0x1e]
+ qvfctiwz 3, 5
+# CHECK: qvflogical 3, 4, 5, 9 # encoding: [0x10,0x64,0x2c,0x88]
+ qvfequ 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 12 # encoding: [0x10,0x64,0x2e,0x08]
+ qvflogical 3, 4, 5, 12
+# CHECK: qvfmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xba]
+ qvfmadd 3, 4, 6, 5
+# CHECK: qvfmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xba]
+ qvfmadds 3, 4, 6, 5
+# CHECK: qvfmr 3, 5 # encoding: [0x10,0x60,0x28,0x90]
+ qvfmr 3, 5
+# CHECK: qvfmsub 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xb8]
+ qvfmsub 3, 4, 6, 5
+# CHECK: qvfmsubs 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xb8]
+ qvfmsubs 3, 4, 6, 5
+# CHECK: qvfmul 3, 4, 6 # encoding: [0x10,0x64,0x01,0xb2]
+ qvfmul 3, 4, 6
+# CHECK: qvfmuls 3, 4, 6 # encoding: [0x00,0x64,0x01,0xb2]
+ qvfmuls 3, 4, 6
+# CHECK: qvfnabs 3, 5 # encoding: [0x10,0x60,0x29,0x10]
+ qvfnabs 3, 5
+# CHECK: qvflogical 3, 4, 5, 14 # encoding: [0x10,0x64,0x2f,0x08]
+ qvfnand 3, 4, 5
+# CHECK: qvfneg 3, 5 # encoding: [0x10,0x60,0x28,0x50]
+ qvfneg 3, 5
+# CHECK: qvfnmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xbe]
+ qvfnmadd 3, 4, 6, 5
+# CHECK: qvfnmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xbe]
+ qvfnmadds 3, 4, 6, 5
+# CHECK: qvfnmsub 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xbc]
+ qvfnmsub 3, 4, 6, 5
+# CHECK: qvfnmsubs 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xbc]
+ qvfnmsubs 3, 4, 6, 5
+# CHECK: qvflogical 3, 4, 5, 8 # encoding: [0x10,0x64,0x2c,0x08]
+ qvfnor 3, 4, 5
+# CHECK: qvflogical 3, 4, 4, 10 # encoding: [0x10,0x64,0x25,0x08]
+ qvfnot 3, 4
+# CHECK: qvflogical 3, 4, 5, 13 # encoding: [0x10,0x64,0x2e,0x88]
+ qvforc 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 7 # encoding: [0x10,0x64,0x2b,0x88]
+ qvfor 3, 4, 5
+# CHECK: qvfperm 3, 4, 5, 6 # encoding: [0x10,0x64,0x29,0x8c]
+ qvfperm 3, 4, 5, 6
+# CHECK: qvfre 3, 5 # encoding: [0x10,0x60,0x28,0x30]
+ qvfre 3, 5
+# CHECK: qvfres 3, 5 # encoding: [0x00,0x60,0x28,0x30]
+ qvfres 3, 5
+# CHECK: qvfrim 3, 5 # encoding: [0x10,0x60,0x2b,0xd0]
+ qvfrim 3, 5
+# CHECK: qvfrin 3, 5 # encoding: [0x10,0x60,0x2b,0x10]
+ qvfrin 3, 5
+# CHECK: qvfrip 3, 5 # encoding: [0x10,0x60,0x2b,0x90]
+ qvfrip 3, 5
+# CHECK: qvfriz 3, 5 # encoding: [0x10,0x60,0x2b,0x50]
+ qvfriz 3, 5
+# CHECK: qvfrsp 3, 5 # encoding: [0x10,0x60,0x28,0x18]
+ qvfrsp 3, 5
+# CHECK: qvfrsqrte 3, 5 # encoding: [0x10,0x60,0x28,0x34]
+ qvfrsqrte 3, 5
+# CHECK: qvfrsqrtes 3, 5 # encoding: [0x00,0x60,0x28,0x34]
+ qvfrsqrtes 3, 5
+# CHECK: qvfsel 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xae]
+ qvfsel 3, 4, 6, 5
+# CHECK: qvflogical 3, 3, 3, 15 # encoding: [0x10,0x63,0x1f,0x88]
+ qvfset 3
+# CHECK: qvfsub 3, 4, 5 # encoding: [0x10,0x64,0x28,0x28]
+ qvfsub 3, 4, 5
+# CHECK: qvfsubs 3, 4, 5 # encoding: [0x00,0x64,0x28,0x28]
+ qvfsubs 3, 4, 5
+# CHECK: qvfxmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x92]
+ qvfxmadd 3, 4, 6, 5
+# CHECK: qvfxmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x92]
+ qvfxmadds 3, 4, 6, 5
+# CHECK: qvfxmul 3, 4, 6 # encoding: [0x10,0x64,0x01,0xa2]
+ qvfxmul 3, 4, 6
+# CHECK: qvfxmuls 3, 4, 6 # encoding: [0x00,0x64,0x01,0xa2]
+ qvfxmuls 3, 4, 6
+# CHECK: qvflogical 3, 4, 5, 6 # encoding: [0x10,0x64,0x2b,0x08]
+ qvfxor 3, 4, 5
+# CHECK: qvfxxcpnmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x86]
+ qvfxxcpnmadd 3, 4, 6, 5
+# CHECK: qvfxxcpnmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x86]
+ qvfxxcpnmadds 3, 4, 6, 5
+# CHECK: qvfxxmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x82]
+ qvfxxmadd 3, 4, 6, 5
+# CHECK: qvfxxmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x82]
+ qvfxxmadds 3, 4, 6, 5
+# CHECK: qvfxxnpmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x96]
+ qvfxxnpmadd 3, 4, 6, 5
+# CHECK: qvfxxnpmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x96]
+ qvfxxnpmadds 3, 4, 6, 5
+# CHECK: qvlfcduxa 3, 9, 11 # encoding: [0x7c,0x69,0x58,0xcf]
+ qvlfcduxa 3, 9, 11
+# CHECK: qvlfcdux 3, 9, 11 # encoding: [0x7c,0x69,0x58,0xce]
+ qvlfcdux 3, 9, 11
+# CHECK: qvlfcdxa 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x8f]
+ qvlfcdxa 3, 10, 11
+# CHECK: qvlfcdx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x8e]
+ qvlfcdx 3, 10, 11
+# CHECK: qvlfcsuxa 3, 9, 11 # encoding: [0x7c,0x69,0x58,0x4f]
+ qvlfcsuxa 3, 9, 11
+# CHECK: qvlfcsux 3, 9, 11 # encoding: [0x7c,0x69,0x58,0x4e]
+ qvlfcsux 3, 9, 11
+# CHECK: qvlfcsxa 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x0f]
+ qvlfcsxa 3, 10, 11
+# CHECK: qvlfcsx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x0e]
+ qvlfcsx 3, 10, 11
+# CHECK: qvlfduxa 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0xcf]
+ qvlfduxa 3, 9, 11
+# CHECK: qvlfdux 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0xce]
+ qvlfdux 3, 9, 11
+# CHECK: qvlfdxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x8f]
+ qvlfdxa 3, 10, 11
+# CHECK: qvlfdx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x8e]
+ qvlfdx 3, 10, 11
+# CHECK: qvlfiwaxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0xcf]
+ qvlfiwaxa 3, 10, 11
+# CHECK: qvlfiwax 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0xce]
+ qvlfiwax 3, 10, 11
+# CHECK: qvlfiwzxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0x8f]
+ qvlfiwzxa 3, 10, 11
+# CHECK: qvlfiwzx 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0x8e]
+ qvlfiwzx 3, 10, 11
+# CHECK: qvlfsuxa 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0x4f]
+ qvlfsuxa 3, 9, 11
+# CHECK: qvlfsux 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0x4e]
+ qvlfsux 3, 9, 11
+# CHECK: qvlfsxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x0f]
+ qvlfsxa 3, 10, 11
+# CHECK: qvlfsx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x0e]
+ qvlfsx 3, 10, 11
+# CHECK: qvlpcldx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x8c]
+ qvlpcldx 3, 10, 11
+# CHECK: qvlpclsx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x0c]
+ qvlpclsx 3, 10, 11
+# CHECK: qvlpcrdx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x8c]
+ qvlpcrdx 3, 10, 11
+# CHECK: qvlpcrsx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x0c]
+ qvlpcrsx 3, 10, 11
+# CHECK: qvstfcduxa 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xcf]
+ qvstfcduxa 2, 9, 11
+# CHECK: qvstfcduxia 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xcb]
+ qvstfcduxia 2, 9, 11
+# CHECK: qvstfcduxi 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xca]
+ qvstfcduxi 2, 9, 11
+# CHECK: qvstfcdux 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xce]
+ qvstfcdux 2, 9, 11
+# CHECK: qvstfcdxa 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8f]
+ qvstfcdxa 2, 10, 11
+# CHECK: qvstfcdxia 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8b]
+ qvstfcdxia 2, 10, 11
+# CHECK: qvstfcdxi 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8a]
+ qvstfcdxi 2, 10, 11
+# CHECK: qvstfcdx 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8e]
+ qvstfcdx 2, 10, 11
+# CHECK: qvstfcsuxa 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4f]
+ qvstfcsuxa 2, 9, 11
+# CHECK: qvstfcsuxia 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4b]
+ qvstfcsuxia 2, 9, 11
+# CHECK: qvstfcsuxi 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4a]
+ qvstfcsuxi 2, 9, 11
+# CHECK: qvstfcsux 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4e]
+ qvstfcsux 2, 9, 11
+# CHECK: qvstfcsxa 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0f]
+ qvstfcsxa 2, 10, 11
+# CHECK: qvstfcsxia 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0b]
+ qvstfcsxia 2, 10, 11
+# CHECK: qvstfcsxi 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0a]
+ qvstfcsxi 2, 10, 11
+# CHECK: qvstfcsx 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0e]
+ qvstfcsx 2, 10, 11
+# CHECK: qvstfduxa 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xcf]
+ qvstfduxa 2, 9, 11
+# CHECK: qvstfduxia 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xcb]
+ qvstfduxia 2, 9, 11
+# CHECK: qvstfduxi 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xca]
+ qvstfduxi 2, 9, 11
+# CHECK: qvstfdux 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xce]
+ qvstfdux 2, 9, 11
+# CHECK: qvstfdxa 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8f]
+ qvstfdxa 2, 10, 11
+# CHECK: qvstfdxia 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8b]
+ qvstfdxia 2, 10, 11
+# CHECK: qvstfdxi 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8a]
+ qvstfdxi 2, 10, 11
+# CHECK: qvstfdx 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8e]
+ qvstfdx 2, 10, 11
+# CHECK: qvstfiwxa 2, 10, 11 # encoding: [0x7c,0x4a,0x5f,0x8f]
+ qvstfiwxa 2, 10, 11
+# CHECK: qvstfiwx 2, 10, 11 # encoding: [0x7c,0x4a,0x5f,0x8e]
+ qvstfiwx 2, 10, 11
+# CHECK: qvstfsuxa 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4f]
+ qvstfsuxa 2, 9, 11
+# CHECK: qvstfsuxia 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4b]
+ qvstfsuxia 2, 9, 11
+# CHECK: qvstfsuxi 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4a]
+ qvstfsuxi 2, 9, 11
+# CHECK: qvstfsux 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4e]
+ qvstfsux 2, 9, 11
+# CHECK: qvstfsxa 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0f]
+ qvstfsxa 2, 10, 11
+# CHECK: qvstfsxia 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0b]
+ qvstfsxia 2, 10, 11
+# CHECK: qvstfsxi 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0a]
+ qvstfsxi 2, 10, 11
+# CHECK: qvstfsx 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0e]
+ qvstfsx 2, 10, 11
+