[PowerPC] Add support for the QPX vector instruction set

author Hal Finkel <hfinkel@anl.gov>

Wed, 25 Feb 2015 01:06:45 +0000 (01:06 +0000)

committer Hal Finkel <hfinkel@anl.gov>

Wed, 25 Feb 2015 01:06:45 +0000 (01:06 +0000)
author Hal Finkel <hfinkel@anl.gov>
Wed, 25 Feb 2015 01:06:45 +0000 (01:06 +0000)
committer Hal Finkel <hfinkel@anl.gov>
Wed, 25 Feb 2015 01:06:45 +0000 (01:06 +0000)
diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td

index 5cdabdeadaea99f8749688b57ba2e409c5a30803..110d55d562a29d063c2592248a3e9f61ed973ebd 100644 (file)
--- a/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/include/llvm/IR/IntrinsicsPowerPC.td
@@ -542,3 +542,180 @@ def int_ppc_vsx_xsmindp : PowerPC_VSX_Sca_DDD_Intrinsic<"xsmindp">;
  def int_ppc_vsx_xvdivdp : PowerPC_VSX_Vec_DDD_Intrinsic<"xvdivdp">;
  def int_ppc_vsx_xvdivsp : PowerPC_VSX_Vec_FFF_Intrinsic<"xvdivsp">;
  }
+
+//===----------------------------------------------------------------------===//
+// PowerPC QPX Intrinsics.
+//
+
+let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
+  /// PowerPC_QPX_Intrinsic - Base class for all QPX intrinsics.
+  class PowerPC_QPX_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
+                              list<LLVMType> param_types,
+                              list<IntrinsicProperty> properties>
+    : GCCBuiltin<!strconcat("__builtin_qpx_", GCCIntSuffix)>,
+      Intrinsic<ret_types, param_types, properties>;
+}
+
+//===----------------------------------------------------------------------===//
+// PowerPC QPX Intrinsic Class Definitions.
+//
+
+/// PowerPC_QPX_FF_Intrinsic - A PowerPC intrinsic that takes one v4f64
+/// vector and returns one.  These intrinsics have no side effects.
+class PowerPC_QPX_FF_Intrinsic<string GCCIntSuffix>
+  : PowerPC_QPX_Intrinsic<GCCIntSuffix,
+                          [llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+
+/// PowerPC_QPX_FFF_Intrinsic - A PowerPC intrinsic that takes two v4f64
+/// vectors and returns one.  These intrinsics have no side effects.
+class PowerPC_QPX_FFF_Intrinsic<string GCCIntSuffix>
+  : PowerPC_QPX_Intrinsic<GCCIntSuffix,
+                          [llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty],
+                          [IntrNoMem]>;
+
+/// PowerPC_QPX_FFFF_Intrinsic - A PowerPC intrinsic that takes three v4f64
+/// vectors and returns one.  These intrinsics have no side effects.
+class PowerPC_QPX_FFFF_Intrinsic<string GCCIntSuffix>
+  : PowerPC_QPX_Intrinsic<GCCIntSuffix,
+                          [llvm_v4f64_ty],
+                          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+                          [IntrNoMem]>;
+
+/// PowerPC_QPX_Load_Intrinsic - A PowerPC intrinsic that takes a pointer
+/// and returns a v4f64.
+class PowerPC_QPX_Load_Intrinsic<string GCCIntSuffix>
+  : PowerPC_QPX_Intrinsic<GCCIntSuffix,
+                          [llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
+
+/// PowerPC_QPX_LoadPerm_Intrinsic - A PowerPC intrinsic that takes a pointer
+/// and returns a v4f64 permutation.
+class PowerPC_QPX_LoadPerm_Intrinsic<string GCCIntSuffix>
+  : PowerPC_QPX_Intrinsic<GCCIntSuffix,
+                          [llvm_v4f64_ty], [llvm_ptr_ty], [IntrNoMem]>;
+
+/// PowerPC_QPX_Store_Intrinsic - A PowerPC intrinsic that takes a pointer
+/// and stores a v4f64.
+class PowerPC_QPX_Store_Intrinsic<string GCCIntSuffix>
+  : PowerPC_QPX_Intrinsic<GCCIntSuffix,
+                          [], [llvm_v4f64_ty, llvm_ptr_ty],
+                          [IntrReadWriteArgMem]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC QPX Intrinsic Definitions.
+
+let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
+  // Add Instructions
+  def int_ppc_qpx_qvfadd : PowerPC_QPX_FFF_Intrinsic<"qvfadd">;
+  def int_ppc_qpx_qvfadds : PowerPC_QPX_FFF_Intrinsic<"qvfadds">;
+  def int_ppc_qpx_qvfsub : PowerPC_QPX_FFF_Intrinsic<"qvfsub">;
+  def int_ppc_qpx_qvfsubs : PowerPC_QPX_FFF_Intrinsic<"qvfsubs">;
+
+  // Estimate Instructions
+  def int_ppc_qpx_qvfre : PowerPC_QPX_FF_Intrinsic<"qvfre">;
+  def int_ppc_qpx_qvfres : PowerPC_QPX_FF_Intrinsic<"qvfres">;
+  def int_ppc_qpx_qvfrsqrte : PowerPC_QPX_FF_Intrinsic<"qvfrsqrte">;
+  def int_ppc_qpx_qvfrsqrtes : PowerPC_QPX_FF_Intrinsic<"qvfrsqrtes">;
+
+  // Multiply Instructions
+  def int_ppc_qpx_qvfmul : PowerPC_QPX_FFF_Intrinsic<"qvfmul">;
+  def int_ppc_qpx_qvfmuls : PowerPC_QPX_FFF_Intrinsic<"qvfmuls">;
+  def int_ppc_qpx_qvfxmul : PowerPC_QPX_FFF_Intrinsic<"qvfxmul">;
+  def int_ppc_qpx_qvfxmuls : PowerPC_QPX_FFF_Intrinsic<"qvfxmuls">;
+
+  // Multiply-add instructions
+  def int_ppc_qpx_qvfmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfmadd">;
+  def int_ppc_qpx_qvfmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfmadds">;
+  def int_ppc_qpx_qvfnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadd">;
+  def int_ppc_qpx_qvfnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadds">;
+  def int_ppc_qpx_qvfmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfmsub">;
+  def int_ppc_qpx_qvfmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfmsubs">;
+  def int_ppc_qpx_qvfnmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsub">;
+  def int_ppc_qpx_qvfnmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsubs">;
+  def int_ppc_qpx_qvfxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadd">;
+  def int_ppc_qpx_qvfxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadds">;
+  def int_ppc_qpx_qvfxxnpmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadd">;
+  def int_ppc_qpx_qvfxxnpmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadds">;
+  def int_ppc_qpx_qvfxxcpnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadd">;
+  def int_ppc_qpx_qvfxxcpnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadds">;
+  def int_ppc_qpx_qvfxxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadd">;
+  def int_ppc_qpx_qvfxxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadds">;
+
+  // Select Instruction
+  def int_ppc_qpx_qvfsel : PowerPC_QPX_FFFF_Intrinsic<"qvfsel">;
+
+  // Permute Instruction
+  def int_ppc_qpx_qvfperm : PowerPC_QPX_FFFF_Intrinsic<"qvfperm">;
+
+  // Convert and Round Instructions
+  def int_ppc_qpx_qvfctid : PowerPC_QPX_FF_Intrinsic<"qvfctid">;
+  def int_ppc_qpx_qvfctidu : PowerPC_QPX_FF_Intrinsic<"qvfctidu">;
+  def int_ppc_qpx_qvfctidz : PowerPC_QPX_FF_Intrinsic<"qvfctidz">;
+  def int_ppc_qpx_qvfctiduz : PowerPC_QPX_FF_Intrinsic<"qvfctiduz">;
+  def int_ppc_qpx_qvfctiw : PowerPC_QPX_FF_Intrinsic<"qvfctiw">;
+  def int_ppc_qpx_qvfctiwu : PowerPC_QPX_FF_Intrinsic<"qvfctiwu">;
+  def int_ppc_qpx_qvfctiwz : PowerPC_QPX_FF_Intrinsic<"qvfctiwz">;
+  def int_ppc_qpx_qvfctiwuz : PowerPC_QPX_FF_Intrinsic<"qvfctiwuz">;
+  def int_ppc_qpx_qvfcfid : PowerPC_QPX_FF_Intrinsic<"qvfcfid">;
+  def int_ppc_qpx_qvfcfidu : PowerPC_QPX_FF_Intrinsic<"qvfcfidu">;
+  def int_ppc_qpx_qvfcfids : PowerPC_QPX_FF_Intrinsic<"qvfcfids">;
+  def int_ppc_qpx_qvfcfidus : PowerPC_QPX_FF_Intrinsic<"qvfcfidus">;
+  def int_ppc_qpx_qvfrsp : PowerPC_QPX_FF_Intrinsic<"qvfrsp">;
+  def int_ppc_qpx_qvfriz : PowerPC_QPX_FF_Intrinsic<"qvfriz">;
+  def int_ppc_qpx_qvfrin : PowerPC_QPX_FF_Intrinsic<"qvfrin">;
+  def int_ppc_qpx_qvfrip : PowerPC_QPX_FF_Intrinsic<"qvfrip">;
+  def int_ppc_qpx_qvfrim : PowerPC_QPX_FF_Intrinsic<"qvfrim">;
+
+  // Move Instructions
+  def int_ppc_qpx_qvfneg : PowerPC_QPX_FF_Intrinsic<"qvfneg">;
+  def int_ppc_qpx_qvfabs : PowerPC_QPX_FF_Intrinsic<"qvfabs">;
+  def int_ppc_qpx_qvfnabs : PowerPC_QPX_FF_Intrinsic<"qvfnabs">;
+  def int_ppc_qpx_qvfcpsgn : PowerPC_QPX_FFF_Intrinsic<"qvfcpsgn">;
+
+  // Compare Instructions
+  def int_ppc_qpx_qvftstnan : PowerPC_QPX_FFF_Intrinsic<"qvftstnan">;
+  def int_ppc_qpx_qvfcmplt : PowerPC_QPX_FFF_Intrinsic<"qvfcmplt">;
+  def int_ppc_qpx_qvfcmpgt : PowerPC_QPX_FFF_Intrinsic<"qvfcmpgt">;
+  def int_ppc_qpx_qvfcmpeq : PowerPC_QPX_FFF_Intrinsic<"qvfcmpeq">;
+
+  // Load instructions
+  def int_ppc_qpx_qvlfd : PowerPC_QPX_Load_Intrinsic<"qvlfd">;
+  def int_ppc_qpx_qvlfda : PowerPC_QPX_Load_Intrinsic<"qvlfda">;
+  def int_ppc_qpx_qvlfs : PowerPC_QPX_Load_Intrinsic<"qvlfs">;
+  def int_ppc_qpx_qvlfsa : PowerPC_QPX_Load_Intrinsic<"qvlfsa">;
+
+  def int_ppc_qpx_qvlfcda : PowerPC_QPX_Load_Intrinsic<"qvlfcda">;
+  def int_ppc_qpx_qvlfcd : PowerPC_QPX_Load_Intrinsic<"qvlfcd">;
+  def int_ppc_qpx_qvlfcsa : PowerPC_QPX_Load_Intrinsic<"qvlfcsa">;
+  def int_ppc_qpx_qvlfcs : PowerPC_QPX_Load_Intrinsic<"qvlfcs">;
+  def int_ppc_qpx_qvlfiwaa : PowerPC_QPX_Load_Intrinsic<"qvlfiwaa">;
+  def int_ppc_qpx_qvlfiwa : PowerPC_QPX_Load_Intrinsic<"qvlfiwa">;
+  def int_ppc_qpx_qvlfiwza : PowerPC_QPX_Load_Intrinsic<"qvlfiwza">;
+  def int_ppc_qpx_qvlfiwz : PowerPC_QPX_Load_Intrinsic<"qvlfiwz">;
+
+  def int_ppc_qpx_qvlpcld : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcld">;
+  def int_ppc_qpx_qvlpcls : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcls">;
+  def int_ppc_qpx_qvlpcrd : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrd">;
+  def int_ppc_qpx_qvlpcrs : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrs">;
+
+  // Store instructions
+  def int_ppc_qpx_qvstfd : PowerPC_QPX_Store_Intrinsic<"qvstfd">;
+  def int_ppc_qpx_qvstfda : PowerPC_QPX_Store_Intrinsic<"qvstfda">;
+  def int_ppc_qpx_qvstfs : PowerPC_QPX_Store_Intrinsic<"qvstfs">;
+  def int_ppc_qpx_qvstfsa : PowerPC_QPX_Store_Intrinsic<"qvstfsa">;
+
+  def int_ppc_qpx_qvstfcda : PowerPC_QPX_Store_Intrinsic<"qvstfcda">;
+  def int_ppc_qpx_qvstfcd : PowerPC_QPX_Store_Intrinsic<"qvstfcd">;
+  def int_ppc_qpx_qvstfcsa : PowerPC_QPX_Store_Intrinsic<"qvstfcsa">;
+  def int_ppc_qpx_qvstfcs : PowerPC_QPX_Store_Intrinsic<"qvstfcs">;
+  def int_ppc_qpx_qvstfiwa : PowerPC_QPX_Store_Intrinsic<"qvstfiwa">;
+  def int_ppc_qpx_qvstfiw : PowerPC_QPX_Store_Intrinsic<"qvstfiw">;
+
+  // Logical and permutation formation
+  def int_ppc_qpx_qvflogical : PowerPC_QPX_Intrinsic<"qvflogical",
+                          [llvm_v4f64_ty],
+                          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+  def int_ppc_qpx_qvgpci : PowerPC_QPX_Intrinsic<"qvgpci",
+                          [llvm_v4f64_ty], [llvm_i32_ty], [IntrNoMem]>;
+}
+
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp

index cd36e58b78d3eccd715d17df6a305715c51c5567..bf00e7397be842ba2acd9afce686afd675316c1b 100644 (file)
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -132,6 +132,16 @@ static const MCPhysReg VSFRegs[64] = {
    PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
    PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
  };
+static unsigned QFRegs[32] = {
+  PPC::QF0,  PPC::QF1,  PPC::QF2,  PPC::QF3,
+  PPC::QF4,  PPC::QF5,  PPC::QF6,  PPC::QF7,
+  PPC::QF8,  PPC::QF9,  PPC::QF10, PPC::QF11,
+  PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15,
+  PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19,
+  PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23,
+  PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27,
+  PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31
+};
  static const MCPhysReg CRBITRegs[32] = {
    PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
    PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
@@ -429,6 +439,7 @@ public:
    bool isU8ImmX8() const { return Kind == Immediate &&
                                    isUInt<8>(getImm()) &&
                                    (getImm() & 7) == 0; }
+  bool isU12Imm() const { return Kind == Immediate && isUInt<12>(getImm()); }
    bool isU16Imm() const {
      switch (Kind) {
        case Expression:
@@ -564,6 +575,21 @@ public:
      Inst.addOperand(MCOperand::CreateReg(VSFRegs[getVSReg()]));
    }
  
+  void addRegQFRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(QFRegs[getReg()]));
+  }
+
+  void addRegQSRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(QFRegs[getReg()]));
+  }
+
+  void addRegQBRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(QFRegs[getReg()]));
+  }
+
    void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const {
      assert(N == 1 && "Invalid number of operands!");
      Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getCRBit()]));
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp

index 5251b60f34800a4e56ac0718e188201a2291946c..0ed072393273efe20b8a648d187360ddddbadc6d 100644 (file)
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -164,6 +164,17 @@ static const unsigned G8Regs[] = {
    PPC::X28, PPC::X29, PPC::X30, PPC::X31
  };
  
+static const unsigned QFRegs[] = {
+  PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3,
+  PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
+  PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11,
+  PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15,
+  PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19,
+  PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23,
+  PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27,
+  PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31
+};
+
  template <std::size_t N>
  static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
                                          const unsigned (&Regs)[N]) {
@@ -235,6 +246,15 @@ static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
  #define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass
  #define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass
  
+static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  return decodeRegisterClass(Inst, RegNo, QFRegs);
+}
+
+#define DecodeQSRCRegisterClass DecodeQFRCRegisterClass
+#define DecodeQBRCRegisterClass DecodeQFRCRegisterClass
+
  template<unsigned N>
  static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm,
                                        int64_t Address, const void *Decoder) {
@@ -335,6 +355,15 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
    uint32_t Inst =
        (Bytes[0] << 24) | (Bytes[1] << 16) | (Bytes[2] << 8) | (Bytes[3] << 0);
  
+  if ((STI.getFeatureBits() & PPC::FeatureQPX) != 0) {
+    DecodeStatus result =
+      decodeInstruction(DecoderTableQPX32, MI, Inst, Address, this, STI);
+    if (result != MCDisassembler::Fail)
+      return result;
+
+    MI.clear();
+  }
+
    return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI);
  }
  
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp

index 670c40a2a3bde9951fa159a8bae71f0a1307cff2..c287fbe7c5b88f4c3e9ef09cf38ebad550574bbc 100644 (file)
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -34,7 +34,20 @@ FullRegNames("ppc-asm-full-reg-names", cl::Hidden, cl::init(false),
  #include "PPCGenAsmWriter.inc"
  
  void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  OS << getRegisterName(RegNo);
+  const char *RegName = getRegisterName(RegNo);
+  if (RegName[0] == 'q' /* QPX */) {
+    // The system toolchain on the BG/Q does not understand QPX register names
+    // in .cfi_* directives, so print the name of the floating-point
+    // subregister instead.
+    std::string RN(RegName);
+
+    RN[0] = 'f';
+    OS << RN;
+
+    return;
+  }
+
+  OS << RegName;
  }
  
  void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
@@ -236,6 +249,13 @@ void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo,
    O << (unsigned int)Value;
  }
  
+void PPCInstPrinter::printU12ImmOperand(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  unsigned short Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 4095 && "Invalid u12imm argument!");
+  O << (unsigned short)Value;
+}
+
  void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo,
                                          raw_ostream &O) {
    if (MI->getOperand(OpNo).isImm())
@@ -338,6 +358,7 @@ static const char *stripRegisterPrefix(const char *RegName) {
    switch (RegName[0]) {
    case 'r':
    case 'f':
+  case 'q': // for QPX
    case 'v':
      if (RegName[1] == 's')
        return RegName + 2;
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h

index b21aa22daa1ca78a961fcb0e58f86c107c12b2cf..6ead19b33fed8ac954cfd67bb26728786a9f48e1 100644 (file)
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -48,6 +48,7 @@ public:
    void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
    void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
    void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU12ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
    void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
    void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
    void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp

index 480b790a99b060a8b82e6a56f4add3a51359c0ab..13272908b12ec16570ac77ec8747c5435fde347d 100644 (file)
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -151,6 +151,7 @@ static const char *stripRegisterPrefix(const char *RegName) {
    switch (RegName[0]) {
      case 'r':
      case 'f':
+    case 'q': // for QPX
      case 'v':
        if (RegName[1] == 's')
          return RegName + 2;
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td

index 3eaec6ba54dfbcb0b3fd45e09baf74312cd70728..045fca3c747729b6607fd34fb69861c849dd3259 100644 (file)
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -55,13 +55,17 @@ def RetCC_PPC : CallingConv<[
    // only the ELFv2 ABI fully utilizes all these registers.
    CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
    CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
-  
+
+  // QPX vectors are returned in QF1 and QF2. 
+  CCIfType<[v4f64, v4f32, v4i1],
+           CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
+ 
    // Vector types returned as "direct" go into V2 .. V9; note that only the
    // ELFv2 ABI fully utilizes all these registers.
-  CCIfType<[v16i8, v8i16, v4i32, v4f32],
-           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>,
-  CCIfType<[v2f64, v2i64],
-           CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
+  CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()",
+           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
+  CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
+           CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
  ]>;
  
  // No explicit register is specified for the AnyReg calling convention. The
@@ -108,10 +112,12 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[
    CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
    CCIfType<[f32],  CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
    CCIfType<[f64],  CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
-  CCIfType<[v16i8, v8i16, v4i32, v4f32],
-           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>,
-  CCIfType<[v2f64, v2i64],
-           CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
+  CCIfType<[v4f64, v4f32, v4i1],
+           CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
+  CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()",
+           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
+  CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
+           CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
  ]>;
  
  //===----------------------------------------------------------------------===//
@@ -144,6 +150,9 @@ def CC_PPC32_SVR4_Common : CallingConv<[
    // alignment and size as doubles.
    CCIfType<[f32,f64], CCAssignToStack<8, 8>>,  
  
+  // QPX vectors that are stored in double precision need 32-byte alignment.
+  CCIfType<[v4f64, v4i1], CCAssignToStack<32, 32>>,
+
    // Vectors get 16-byte stack slots that are 16-byte aligned.
    CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 16>>
  ]>;
@@ -158,12 +167,17 @@ def CC_PPC32_SVR4_VarArg : CallingConv<[
  // In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to
  // put vector arguments in vector registers before putting them on the stack.
  def CC_PPC32_SVR4 : CallingConv<[
+  // QPX vectors mirror the scalar FP convention.
+  CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()",
+    CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>,
+
    // The first 12 Vector arguments are passed in AltiVec registers.
-  CCIfType<[v16i8, v8i16, v4i32, v4f32],
-           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>,
-  CCIfType<[v2f64, v2i64],
+  CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()",
+           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9,
+                          V10, V11, V12, V13]>>>,
+  CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
             CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9,
-                          VSH10, VSH11, VSH12, VSH13]>>,
+                          VSH10, VSH11, VSH12, VSH13]>>>,
             
    CCDelegateTo<CC_PPC32_SVR4_Common>
  ]>;  
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp

index 10429db9b90d7cd748ecf03bf95cd3cd5811d64b..f997fea4d93e6f76f2a005726fc07d588c858cfe 100644 (file)
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -83,7 +83,7 @@ static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
  
  PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
      : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
-                          (STI.hasQPX() || STI.isBGQ()) ? 32 : 16, 0),
+                          STI.getPlatformStackAlignment(), 0),
        Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
        TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
        FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

index 2418ca6b19aac0c05ec11342f96c90f6c61b25b3..0d553d32f31d6a5f53a3b45757f055598a9028fd 100644 (file)
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -2293,6 +2293,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
    // Altivec Vector compare instructions do not set any CR register by default and
    // vector compare operations return the same type as the operands.
    if (LHS.getValueType().isVector()) {
+    if (PPCSubTarget->hasQPX())
+      return nullptr;
+
      EVT VecVT = LHS.getValueType();
      bool Swap, Negate;
      unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
@@ -2468,6 +2471,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
          assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
          switch (LoadedVT.getSimpleVT().SimpleTy) {
            default: llvm_unreachable("Invalid PPC load type!");
+          case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
+          case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
            case MVT::f64: Opcode = PPC::LFDUX; break;
            case MVT::f32: Opcode = PPC::LFSUX; break;
            case MVT::i32: Opcode = PPC::LWZUX; break;
@@ -2711,6 +2716,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
          SelectCCOp = PPC::SELECT_CC_VSFRC;
        else
          SelectCCOp = PPC::SELECT_CC_F8;
+    else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
+      SelectCCOp = PPC::SELECT_CC_QFRC;
+    else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
+      SelectCCOp = PPC::SELECT_CC_QSRC;
+    else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
+      SelectCCOp = PPC::SELECT_CC_QBRC;
      else if (N->getValueType(0) == MVT::v2f64 ||
               N->getValueType(0) == MVT::v2i64)
        SelectCCOp = PPC::SELECT_CC_VSRC;
@@ -3406,6 +3417,9 @@ void PPCDAGToDAGISel::PeepholeCROps() {
        case PPC::SELECT_I8:
        case PPC::SELECT_F4:
        case PPC::SELECT_F8:
+      case PPC::SELECT_QFRC:
+      case PPC::SELECT_QSRC:
+      case PPC::SELECT_QBRC:
        case PPC::SELECT_VRRC:
        case PPC::SELECT_VSFRC:
        case PPC::SELECT_VSRC: {
@@ -3713,6 +3727,9 @@ void PPCDAGToDAGISel::PeepholeCROps() {
        case PPC::SELECT_I8:
        case PPC::SELECT_F4:
        case PPC::SELECT_F8:
+      case PPC::SELECT_QFRC:
+      case PPC::SELECT_QSRC:
+      case PPC::SELECT_QBRC:
        case PPC::SELECT_VRRC:
        case PPC::SELECT_VSFRC:
        case PPC::SELECT_VSRC:
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index 7346dff8602620804aaf40b2d73d1405084856f6..bb0eb3995299e7ad643bfbdfc4a3bb5aab920153 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -610,6 +610,162 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
        addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
    }
  
+  if (Subtarget.hasQPX()) {
+    setOperationAction(ISD::FADD, MVT::v4f64, Legal);
+    setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
+    setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
+    setOperationAction(ISD::FREM, MVT::v4f64, Expand);
+
+    setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
+    setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
+
+    setOperationAction(ISD::LOAD  , MVT::v4f64, Custom);
+    setOperationAction(ISD::STORE , MVT::v4f64, Custom);
+
+    setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
+    setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
+
+    if (!Subtarget.useCRBits())
+      setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
+    setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
+
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
+    setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
+    setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
+    setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
+    setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
+    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
+    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
+
+    setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
+    setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
+
+    setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
+    setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
+    setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
+
+    setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
+    setOperationAction(ISD::FABS , MVT::v4f64, Legal);
+    setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
+    setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
+    setOperationAction(ISD::FPOWI , MVT::v4f64, Expand);
+    setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
+    setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
+    setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
+    setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
+    setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
+    setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
+
+    setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
+    setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
+
+    setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
+    setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
+
+    addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
+
+    setOperationAction(ISD::FADD, MVT::v4f32, Legal);
+    setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
+    setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
+    setOperationAction(ISD::FREM, MVT::v4f32, Expand);
+
+    setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
+    setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
+
+    setOperationAction(ISD::LOAD  , MVT::v4f32, Custom);
+    setOperationAction(ISD::STORE , MVT::v4f32, Custom);
+
+    if (!Subtarget.useCRBits())
+      setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
+    setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
+
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
+    setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
+    setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
+    setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
+    setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
+    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
+    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+
+    setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
+    setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
+
+    setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
+    setOperationAction(ISD::FABS , MVT::v4f32, Legal);
+    setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
+    setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
+    setOperationAction(ISD::FPOWI , MVT::v4f32, Expand);
+    setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
+    setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
+    setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
+    setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
+    setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
+    setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
+
+    setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
+    setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
+
+    setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
+    setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
+
+    addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
+
+    setOperationAction(ISD::AND , MVT::v4i1, Legal);
+    setOperationAction(ISD::OR , MVT::v4i1, Legal);
+    setOperationAction(ISD::XOR , MVT::v4i1, Legal);
+
+    if (!Subtarget.useCRBits())
+      setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
+    setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
+
+    setOperationAction(ISD::LOAD  , MVT::v4i1, Custom);
+    setOperationAction(ISD::STORE , MVT::v4i1, Custom);
+
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
+    setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
+    setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
+    setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
+    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
+    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
+
+    setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
+
+    addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
+
+    setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
+    setOperationAction(ISD::FCEIL,  MVT::v4f64, Legal);
+    setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
+    setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
+
+    setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+    setOperationAction(ISD::FCEIL,  MVT::v4f32, Legal);
+    setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+    setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+
+    setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
+    setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
+
+    // These need to set FE_INEXACT, and so cannot be vectorized here.
+    setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
+    setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
+
+    if (TM.Options.UnsafeFPMath) {
+      setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
+      setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
+
+      setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+      setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+    } else {
+      setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
+      setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
+
+      setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
+      setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
+    }
+  }
+
    if (Subtarget.has64BitSupport())
      setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
  
@@ -621,8 +777,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
    }
  
    setBooleanContents(ZeroOrOneBooleanContent);
-  // Altivec instructions set fields to all zeros or all ones.
-  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
+  if (Subtarget.hasAltivec()) {
+    // Altivec instructions set fields to all zeros or all ones.
+    setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+  }
  
    if (!isPPC64) {
      // These libcalls are not available in 32-bit.
@@ -851,12 +1010,22 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
    case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
    case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
    case PPCISD::SC:              return "PPCISD::SC";
+  case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
+  case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
+  case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
+  case PPCISD::QVESPLATI:       return "PPCISD::QVESPLATI";
+  case PPCISD::QBFLT:           return "PPCISD::QBFLT";
+  case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
    }
  }
  
-EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT PPCTargetLowering::getSetCCResultType(LLVMContext &C, EVT VT) const {
    if (!VT.isVector())
      return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
+
+  if (Subtarget.hasQPX())
+    return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
+
    return VT.changeVectorElementTypeToInteger();
  }
  
@@ -1242,6 +1411,36 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
    return SDValue();
  }
  
+/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
+/// amount, otherwise return -1.
+int PPC::isQVALIGNIShuffleMask(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
+    return -1;
+
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+
+  // Find the first non-undef value in the shuffle mask.
+  unsigned i;
+  for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
+    /*search*/;
+
+  if (i == 4) return -1;  // all undef.
+
+  // Otherwise, check to see if the rest of the elements are consecutively
+  // numbered from this value.
+  unsigned ShiftAmt = SVOp->getMaskElt(i);
+  if (ShiftAmt < i) return -1;
+  ShiftAmt -= i;
+
+  // Check the rest of the elements to see if they are consecutive.
+  for (++i; i != 4; ++i)
+    if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
+      return -1;
+
+  return ShiftAmt;
+}
+
  //===----------------------------------------------------------------------===//
  //  Addressing Mode Selection
  //===----------------------------------------------------------------------===//
@@ -1501,9 +1700,16 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
    } else
      return false;
  
-  // PowerPC doesn't have preinc load/store instructions for vectors.
-  if (VT.isVector())
-    return false;
+  // PowerPC doesn't have preinc load/store instructions for vectors (except
+  // for QPX, which does have preinc r+r forms).
+  if (VT.isVector()) {
+    if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
+      return false;
+    } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
+      AM = ISD::PRE_INC;
+      return true;
+    }
+  }
  
    if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
  
@@ -2240,6 +2446,17 @@ static const MCPhysReg *GetFPR() {
    return FPR;
  }
  
+/// GetQFPR - Get the set of QPX registers that should be allocated for
+/// arguments.
+static const MCPhysReg *GetQFPR() {
+  static const MCPhysReg QFPR[] = {
+    PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
+    PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13
+  };
+
+  return QFPR;
+}
+
  /// CalculateStackSlotSize - Calculates the size reserved for this argument on
  /// the stack.
  static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
@@ -2268,6 +2485,10 @@ static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
        ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
        ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
      Align = 16;
+  // QPX vector types stored in double-precision are padded to a 32 byte
+  // boundary.
+  else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
+    Align = 32;
  
    // ByVal parameters are aligned as requested.
    if (Flags.isByVal()) {
@@ -2306,7 +2527,7 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
                                     unsigned ParamAreaSize,
                                     unsigned &ArgOffset,
                                     unsigned &AvailableFPRs,
-                                   unsigned &AvailableVRs) {
+                                   unsigned &AvailableVRs, bool HasQPX) {
    bool UseMemory = false;
  
    // Respect alignment of argument on the stack.
@@ -2330,7 +2551,11 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
    // However, if the argument is actually passed in an FPR or a VR,
    // we don't use memory after all.
    if (!Flags.isByVal()) {
-    if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
+    if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
+        // QPX registers overlap with the scalar FP registers.
+        (HasQPX && (ArgVT == MVT::v4f32 ||
+                    ArgVT == MVT::v4f64 ||
+                    ArgVT == MVT::v4i1)))
        if (AvailableFPRs > 0) {
          --AvailableFPRs;
          return false;
@@ -2464,13 +2689,21 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
          case MVT::v16i8:
          case MVT::v8i16:
          case MVT::v4i32:
-        case MVT::v4f32:
            RC = &PPC::VRRCRegClass;
            break;
+        case MVT::v4f32:
+          RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
+          break;
          case MVT::v2f64:
          case MVT::v2i64:
            RC = &PPC::VSHRCRegClass;
            break;
+        case MVT::v4f64:
+          RC = &PPC::QFRCRegClass;
+          break;
+        case MVT::v4i1:
+          RC = &PPC::QBRCRegClass;
+          break;
        }
  
        // Transform the arguments stored in physical registers into virtual ones.
@@ -2658,9 +2891,12 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
      PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
    };
  
+  static const MCPhysReg *QFPR = GetQFPR();
+
    const unsigned Num_GPR_Regs = array_lengthof(GPR);
    const unsigned Num_FPR_Regs = 13;
    const unsigned Num_VR_Regs  = array_lengthof(VR);
+  const unsigned Num_QFPR_Regs = Num_FPR_Regs;
  
    // Do a first pass over the arguments to determine whether the ABI
    // guarantees that our caller has allocated the parameter save area
@@ -2676,7 +2912,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
    for (unsigned i = 0, e = Ins.size(); i != e; ++i)
      if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
                                 PtrByteSize, LinkageSize, ParamAreaSize,
-                               NumBytes, AvailableFPRs, AvailableVRs))
+                               NumBytes, AvailableFPRs, AvailableVRs,
+                               Subtarget.hasQPX()))
        HasParameterArea = true;
  
    // Add DAG nodes to load the arguments or copy them out of registers.  On
@@ -2685,6 +2922,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
  
    unsigned ArgOffset = LinkageSize;
    unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+  unsigned &QFPR_idx = FPR_idx;
    SmallVector<SDValue, 8> MemOps;
    Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
    unsigned CurArgIdx = 0;
@@ -2908,6 +3146,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
      case MVT::v16i8:
      case MVT::v2f64:
      case MVT::v2i64:
+      if (!Subtarget.hasQPX()) {
        // These can be scalar arguments or elements of a vector array type
        // passed directly.  The latter are used to implement ELFv2 homogenous
        // vector aggregates.
@@ -2926,6 +3165,36 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
        if (CallConv != CallingConv::Fast || needsLoad)
          ArgOffset += 16;
        break;
+      } // not QPX
+
+      assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
+             "Invalid QPX parameter type");
+      /* fall through */
+
+    case MVT::v4f64:
+    case MVT::v4i1:
+      // QPX vectors are treated like their scalar floating-point subregisters
+      // (except that they're larger).
+      unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
+      if (QFPR_idx != Num_QFPR_Regs) {
+        const TargetRegisterClass *RC;
+        switch (ObjectVT.getSimpleVT().SimpleTy) {
+        case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
+        case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
+        default:         RC = &PPC::QBRCRegClass; break;
+        }
+
+        unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+        ++QFPR_idx;
+      } else {
+        if (CallConv == CallingConv::Fast)
+          ComputeArgOffset();
+        needsLoad = true;
+      }
+      if (CallConv != CallingConv::Fast || needsLoad)
+        ArgOffset += Sz;
+      break;
      }
  
      // We need to load the argument to a virtual register if we determined
@@ -4306,6 +4575,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
    unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
    unsigned NumBytes = LinkageSize;
    unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+  unsigned &QFPR_idx = FPR_idx;
  
    static const MCPhysReg GPR[] = {
      PPC::X3, PPC::X4, PPC::X5, PPC::X6,
@@ -4322,9 +4592,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
      PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
    };
  
+  static const MCPhysReg *QFPR = GetQFPR();
+
    const unsigned NumGPRs = array_lengthof(GPR);
    const unsigned NumFPRs = 13;
    const unsigned NumVRs  = array_lengthof(VR);
+  const unsigned NumQFPRs = NumFPRs;
  
    // When using the fast calling convention, we don't provide backing for
    // arguments that will be in registers.
@@ -4348,12 +4621,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
            if (++NumGPRsUsed <= NumGPRs)
              continue;
            break;
-        case MVT::f32:
-        case MVT::f64:
-          if (++NumFPRsUsed <= NumFPRs)
-            continue;
-          break;
-        case MVT::v4f32:
          case MVT::v4i32:
          case MVT::v8i16:
          case MVT::v16i8:
@@ -4362,6 +4629,24 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
            if (++NumVRsUsed <= NumVRs)
              continue;
            break;
+        case MVT::v4f32:
+         // When using QPX, this is handled like a FP register, otherwise, it
+         // is an Altivec register.
+          if (Subtarget.hasQPX()) {
+            if (++NumFPRsUsed <= NumFPRs)
+              continue;
+          } else {
+            if (++NumVRsUsed <= NumVRs)
+              continue;
+          }
+          break;
+        case MVT::f32:
+        case MVT::f64:
+        case MVT::v4f64: // QPX
+        case MVT::v4i1:  // QPX
+          if (++NumFPRsUsed <= NumFPRs)
+            continue;
+          break;
          }
      }
  
@@ -4703,6 +4988,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
      case MVT::v16i8:
      case MVT::v2f64:
      case MVT::v2i64:
+      if (!Subtarget.hasQPX()) {
        // These can be scalar arguments or elements of a vector array type
        // passed directly.  The latter are used to implement ELFv2 homogenous
        // vector aggregates.
@@ -4766,6 +5052,60 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
        if (CallConv != CallingConv::Fast)
          ArgOffset += 16;
        break;
+      } // not QPX
+
+      assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
+             "Invalid QPX parameter type");
+
+      /* fall through */
+    case MVT::v4f64:
+    case MVT::v4i1: {
+      bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
+      if (isVarArg) {
+        // We could elide this store in the case where the object fits
+        // entirely in R registers.  Maybe later.
+        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+                                     MachinePointerInfo(), false, false, 0);
+        MemOpChains.push_back(Store);
+        if (QFPR_idx != NumQFPRs) {
+          SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl,
+                                     Store, PtrOff, MachinePointerInfo(),
+                                     false, false, false, 0);
+          MemOpChains.push_back(Load.getValue(1));
+          RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
+        }
+        ArgOffset += (IsF32 ? 16 : 32);
+        for (unsigned i=0; i<(IsF32 ? 16 : 32); i+=PtrByteSize) {
+          if (GPR_idx == NumGPRs)
+            break;
+          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
+                                  DAG.getConstant(i, PtrVT));
+          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
+                                     false, false, false, 0);
+          MemOpChains.push_back(Load.getValue(1));
+          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+        }
+        break;
+      }
+
+      // Non-varargs QPX params go into registers or on the stack.
+      if (QFPR_idx != NumQFPRs) {
+        RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
+      } else {
+        if (CallConv == CallingConv::Fast)
+          ComputePtrOff();
+
+        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+                         true, isTailCall, true, MemOpChains,
+                         TailCallArguments, dl);
+        if (CallConv == CallingConv::Fast)
+          ArgOffset += (IsF32 ? 16 : 32);
+      }
+
+      if (CallConv != CallingConv::Fast)
+        ArgOffset += (IsF32 ? 16 : 32);
+      break;
+      }
      }
    }
  
@@ -5384,6 +5724,9 @@ SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
  }
  
  SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+  if (Op.getValueType().isVector())
+    return LowerVectorLoad(Op, DAG);
+
    assert(Op.getValueType() == MVT::i1 &&
           "Custom lowering only for i1 loads");
  
@@ -5405,6 +5748,9 @@ SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
  }
  
  SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+  if (Op.getOperand(1).getValueType().isVector())
+    return LowerVectorStore(Op, DAG);
+
    assert(Op.getOperand(1).getValueType() == MVT::i1 &&
           "Custom lowering only for i1 stores");
  
@@ -5674,6 +6020,29 @@ void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
  SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
                                            SelectionDAG &DAG) const {
    SDLoc dl(Op);
+
+  if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
+    if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
+      return SDValue();
+
+    SDValue Value = Op.getOperand(0);
+    // The values are now known to be -1 (false) or 1 (true). To convert this
+    // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
+    // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
+    Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
+  
+    SDValue FPHalfs = DAG.getConstantFP(0.5, MVT::f64);
+    FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
+                          FPHalfs, FPHalfs, FPHalfs, FPHalfs);
+  
+    Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
+
+    if (Op.getValueType() != MVT::v4f64)
+      Value = DAG.getNode(ISD::FP_ROUND, dl,
+                          Op.getValueType(), Value, DAG.getIntPtrConstant(1));
+    return Value;
+  }
+
    // Don't handle ppc_fp128 here; let it be lowered to a libcall.
    if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
      return SDValue();
@@ -6125,6 +6494,127 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
    BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
    assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
  
+  if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
+    // We first build an i32 vector, load it into a QPX register,
+    // then convert it to a floating-point vector and compare it
+    // to a zero vector to get the boolean result.
+    MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+    int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+    MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+    EVT PtrVT = getPointerTy();
+    SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+    assert(BVN->getNumOperands() == 4 &&
+      "BUILD_VECTOR for v4i1 does not have 4 operands");
+
+    bool IsConst = true;
+    for (unsigned i = 0; i < 4; ++i) {
+      if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+      if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
+        IsConst = false;
+        break;
+      }
+    }
+
+    if (IsConst) {
+      Constant *One =
+        ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
+      Constant *NegOne =
+        ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
+
+      SmallVector<Constant*, 4> CV(4, NegOne);
+      for (unsigned i = 0; i < 4; ++i) {
+        if (BVN->getOperand(i).getOpcode() == ISD::UNDEF)
+          CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
+        else if (cast<ConstantSDNode>(BVN->getOperand(i))->
+                   getConstantIntValue()->isZero())
+          continue;
+        else
+          CV[i] = One;
+      }
+
+      Constant *CP = ConstantVector::get(CV);
+      SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(),
+                      16 /* alignment */);
+ 
+      SmallVector<SDValue, 2> Ops;
+      Ops.push_back(DAG.getEntryNode());
+      Ops.push_back(CPIdx);
+
+      SmallVector<EVT, 2> ValueVTs;
+      ValueVTs.push_back(MVT::v4i1);
+      ValueVTs.push_back(MVT::Other); // chain
+      SDVTList VTs = DAG.getVTList(ValueVTs);
+
+      return DAG.getMemIntrinsicNode(PPCISD::QVLFSb,
+        dl, VTs, Ops, MVT::v4f32,
+        MachinePointerInfo::getConstantPool());
+    }
+
+    SmallVector<SDValue, 4> Stores;
+    for (unsigned i = 0; i < 4; ++i) {
+      if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+      unsigned Offset = 4*i;
+      SDValue Idx = DAG.getConstant(Offset, FIdx.getValueType());
+      Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
+
+      unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
+      if (StoreSize > 4) {
+        Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
+                                           BVN->getOperand(i), Idx,
+                                           PtrInfo.getWithOffset(Offset),
+                                           MVT::i32, false, false, 0));
+      } else {
+        SDValue StoreValue = BVN->getOperand(i);
+        if (StoreSize < 4)
+          StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
+
+        Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
+                                      StoreValue, Idx,
+                                      PtrInfo.getWithOffset(Offset),
+                                      false, false, 0));
+      }
+    }
+
+    SDValue StoreChain;
+    if (!Stores.empty())
+      StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+    else
+      StoreChain = DAG.getEntryNode();
+
+    // Now load from v4i32 into the QPX register; this will extend it to
+    // v4i64 but not yet convert it to a floating point. Nevertheless, this
+    // is typed as v4f64 because the QPX register integer states are not
+    // explicitly represented.
+
+    SmallVector<SDValue, 2> Ops;
+    Ops.push_back(StoreChain);
+    Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, MVT::i32));
+    Ops.push_back(FIdx);
+
+    SmallVector<EVT, 2> ValueVTs;
+    ValueVTs.push_back(MVT::v4f64);
+    ValueVTs.push_back(MVT::Other); // chain
+    SDVTList VTs = DAG.getVTList(ValueVTs);
+
+    SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
+      dl, VTs, Ops, MVT::v4i32, PtrInfo);
+    LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
+      DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, MVT::i32),
+      LoadedVect);
+
+    SDValue FPZeros = DAG.getConstantFP(0.0, MVT::f64);
+    FPZeros = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
+                          FPZeros, FPZeros, FPZeros, FPZeros);
+
+    return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
+  }
+
+  // All other QPX vectors are handled by generic code.
+  if (Subtarget.hasQPX())
+    return SDValue();
+
    // Check if this is a splat of a constant value.
    APInt APSplatBits, APSplatUndef;
    unsigned SplatBitSize;
@@ -6383,6 +6873,45 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
    EVT VT = Op.getValueType();
    bool isLittleEndian = Subtarget.isLittleEndian();
  
+  if (Subtarget.hasQPX()) {
+    if (VT.getVectorNumElements() != 4)
+      return SDValue();
+
+    if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+
+    int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
+    if (AlignIdx != -1) {
+      return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
+                         DAG.getConstant(AlignIdx, MVT::i32));
+    } else if (SVOp->isSplat()) {
+      int SplatIdx = SVOp->getSplatIndex();
+      if (SplatIdx >= 4) {
+        std::swap(V1, V2);
+        SplatIdx -= 4;
+      }
+
+      // FIXME: If SplatIdx == 0 and the input came from a load, then there is
+      // nothing to do.
+
+      return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
+                         DAG.getConstant(SplatIdx, MVT::i32));
+    }
+
+    // Lower this into a qvgpci/qvfperm pair.
+
+    // Compute the qvgpci literal
+    unsigned idx = 0;
+    for (unsigned i = 0; i < 4; ++i) {
+      int m = SVOp->getMaskElt(i);
+      unsigned mm = m >= 0 ? (unsigned) m : i;
+      idx |= mm << (3-i)*3;
+    }
+
+    SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
+                             DAG.getConstant(idx, MVT::i32));
+    return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
+  }
+
    // Cases that are handled by instructions that take permute immediates
    // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
    // selected by the instruction selector.
@@ -6665,6 +7194,302 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
                       false, false, false, 0);
  }
  
+SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+                                                   SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  SDNode *N = Op.getNode();
+
+  assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
+         "Unknown extract_vector_elt type");
+
+  SDValue Value = N->getOperand(0);
+
+  // The first part of this is like the store lowering except that we don't
+  // need to track the chain.
+
+  // The values are now known to be -1 (false) or 1 (true). To convert this
+  // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
+  // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
+  Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
+
+  // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
+  // understand how to form the extending load.
+  SDValue FPHalfs = DAG.getConstantFP(0.5, MVT::f64);
+  FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
+                        FPHalfs, FPHalfs, FPHalfs, FPHalfs);
+
+  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 
+
+  // Now convert to an integer and store.
+  Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
+    DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, MVT::i32),
+    Value);
+
+  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+  EVT PtrVT = getPointerTy();
+  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+  SDValue StoreChain = DAG.getEntryNode();
+  SmallVector<SDValue, 2> Ops;
+  Ops.push_back(StoreChain);
+  Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, MVT::i32));
+  Ops.push_back(Value);
+  Ops.push_back(FIdx);
+
+  SmallVector<EVT, 2> ValueVTs;
+  ValueVTs.push_back(MVT::Other); // chain
+  SDVTList VTs = DAG.getVTList(ValueVTs);
+
+  StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
+    dl, VTs, Ops, MVT::v4i32, PtrInfo);
+
+  // Extract the value requested.
+  unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+  SDValue Idx = DAG.getConstant(Offset, FIdx.getValueType());
+  Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
+
+  SDValue IntVal = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
+                               PtrInfo.getWithOffset(Offset),
+                               false, false, false, 0);
+
+  if (!Subtarget.useCRBits())
+    return IntVal;
+
+  return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
+}
+
+/// Lowering for QPX v4i1 loads
+SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
+  SDValue LoadChain = LN->getChain();
+  SDValue BasePtr = LN->getBasePtr();
+
+  if (Op.getValueType() == MVT::v4f64 ||
+      Op.getValueType() == MVT::v4f32) {
+    EVT MemVT = LN->getMemoryVT();
+    unsigned Alignment = LN->getAlignment();
+
+    // If this load is properly aligned, then it is legal.
+    if (Alignment >= MemVT.getStoreSize())
+      return Op;
+
+    EVT ScalarVT = Op.getValueType().getScalarType(),
+        ScalarMemVT = MemVT.getScalarType();
+    unsigned Stride = ScalarMemVT.getStoreSize();
+
+    SmallVector<SDValue, 8> Vals, LoadChains;
+    for (unsigned Idx = 0; Idx < 4; ++Idx) {
+      SDValue Load;
+      if (ScalarVT != ScalarMemVT)
+        Load =
+          DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
+                         BasePtr,
+                         LN->getPointerInfo().getWithOffset(Idx*Stride),
+                         ScalarMemVT, LN->isVolatile(), LN->isNonTemporal(),
+                         LN->isInvariant(), MinAlign(Alignment, Idx*Stride),
+                         LN->getAAInfo());
+      else
+        Load =
+          DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
+                       LN->getPointerInfo().getWithOffset(Idx*Stride),
+                       LN->isVolatile(), LN->isNonTemporal(),
+                       LN->isInvariant(), MinAlign(Alignment, Idx*Stride),
+                       LN->getAAInfo());
+
+      if (Idx == 0 && LN->isIndexed()) {
+        assert(LN->getAddressingMode() == ISD::PRE_INC &&
+               "Unknown addressing mode on vector load");
+        Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
+                                  LN->getAddressingMode());
+      }
+
+      Vals.push_back(Load);
+      LoadChains.push_back(Load.getValue(1));
+
+      BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+                            DAG.getConstant(Stride, BasePtr.getValueType()));
+    }
+
+    SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
+    SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
+                                   Op.getValueType(), Vals);
+
+    if (LN->isIndexed()) {
+      SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
+      return DAG.getMergeValues(RetOps, dl);
+    }
+
+    SDValue RetOps[] = { Value, TF };
+    return DAG.getMergeValues(RetOps, dl);
+  }
+
+  assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
+  assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
+
+  // To lower v4i1 from a byte array, we load the byte elements of the
+  // vector and then reuse the BUILD_VECTOR logic.
+
+  SmallVector<SDValue, 4> VectElmts, VectElmtChains;
+  for (unsigned i = 0; i < 4; ++i) {
+    SDValue Idx = DAG.getConstant(i, BasePtr.getValueType());
+    Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
+
+    VectElmts.push_back(DAG.getExtLoad(ISD::EXTLOAD,
+                        dl, MVT::i32, LoadChain, Idx,
+                        LN->getPointerInfo().getWithOffset(i),
+                        MVT::i8 /* memory type */,
+                        LN->isVolatile(), LN->isNonTemporal(),
+                        LN->isInvariant(),
+                        1 /* alignment */, LN->getAAInfo()));
+    VectElmtChains.push_back(VectElmts[i].getValue(1));
+  }
+
+  LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
+  SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i1, VectElmts);
+
+  SDValue RVals[] = { Value, LoadChain };
+  return DAG.getMergeValues(RVals, dl);
+}
+
+/// Lowering for QPX v4i1 stores
+SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
+  SDValue StoreChain = SN->getChain();
+  SDValue BasePtr = SN->getBasePtr();
+  SDValue Value = SN->getValue();
+
+  if (Value.getValueType() == MVT::v4f64 ||
+      Value.getValueType() == MVT::v4f32) {
+    EVT MemVT = SN->getMemoryVT();
+    unsigned Alignment = SN->getAlignment();
+
+    // If this store is properly aligned, then it is legal.
+    if (Alignment >= MemVT.getStoreSize())
+      return Op;
+
+    EVT ScalarVT = Value.getValueType().getScalarType(),
+        ScalarMemVT = MemVT.getScalarType();
+    unsigned Stride = ScalarMemVT.getStoreSize();
+
+    SmallVector<SDValue, 8> Stores;
+    for (unsigned Idx = 0; Idx < 4; ++Idx) {
+      SDValue Ex =
+        DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
+                    DAG.getConstant(Idx, getVectorIdxTy()));
+      SDValue Store;
+      if (ScalarVT != ScalarMemVT)
+        Store =
+          DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
+                            SN->getPointerInfo().getWithOffset(Idx*Stride),
+                            ScalarMemVT, SN->isVolatile(), SN->isNonTemporal(),
+                            MinAlign(Alignment, Idx*Stride), SN->getAAInfo());
+      else
+        Store =
+          DAG.getStore(StoreChain, dl, Ex, BasePtr,
+                       SN->getPointerInfo().getWithOffset(Idx*Stride),
+                       SN->isVolatile(), SN->isNonTemporal(),
+                       MinAlign(Alignment, Idx*Stride), SN->getAAInfo());
+
+      if (Idx == 0 && SN->isIndexed()) {
+        assert(SN->getAddressingMode() == ISD::PRE_INC &&
+               "Unknown addressing mode on vector store");
+        Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
+                                    SN->getAddressingMode());
+      }
+
+      BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+                            DAG.getConstant(Stride, BasePtr.getValueType()));
+      Stores.push_back(Store);
+    }
+
+    SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+
+    if (SN->isIndexed()) {
+      SDValue RetOps[] = { TF, Stores[0].getValue(1) };
+      return DAG.getMergeValues(RetOps, dl);
+    }
+
+    return TF;
+  }
+
+  assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
+  assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
+
+  // The values are now known to be -1 (false) or 1 (true). To convert this
+  // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
+  // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
+  Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
+
+  // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
+  // understand how to form the extending load.
+  SDValue FPHalfs = DAG.getConstantFP(0.5, MVT::f64);
+  FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
+                        FPHalfs, FPHalfs, FPHalfs, FPHalfs);
+
+  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 
+
+  // Now convert to an integer and store.
+  Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
+    DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, MVT::i32),
+    Value);
+
+  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+  EVT PtrVT = getPointerTy();
+  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+  SmallVector<SDValue, 2> Ops;
+  Ops.push_back(StoreChain);
+  Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, MVT::i32));
+  Ops.push_back(Value);
+  Ops.push_back(FIdx);
+
+  SmallVector<EVT, 2> ValueVTs;
+  ValueVTs.push_back(MVT::Other); // chain
+  SDVTList VTs = DAG.getVTList(ValueVTs);
+
+  StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
+    dl, VTs, Ops, MVT::v4i32, PtrInfo);
+
+  // Move data into the byte array.
+  SmallVector<SDValue, 4> Loads, LoadChains;
+  for (unsigned i = 0; i < 4; ++i) {
+    unsigned Offset = 4*i;
+    SDValue Idx = DAG.getConstant(Offset, FIdx.getValueType());
+    Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
+
+    Loads.push_back(DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
+                                   PtrInfo.getWithOffset(Offset),
+                                   false, false, false, 0));
+    LoadChains.push_back(Loads[i].getValue(1));
+  }
+
+  StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
+
+  SmallVector<SDValue, 4> Stores;
+  for (unsigned i = 0; i < 4; ++i) {
+    SDValue Idx = DAG.getConstant(i, BasePtr.getValueType());
+    Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
+
+    Stores.push_back(DAG.getTruncStore(StoreChain, dl, Loads[i], Idx,
+                                       SN->getPointerInfo().getWithOffset(i),
+                                       MVT::i8 /* memory type */,
+                                       SN->isNonTemporal(), SN->isVolatile(), 
+                                       1 /* alignment */, SN->getAAInfo()));
+  }
+
+  StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
+
+  return StoreChain;
+}
+
  SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
    SDLoc dl(Op);
    if (Op.getValueType() == MVT::v4i32) {
@@ -6787,6 +7612,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
    case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
    case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
    case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op, DAG);
+  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
    case ISD::MUL:                return LowerMUL(Op, DAG);
  
    // For counter-based loop handling.
@@ -7411,6 +8237,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
               MI->getOpcode() == PPC::SELECT_CC_I8 ||
               MI->getOpcode() == PPC::SELECT_CC_F4 ||
               MI->getOpcode() == PPC::SELECT_CC_F8 ||
+             MI->getOpcode() == PPC::SELECT_CC_QFRC ||
+             MI->getOpcode() == PPC::SELECT_CC_QSRC ||
+             MI->getOpcode() == PPC::SELECT_CC_QBRC ||
               MI->getOpcode() == PPC::SELECT_CC_VRRC ||
               MI->getOpcode() == PPC::SELECT_CC_VSFRC ||
               MI->getOpcode() == PPC::SELECT_CC_VSRC ||
@@ -7418,6 +8247,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
               MI->getOpcode() == PPC::SELECT_I8 ||
               MI->getOpcode() == PPC::SELECT_F4 ||
               MI->getOpcode() == PPC::SELECT_F8 ||
+             MI->getOpcode() == PPC::SELECT_QFRC ||
+             MI->getOpcode() == PPC::SELECT_QSRC ||
+             MI->getOpcode() == PPC::SELECT_QBRC ||
               MI->getOpcode() == PPC::SELECT_VRRC ||
               MI->getOpcode() == PPC::SELECT_VSFRC ||
               MI->getOpcode() == PPC::SELECT_VSRC) {
@@ -7451,6 +8283,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
          MI->getOpcode() == PPC::SELECT_I8 ||
          MI->getOpcode() == PPC::SELECT_F4 ||
          MI->getOpcode() == PPC::SELECT_F8 ||
+        MI->getOpcode() == PPC::SELECT_QFRC ||
+        MI->getOpcode() == PPC::SELECT_QSRC ||
+        MI->getOpcode() == PPC::SELECT_QBRC ||
          MI->getOpcode() == PPC::SELECT_VRRC ||
          MI->getOpcode() == PPC::SELECT_VSFRC ||
          MI->getOpcode() == PPC::SELECT_VSRC) {
@@ -7866,7 +8701,9 @@ SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
    if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
        (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
        (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
-      (VT == MVT::v2f64 && Subtarget.hasVSX())) {
+      (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
+      (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
+      (VT == MVT::v4f64 && Subtarget.hasQPX())) {
      // Convergence is quadratic, so we essentially double the number of digits
      // correct after every iteration. For both FRE and FRSQRTE, the minimum
      // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
@@ -7887,7 +8724,9 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
    if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
        (VT == MVT::f64 && Subtarget.hasFRE()) ||
        (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
-      (VT == MVT::v2f64 && Subtarget.hasVSX())) {
+      (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
+      (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
+      (VT == MVT::v4f64 && Subtarget.hasQPX())) {
      // Convergence is quadratic, so we essentially double the number of digits
      // correct after every iteration. For both FRE and FRSQRTE, the minimum
      // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
@@ -7973,6 +8812,24 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
      EVT VT;
      switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
      default: return false;
+    case Intrinsic::ppc_qpx_qvlfd:
+    case Intrinsic::ppc_qpx_qvlfda:
+      VT = MVT::v4f64;
+      break;
+    case Intrinsic::ppc_qpx_qvlfs:
+    case Intrinsic::ppc_qpx_qvlfsa:
+      VT = MVT::v4f32;
+      break;
+    case Intrinsic::ppc_qpx_qvlfcd:
+    case Intrinsic::ppc_qpx_qvlfcda:
+      VT = MVT::v2f64;
+      break;
+    case Intrinsic::ppc_qpx_qvlfcs:
+    case Intrinsic::ppc_qpx_qvlfcsa:
+      VT = MVT::v2f32;
+      break;
+    case Intrinsic::ppc_qpx_qvlfiwa:
+    case Intrinsic::ppc_qpx_qvlfiwz:
      case Intrinsic::ppc_altivec_lvx:
      case Intrinsic::ppc_altivec_lvxl:
      case Intrinsic::ppc_vsx_lxvw4x:
@@ -7999,6 +8856,24 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
      EVT VT;
      switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
      default: return false;
+    case Intrinsic::ppc_qpx_qvstfd:
+    case Intrinsic::ppc_qpx_qvstfda:
+      VT = MVT::v4f64;
+      break;
+    case Intrinsic::ppc_qpx_qvstfs:
+    case Intrinsic::ppc_qpx_qvstfsa:
+      VT = MVT::v4f32;
+      break;
+    case Intrinsic::ppc_qpx_qvstfcd:
+    case Intrinsic::ppc_qpx_qvstfcda:
+      VT = MVT::v2f64;
+      break;
+    case Intrinsic::ppc_qpx_qvstfcs:
+    case Intrinsic::ppc_qpx_qvstfcsa:
+      VT = MVT::v2f32;
+      break;
+    case Intrinsic::ppc_qpx_qvstfiw:
+    case Intrinsic::ppc_qpx_qvstfiwa:
      case Intrinsic::ppc_altivec_stvx:
      case Intrinsic::ppc_altivec_stvxl:
      case Intrinsic::ppc_vsx_stxvw4x:
@@ -8927,14 +9802,20 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
          return expandVSXLoadForLE(N, DCI);
      }
  
-    Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+    EVT MemVT = LD->getMemoryVT();
+    Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
      unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
-    if (ISD::isNON_EXTLoad(N) && VT.isVector() && Subtarget.hasAltivec() &&
-        // P8 and later hardware should just use LOAD.
-        !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
-                                     VT == MVT::v4i32 || VT == MVT::v4f32) &&
+    Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
+    unsigned ScalarABIAlignment = getDataLayout()->getABITypeAlignment(STy);
+    if (LD->isUnindexed() && VT.isVector() &&
+        ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
+          // P8 and later hardware should just use LOAD.
+          !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
+                                       VT == MVT::v4i32 || VT == MVT::v4f32)) ||
+         (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
+          LD->getAlignment() >= ScalarABIAlignment)) &&
          LD->getAlignment() < ABIAlignment) {
-      // This is a type-legal unaligned Altivec load.
+      // This is a type-legal unaligned Altivec or QPX load.
        SDValue Chain = LD->getChain();
        SDValue Ptr = LD->getBasePtr();
        bool isLittleEndian = Subtarget.isLittleEndian();
@@ -8963,10 +9844,28 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
        // a different base address offset from this one by an aligned amount.
        // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
        // optimization later.
-      Intrinsic::ID Intr = (isLittleEndian ?
-                            Intrinsic::ppc_altivec_lvsr :
-                            Intrinsic::ppc_altivec_lvsl);
-      SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8);
+      Intrinsic::ID Intr, IntrLD, IntrPerm;
+      MVT PermCntlTy, PermTy, LDTy;
+      if (Subtarget.hasAltivec()) {
+        Intr = isLittleEndian ?  Intrinsic::ppc_altivec_lvsr :
+                                 Intrinsic::ppc_altivec_lvsl;
+        IntrLD = Intrinsic::ppc_altivec_lvx;
+        IntrPerm = Intrinsic::ppc_altivec_vperm;
+        PermCntlTy = MVT::v16i8;
+        PermTy = MVT::v4i32;
+        LDTy = MVT::v4i32;
+      } else {
+        Intr =   MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
+                                       Intrinsic::ppc_qpx_qvlpcls;
+        IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
+                                       Intrinsic::ppc_qpx_qvlfs;
+        IntrPerm = Intrinsic::ppc_qpx_qvfperm;
+        PermCntlTy = MVT::v4f64;
+        PermTy = MVT::v4f64;
+        LDTy = MemVT.getSimpleVT();
+      }
+
+      SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
  
        // Create the new MMO for the new base load. It is like the original MMO,
        // but represents an area in memory almost twice the vector size centered
@@ -8975,18 +9874,16 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
        // original unaligned load.
        MachineFunction &MF = DAG.getMachineFunction();
        MachineMemOperand *BaseMMO =
-        MF.getMachineMemOperand(LD->getMemOperand(),
-                                -LD->getMemoryVT().getStoreSize()+1,
-                                2*LD->getMemoryVT().getStoreSize()-1);
+        MF.getMachineMemOperand(LD->getMemOperand(), -MemVT.getStoreSize()+1,
+                                2*MemVT.getStoreSize()-1);
  
        // Create the new base load.
-      SDValue LDXIntID = DAG.getTargetConstant(Intrinsic::ppc_altivec_lvx,
-                                               getPointerTy());
+      SDValue LDXIntID = DAG.getTargetConstant(IntrLD, getPointerTy());
        SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
        SDValue BaseLoad =
          DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
-                                DAG.getVTList(MVT::v4i32, MVT::Other),
-                                BaseLoadOps, MVT::v4i32, BaseMMO);
+                                DAG.getVTList(PermTy, MVT::Other),
+                                BaseLoadOps, LDTy, BaseMMO);
  
        // Note that the value of IncOffset (which is provided to the next
        // load's pointer info offset value, and thus used to calculate the
@@ -9010,12 +9907,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
  
        MachineMemOperand *ExtraMMO =
          MF.getMachineMemOperand(LD->getMemOperand(),
-                                1, 2*LD->getMemoryVT().getStoreSize()-1);
+                                1, 2*MemVT.getStoreSize()-1);
        SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
        SDValue ExtraLoad =
          DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
-                                DAG.getVTList(MVT::v4i32, MVT::Other),
-                                ExtraLoadOps, MVT::v4i32, ExtraMMO);
+                                DAG.getVTList(PermTy, MVT::Other),
+                                ExtraLoadOps, LDTy, ExtraMMO);
  
        SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
          BaseLoad.getValue(1), ExtraLoad.getValue(1));
@@ -9027,14 +9924,19 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
        // and ExtraLoad here.
        SDValue Perm;
        if (isLittleEndian)
-        Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
+        Perm = BuildIntrinsicOp(IntrPerm,
                                  ExtraLoad, BaseLoad, PermCntl, DAG, dl);
        else
-        Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
+        Perm = BuildIntrinsicOp(IntrPerm,
                                  BaseLoad, ExtraLoad, PermCntl, DAG, dl);
  
-      if (VT != MVT::v4i32)
-        Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm);
+      if (VT != PermTy)
+        Perm = Subtarget.hasAltivec() ?
+                 DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
+                 DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
+                               DAG.getTargetConstant(1, MVT::i64));
+                               // second argument is 1 because this rounding
+                               // is always exact.
  
        // The output of the permutation is our loaded result, the TokenFactor is
        // our new chain.
@@ -9045,15 +9947,21 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
      break;
      case ISD::INTRINSIC_WO_CHAIN: {
        bool isLittleEndian = Subtarget.isLittleEndian();
+      unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
        Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
                                             : Intrinsic::ppc_altivec_lvsl);
-      if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == Intr &&
-          N->getOperand(1)->getOpcode() == ISD::ADD) {
+      if ((IID == Intr ||
+           IID == Intrinsic::ppc_qpx_qvlpcld  ||
+           IID == Intrinsic::ppc_qpx_qvlpcls) &&
+        N->getOperand(1)->getOpcode() == ISD::ADD) {
          SDValue Add = N->getOperand(1);
  
+        int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
+                   5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
+
          if (DAG.MaskedValueIsZero(
                  Add->getOperand(1),
-                APInt::getAllOnesValue(4 /* 16 byte alignment */)
+                APInt::getAllOnesValue(Bits /* alignment */)
                      .zext(
                          Add.getValueType().getScalarType().getSizeInBits()))) {
            SDNode *BasePtr = Add->getOperand(0).getNode();
@@ -9061,8 +9969,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
                                      UE = BasePtr->use_end();
                 UI != UE; ++UI) {
              if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
-                cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
-                    Intr) {
+                cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
                // We've found another LVSL/LVSR, and this address is an aligned
                // multiple of that one. The results will be the same, so use the
                // one we've just found instead.
@@ -9071,6 +9978,27 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
              }
            }
          }
+
+        if (isa<ConstantSDNode>(Add->getOperand(1))) {
+          SDNode *BasePtr = Add->getOperand(0).getNode();
+          for (SDNode::use_iterator UI = BasePtr->use_begin(),
+               UE = BasePtr->use_end(); UI != UE; ++UI) {
+            if (UI->getOpcode() == ISD::ADD &&
+                isa<ConstantSDNode>(UI->getOperand(1)) &&
+                (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
+                 cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
+                (1 << Bits) == 0) {
+              SDNode *OtherAdd = *UI;
+              for (SDNode::use_iterator VI = OtherAdd->use_begin(),
+                   VE = OtherAdd->use_end(); VI != VE; ++VI) {
+                if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+                    cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
+                  return SDValue(*VI, 0);
+                }
+              }
+            }
+          }
+        }
        }
      }
  
@@ -9521,8 +10449,16 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
          return std::make_pair(0U, &PPC::F4RCRegClass);
        if (VT == MVT::f64 || VT == MVT::i64)
          return std::make_pair(0U, &PPC::F8RCRegClass);
+      if (VT == MVT::v4f64 && Subtarget.hasQPX())
+        return std::make_pair(0U, &PPC::QFRCRegClass);
+      if (VT == MVT::v4f32 && Subtarget.hasQPX())
+        return std::make_pair(0U, &PPC::QSRCRegClass);
        break;
      case 'v':
+      if (VT == MVT::v4f64 && Subtarget.hasQPX())
+        return std::make_pair(0U, &PPC::QFRCRegClass);
+      if (VT == MVT::v4f32 && Subtarget.hasQPX())
+        return std::make_pair(0U, &PPC::QSRCRegClass);
        return std::make_pair(0U, &PPC::VRRCRegClass);
      case 'y':   // crrc
        return std::make_pair(0U, &PPC::CRRCRegClass);
@@ -9642,7 +10578,9 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
  // by AM is legal for this target, for a load/store of the specified type.
  bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
                                                Type *Ty) const {
-  // FIXME: PPC does not allow r+i addressing modes for vectors!
+  // PPC does not allow r+i addressing modes for vectors!
+  if (Ty->isVectorTy() && AM.BaseOffs != 0)
+    return false;
  
    // PPC allows a sign-extended 16-bit immediate field.
    if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
@@ -9773,6 +10711,12 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
                                             unsigned Intrinsic) const {
  
    switch (Intrinsic) {
+  case Intrinsic::ppc_qpx_qvlfd:
+  case Intrinsic::ppc_qpx_qvlfs:
+  case Intrinsic::ppc_qpx_qvlfcd:
+  case Intrinsic::ppc_qpx_qvlfcs:
+  case Intrinsic::ppc_qpx_qvlfiwa:
+  case Intrinsic::ppc_qpx_qvlfiwz:
    case Intrinsic::ppc_altivec_lvx:
    case Intrinsic::ppc_altivec_lvxl:
    case Intrinsic::ppc_altivec_lvebx:
@@ -9794,6 +10738,18 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
      case Intrinsic::ppc_vsx_lxvd2x:
        VT = MVT::v2f64;
        break;
+    case Intrinsic::ppc_qpx_qvlfd:
+      VT = MVT::v4f64;
+      break;
+    case Intrinsic::ppc_qpx_qvlfs:
+      VT = MVT::v4f32;
+      break;
+    case Intrinsic::ppc_qpx_qvlfcd:
+      VT = MVT::v2f64;
+      break;
+    case Intrinsic::ppc_qpx_qvlfcs:
+      VT = MVT::v2f32;
+      break;
      default:
        VT = MVT::v4i32;
        break;
@@ -9810,6 +10766,47 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
      Info.writeMem = false;
      return true;
    }
+  case Intrinsic::ppc_qpx_qvlfda:
+  case Intrinsic::ppc_qpx_qvlfsa:
+  case Intrinsic::ppc_qpx_qvlfcda:
+  case Intrinsic::ppc_qpx_qvlfcsa:
+  case Intrinsic::ppc_qpx_qvlfiwaa:
+  case Intrinsic::ppc_qpx_qvlfiwza: {
+    EVT VT;
+    switch (Intrinsic) {
+    case Intrinsic::ppc_qpx_qvlfda:
+      VT = MVT::v4f64;
+      break;
+    case Intrinsic::ppc_qpx_qvlfsa:
+      VT = MVT::v4f32;
+      break;
+    case Intrinsic::ppc_qpx_qvlfcda:
+      VT = MVT::v2f64;
+      break;
+    case Intrinsic::ppc_qpx_qvlfcsa:
+      VT = MVT::v2f32;
+      break;
+    default:
+      VT = MVT::v4i32;
+      break;
+    }
+
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = VT;
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Info.size = VT.getStoreSize();
+    Info.align = 1;
+    Info.vol = false;
+    Info.readMem = true;
+    Info.writeMem = false;
+    return true;
+  }
+  case Intrinsic::ppc_qpx_qvstfd:
+  case Intrinsic::ppc_qpx_qvstfs:
+  case Intrinsic::ppc_qpx_qvstfcd:
+  case Intrinsic::ppc_qpx_qvstfcs:
+  case Intrinsic::ppc_qpx_qvstfiw:
    case Intrinsic::ppc_altivec_stvx:
    case Intrinsic::ppc_altivec_stvxl:
    case Intrinsic::ppc_altivec_stvebx:
@@ -9831,6 +10828,18 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
      case Intrinsic::ppc_vsx_stxvd2x:
        VT = MVT::v2f64;
        break;
+    case Intrinsic::ppc_qpx_qvstfd:
+      VT = MVT::v4f64;
+      break;
+    case Intrinsic::ppc_qpx_qvstfs:
+      VT = MVT::v4f32;
+      break;
+    case Intrinsic::ppc_qpx_qvstfcd:
+      VT = MVT::v2f64;
+      break;
+    case Intrinsic::ppc_qpx_qvstfcs:
+      VT = MVT::v2f32;
+      break;
      default:
        VT = MVT::v4i32;
        break;
@@ -9847,6 +10856,41 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
      Info.writeMem = true;
      return true;
    }
+  case Intrinsic::ppc_qpx_qvstfda:
+  case Intrinsic::ppc_qpx_qvstfsa:
+  case Intrinsic::ppc_qpx_qvstfcda:
+  case Intrinsic::ppc_qpx_qvstfcsa:
+  case Intrinsic::ppc_qpx_qvstfiwa: {
+    EVT VT;
+    switch (Intrinsic) {
+    case Intrinsic::ppc_qpx_qvstfda:
+      VT = MVT::v4f64;
+      break;
+    case Intrinsic::ppc_qpx_qvstfsa:
+      VT = MVT::v4f32;
+      break;
+    case Intrinsic::ppc_qpx_qvstfcda:
+      VT = MVT::v2f64;
+      break;
+    case Intrinsic::ppc_qpx_qvstfcsa:
+      VT = MVT::v2f32;
+      break;
+    default:
+      VT = MVT::v4i32;
+      break;
+    }
+
+    Info.opc = ISD::INTRINSIC_VOID;
+    Info.memVT = VT;
+    Info.ptrVal = I.getArgOperand(1);
+    Info.offset = 0;
+    Info.size = VT.getStoreSize();
+    Info.align = 1;
+    Info.vol = false;
+    Info.readMem = false;
+    Info.writeMem = true;
+    return true;
+  }
    default:
      break;
    }
@@ -10009,6 +11053,11 @@ PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
    if (VT == MVT::v2i64)
      return false;
  
+  if (Subtarget.hasQPX()) {
+    if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1)
+      return true;
+  }
+
    return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
  }
  
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h

index 6e12d9c097ae1ece0c7b4a1a32c3cdb5edcd026b..47d9c68f5381308eecb52c740e6fed9af7ed14b0 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -283,6 +283,22 @@ namespace llvm {
        /// of outputs.
        XXSWAPD,
  
+      /// QVFPERM = This corresponds to the QPX qvfperm instruction.
+      QVFPERM,
+
+      /// QVGPCI = This corresponds to the QPX qvgpci instruction.
+      QVGPCI,
+
+      /// QVALIGNI = This corresponds to the QPX qvaligni instruction.
+      QVALIGNI,
+
+      /// QVESPLATI = This corresponds to the QPX qvesplati instruction.
+      QVESPLATI,
+
+      /// QBFLT = Access the underlying QPX floating-point boolean
+      /// representation.
+      QBFLT,
+
        /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
        /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of
        /// the GPRC input, then stores it through Ptr.  Type can be either i16 or
@@ -332,7 +348,11 @@ namespace llvm {
        /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
        /// Maps directly to an stxvd2x instruction that will be preceded by
        /// an xxswapd.
-      STXVD2X
+      STXVD2X,
+
+      /// QBRC, CHAIN = QVLFSb CHAIN, Ptr
+      /// The 4xf32 load used for v4i1 constants.
+      QVLFSb
      };
    }
  
@@ -381,6 +401,10 @@ namespace llvm {
      /// size, return the constant being splatted.  The ByteSize field indicates
      /// the number of bytes of each element [124] -> [bhw].
      SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
+
+    /// If this is a qvaligni shuffle mask, return the shift
+    /// amount, otherwise return -1.
+    int isQVALIGNIShuffleMask(SDNode *N);
    }
  
    class PPCTargetLowering : public TargetLowering {
@@ -679,11 +703,15 @@ namespace llvm {
      SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
      SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
  
+    SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
+
      SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
                              CallingConv::ID CallConv, bool isVarArg,
                              const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td

index 0410b1c7590f3de6c1df274c7140c57f475af1c4..506a2d0c7aeb9141b4d7b68fa9e6678876ffe340 100644 (file)
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -562,6 +562,47 @@ class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
    let Inst{31}    = 0;
  }
  
+// Used for QPX
+class XForm_18<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> FRT;
+  bits<5> FRA;
+  bits<5> FRB;
+
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = FRT;
+  let Inst{11-15} = FRA;
+  let Inst{16-20} = FRB;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+class XForm_19<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern> 
+  : XForm_18<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let FRA = 0;
+}
+
+class XForm_20<bits<6> opcode, bits<6> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> FRT;
+  bits<5> FRA;
+  bits<5> FRB;
+  bits<4> tttt;
+
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = FRT;
+  let Inst{11-15} = FRA;
+  let Inst{16-20} = FRB;
+  let Inst{21-24} = tttt;
+  let Inst{25-30} = xo;
+  let Inst{31}    = 0;
+}
+
  class XForm_24<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
                 InstrItinClass itin, list<dag> pattern> 
    : I<opcode, OOL, IOL, asmstr, itin> {
@@ -1215,6 +1256,14 @@ class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
    let Inst{31}    = 0;
  }
  
+// Used for QPX
+class AForm_4a<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let FRA = 0;
+  let FRC = 0;
+}
+
  // 1.7.13 M-Form
  class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
                InstrItinClass itin, list<dag> pattern>
@@ -1439,6 +1488,49 @@ class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
    let Inst{22-31} = xo;
  }
  
+// Z23-Form (used by QPX)
+class Z23Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, 
+              InstrItinClass itin, list<dag> pattern>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> FRT;
+  bits<5> FRA;
+  bits<5> FRB;
+  bits<2> idx;
+
+  let Pattern = pattern;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = FRT;
+  let Inst{11-15} = FRA;
+  let Inst{16-20} = FRB;
+  let Inst{21-22} = idx;
+  let Inst{23-30} = xo;
+  let Inst{31}    = RC;
+}
+
+class Z23Form_2<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : Z23Form_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let FRB = 0;
+}
+
+class Z23Form_3<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, 
+              InstrItinClass itin, list<dag> pattern>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> FRT;
+  bits<12> idx;
+
+  let Pattern = pattern;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = FRT;
+  let Inst{11-22} = idx;
+  let Inst{23-30} = xo;
+  let Inst{31}    = RC;
+}
+
  //===----------------------------------------------------------------------===//
  class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
      : I<0, OOL, IOL, asmstr, NoItinerary> {
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp

index d1c60a2e37c3a692c301129bfb29187c6efabc87..fe9474a5de0033f44c706259a876d9eca56e864a 100644 (file)
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -181,6 +181,9 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
    case PPC::RESTORE_CRBIT:
    case PPC::LVX:
    case PPC::LXVD2X:
+  case PPC::QVLFDX:
+  case PPC::QVLFSXs:
+  case PPC::QVLFDXb:
    case PPC::RESTORE_VRSAVE:
      // Check for the operands added by addFrameReference (the immediate is the
      // offset which defaults to 0).
@@ -207,6 +210,9 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
    case PPC::SPILL_CRBIT:
    case PPC::STVX:
    case PPC::STXVD2X:
+  case PPC::QVSTFDX:
+  case PPC::QVSTFSXs:
+  case PPC::QVSTFDXb:
    case PPC::SPILL_VRSAVE:
      // Check for the operands added by addFrameReference (the immediate is the
      // offset which defaults to 0).
@@ -759,6 +765,12 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
      Opc = PPC::XXLOR;
    else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg))
      Opc = PPC::XXLORf;
+  else if (PPC::QFRCRegClass.contains(DestReg, SrcReg))
+    Opc = PPC::QVFMR;
+  else if (PPC::QSRCRegClass.contains(DestReg, SrcReg))
+    Opc = PPC::QVFMRs;
+  else if (PPC::QBRCRegClass.contains(DestReg, SrcReg))
+    Opc = PPC::QVFMRb;
    else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
      Opc = PPC::CROR;
    else
@@ -844,6 +856,24 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                                                 getKillRegState(isKill)),
                                         FrameIdx));
      SpillsVRS = true;
+  } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFDX))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
+    NonRI = true;
+  } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFSXs))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
+    NonRI = true;
+  } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFDXb))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
+    NonRI = true;
    } else {
      llvm_unreachable("Unknown regclass!");
    }
@@ -939,6 +969,18 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
                                                 DestReg),
                                         FrameIdx));
      SpillsVRS = true;
+  } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDX), DestReg),
+                                       FrameIdx));
+    NonRI = true;
+  } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFSXs), DestReg),
+                                       FrameIdx));
+    NonRI = true;
+  } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDXb), DestReg),
+                                       FrameIdx));
+    NonRI = true;
    } else {
      llvm_unreachable("Unknown regclass!");
    }
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td

index 4e3980dfc9b23b98abac2ca78aa42af220518d1b..c2c53355b6ebd67ed0c2c3ffae119b452ea07c22 100644 (file)
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -61,6 +61,27 @@ def tocentry32 : Operand<iPTR> {
    let MIOperandInfo = (ops i32imm:$imm);
  }
  
+def SDT_PPCqvfperm   : SDTypeProfile<1, 3, [
+  SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVec<3>
+]>;
+def SDT_PPCqvgpci   : SDTypeProfile<1, 1, [
+  SDTCisVec<0>, SDTCisInt<1>
+]>;
+def SDT_PPCqvaligni   : SDTypeProfile<1, 3, [
+  SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<3>
+]>;
+def SDT_PPCqvesplati   : SDTypeProfile<1, 2, [
+  SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>
+]>;
+
+def SDT_PPCqbflt : SDTypeProfile<1, 1, [
+  SDTCisVec<0>, SDTCisVec<1>
+]>;
+
+def SDT_PPCqvlfsb : SDTypeProfile<1, 1, [
+  SDTCisVec<0>, SDTCisPtrTy<1>
+]>;
+
  //===----------------------------------------------------------------------===//
  // PowerPC specific DAG Nodes.
  //
@@ -127,6 +148,16 @@ def PPCaddiDtprelL   : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
  
  def PPCvperm    : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
  
+def PPCqvfperm   : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>;
+def PPCqvgpci    : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>;
+def PPCqvaligni  : SDNode<"PPCISD::QVALIGNI", SDT_PPCqvaligni, []>;
+def PPCqvesplati : SDNode<"PPCISD::QVESPLATI", SDT_PPCqvesplati, []>;
+
+def PPCqbflt     : SDNode<"PPCISD::QBFLT", SDT_PPCqbflt, []>;
+
+def PPCqvlfsb    : SDNode<"PPCISD::QVLFSb", SDT_PPCqvlfsb,
+                          [SDNPHasChain, SDNPMayLoad]>;
+
  def PPCcmpb     : SDNode<"PPCISD::CMPB", SDTIntBinOp, []>;
  
  // These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
@@ -464,6 +495,15 @@ def u6imm   : Operand<i32> {
    let ParserMatchClass = PPCU6ImmAsmOperand;
    let DecoderMethod = "decodeUImmOperand<6>";
  }
+def PPCU12ImmAsmOperand : AsmOperandClass {
+  let Name = "U12Imm"; let PredicateMethod = "isU12Imm";
+  let RenderMethod = "addImmOperands";
+}
+def u12imm  : Operand<i32> {
+  let PrintMethod = "printU12ImmOperand";
+  let ParserMatchClass = PPCU12ImmAsmOperand;
+  let DecoderMethod = "decodeUImmOperand<12>";
+}
  def PPCS16ImmAsmOperand : AsmOperandClass {
    let Name = "S16Imm"; let PredicateMethod = "isS16Imm";
    let RenderMethod = "addS16ImmOperands";
@@ -680,6 +720,10 @@ def IsPPC6xx  : Predicate<"PPCSubTarget->isPPC6xx()">;
  def IsE500  : Predicate<"PPCSubTarget->isE500()">;
  def HasSPE  : Predicate<"PPCSubTarget->HasSPE()">;
  def HasICBT : Predicate<"PPCSubTarget->hasICBT()">;
+
+def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
+def NaNsFPMath   : Predicate<"!TM.Options.NoNaNsFPMath">;
+
  //===----------------------------------------------------------------------===//
  // PowerPC Multiclass Definitions.
  
@@ -2643,6 +2687,7 @@ include "PPCInstrAltivec.td"
  include "PPCInstrSPE.td"
  include "PPCInstr64Bit.td"
  include "PPCInstrVSX.td"
+include "PPCInstrQPX.td"
  
  def crnot : OutPatFrag<(ops node:$in),
                         (CRNOR $in, $in)>;
diff --git a/lib/Target/PowerPC/PPCInstrQPX.td b/lib/Target/PowerPC/PPCInstrQPX.td

new file mode 100644 (file)

index 0000000..c984d46
--- /dev/null
+++ b/lib/Target/PowerPC/PPCInstrQPX.td
@@ -0,0 +1,1192 @@
+//===- PPCInstrQPX.td - The PowerPC QPX Extension --*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the QPX extension to the PowerPC instruction set.
+// Reference:
+// Book Q: QPX Architecture Definition. IBM (as updated in) 2011.
+//
+//===----------------------------------------------------------------------===//
+
+def PPCRegQFRCAsmOperand : AsmOperandClass {
+  let Name = "RegQFRC"; let PredicateMethod = "isRegNumber";
+}
+def qfrc : RegisterOperand<QFRC> {
+  let ParserMatchClass = PPCRegQFRCAsmOperand;
+}
+def PPCRegQSRCAsmOperand : AsmOperandClass {
+  let Name = "RegQSRC"; let PredicateMethod = "isRegNumber";
+}
+def qsrc : RegisterOperand<QSRC> {
+  let ParserMatchClass = PPCRegQSRCAsmOperand;
+}
+def PPCRegQBRCAsmOperand : AsmOperandClass {
+  let Name = "RegQBRC"; let PredicateMethod = "isRegNumber";
+}
+def qbrc : RegisterOperand<QBRC> {
+  let ParserMatchClass = PPCRegQBRCAsmOperand;
+}
+
+//===----------------------------------------------------------------------===//
+// Helpers for defining instructions that directly correspond to intrinsics.
+
+// QPXA1_Int - A AForm_1 intrinsic definition.
+class QPXA1_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+  : AForm_1<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+              !strconcat(opc, " $FRT, $FRA, $FRC, $FRB"), IIC_FPFused,
+                       [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
+// QPXA1s_Int - A AForm_1 intrinsic definition (simple instructions).
+class QPXA1s_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+  : AForm_1<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+              !strconcat(opc, " $FRT, $FRA, $FRC, $FRB"), IIC_VecPerm,
+                       [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
+// QPXA2_Int - A AForm_2 intrinsic definition.
+class QPXA2_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+  : AForm_2<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+              !strconcat(opc, " $FRT, $FRA, $FRB"), IIC_FPGeneral,
+                       [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB))]>;
+// QPXA3_Int - A AForm_3 intrinsic definition.
+class QPXA3_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+  : AForm_3<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC),
+              !strconcat(opc, " $FRT, $FRA, $FRC"), IIC_FPGeneral,
+                       [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRC))]>;
+// QPXA4_Int - A AForm_4a intrinsic definition.
+class QPXA4_Int<bits<6> opcode, bits<5> xo, string opc, Intrinsic IntID>
+  : AForm_4a<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRB),
+              !strconcat(opc, " $FRT, $FRB"), IIC_FPGeneral,
+                       [(set v4f64:$FRT, (IntID v4f64:$FRB))]>;
+// QPXX18_Int - A XForm_18 intrinsic definition.
+class QPXX18_Int<bits<6> opcode, bits<10> xo, string opc, Intrinsic IntID>
+  : XForm_18<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+              !strconcat(opc, " $FRT, $FRA, $FRB"), IIC_FPCompare,
+                       [(set v4f64:$FRT, (IntID v4f64:$FRA, v4f64:$FRB))]>;
+// QPXX19_Int - A XForm_19 intrinsic definition.
+class QPXX19_Int<bits<6> opcode, bits<10> xo, string opc, Intrinsic IntID>
+  : XForm_19<opcode, xo, (outs qfrc:$FRT), (ins qfrc:$FRB),
+              !strconcat(opc, " $FRT, $FRB"), IIC_FPGeneral,
+                       [(set v4f64:$FRT, (IntID v4f64:$FRB))]>;
+
+//===----------------------------------------------------------------------===//
+// Pattern Frags.
+
+def extloadv4f32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v4f32;
+}]>;
+
+def truncstorev4f32 : PatFrag<(ops node:$val, node:$ptr),
+                            (truncstore node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32;
+}]>;
+def pre_truncstv4f32 : PatFrag<(ops node:$val, node:$base, node:$offset),
+                               (pre_truncst node:$val,
+                                            node:$base, node:$offset), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32;
+}]>;
+
+def fround_inexact : PatFrag<(ops node:$val), (fround node:$val), [{
+  return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() == 0;
+}]>;
+
+def fround_exact : PatFrag<(ops node:$val), (fround node:$val), [{
+  return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() == 1;
+}]>;
+
+let FastIselShouldIgnore = 1 in // FastIsel should ignore all u12 instrs.
+  def u12 : ImmLeaf<i32, [{ return (Imm & 0xFFF) == Imm; }]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions.
+
+def HasQPX : Predicate<"PPCSubTarget->hasQPX()">;
+let Predicates = [HasQPX] in {
+let DecoderNamespace = "QPX" in {
+let hasSideEffects = 0 in { // QPX instructions don't have side effects.
+let Uses = [RM] in {
+  // Add Instructions
+  let isCommutable = 1 in {
+    def QVFADD : AForm_2<4, 21,
+                        (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+                        "qvfadd $FRT, $FRA, $FRB", IIC_FPGeneral,
+                        [(set v4f64:$FRT, (fadd v4f64:$FRA, v4f64:$FRB))]>;
+    let isCodeGenOnly = 1 in
+      def QVFADDS : QPXA2_Int<0, 21, "qvfadds", int_ppc_qpx_qvfadds>;
+    def QVFADDSs : AForm_2<0, 21,
+                          (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+                          "qvfadds $FRT, $FRA, $FRB", IIC_FPGeneral,
+                          [(set v4f32:$FRT, (fadd v4f32:$FRA, v4f32:$FRB))]>;
+  }
+  def QVFSUB : AForm_2<4, 20,
+                      (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+                      "qvfsub $FRT, $FRA, $FRB", IIC_FPGeneral,
+                      [(set v4f64:$FRT, (fsub v4f64:$FRA, v4f64:$FRB))]>;
+  let isCodeGenOnly = 1 in
+    def QVFSUBS : QPXA2_Int<0, 20, "qvfsubs", int_ppc_qpx_qvfsubs>;
+  def QVFSUBSs : AForm_2<0, 20,
+                        (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+                        "qvfsubs $FRT, $FRA, $FRB", IIC_FPGeneral,
+                        [(set v4f32:$FRT, (fsub v4f32:$FRA, v4f32:$FRB))]>;
+
+  // Estimate Instructions
+  def QVFRE : AForm_4a<4, 24, (outs qfrc:$FRT), (ins qfrc:$FRB),
+                       "qvfre $FRT, $FRB", IIC_FPGeneral,
+                       [(set v4f64:$FRT, (PPCfre v4f64:$FRB))]>;
+  def QVFRES : QPXA4_Int<0, 24, "qvfres", int_ppc_qpx_qvfres>;
+  let isCodeGenOnly = 1 in
+  def QVFRESs : AForm_4a<0, 24, (outs qsrc:$FRT), (ins qsrc:$FRB),
+                         "qvfres $FRT, $FRB", IIC_FPGeneral,
+                         [(set v4f32:$FRT, (PPCfre v4f32:$FRB))]>;
+
+  def QVFRSQRTE : AForm_4a<4, 26, (outs qfrc:$FRT), (ins qfrc:$FRB),
+                           "qvfrsqrte $FRT, $FRB", IIC_FPGeneral,
+                           [(set v4f64:$FRT, (PPCfrsqrte v4f64:$FRB))]>;
+  def QVFRSQRTES : QPXA4_Int<0, 26, "qvfrsqrtes", int_ppc_qpx_qvfrsqrtes>;
+  let isCodeGenOnly = 1 in
+  def QVFRSQRTESs : AForm_4a<0, 26, (outs qsrc:$FRT), (ins qsrc:$FRB),
+                             "qvfrsqrtes $FRT, $FRB", IIC_FPGeneral,
+                             [(set v4f32:$FRT, (PPCfrsqrte v4f32:$FRB))]>;
+
+  // Multiply Instructions
+  let isCommutable = 1 in {
+    def QVFMUL : AForm_3<4, 25,
+                        (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC),
+                        "qvfmul $FRT, $FRA, $FRC", IIC_FPGeneral,
+                        [(set v4f64:$FRT, (fmul v4f64:$FRA, v4f64:$FRC))]>;
+    let isCodeGenOnly = 1 in
+      def QVFMULS : QPXA3_Int<0, 25, "qvfmuls", int_ppc_qpx_qvfmuls>;
+    def QVFMULSs : AForm_3<0, 25,
+                          (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC),
+                          "qvfmuls $FRT, $FRA, $FRC", IIC_FPGeneral,
+                          [(set v4f32:$FRT, (fmul v4f32:$FRA, v4f32:$FRC))]>;
+  }
+  def QVFXMUL : QPXA3_Int<4, 17, "qvfxmul", int_ppc_qpx_qvfxmul>;
+  def QVFXMULS : QPXA3_Int<0, 17, "qvfxmuls", int_ppc_qpx_qvfxmuls>;
+
+  // Multiply-add instructions
+  def QVFMADD : AForm_1<4, 29,
+                      (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+                      "qvfmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+                      [(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC, v4f64:$FRB))]>;
+  let isCodeGenOnly = 1 in
+    def QVFMADDS : QPXA1_Int<0, 29, "qvfmadds", int_ppc_qpx_qvfmadds>;
+  def QVFMADDSs : AForm_1<0, 29,
+                        (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+                        "qvfmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+                        [(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC, v4f32:$FRB))]>;
+  def QVFNMADD : AForm_1<4, 31,
+                      (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+                      "qvfnmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+                      [(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC,
+                                                   v4f64:$FRB)))]>;
+  let isCodeGenOnly = 1 in
+    def QVFNMADDS : QPXA1_Int<0, 31, "qvfnmadds", int_ppc_qpx_qvfnmadds>;
+  def QVFNMADDSs : AForm_1<0, 31,
+                        (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+                        "qvfnmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+                        [(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC,
+                                                     v4f32:$FRB)))]>;
+  def QVFMSUB : AForm_1<4, 28,
+                      (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+                      "qvfmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+                      [(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC,
+                                             (fneg v4f64:$FRB)))]>;
+  let isCodeGenOnly = 1 in
+    def QVFMSUBS : QPXA1_Int<0, 28, "qvfmsubs", int_ppc_qpx_qvfmsubs>;
+  def QVFMSUBSs : AForm_1<0, 28,
+                      (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+                      "qvfmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+                      [(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC,
+                                             (fneg v4f32:$FRB)))]>;
+  def QVFNMSUB : AForm_1<4, 30,
+                      (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+                      "qvfnmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+                      [(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC,
+                                              (fneg v4f64:$FRB))))]>;
+  let isCodeGenOnly = 1 in
+    def QVFNMSUBS : QPXA1_Int<0, 30, "qvfnmsubs", int_ppc_qpx_qvfnmsubs>;
+  def QVFNMSUBSs : AForm_1<0, 30,
+                      (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+                      "qvfnmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+                      [(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC,
+                                              (fneg v4f32:$FRB))))]>;
+  def QVFXMADD : QPXA1_Int<4, 9, "qvfxmadd", int_ppc_qpx_qvfxmadd>;
+  def QVFXMADDS : QPXA1_Int<0, 9, "qvfxmadds", int_ppc_qpx_qvfxmadds>;
+  def QVFXXNPMADD : QPXA1_Int<4, 11, "qvfxxnpmadd", int_ppc_qpx_qvfxxnpmadd>;
+  def QVFXXNPMADDS : QPXA1_Int<0, 11, "qvfxxnpmadds", int_ppc_qpx_qvfxxnpmadds>;
+  def QVFXXCPNMADD : QPXA1_Int<4, 3, "qvfxxcpnmadd", int_ppc_qpx_qvfxxcpnmadd>;
+  def QVFXXCPNMADDS : QPXA1_Int<0, 3, "qvfxxcpnmadds", int_ppc_qpx_qvfxxcpnmadds>;
+  def QVFXXMADD : QPXA1_Int<4, 1, "qvfxxmadd", int_ppc_qpx_qvfxxmadd>;
+  def QVFXXMADDS : QPXA1_Int<0, 1, "qvfxxmadds", int_ppc_qpx_qvfxxmadds>;
+
+  // Select Instruction
+  let isCodeGenOnly = 1 in
+    def QVFSEL : QPXA1s_Int<4, 23, "qvfsel", int_ppc_qpx_qvfsel>;
+  def QVFSELb : AForm_1<4, 23, (outs qfrc:$FRT),
+                        (ins qbrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+                        "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
+                        [(set v4f64:$FRT, (vselect v4i1:$FRA,
+                                                   v4f64:$FRC, v4f64:$FRB))]>;
+  let isCodeGenOnly = 1 in
+  def QVFSELbs : AForm_1<4, 23, (outs qsrc:$FRT),
+                        (ins qbrc:$FRA, qsrc:$FRB, qsrc:$FRC),
+                        "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
+                        [(set v4f32:$FRT, (vselect v4i1:$FRA,
+                                                   v4f32:$FRC, v4f32:$FRB))]>;
+  let isCodeGenOnly = 1 in
+  def QVFSELbb: AForm_1<4, 23, (outs qbrc:$FRT),
+                        (ins qbrc:$FRA, qbrc:$FRB, qbrc:$FRC),
+                        "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm,
+                        [(set v4i1:$FRT, (vselect v4i1:$FRA,
+                                                  v4i1:$FRC, v4i1:$FRB))]>;
+
+  // SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
+  // instruction selection into a branch sequence.
+  let usesCustomInserter = 1 in {
+    def SELECT_CC_QFRC: Pseudo<(outs qfrc:$dst), (ins crrc:$cond, qfrc:$T, qfrc:$F,
+                                i32imm:$BROPC), "#SELECT_CC_QFRC",
+                                []>;
+    def SELECT_CC_QSRC: Pseudo<(outs qsrc:$dst), (ins crrc:$cond, qsrc:$T, qsrc:$F,
+                                i32imm:$BROPC), "#SELECT_CC_QSRC",
+                                []>;
+    def SELECT_CC_QBRC: Pseudo<(outs qbrc:$dst), (ins crrc:$cond, qbrc:$T, qbrc:$F,
+                                i32imm:$BROPC), "#SELECT_CC_QBRC",
+                                []>;
+
+    // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
+    // register bit directly.
+    def SELECT_QFRC: Pseudo<(outs qfrc:$dst), (ins crbitrc:$cond,
+                            qfrc:$T, qfrc:$F), "#SELECT_QFRC",
+                            [(set v4f64:$dst,
+                                  (select i1:$cond, v4f64:$T, v4f64:$F))]>;
+    def SELECT_QSRC: Pseudo<(outs qsrc:$dst), (ins crbitrc:$cond,
+                            qsrc:$T, qsrc:$F), "#SELECT_QSRC",
+                            [(set v4f32:$dst,
+                                  (select i1:$cond, v4f32:$T, v4f32:$F))]>;
+    def SELECT_QBRC: Pseudo<(outs qbrc:$dst), (ins crbitrc:$cond,
+                            qbrc:$T, qbrc:$F), "#SELECT_QBRC",
+                            [(set v4i1:$dst,
+                                  (select i1:$cond, v4i1:$T, v4i1:$F))]>;
+  }
+
+  // Convert and Round Instructions
+  def QVFCTID : QPXX19_Int<4, 814, "qvfctid", int_ppc_qpx_qvfctid>;
+  let isCodeGenOnly = 1 in
+    def QVFCTIDb : XForm_19<4, 814, (outs qbrc:$FRT), (ins qbrc:$FRB),
+                            "qvfctid $FRT, $FRB", IIC_FPGeneral, []>;
+
+  def QVFCTIDU : QPXX19_Int<4, 942, "qvfctidu", int_ppc_qpx_qvfctidu>;
+  def QVFCTIDZ : QPXX19_Int<4, 815, "qvfctidz", int_ppc_qpx_qvfctidz>;
+  def QVFCTIDUZ : QPXX19_Int<4, 943, "qvfctiduz", int_ppc_qpx_qvfctiduz>;
+  def QVFCTIW : QPXX19_Int<4, 14, "qvfctiw", int_ppc_qpx_qvfctiw>;
+  def QVFCTIWU : QPXX19_Int<4, 142, "qvfctiwu", int_ppc_qpx_qvfctiwu>;
+  def QVFCTIWZ : QPXX19_Int<4, 15, "qvfctiwz", int_ppc_qpx_qvfctiwz>;
+  def QVFCTIWUZ : QPXX19_Int<4, 143, "qvfctiwuz", int_ppc_qpx_qvfctiwuz>;
+  def QVFCFID : QPXX19_Int<4, 846, "qvfcfid", int_ppc_qpx_qvfcfid>;
+  let isCodeGenOnly = 1 in
+    def QVFCFIDb : XForm_19<4, 846, (outs qbrc:$FRT), (ins qbrc:$FRB),
+                            "qvfcfid $FRT, $FRB", IIC_FPGeneral, []>;
+
+  def QVFCFIDU : QPXX19_Int<4, 974, "qvfcfidu", int_ppc_qpx_qvfcfidu>;
+  def QVFCFIDS : QPXX19_Int<0, 846, "qvfcfids", int_ppc_qpx_qvfcfids>;
+  def QVFCFIDUS : QPXX19_Int<0, 974, "qvfcfidus", int_ppc_qpx_qvfcfidus>;
+
+  let isCodeGenOnly = 1 in
+    def QVFRSP : QPXX19_Int<4, 12, "qvfrsp", int_ppc_qpx_qvfrsp>;
+  def QVFRSPs : XForm_19<4, 12,
+                      (outs qsrc:$FRT), (ins qfrc:$FRB),
+                      "qvfrsp $FRT, $FRB", IIC_FPGeneral,
+                      [(set v4f32:$FRT, (fround_inexact v4f64:$FRB))]>;
+
+  def QVFRIZ : XForm_19<4, 424, (outs qfrc:$FRT), (ins qfrc:$FRB),
+                        "qvfriz $FRT, $FRB", IIC_FPGeneral,
+                        [(set v4f64:$FRT, (ftrunc v4f64:$FRB))]>;
+  let isCodeGenOnly = 1 in
+    def QVFRIZs : XForm_19<4, 424, (outs qsrc:$FRT), (ins qsrc:$FRB),
+                           "qvfriz $FRT, $FRB", IIC_FPGeneral,
+                           [(set v4f32:$FRT, (ftrunc v4f32:$FRB))]>;
+
+  def QVFRIN : XForm_19<4, 392, (outs qfrc:$FRT), (ins qfrc:$FRB),
+                        "qvfrin $FRT, $FRB", IIC_FPGeneral,
+                        [(set v4f64:$FRT, (frnd v4f64:$FRB))]>;
+  let isCodeGenOnly = 1 in
+    def QVFRINs : XForm_19<4, 392, (outs qsrc:$FRT), (ins qsrc:$FRB),
+                           "qvfrin $FRT, $FRB", IIC_FPGeneral,
+                           [(set v4f32:$FRT, (frnd v4f32:$FRB))]>;
+
+  def QVFRIP : XForm_19<4, 456, (outs qfrc:$FRT), (ins qfrc:$FRB),
+                        "qvfrip $FRT, $FRB", IIC_FPGeneral,
+                        [(set v4f64:$FRT, (fceil v4f64:$FRB))]>;
+  let isCodeGenOnly = 1 in
+    def QVFRIPs : XForm_19<4, 456, (outs qsrc:$FRT), (ins qsrc:$FRB),
+                           "qvfrip $FRT, $FRB", IIC_FPGeneral,
+                           [(set v4f32:$FRT, (fceil v4f32:$FRB))]>;
+
+  def QVFRIM : XForm_19<4, 488, (outs qfrc:$FRT), (ins qfrc:$FRB),
+                        "qvfrim $FRT, $FRB", IIC_FPGeneral,
+                        [(set v4f64:$FRT, (ffloor v4f64:$FRB))]>;
+  let isCodeGenOnly = 1 in
+    def QVFRIMs : XForm_19<4, 488, (outs qsrc:$FRT), (ins qsrc:$FRB),
+                           "qvfrim $FRT, $FRB", IIC_FPGeneral,
+                           [(set v4f32:$FRT, (ffloor v4f32:$FRB))]>;
+
+  // Move Instructions
+  def QVFMR : XForm_19<4, 72,
+                      (outs qfrc:$FRT), (ins qfrc:$FRB),
+                      "qvfmr $FRT, $FRB", IIC_VecPerm,
+                      [/* (set v4f64:$FRT, v4f64:$FRB) */]>;
+  let isCodeGenOnly = 1 in {
+    def QVFMRs : XForm_19<4, 72,
+                         (outs qsrc:$FRT), (ins qsrc:$FRB),
+                         "qvfmr $FRT, $FRB", IIC_VecPerm,
+                         [/* (set v4f32:$FRT, v4f32:$FRB) */]>;
+    def QVFMRb : XForm_19<4, 72,
+                         (outs qbrc:$FRT), (ins qbrc:$FRB),
+                         "qvfmr $FRT, $FRB", IIC_VecPerm,
+                         [/* (set v4i1:$FRT, v4i1:$FRB) */]>;
+  }
+  def QVFNEG : XForm_19<4, 40,
+                      (outs qfrc:$FRT), (ins qfrc:$FRB),
+                      "qvfneg $FRT, $FRB", IIC_VecPerm,
+                      [(set v4f64:$FRT, (fneg v4f64:$FRB))]>;
+  let isCodeGenOnly = 1 in
+    def QVFNEGs : XForm_19<4, 40,
+                         (outs qsrc:$FRT), (ins qsrc:$FRB),
+                         "qvfneg $FRT, $FRB", IIC_VecPerm,
+                         [(set v4f32:$FRT, (fneg v4f32:$FRB))]>;
+  def QVFABS : XForm_19<4, 264,
+                      (outs qfrc:$FRT), (ins qfrc:$FRB),
+                      "qvfabs $FRT, $FRB", IIC_VecPerm,
+                      [(set v4f64:$FRT, (fabs v4f64:$FRB))]>;
+  let isCodeGenOnly = 1 in
+    def QVFABSs : XForm_19<4, 264,
+                         (outs qsrc:$FRT), (ins qsrc:$FRB),
+                         "qvfabs $FRT, $FRB", IIC_VecPerm,
+                         [(set v4f32:$FRT, (fabs v4f32:$FRB))]>;
+  def QVFNABS : XForm_19<4, 136,
+                      (outs qfrc:$FRT), (ins qfrc:$FRB),
+                      "qvfnabs $FRT, $FRB", IIC_VecPerm,
+                      [(set v4f64:$FRT, (fneg (fabs v4f64:$FRB)))]>;
+  let isCodeGenOnly = 1 in
+    def QVFNABSs : XForm_19<4, 136,
+                         (outs qsrc:$FRT), (ins qsrc:$FRB),
+                         "qvfnabs $FRT, $FRB", IIC_VecPerm,
+                         [(set v4f32:$FRT, (fneg (fabs v4f32:$FRB)))]>;
+  def QVFCPSGN : XForm_18<4, 8,
+                      (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+                      "qvfcpsgn $FRT, $FRA, $FRB", IIC_VecPerm,
+                      [(set v4f64:$FRT, (fcopysign v4f64:$FRB, v4f64:$FRA))]>;
+  let isCodeGenOnly = 1 in
+    def QVFCPSGNs : XForm_18<4, 8,
+                         (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+                         "qvfcpsgn $FRT, $FRA, $FRB", IIC_VecPerm,
+                         [(set v4f32:$FRT, (fcopysign v4f32:$FRB, v4f32:$FRA))]>;
+
+  def QVALIGNI : Z23Form_1<4, 5,
+                      (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, u2imm:$idx),
+                      "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
+                      [(set v4f64:$FRT,
+                            (PPCqvaligni v4f64:$FRA, v4f64:$FRB,
+                                         (i32 imm:$idx)))]>;
+  let isCodeGenOnly = 1 in
+     def QVALIGNIs : Z23Form_1<4, 5,
+                         (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, u2imm:$idx),
+                         "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
+                         [(set v4f32:$FRT,
+                               (PPCqvaligni v4f32:$FRA, v4f32:$FRB,
+                                            (i32 imm:$idx)))]>;
+  let isCodeGenOnly = 1 in
+     def QVALIGNIb : Z23Form_1<4, 5,
+                         (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u2imm:$idx),
+                         "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm,
+                         [(set v4i1:$FRT,
+                               (PPCqvaligni v4i1:$FRA, v4i1:$FRB,
+                                            (i32 imm:$idx)))]>;
+
+  def QVESPLATI : Z23Form_2<4, 37,
+                      (outs qfrc:$FRT), (ins qfrc:$FRA, u2imm:$idx),
+                      "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
+                      [(set v4f64:$FRT,
+                            (PPCqvesplati v4f64:$FRA, (i32 imm:$idx)))]>;
+  let isCodeGenOnly = 1 in
+     def QVESPLATIs : Z23Form_2<4, 37,
+                         (outs qsrc:$FRT), (ins qsrc:$FRA, u2imm:$idx),
+                         "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
+                         [(set v4f32:$FRT,
+                               (PPCqvesplati v4f32:$FRA, (i32 imm:$idx)))]>;
+  let isCodeGenOnly = 1 in
+     def QVESPLATIb : Z23Form_2<4, 37,
+                         (outs qbrc:$FRT), (ins qbrc:$FRA, u2imm:$idx),
+                         "qvesplati $FRT, $FRA, $idx", IIC_VecPerm,
+                         [(set v4i1:$FRT,
+                               (PPCqvesplati v4i1:$FRA, (i32 imm:$idx)))]>;
+
+  def QVFPERM : AForm_1<4, 6,
+                      (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC),
+                      "qvfperm $FRT, $FRA, $FRB, $FRC", IIC_VecPerm,
+                      [(set v4f64:$FRT,
+                            (PPCqvfperm v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>;
+  let isCodeGenOnly = 1 in
+     def QVFPERMs : AForm_1<4, 6,
+                         (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qfrc:$FRC),
+                         "qvfperm $FRT, $FRA, $FRB, $FRC", IIC_VecPerm,
+                         [(set v4f32:$FRT,
+                               (PPCqvfperm v4f32:$FRA, v4f32:$FRB, v4f64:$FRC))]>;
+
+  let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+  def QVGPCI : Z23Form_3<4, 133,
+                      (outs qfrc:$FRT), (ins u12imm:$idx),
+                      "qvgpci $FRT, $idx", IIC_VecPerm,
+                      [(set v4f64:$FRT, (PPCqvgpci (u12:$idx)))]>;
+
+  // Compare Instruction
+  let isCodeGenOnly = 1 in
+    def QVFTSTNAN : QPXX18_Int<4, 64, "qvftstnan", int_ppc_qpx_qvftstnan>;
+  def QVFTSTNANb : XForm_18<4, 64, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+                           "qvftstnan $FRT, $FRA, $FRB", IIC_FPCompare,
+                           [(set v4i1:$FRT,
+                                 (setcc v4f64:$FRA, v4f64:$FRB, SETUO))]>;
+  let isCodeGenOnly = 1 in
+  def QVFTSTNANbs : XForm_18<4, 64, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+                            "qvftstnan $FRT, $FRA, $FRB", IIC_FPCompare,
+                            [(set v4i1:$FRT,
+                                  (setcc v4f32:$FRA, v4f32:$FRB, SETUO))]>;
+  let isCodeGenOnly = 1 in
+    def QVFCMPLT : QPXX18_Int<4, 96, "qvfcmplt", int_ppc_qpx_qvfcmplt>;
+  def QVFCMPLTb : XForm_18<4, 96, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+                           "qvfcmplt $FRT, $FRA, $FRB", IIC_FPCompare,
+                           [(set v4i1:$FRT,
+                                 (setcc v4f64:$FRA, v4f64:$FRB, SETOLT))]>;
+  let isCodeGenOnly = 1 in
+  def QVFCMPLTbs : XForm_18<4, 96, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+                            "qvfcmplt $FRT, $FRA, $FRB", IIC_FPCompare,
+                            [(set v4i1:$FRT,
+                                  (setcc v4f32:$FRA, v4f32:$FRB, SETOLT))]>;
+  let isCodeGenOnly = 1 in
+    def QVFCMPGT : QPXX18_Int<4, 32, "qvfcmpgt", int_ppc_qpx_qvfcmpgt>;
+  def QVFCMPGTb : XForm_18<4, 32, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+                           "qvfcmpgt $FRT, $FRA, $FRB", IIC_FPCompare,
+                           [(set v4i1:$FRT,
+                                 (setcc v4f64:$FRA, v4f64:$FRB, SETOGT))]>;
+  let isCodeGenOnly = 1 in
+  def QVFCMPGTbs : XForm_18<4, 32, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+                            "qvfcmpgt $FRT, $FRA, $FRB", IIC_FPCompare,
+                            [(set v4i1:$FRT,
+                                  (setcc v4f32:$FRA, v4f32:$FRB, SETOGT))]>;
+  let isCodeGenOnly = 1 in
+    def QVFCMPEQ : QPXX18_Int<4, 0, "qvfcmpeq", int_ppc_qpx_qvfcmpeq>;
+  def QVFCMPEQb : XForm_18<4, 0, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB),
+                           "qvfcmpeq $FRT, $FRA, $FRB", IIC_FPCompare,
+                           [(set v4i1:$FRT,
+                                 (setcc v4f64:$FRA, v4f64:$FRB, SETOEQ))]>;
+  let isCodeGenOnly = 1 in
+  def QVFCMPEQbs : XForm_18<4, 0, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB),
+                            "qvfcmpeq $FRT, $FRA, $FRB", IIC_FPCompare,
+                            [(set v4i1:$FRT,
+                                  (setcc v4f32:$FRA, v4f32:$FRB, SETOEQ))]>;
+
+  let isCodeGenOnly = 1 in
+  def QVFLOGICAL : XForm_20<4, 4,
+                      (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, u12imm:$tttt),
+                      "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
+  def QVFLOGICALb : XForm_20<4, 4,
+                      (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u12imm:$tttt),
+                      "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
+  let isCodeGenOnly = 1 in
+  def QVFLOGICALs : XForm_20<4, 4,
+                      (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u12imm:$tttt),
+                      "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>;
+
+  // Load indexed instructions
+  let mayLoad = 1, canFoldAsLoad = 1 in {
+    def QVLFDX : XForm_1<31, 583,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfdx $FRT, $src", IIC_LdStLFD,
+                        [(set v4f64:$FRT, (load xoaddr:$src))]>;
+    let isCodeGenOnly = 1 in
+    def QVLFDXb : XForm_1<31, 583,
+                        (outs qbrc:$FRT), (ins memrr:$src),
+                        "qvlfdx $FRT, $src", IIC_LdStLFD, []>;
+
+    let RC = 1 in
+    def QVLFDXA : XForm_1<31, 583,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfdxa $FRT, $src", IIC_LdStLFD, []>;
+
+    def QVLFDUX : XForm_1<31, 615,
+                        (outs qfrc:$FRT, ptr_rc_nor0:$ea_result),
+                        (ins memrr:$src),
+                        "qvlfdux $FRT, $src", IIC_LdStLFDU, []>,
+                        RegConstraint<"$src.ptrreg = $ea_result">,
+                        NoEncode<"$ea_result">;
+    let RC = 1 in
+    def QVLFDUXA : XForm_1<31, 615,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfduxa $FRT, $src", IIC_LdStLFD, []>;
+
+    def QVLFSX : XForm_1<31, 519,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfsx $FRT, $src", IIC_LdStLFD,
+                        [(set v4f64:$FRT, (extloadv4f32 xoaddr:$src))]>;
+
+    let isCodeGenOnly = 1 in
+    def QVLFSXb : XForm_1<31, 519,
+                        (outs qbrc:$FRT), (ins memrr:$src),
+                        "qvlfsx $FRT, $src", IIC_LdStLFD,
+                        [(set v4i1:$FRT, (PPCqvlfsb xoaddr:$src))]>;
+    let isCodeGenOnly = 1 in
+    def QVLFSXs : XForm_1<31, 519,
+                        (outs qsrc:$FRT), (ins memrr:$src),
+                        "qvlfsx $FRT, $src", IIC_LdStLFD,
+                        [(set v4f32:$FRT, (load xoaddr:$src))]>;
+
+    let RC = 1 in
+    def QVLFSXA : XForm_1<31, 519,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfsxa $FRT, $src", IIC_LdStLFD, []>;
+
+    def QVLFSUX : XForm_1<31, 551,
+                        (outs qsrc:$FRT, ptr_rc_nor0:$ea_result),
+                        (ins memrr:$src),
+                        "qvlfsux $FRT, $src", IIC_LdStLFDU, []>,
+                        RegConstraint<"$src.ptrreg = $ea_result">,
+                        NoEncode<"$ea_result">;
+
+    let RC = 1 in
+    def QVLFSUXA : XForm_1<31, 551,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfsuxa $FRT, $src", IIC_LdStLFD, []>;
+
+    def QVLFCDX : XForm_1<31, 71,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfcdx $FRT, $src", IIC_LdStLFD, []>;
+    let RC = 1 in
+    def QVLFCDXA : XForm_1<31, 71,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfcdxa $FRT, $src", IIC_LdStLFD, []>;
+
+    def QVLFCDUX : XForm_1<31, 103,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfcdux $FRT, $src", IIC_LdStLFD, []>;
+    let RC = 1 in
+    def QVLFCDUXA : XForm_1<31, 103,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfcduxa $FRT, $src", IIC_LdStLFD, []>;
+
+    def QVLFCSX : XForm_1<31, 7,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfcsx $FRT, $src", IIC_LdStLFD, []>;
+    let isCodeGenOnly = 1 in
+    def QVLFCSXs : XForm_1<31, 7,
+                         (outs qsrc:$FRT), (ins memrr:$src),
+                         "qvlfcsx $FRT, $src", IIC_LdStLFD, []>;
+
+    let RC = 1 in
+    def QVLFCSXA : XForm_1<31, 7,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfcsxa $FRT, $src", IIC_LdStLFD, []>;
+
+    def QVLFCSUX : XForm_1<31, 39,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfcsux $FRT, $src", IIC_LdStLFD, []>;
+    let RC = 1 in
+    def QVLFCSUXA : XForm_1<31, 39,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfcsuxa $FRT, $src", IIC_LdStLFD, []>;
+
+    def QVLFIWAX : XForm_1<31, 871,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfiwax $FRT, $src", IIC_LdStLFD, []>;
+    let RC = 1 in
+    def QVLFIWAXA : XForm_1<31, 871,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfiwaxa $FRT, $src", IIC_LdStLFD, []>;
+
+    def QVLFIWZX : XForm_1<31, 839,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfiwzx $FRT, $src", IIC_LdStLFD, []>;
+    let RC = 1 in
+    def QVLFIWZXA : XForm_1<31, 839,
+                        (outs qfrc:$FRT), (ins memrr:$src),
+                        "qvlfiwzxa $FRT, $src", IIC_LdStLFD, []>;
+  }
+
+
+  def QVLPCLDX : XForm_1<31, 582,
+                      (outs qfrc:$FRT), (ins memrr:$src),
+                      "qvlpcldx $FRT, $src", IIC_LdStLFD, []>;
+  def QVLPCLSX : XForm_1<31, 518,
+                      (outs qfrc:$FRT), (ins memrr:$src),
+                      "qvlpclsx $FRT, $src", IIC_LdStLFD, []>;
+  let isCodeGenOnly = 1 in
+    def QVLPCLSXint : XForm_11<31, 518,
+                              (outs qfrc:$FRT), (ins G8RC:$src),
+                              "qvlpclsx $FRT, 0, $src", IIC_LdStLFD, []>;
+  def QVLPCRDX : XForm_1<31, 70,
+                      (outs qfrc:$FRT), (ins memrr:$src),
+                      "qvlpcrdx $FRT, $src", IIC_LdStLFD, []>;
+  def QVLPCRSX : XForm_1<31, 6,
+                      (outs qfrc:$FRT), (ins memrr:$src),
+                      "qvlpcrsx $FRT, $src", IIC_LdStLFD, []>;
+
+  // Store indexed instructions
+  let mayStore = 1 in {
+    def QVSTFDX : XForm_8<31, 711,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfdx $FRT, $dst", IIC_LdStSTFD,
+                        [(store qfrc:$FRT, xoaddr:$dst)]>;
+    let isCodeGenOnly = 1 in
+    def QVSTFDXb : XForm_8<31, 711,
+                        (outs), (ins qbrc:$FRT, memrr:$dst),
+                        "qvstfdx $FRT, $dst", IIC_LdStSTFD, []>;
+
+    let RC = 1 in
+    def QVSTFDXA : XForm_8<31, 711,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfdxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFDUX : XForm_8<31, 743, (outs ptr_rc_nor0:$ea_res),
+                           (ins qfrc:$FRT, memrr:$dst),
+                           "qvstfdux $FRT, $dst", IIC_LdStSTFDU, []>,
+                           RegConstraint<"$dst.ptrreg = $ea_res">,
+                           NoEncode<"$ea_res">;
+
+    let RC = 1 in
+    def QVSTFDUXA : XForm_8<31, 743,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfduxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFDXI : XForm_8<31, 709,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfdxi $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFDXIA : XForm_8<31, 709,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfdxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFDUXI : XForm_8<31, 741,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfduxi $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFDUXIA : XForm_8<31, 741,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfduxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFSX : XForm_8<31, 647,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfsx $FRT, $dst", IIC_LdStSTFD,
+                        [(truncstorev4f32 qfrc:$FRT, xoaddr:$dst)]>;
+    let isCodeGenOnly = 1 in
+    def QVSTFSXs : XForm_8<31, 647,
+                         (outs), (ins qsrc:$FRT, memrr:$dst),
+                         "qvstfsx $FRT, $dst", IIC_LdStSTFD,
+                         [(store qsrc:$FRT, xoaddr:$dst)]>;
+
+    let RC = 1 in
+    def QVSTFSXA : XForm_8<31, 647,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfsxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFSUX : XForm_8<31, 679, (outs ptr_rc_nor0:$ea_res),
+                           (ins qsrc:$FRT, memrr:$dst),
+                           "qvstfsux $FRT, $dst", IIC_LdStSTFDU, []>,
+                           RegConstraint<"$dst.ptrreg = $ea_res">,
+                           NoEncode<"$ea_res">;
+    let isCodeGenOnly = 1 in
+    def QVSTFSUXs: XForm_8<31, 679, (outs ptr_rc_nor0:$ea_res),
+                           (ins qfrc:$FRT, memrr:$dst),
+                           "qvstfsux $FRT, $dst", IIC_LdStSTFDU, []>,
+                           RegConstraint<"$dst.ptrreg = $ea_res">,
+                           NoEncode<"$ea_res">;
+
+    let RC = 1 in
+    def QVSTFSUXA : XForm_8<31, 679,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfsuxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFSXI : XForm_8<31, 645,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfsxi $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFSXIA : XForm_8<31, 645,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfsxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFSUXI : XForm_8<31, 677,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfsuxi $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFSUXIA : XForm_8<31, 677,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfsuxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFCDX : XForm_8<31, 199,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcdx $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFCDXA : XForm_8<31, 199,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcdxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFCSX : XForm_8<31, 135,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcsx $FRT, $dst", IIC_LdStSTFD, []>;
+    let isCodeGenOnly = 1 in
+    def QVSTFCSXs : XForm_8<31, 135,
+                         (outs), (ins qsrc:$FRT, memrr:$dst),
+                         "qvstfcsx $FRT, $dst", IIC_LdStSTFD, []>;
+
+    let RC = 1 in
+    def QVSTFCSXA : XForm_8<31, 135,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcsxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFCDUX : XForm_8<31, 231,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcdux $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFCDUXA : XForm_8<31, 231,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcduxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFCSUX : XForm_8<31, 167,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcsux $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFCSUXA : XForm_8<31, 167,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcsuxa $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFCDXI : XForm_8<31, 197,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcdxi $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFCDXIA : XForm_8<31, 197,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcdxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFCSXI : XForm_8<31, 133,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcsxi $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFCSXIA : XForm_8<31, 133,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcsxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFCDUXI : XForm_8<31, 229,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcduxi $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFCDUXIA : XForm_8<31, 229,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcduxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFCSUXI : XForm_8<31, 165,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcsuxi $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFCSUXIA : XForm_8<31, 165,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfcsuxia $FRT, $dst", IIC_LdStSTFD, []>;
+
+    def QVSTFIWX : XForm_8<31, 967,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfiwx $FRT, $dst", IIC_LdStSTFD, []>;
+    let RC = 1 in
+    def QVSTFIWXA : XForm_8<31, 967,
+                        (outs), (ins qfrc:$FRT, memrr:$dst),
+                        "qvstfiwxa $FRT, $dst", IIC_LdStSTFD, []>;
+  }
+}
+
+} // neverHasSideEffects
+}
+
+def : InstAlias<"qvfclr $FRT",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRT, qbrc:$FRT, 0)>;
+def : InstAlias<"qvfand $FRT, $FRA, $FRB",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 1)>;
+def : InstAlias<"qvfandc $FRT, $FRA, $FRB",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 4)>;
+def : InstAlias<"qvfctfb $FRT, $FRA",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRA, 5)>;
+def : InstAlias<"qvfxor $FRT, $FRA, $FRB",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 6)>;
+def : InstAlias<"qvfor $FRT, $FRA, $FRB",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 7)>;
+def : InstAlias<"qvfnor $FRT, $FRA, $FRB",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 8)>;
+def : InstAlias<"qvfequ $FRT, $FRA, $FRB",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 9)>;
+def : InstAlias<"qvfnot $FRT, $FRA",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRA, 10)>;
+def : InstAlias<"qvforc $FRT, $FRA, $FRB",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 13)>;
+def : InstAlias<"qvfnand $FRT, $FRA, $FRB",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 14)>;
+def : InstAlias<"qvfset $FRT",
+                (QVFLOGICALb qbrc:$FRT, qbrc:$FRT, qbrc:$FRT, 15)>;
+
+//===----------------------------------------------------------------------===//
+// Additional QPX Patterns
+//
+
+def : Pat<(v4f64 (scalar_to_vector f64:$A)),
+          (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), $A, sub_64)>;
+def : Pat<(v4f32 (scalar_to_vector f32:$A)),
+          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $A, sub_64)>;
+
+def : Pat<(f64 (vector_extract v4f64:$S, 0)),
+          (EXTRACT_SUBREG $S, sub_64)>;
+def : Pat<(f32 (vector_extract v4f32:$S, 0)),
+          (EXTRACT_SUBREG $S, sub_64)>;
+
+def : Pat<(f64 (vector_extract v4f64:$S, 1)),
+          (EXTRACT_SUBREG (QVESPLATI $S, 1), sub_64)>;
+def : Pat<(f64 (vector_extract v4f64:$S, 2)),
+          (EXTRACT_SUBREG (QVESPLATI $S, 2), sub_64)>;
+def : Pat<(f64 (vector_extract v4f64:$S, 3)),
+          (EXTRACT_SUBREG (QVESPLATI $S, 3), sub_64)>;
+
+def : Pat<(f32 (vector_extract v4f32:$S, 1)),
+          (EXTRACT_SUBREG (QVESPLATIs $S, 1), sub_64)>;
+def : Pat<(f32 (vector_extract v4f32:$S, 2)),
+          (EXTRACT_SUBREG (QVESPLATIs $S, 2), sub_64)>;
+def : Pat<(f32 (vector_extract v4f32:$S, 3)),
+          (EXTRACT_SUBREG (QVESPLATIs $S, 3), sub_64)>;
+
+def : Pat<(f64 (vector_extract v4f64:$S, i64:$F)),
+          (EXTRACT_SUBREG (QVFPERM $S, $S,
+                                   (QVLPCLSXint (RLDICR $F, 2,
+                                                        /* 63-2 = */ 61))),
+                          sub_64)>;
+def : Pat<(f32 (vector_extract v4f32:$S, i64:$F)),
+          (EXTRACT_SUBREG (QVFPERMs $S, $S,
+                                    (QVLPCLSXint (RLDICR $F, 2,
+                                                         /* 63-2 = */ 61))),
+                          sub_64)>;
+
+def : Pat<(int_ppc_qpx_qvfperm v4f64:$A, v4f64:$B, v4f64:$C),
+          (QVFPERM $A, $B, $C)>;
+
+def : Pat<(int_ppc_qpx_qvfcpsgn v4f64:$A, v4f64:$B),
+          (QVFCPSGN $A, $B)>;
+
+// FCOPYSIGN's operand types need not agree.
+def : Pat<(fcopysign v4f64:$frB, v4f32:$frA),
+          (QVFCPSGN (COPY_TO_REGCLASS $frA, QFRC), $frB)>;
+def : Pat<(fcopysign QSRC:$frB, QFRC:$frA),
+          (QVFCPSGNs (COPY_TO_REGCLASS $frA, QSRC), $frB)>;
+
+def : Pat<(int_ppc_qpx_qvfneg v4f64:$A), (QVFNEG $A)>;
+def : Pat<(int_ppc_qpx_qvfabs v4f64:$A), (QVFABS $A)>;
+def : Pat<(int_ppc_qpx_qvfnabs v4f64:$A), (QVFNABS $A)>;
+
+def : Pat<(int_ppc_qpx_qvfriz v4f64:$A), (QVFRIZ $A)>;
+def : Pat<(int_ppc_qpx_qvfrin v4f64:$A), (QVFRIN $A)>;
+def : Pat<(int_ppc_qpx_qvfrip v4f64:$A), (QVFRIP $A)>;
+def : Pat<(int_ppc_qpx_qvfrim v4f64:$A), (QVFRIM $A)>;
+
+def : Pat<(int_ppc_qpx_qvfre v4f64:$A), (QVFRE $A)>;
+def : Pat<(int_ppc_qpx_qvfrsqrte v4f64:$A), (QVFRSQRTE $A)>;
+
+def : Pat<(int_ppc_qpx_qvfadd v4f64:$A, v4f64:$B),
+          (QVFADD $A, $B)>;
+def : Pat<(int_ppc_qpx_qvfsub v4f64:$A, v4f64:$B),
+          (QVFSUB $A, $B)>;
+def : Pat<(int_ppc_qpx_qvfmul v4f64:$A, v4f64:$B),
+          (QVFMUL $A, $B)>;
+
+// Additional QVFNMSUB patterns: -a*c + b == -(a*c - b)
+def : Pat<(fma (fneg v4f64:$A), v4f64:$C, v4f64:$B),
+          (QVFNMSUB $A, $B, $C)>;
+def : Pat<(fma v4f64:$A, (fneg v4f64:$C), v4f64:$B),
+          (QVFNMSUB $A, $B, $C)>;
+def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B),
+          (QVFNMSUBSs $A, $B, $C)>;
+def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B),
+          (QVFNMSUBSs $A, $B, $C)>;
+
+def : Pat<(int_ppc_qpx_qvfmadd v4f64:$A, v4f64:$B, v4f64:$C),
+          (QVFMADD $A, $B, $C)>;
+def : Pat<(int_ppc_qpx_qvfnmadd v4f64:$A, v4f64:$B, v4f64:$C),
+          (QVFNMADD $A, $B, $C)>;
+def : Pat<(int_ppc_qpx_qvfmsub v4f64:$A, v4f64:$B, v4f64:$C),
+          (QVFMSUB $A, $B, $C)>;
+def : Pat<(int_ppc_qpx_qvfnmsub v4f64:$A, v4f64:$B, v4f64:$C),
+          (QVFNMSUB $A, $B, $C)>;
+
+def : Pat<(int_ppc_qpx_qvlfd xoaddr:$src),
+          (QVLFDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfda xoaddr:$src),
+          (QVLFDXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfs xoaddr:$src),
+          (QVLFSX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfsa xoaddr:$src),
+          (QVLFSXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcda xoaddr:$src),
+          (QVLFCDXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcd xoaddr:$src),
+          (QVLFCDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcsa xoaddr:$src),
+          (QVLFCSXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfcs xoaddr:$src),
+          (QVLFCSX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfda xoaddr:$src),
+          (QVLFDXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwaa xoaddr:$src),
+          (QVLFIWAXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwa xoaddr:$src),
+          (QVLFIWAX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwza xoaddr:$src),
+          (QVLFIWZXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfiwz xoaddr:$src),
+          (QVLFIWZX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlfsa xoaddr:$src),
+          (QVLFSXA xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcld xoaddr:$src),
+          (QVLPCLDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcls xoaddr:$src),
+          (QVLPCLSX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcrd xoaddr:$src),
+          (QVLPCRDX xoaddr:$src)>;
+def : Pat<(int_ppc_qpx_qvlpcrs xoaddr:$src),
+          (QVLPCRSX xoaddr:$src)>;
+
+def : Pat<(int_ppc_qpx_qvstfd v4f64:$T, xoaddr:$dst),
+          (QVSTFDX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfs v4f64:$T, xoaddr:$dst),
+          (QVSTFSX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcda v4f64:$T, xoaddr:$dst),
+          (QVSTFCDXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcd v4f64:$T, xoaddr:$dst),
+          (QVSTFCDX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcsa v4f64:$T, xoaddr:$dst),
+          (QVSTFCSXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfcs v4f64:$T, xoaddr:$dst),
+          (QVSTFCSX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfda v4f64:$T, xoaddr:$dst),
+          (QVSTFDXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfiwa v4f64:$T, xoaddr:$dst),
+          (QVSTFIWXA $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfiw v4f64:$T, xoaddr:$dst),
+          (QVSTFIWX $T, xoaddr:$dst)>;
+def : Pat<(int_ppc_qpx_qvstfsa v4f64:$T, xoaddr:$dst),
+          (QVSTFSXA $T, xoaddr:$dst)>;
+
+def : Pat<(pre_store v4f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (QVSTFDUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store v4f32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (QVSTFSUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncstv4f32 v4f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+          (QVSTFSUXs $rS, $ptrreg, $ptroff)>;
+
+def : Pat<(int_ppc_qpx_qvflogical  v4f64:$A, v4f64:$B, (i32 imm:$idx)),
+          (QVFLOGICAL $A, $B, imm:$idx)>;
+def : Pat<(int_ppc_qpx_qvgpci (u12:$idx)),
+          (QVGPCI imm:$idx)>;
+
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETOGE),
+          (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+                       (QVFTSTNANb $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETOLE),
+          (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+                       (QVFTSTNANb $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETONE),
+          (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
+                       (QVFTSTNANb $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETO),
+          (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+                       (QVFTSTNANb $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUEQ),
+          (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
+                       (QVFTSTNANb $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUGT),
+          (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+                       (QVFTSTNANb $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUGE),
+          (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+                       (QVFCMPLTb $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETULT),
+          (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+                       (QVFTSTNANb $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETULE),
+          (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+                       (QVFCMPGTb $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUNE),
+          (QVFLOGICALb (QVFTSTNANb $FRA, $FRB),
+                       (QVFCMPEQb $FRA, $FRB), (i32 13))>;
+
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETEQ),
+          (QVFCMPEQb $FRA, $FRB)>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETGT),
+          (QVFCMPGTb $FRA, $FRB)>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETGE),
+          (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+                       (QVFCMPLTb $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETLT),
+          (QVFCMPLTb $FRA, $FRB)>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETLE),
+          (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+                       (QVFCMPGTb $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETNE),
+          (QVFLOGICALb (QVFCMPEQb $FRA, $FRB),
+                       (QVFCMPEQb $FRA, $FRB), (i32 10))>;
+
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETOGE),
+          (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+                       (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETOLE),
+          (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+                       (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETONE),
+          (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
+                       (QVFTSTNANbs $FRA, $FRB), (i32 8))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETO),
+          (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+                       (QVFTSTNANbs $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUEQ),
+          (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
+                       (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUGT),
+          (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+                       (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUGE),
+          (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+                       (QVFCMPLTbs $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETULT),
+          (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+                       (QVFTSTNANbs $FRA, $FRB), (i32 7))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETULE),
+          (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+                       (QVFCMPGTbs $FRA, $FRB), (i32 13))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUNE),
+          (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB),
+                       (QVFCMPEQbs $FRA, $FRB), (i32 13))>;
+
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETEQ),
+          (QVFCMPEQbs $FRA, $FRB)>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETGT),
+          (QVFCMPGTbs $FRA, $FRB)>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETGE),
+          (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+                       (QVFCMPLTbs $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETLT),
+          (QVFCMPLTbs $FRA, $FRB)>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETLE),
+          (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+                       (QVFCMPGTbs $FRA, $FRB), (i32 10))>;
+def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETNE),
+          (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB),
+                       (QVFCMPEQbs $FRA, $FRB), (i32 10))>;
+
+def : Pat<(and v4i1:$FRA, (not v4i1:$FRB)),
+          (QVFLOGICALb $FRA, $FRB, (i32 4))>;
+def : Pat<(not (or v4i1:$FRA, v4i1:$FRB)),
+          (QVFLOGICALb $FRA, $FRB, (i32 8))>;
+def : Pat<(not (xor v4i1:$FRA, v4i1:$FRB)),
+          (QVFLOGICALb $FRA, $FRB, (i32 9))>;
+def : Pat<(or v4i1:$FRA, (not v4i1:$FRB)),
+          (QVFLOGICALb $FRA, $FRB, (i32 13))>;
+def : Pat<(not (and v4i1:$FRA, v4i1:$FRB)),
+          (QVFLOGICALb $FRA, $FRB, (i32 14))>;
+
+def : Pat<(and v4i1:$FRA, v4i1:$FRB),
+          (QVFLOGICALb $FRA, $FRB, (i32 1))>;
+def : Pat<(or v4i1:$FRA, v4i1:$FRB),
+          (QVFLOGICALb $FRA, $FRB, (i32 7))>;
+def : Pat<(xor v4i1:$FRA, v4i1:$FRB),
+          (QVFLOGICALb $FRA, $FRB, (i32 6))>;
+def : Pat<(not v4i1:$FRA),
+          (QVFLOGICALb $FRA, $FRA, (i32 10))>;
+
+def : Pat<(v4f64 (fextend v4f32:$src)),
+          (COPY_TO_REGCLASS $src, QFRC)>;
+
+def : Pat<(v4f32 (fround_exact v4f64:$src)),
+          (COPY_TO_REGCLASS $src, QSRC)>;
+
+// Extract the underlying floating-point values from the
+// QPX (-1.0, 1.0) boolean representation.
+def : Pat<(v4f64 (PPCqbflt v4i1:$src)),
+          (COPY_TO_REGCLASS $src, QFRC)>;
+
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLT)),
+          (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLE)),
+          (SELECT_QFRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETEQ)),
+          (SELECT_QFRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGE)),
+          (SELECT_QFRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGT)),
+          (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETNE)),
+          (SELECT_QFRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLT)),
+          (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLE)),
+          (SELECT_QSRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETEQ)),
+          (SELECT_QSRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGE)),
+          (SELECT_QSRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGT)),
+          (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETNE)),
+          (SELECT_QSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLT)),
+          (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLE)),
+          (SELECT_QBRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETEQ)),
+          (SELECT_QBRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGE)),
+          (SELECT_QBRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGT)),
+          (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETNE)),
+          (SELECT_QBRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+} // end HasQPX
+
+let Predicates = [HasQPX, NoNaNsFPMath] in {
+def : Pat<(fminnum v4f64:$FRA, v4f64:$FRB),
+          (QVFSELb (QVFCMPLTb $FRA, $FRB), $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f64:$FRA, v4f64:$FRB),
+          (QVFSELb (QVFCMPGTb $FRA, $FRB), $FRB, $FRA)>;
+
+def : Pat<(fminnum v4f32:$FRA, v4f32:$FRB),
+          (QVFSELbs (QVFCMPLTbs $FRA, $FRB), $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB),
+          (QVFSELbs (QVFCMPGTbs $FRA, $FRB), $FRB, $FRA)>;
+}
+
+let Predicates = [HasQPX, NaNsFPMath] in {
+// When either of these operands is NaN, we should return the other operand.
+// QVFCMPLT/QVFCMPGT return false is either operand is NaN, which means we need
+// to explicitly or with a NaN test on the second operand.
+def : Pat<(fminnum v4f64:$FRA, v4f64:$FRB),
+          (QVFSELb (QVFLOGICALb (QVFCMPLTb $FRA, $FRB),
+                                (QVFTSTNANb $FRB, $FRB), (i32 7)),
+                   $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f64:$FRA, v4f64:$FRB),
+          (QVFSELb (QVFLOGICALb (QVFCMPGTb $FRA, $FRB),
+                                (QVFTSTNANb $FRB, $FRB), (i32 7)),
+                   $FRB, $FRA)>;
+
+def : Pat<(fminnum v4f32:$FRA, v4f32:$FRB),
+          (QVFSELbs (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB),
+                                 (QVFTSTNANbs $FRB, $FRB), (i32 7)),
+                   $FRB, $FRA)>;
+def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB),
+          (QVFSELbs (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB),
+                                 (QVFTSTNANbs $FRB, $FRB), (i32 7)),
+                   $FRB, $FRA)>;
+}
+
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp

index 41bb11f47a0865d956f070f14a000a8ea9d419d6..c9a96840a9b7a9966fa569f768069c5a6ba063ab 100644 (file)
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -275,6 +275,9 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
    }
    case PPC::F8RCRegClassID:
    case PPC::F4RCRegClassID:
+  case PPC::QFRCRegClassID:
+  case PPC::QSRCRegClassID:
+  case PPC::QBRCRegClassID:
    case PPC::VRRCRegClassID:
    case PPC::VFRCRegClassID:
    case PPC::VSLRCRegClassID:
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td

index 62416bc5d9f1b2707e1ab70964fd1f55dee0bb68..9a7df9615cc51e4976906e40e816e927877b49f6 100644 (file)
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -49,6 +49,13 @@ class FPR<bits<5> num, string n> : PPCReg<n> {
    let HWEncoding{4-0} = num;
  }
  
+// QFPR - One of the 32 256-bit floating-point vector registers (used for QPX)
+class QFPR<FPR SubReg, string n> : PPCReg<n> {
+  let HWEncoding = SubReg.HWEncoding;
+  let SubRegs = [SubReg];
+  let SubRegIndices = [sub_64];
+}
+
  // VF - One of the 32 64-bit floating-point subregisters of the vector
  // registers (used by VSX).
  class VF<bits<5> num, string n> : PPCReg<n> {
@@ -114,6 +121,12 @@ foreach Index = 0-31 in {
    def VF#Index : VF<Index, "vs" # !add(Index, 32)>;
  }
  
+// QPX Floating-point registers
+foreach Index = 0-31 in {
+  def QF#Index : QFPR<!cast<FPR>("F"#Index), "q"#Index>,
+                 DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
+}
+
  // Vector registers
  foreach Index = 0-31 in {
    def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>,
@@ -303,6 +316,16 @@ def VFRC :  RegisterClass<"PPC", [f64], 64,
                                 VF22, VF21, VF20)>;
  def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>;
  
+// For QPX
+def QFRC : RegisterClass<"PPC", [v4f64], 256, (add (sequence "QF%u", 0, 13),
+                                                (sequence "QF%u", 31, 14))>;
+def QSRC : RegisterClass<"PPC", [v4f32], 128, (add QFRC)>;
+def QBRC : RegisterClass<"PPC", [v4i1], 256, (add QFRC)> {
+  // These are actually stored as floating-point values where a positive
+  // number is true and anything else (including NaN) is false.
+  let Size = 256;
+}
+
  def CRBITRC : RegisterClass<"PPC", [i1], 32,
    (add CR2LT, CR2GT, CR2EQ, CR2UN,
         CR3LT, CR3GT, CR3EQ, CR3UN,
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp

index 8d3d5c4e408909c4ed46571128bc59b079fbcf43..c91428db3a9a98dc0dd816a90b9971a7eb9e198a 100644 (file)
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -37,6 +37,10 @@ using namespace llvm;
  static cl::opt<bool> UseSubRegLiveness("ppc-track-subreg-liveness",
  cl::desc("Enable subregister liveness tracking for PPC"), cl::Hidden);
  
+static cl::opt<bool> QPXStackUnaligned("qpx-stack-unaligned",
+  cl::desc("Even when QPX is enabled the stack is not 32-byte aligned"),
+  cl::Hidden);
+
  PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
                                                              StringRef FS) {
    initializeEnvironment();
@@ -90,6 +94,7 @@ void PPCSubtarget::initializeEnvironment() {
    HasLazyResolverStubs = false;
    HasICBT = false;
    HasInvariantFunctionDescriptors = false;
+  IsQPXStackUnaligned = false;
  }
  
  void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
@@ -126,8 +131,8 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
    // QPX requires a 32-byte aligned stack. Note that we need to do this if
    // we're compiling for a BG/Q system regardless of whether or not QPX
    // is enabled because external functions will assume this alignment.
-  if (hasQPX() || isBGQ())
-    StackAlignment = 32;
+  IsQPXStackUnaligned = QPXStackUnaligned;
+  StackAlignment = getPlatformStackAlignment();
  
    // Determine endianness.
    // FIXME: Part of the TargetMachine.
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h

index 704a226ed33322d4ab4a3ee1f0040a09b545fa80..247a96d405e99df82f9e7ad14e67ee065f2a5f61 100644 (file)
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -114,6 +114,11 @@ protected:
    bool HasICBT;
    bool HasInvariantFunctionDescriptors;
  
+  /// When targeting QPX running a stock PPC64 Linux kernel where the stack
+  /// alignment has not been changed, we need to keep the 16-byte alignment
+  /// of the stack.
+  bool IsQPXStackUnaligned;
+
    const PPCTargetMachine &TM;
    PPCFrameLowering FrameLowering;
    PPCInstrInfo InstrInfo;
@@ -230,6 +235,14 @@ public:
      return HasInvariantFunctionDescriptors;
    }
  
+  bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
+  unsigned getPlatformStackAlignment() const {
+    if ((hasQPX() || isBGQ()) && !isQPXStackUnaligned())
+      return 32;
+
+    return 16;
+  }
+
    const Triple &getTargetTriple() const { return TargetTriple; }
  
    /// isDarwin - True if this is any darwin platform.
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

index e1d46f725428fc183c3fd7b85eed7a00f5014ddb..073bbb0c5567801a98c2ef537feeeac056470932 100644 (file)
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -193,13 +193,14 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L,
  }
  
  unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
-  if (Vector && !ST->hasAltivec())
+  if (Vector && !ST->hasAltivec() && !ST->hasQPX())
      return 0;
    return ST->hasVSX() ? 64 : 32;
  }
  
  unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) {
    if (Vector) {
+    if (ST->hasQPX()) return 256;
      if (ST->hasAltivec()) return 128;
      return 0;
    }
@@ -276,6 +277,12 @@ unsigned PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
      if (Index == 0)
        return 0;
  
+    return BaseT::getVectorInstrCost(Opcode, Val, Index);
+  } else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) {
+    // Floating point scalars are already located in index #0.
+    if (Index == 0)
+      return 0;
+
      return BaseT::getVectorInstrCost(Opcode, Val, Index);
    }
  
diff --git a/test/CodeGen/PowerPC/qpx-bv-sint.ll b/test/CodeGen/PowerPC/qpx-bv-sint.ll

new file mode 100644 (file)

index 0000000..0bc14ed
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-bv-sint.ll
@@ -0,0 +1,33 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+
+define void @s452() nounwind {
+entry:
+  br label %for.body4
+
+for.body4:                                        ; preds = %for.body4, %entry
+  %conv.4 = sitofp i32 undef to double
+  %conv.5 = sitofp i32 undef to double
+  %mul.4.v.i0.1 = insertelement <2 x double> undef, double %conv.4, i32 0
+  %mul.4.v.i0.2 = insertelement <2 x double> %mul.4.v.i0.1, double %conv.5, i32 1
+  %mul.4 = fmul <2 x double> %mul.4.v.i0.2, undef
+  %add7.4 = fadd <2 x double> undef, %mul.4
+  store <2 x double> %add7.4, <2 x double>* undef, align 16
+  br i1 undef, label %for.end, label %for.body4
+
+for.end:                                          ; preds = %for.body4
+  unreachable
+; CHECK-LABEL: @s452
+; CHECK: lfiwax [[REG1:[0-9]+]],
+; CHECK: fcfid [[REG2:[0-9]+]], [[REG1]]
+; FIXME: We could 'promote' this to a vector earlier and remove this splat.
+; CHECK: qvesplati {{[0-9]+}}, [[REG2]], 0
+; CHECK: qvfmul
+; CHECK: qvfadd
+; CHECK: qvesplati {{[0-9]+}},
+; FIXME: We can use qvstfcdx here instead of two stores.
+; CHECK: stfd
+; CHECK: stfd
+}
+
diff --git a/test/CodeGen/PowerPC/qpx-bv.ll b/test/CodeGen/PowerPC/qpx-bv.ll

new file mode 100644 (file)

index 0000000..ae181de
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-bv.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mcpu=a2q | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+define <4 x double> @foo(double %f1, double %f2, double %f3, double %f4) {
+  %v1 = insertelement <4 x double> undef, double %f1, i32 0
+  %v2 = insertelement <4 x double> %v1,   double %f2, i32 1
+  %v3 = insertelement <4 x double> %v2,   double %f3, i32 2
+  %v4 = insertelement <4 x double> %v3,   double %f4, i32 3
+  ret <4 x double> %v4
+
+; CHECK-LABEL: @foo
+; CHECK: qvgpci [[REG1:[0-9]+]], 275
+; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101
+; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]]
+; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]]
+; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]]
+; CHECK: blr
+}
+
+define <4 x float> @goo(float %f1, float %f2, float %f3, float %f4) {
+  %v1 = insertelement <4 x float> undef, float %f1, i32 0
+  %v2 = insertelement <4 x float> %v1,   float %f2, i32 1
+  %v3 = insertelement <4 x float> %v2,   float %f3, i32 2
+  %v4 = insertelement <4 x float> %v3,   float %f4, i32 3
+  ret <4 x float> %v4
+
+; CHECK-LABEL: @goo
+; CHECK: qvgpci [[REG1:[0-9]+]], 275
+; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101
+; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]]
+; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]]
+; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]]
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/qpx-func-clobber.ll b/test/CodeGen/PowerPC/qpx-func-clobber.ll

new file mode 100644 (file)

index 0000000..c1b808a
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-func-clobber.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+
+declare <4 x double> @foo(<4 x double> %p)
+
+define <4 x double> @bar(<4 x double> %p, <4 x double> %q) {
+entry:
+  %v = call <4 x double> @foo(<4 x double> %p)
+  %w = call <4 x double> @foo(<4 x double> %q)
+  %x = fadd <4 x double> %v, %w
+  ret <4 x double> %x
+
+; CHECK-LABEL: @bar
+; CHECK: qvstfdx 2,
+; CHECK: bl foo
+; CHECK: qvstfdx 1,
+; CHECK: qvlfdx 1,
+; CHECK: bl foo
+; CHECK: qvlfdx [[REG:[0-9]+]],
+; CHECK: qvfadd 1, [[REG]], 1
+}
+
diff --git a/test/CodeGen/PowerPC/qpx-load.ll b/test/CodeGen/PowerPC/qpx-load.ll

new file mode 100644 (file)

index 0000000..2eb2908
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-load.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+
+define <4 x double> @foo(<4 x double>* %p) {
+entry:
+  %v = load <4 x double>* %p, align 8
+  ret <4 x double> %v
+}
+
+; CHECK: @foo
+; CHECK-DAG: li [[REG1:[0-9]+]], 31
+; CHECK-DAG: qvlfdx [[REG4:[0-9]+]], 0, 3
+; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: qvlpcldx [[REG3:[0-9]+]], 0, 3
+; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]]
+; CHECK: blr
+
+define <4 x double> @bar(<4 x double>* %p) {
+entry:
+  %v = load <4 x double>* %p, align 32
+  ret <4 x double> %v
+}
+
+; CHECK: @bar
+; CHECK: qvlfdx
+
diff --git a/test/CodeGen/PowerPC/qpx-recipest.ll b/test/CodeGen/PowerPC/qpx-recipest.ll

new file mode 100644 (file)

index 0000000..0e01358
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-recipest.ll
@@ -0,0 +1,194 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q -enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck -check-prefix=CHECK-SAFE %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
+
+define <4 x double> @foo(<4 x double> %a, <4 x double> %b) nounwind {
+entry:
+  %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
+  %r = fdiv <4 x double> %a, %x
+  ret <4 x double> %r
+
+; CHECK-LABEL: @foo
+; CHECK: qvfrsqrte
+; CHECK: qvfmul
+; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
+;        an qvfmadd instead of a qvfnmsub
+; CHECK: qvfmadd
+; CHECK: qvfmadd
+; CHECK: qvfmul
+; CHECK: qvfmul
+; CHECK: qvfmadd
+; CHECK: qvfmul
+; CHECK: qvfmul
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @foo
+; CHECK-SAFE: fsqrt
+; CHECK-SAFE: fdiv
+; CHECK-SAFE: blr
+}
+
+define <4 x double> @foof(<4 x double> %a, <4 x float> %b) nounwind {
+entry:
+  %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
+  %y = fpext <4 x float> %x to <4 x double>
+  %r = fdiv <4 x double> %a, %y
+  ret <4 x double> %r
+
+; CHECK-LABEL: @foof
+; CHECK: qvfrsqrtes
+; CHECK: qvfmuls
+; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
+;        an qvfmadd instead of a qvfnmsubs
+; CHECK: qvfmadds
+; CHECK: qvfmadds
+; CHECK: qvfmuls
+; CHECK: qvfmul
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @foof
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: fdiv
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @food(<4 x float> %a, <4 x double> %b) nounwind {
+entry:
+  %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
+  %y = fptrunc <4 x double> %x to <4 x float>
+  %r = fdiv <4 x float> %a, %y
+  ret <4 x float> %r
+
+; CHECK-LABEL: @food
+; CHECK: qvfrsqrte
+; CHECK: qvfmul
+; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
+;        an qvfmadd instead of a qvfnmsub
+; CHECK: qvfmadd
+; CHECK: qvfmadd
+; CHECK: qvfmul
+; CHECK: qvfmul
+; CHECK: qvfmadd
+; CHECK: qvfmul
+; CHECK: qvfrsp
+; CHECK: qvfmuls
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @food
+; CHECK-SAFE: fsqrt
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @goo(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
+  %r = fdiv <4 x float> %a, %x
+  ret <4 x float> %r
+
+; CHECK-LABEL: @goo
+; CHECK: qvfrsqrtes
+; CHECK: qvfmuls
+; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
+;        an qvfmadd instead of a qvfnmsubs
+; CHECK: qvfmadds
+; CHECK: qvfmadds
+; CHECK: qvfmuls
+; CHECK: qvfmuls
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @goo
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
+define <4 x double> @foo2(<4 x double> %a, <4 x double> %b) nounwind {
+entry:
+  %r = fdiv <4 x double> %a, %b
+  ret <4 x double> %r
+
+; CHECK-LABEL: @foo2
+; CHECK: qvfre
+; CHECK: qvfnmsub
+; CHECK: qvfmadd
+; CHECK: qvfnmsub
+; CHECK: qvfmadd
+; CHECK: qvfmul
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @foo2
+; CHECK-SAFE: fdiv
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @goo2(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %r = fdiv <4 x float> %a, %b
+  ret <4 x float> %r
+
+; CHECK-LABEL: @goo2
+; CHECK: qvfres
+; CHECK: qvfnmsubs
+; CHECK: qvfmadds
+; CHECK: qvfmuls
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @goo2
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
+define <4 x double> @foo3(<4 x double> %a) nounwind {
+entry:
+  %r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
+  ret <4 x double> %r
+
+; CHECK-LABEL: @foo3
+; CHECK: qvfrsqrte
+; CHECK: qvfmul
+; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
+;        an qvfmadd instead of a qvfnmsub
+; CHECK-DAG: qvfmadd
+; CHECK-DAG: qvfcmpeq
+; CHECK-DAG: qvfmadd
+; CHECK-DAG: qvfmul
+; CHECK-DAG: qvfmul
+; CHECK-DAG: qvfmadd
+; CHECK-DAG: qvfmul
+; CHECK-DAG: qvfmul
+; CHECK: qvfsel
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @foo3
+; CHECK-SAFE: fsqrt
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @goo3(<4 x float> %a) nounwind {
+entry:
+  %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
+  ret <4 x float> %r
+
+; CHECK-LABEL: @goo3
+; CHECK: qvfrsqrtes
+; CHECK: qvfmuls
+; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
+;        an qvfmadds instead of a qvfnmsubs
+; CHECK-DAG: qvfmadds
+; CHECK-DAG: qvfcmpeq
+; CHECK-DAG: qvfmadds
+; CHECK-DAG: qvfmuls
+; CHECK-DAG: qvfmuls
+; CHECK: qvfsel
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @goo3
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: blr
+}
+
diff --git a/test/CodeGen/PowerPC/qpx-rounding-ops.ll b/test/CodeGen/PowerPC/qpx-rounding-ops.ll

new file mode 100644 (file)

index 0000000..6fdd8e6
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-rounding-ops.ll
@@ -0,0 +1,109 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q -enable-unsafe-fp-math | FileCheck -check-prefix=CHECK-FM %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define <4 x float> @test1(<4 x float> %x) nounwind  {
+  %call = tail call <4 x float> @llvm.floor.v4f32(<4 x float> %x) nounwind readnone
+  ret <4 x float> %call
+
+; CHECK: test1:
+; CHECK: qvfrim 1, 1
+
+; CHECK-FM: test1:
+; CHECK-FM: qvfrim 1, 1
+}
+
+declare <4 x float> @llvm.floor.v4f32(<4 x float>) nounwind readnone
+
+define <4 x double> @test2(<4 x double> %x) nounwind  {
+  %call = tail call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
+  ret <4 x double> %call
+
+; CHECK: test2:
+; CHECK: qvfrim 1, 1
+
+; CHECK-FM: test2:
+; CHECK-FM: qvfrim 1, 1
+}
+
+declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone
+
+define <4 x float> @test3(<4 x float> %x) nounwind  {
+  %call = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %x) nounwind readnone
+  ret <4 x float> %call
+
+; CHECK: test3:
+; CHECK-NOT: qvfrin
+
+; CHECK-FM: test3:
+; CHECK-FM-NOT: qvfrin
+}
+
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) nounwind readnone
+
+define <4 x double> @test4(<4 x double> %x) nounwind  {
+  %call = tail call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %x) nounwind readnone
+  ret <4 x double> %call
+
+; CHECK: test4:
+; CHECK-NOT: qvfrin
+
+; CHECK-FM: test4:
+; CHECK-FM-NOT: qvfrin
+}
+
+declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) nounwind readnone
+
+define <4 x float> @test5(<4 x float> %x) nounwind  {
+  %call = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) nounwind readnone
+  ret <4 x float> %call
+
+; CHECK: test5:
+; CHECK: qvfrip 1, 1
+
+; CHECK-FM: test5:
+; CHECK-FM: qvfrip 1, 1
+}
+
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
+
+define <4 x double> @test6(<4 x double> %x) nounwind  {
+  %call = tail call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
+  ret <4 x double> %call
+
+; CHECK: test6:
+; CHECK: qvfrip 1, 1
+
+; CHECK-FM: test6:
+; CHECK-FM: qvfrip 1, 1
+}
+
+declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
+
+define <4 x float> @test9(<4 x float> %x) nounwind  {
+  %call = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) nounwind readnone
+  ret <4 x float> %call
+
+; CHECK: test9:
+; CHECK: qvfriz 1, 1
+
+; CHECK-FM: test9:
+; CHECK-FM: qvfriz 1, 1
+}
+
+declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone
+
+define <4 x double> @test10(<4 x double> %x) nounwind  {
+  %call = tail call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone
+  ret <4 x double> %call
+
+; CHECK: test10:
+; CHECK: qvfriz 1, 1
+
+; CHECK-FM: test10:
+; CHECK-FM: qvfriz 1, 1
+}
+
+declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone
+
diff --git a/test/CodeGen/PowerPC/qpx-s-load.ll b/test/CodeGen/PowerPC/qpx-s-load.ll

new file mode 100644 (file)

index 0000000..8dfab13
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-s-load.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+
+define <4 x float> @foo(<4 x float>* %p) {
+entry:
+  %v = load <4 x float>* %p, align 4
+  ret <4 x float> %v
+}
+
+; CHECK: @foo
+; CHECK-DAG: li [[REG1:[0-9]+]], 15
+; CHECK-DAG: qvlfsx [[REG4:[0-9]+]], 0, 3
+; CHECK-DAG: qvlfsx [[REG2:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: qvlpclsx [[REG3:[0-9]+]], 0, 3
+; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]]
+; CHECK: blr
+
+define <4 x float> @bar(<4 x float>* %p) {
+entry:
+  %v = load <4 x float>* %p, align 16
+  ret <4 x float> %v
+}
+
+; CHECK: @bar
+; CHECK: qvlfsx
+
diff --git a/test/CodeGen/PowerPC/qpx-s-sel.ll b/test/CodeGen/PowerPC/qpx-s-sel.ll

new file mode 100644 (file)

index 0000000..008efea
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-s-sel.ll
@@ -0,0 +1,143 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+
+@Q = constant <4 x i1> <i1 0, i1 undef, i1 1, i1 1>, align 16
+@R = global <4 x i1> <i1 0, i1 0, i1 0, i1 0>, align 16
+
+define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x i1> %c) nounwind readnone {
+entry:
+  %r = select <4 x i1> %c, <4 x float> %a, <4 x float> %b
+  ret <4 x float> %r
+
+; CHECK-LABEL: @test1
+; CHECK: qvfsel 1, 3, 1, 2
+; CHECK: blr
+}
+
+define <4 x float> @test2(<4 x float> %a, <4 x float> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone {
+entry:
+  %v = insertelement <4 x i1> undef, i1 %c1, i32 0
+  %v2 = insertelement <4 x i1> %v, i1 %c2, i32 1
+  %v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2
+  %v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3
+  %r = select <4 x i1> %v4, <4 x float> %a, <4 x float> %b
+  ret <4 x float> %r
+
+; CHECK-LABEL: @test2
+; CHECK: stw
+; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
+; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
+; CHECK: qvfsel 1, [[REG4]], 1, 2
+; CHECK: blr
+}
+
+define <4 x i1> @test3(<4 x i1> %a) nounwind readnone {
+entry:
+  %v = and <4 x i1> %a, <i1 0, i1 undef, i1 1, i1 1>
+  ret <4 x i1> %v
+
+; CHECK-LABEL: @test3
+; CHECK: qvlfsx [[REG:[0-9]+]],
+; qvflogical 1, 1, [[REG]], 1
+; blr
+}
+
+define <4 x i1> @test4(<4 x i1> %a) nounwind {
+entry:
+  %q = load <4 x i1>* @Q, align 16
+  %v = and <4 x i1> %a, %q
+  ret <4 x i1> %v
+
+; CHECK-LABEL: @test4
+; CHECK-DAG: lbz
+; CHECK-DAG: qvlfdx [[REG1:[0-9]+]],
+; CHECK-DAG: stw
+; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]]
+; CHECK: qvflogical 1, 1, [[REG4]], 1
+; CHECK: blr
+}
+
+define void @test5(<4 x i1> %a) nounwind {
+entry:
+  store <4 x i1> %a, <4 x i1>* @R
+  ret void
+
+; CHECK-LABEL: @test5
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: stb
+; CHECK: blr
+}
+
+define i1 @test6(<4 x i1> %a) nounwind {
+entry:
+  %r = extractelement <4 x i1> %a, i32 2
+  ret i1 %r
+
+; CHECK-LABEL: @test6
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: blr
+}
+
+define i1 @test7(<4 x i1> %a) nounwind {
+entry:
+  %r = extractelement <4 x i1> %a, i32 2
+  %s = extractelement <4 x i1> %a, i32 3
+  %q = and i1 %r, %s
+  ret i1 %q
+
+; CHECK-LABEL: @test7
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK-DAG: lwz [[REG4:[0-9]+]],
+; FIXME: We're storing the vector twice, and that's silly.
+; CHECK-DAG: qvstfiwx [[REG3]],
+; CHECK: lwz [[REG5:[0-9]+]],
+; CHECK: and 3,
+; CHECK: blr
+}
+
+define i1 @test8(<3 x i1> %a) nounwind {
+entry:
+  %r = extractelement <3 x i1> %a, i32 2
+  ret i1 %r
+
+; CHECK-LABEL: @test8
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: blr
+}
+
+define <3 x float> @test9(<3 x float> %a, <3 x float> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone {
+entry:
+  %v = insertelement <3 x i1> undef, i1 %c1, i32 0
+  %v2 = insertelement <3 x i1> %v, i1 %c2, i32 1
+  %v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2
+  %r = select <3 x i1> %v3, <3 x float> %a, <3 x float> %b
+  ret <3 x float> %r
+
+; CHECK-LABEL: @test9
+; CHECK: stw
+; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
+; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
+; CHECK: qvfsel 1, [[REG4]], 1, 2
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/qpx-s-store.ll b/test/CodeGen/PowerPC/qpx-s-store.ll

new file mode 100644 (file)

index 0000000..d2ca458
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-s-store.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+
+define void @foo(<4 x float> %v, <4 x float>* %p) {
+entry:
+  store <4 x float> %v, <4 x float>* %p, align 4
+  ret void
+}
+
+; CHECK: @foo
+; CHECK: stfs
+; CHECK: stfs
+; CHECK: stfs
+; CHECK: stfs
+; CHECK: blr
+
+define void @bar(<4 x float> %v, <4 x float>* %p) {
+entry:
+  store <4 x float> %v, <4 x float>* %p, align 16
+  ret void
+}
+
+; CHECK: @bar
+; CHECK: qvstfsx
+
diff --git a/test/CodeGen/PowerPC/qpx-sel.ll b/test/CodeGen/PowerPC/qpx-sel.ll

new file mode 100644 (file)

index 0000000..15ae573
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-sel.ll
@@ -0,0 +1,151 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+
+@Q = constant <4 x i1> <i1 0, i1 undef, i1 1, i1 1>, align 16
+@R = global <4 x i1> <i1 0, i1 0, i1 0, i1 0>, align 16
+
+define <4 x double> @test1(<4 x double> %a, <4 x double> %b, <4 x i1> %c) nounwind readnone {
+entry:
+  %r = select <4 x i1> %c, <4 x double> %a, <4 x double> %b
+  ret <4 x double> %r
+
+; CHECK-LABEL: @test1
+; CHECK: qvfsel 1, 3, 1, 2
+; CHECK: blr
+}
+
+define <4 x double> @test2(<4 x double> %a, <4 x double> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone {
+entry:
+  %v = insertelement <4 x i1> undef, i1 %c1, i32 0
+  %v2 = insertelement <4 x i1> %v, i1 %c2, i32 1
+  %v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2
+  %v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3
+  %r = select <4 x i1> %v4, <4 x double> %a, <4 x double> %b
+  ret <4 x double> %r
+
+; CHECK-LABEL: @test2
+
+; FIXME: This load/store sequence is unnecessary.
+; CHECK-DAG: lbz
+; CHECK-DAG: stw
+
+; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
+; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
+; CHECK: qvfsel 1, [[REG4]], 1, 2
+; CHECK: blr
+}
+
+define <4 x i1> @test3(<4 x i1> %a) nounwind readnone {
+entry:
+  %v = and <4 x i1> %a, <i1 0, i1 undef, i1 1, i1 1>
+  ret <4 x i1> %v
+
+; CHECK-LABEL: @test3
+; CHECK: qvlfsx [[REG:[0-9]+]],
+; qvflogical 1, 1, [[REG]], 1
+; blr
+}
+
+define <4 x i1> @test4(<4 x i1> %a) nounwind {
+entry:
+  %q = load <4 x i1>* @Q, align 16
+  %v = and <4 x i1> %a, %q
+  ret <4 x i1> %v
+
+; CHECK-LABEL: @test4
+; CHECK-DAG: lbz
+; CHECK-DAG: qvlfdx [[REG1:[0-9]+]],
+; CHECK-DAG: stw
+; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]]
+; CHECK: qvflogical 1, 1, [[REG4]], 1
+; CHECK: blr
+}
+
+define void @test5(<4 x i1> %a) nounwind {
+entry:
+  store <4 x i1> %a, <4 x i1>* @R
+  ret void
+
+; CHECK-LABEL: @test5
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: stb
+; CHECK: blr
+}
+
+define i1 @test6(<4 x i1> %a) nounwind {
+entry:
+  %r = extractelement <4 x i1> %a, i32 2
+  ret i1 %r
+
+; CHECK-LABEL: @test6
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: blr
+}
+
+define i1 @test7(<4 x i1> %a) nounwind {
+entry:
+  %r = extractelement <4 x i1> %a, i32 2
+  %s = extractelement <4 x i1> %a, i32 3
+  %q = and i1 %r, %s
+  ret i1 %q
+
+; CHECK-LABEL: @test7
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK-DAG: lwz [[REG4:[0-9]+]],
+; FIXME: We're storing the vector twice, and that's silly.
+; CHECK-DAG: qvstfiwx [[REG3]],
+; CHECK-DAG: lwz [[REG5:[0-9]+]],
+; CHECK: and 3,
+; CHECK: blr
+}
+
+define i1 @test8(<3 x i1> %a) nounwind {
+entry:
+  %r = extractelement <3 x i1> %a, i32 2
+  ret i1 %r
+
+; CHECK-LABEL: @test8
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: blr
+}
+
+define <3 x double> @test9(<3 x double> %a, <3 x double> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone {
+entry:
+  %v = insertelement <3 x i1> undef, i1 %c1, i32 0
+  %v2 = insertelement <3 x i1> %v, i1 %c2, i32 1
+  %v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2
+  %r = select <3 x i1> %v3, <3 x double> %a, <3 x double> %b
+  ret <3 x double> %r
+
+; CHECK-LABEL: @test9
+
+; FIXME: This load/store sequence is unnecessary.
+; CHECK-DAG: lbz
+; CHECK-DAG: stw
+
+; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
+; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
+; CHECK: qvfsel 1, [[REG4]], 1, 2
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/qpx-store.ll b/test/CodeGen/PowerPC/qpx-store.ll

new file mode 100644 (file)

index 0000000..c29cc22
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-store.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+
+define void @foo(<4 x double> %v, <4 x double>* %p) {
+entry:
+  store <4 x double> %v, <4 x double>* %p, align 8
+  ret void
+}
+
+; CHECK: @foo
+; CHECK: stfd
+; CHECK: stfd
+; CHECK: stfd
+; CHECK: stfd
+; CHECK: blr
+
+define void @bar(<4 x double> %v, <4 x double>* %p) {
+entry:
+  store <4 x double> %v, <4 x double>* %p, align 32
+  ret void
+}
+
+; CHECK: @bar
+; CHECK: qvstfdx
+
diff --git a/test/CodeGen/PowerPC/qpx-unalperm.ll b/test/CodeGen/PowerPC/qpx-unalperm.ll

new file mode 100644 (file)

index 0000000..e765b46
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-unalperm.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -mcpu=a2q | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+define <4 x double> @foo(<4 x double>* %a) {
+entry:
+  %r = load <4 x double>* %a, align 32
+  ret <4 x double> %r
+; CHECK: qvlfdx
+; CHECK: blr
+}
+
+define <4 x double> @bar(<4 x double>* %a) {
+entry:
+  %r = load <4 x double>* %a, align 8
+  %b = getelementptr <4 x double>* %a, i32 16
+  %s = load <4 x double>* %b, align 32
+  %t = fadd <4 x double> %r, %s
+  ret <4 x double> %t
+; CHECK: qvlpcldx
+; CHECK: qvlfdx
+; CHECK: qvfperm
+; CHECK: blr
+}
+
+define <4 x double> @bar1(<4 x double>* %a) {
+entry:
+  %r = load <4 x double>* %a, align 8
+  %b = getelementptr <4 x double>* %a, i32 16
+  %s = load <4 x double>* %b, align 8
+  %t = fadd <4 x double> %r, %s
+  ret <4 x double> %t
+}
+
+define <4 x double> @bar2(<4 x double>* %a) {
+entry:
+  %r = load <4 x double>* %a, align 8
+  %b = getelementptr <4 x double>* %a, i32 1
+  %s = load <4 x double>* %b, align 32
+  %t = fadd <4 x double> %r, %s
+  ret <4 x double> %t
+}
+
+define <4 x double> @bar3(<4 x double>* %a) {
+entry:
+  %r = load <4 x double>* %a, align 8
+  %b = getelementptr <4 x double>* %a, i32 1
+  %s = load <4 x double>* %b, align 8
+  %t = fadd <4 x double> %r, %s
+  ret <4 x double> %t
+}
+
+define <4 x double> @bar4(<4 x double>* %a) {
+entry:
+  %r = load <4 x double>* %a, align 8
+  %b = getelementptr <4 x double>* %a, i32 1
+  %s = load <4 x double>* %b, align 8
+  %c = getelementptr <4 x double>* %b, i32 1
+  %t = load <4 x double>* %c, align 8
+  %u = fadd <4 x double> %r, %s
+  %v = fadd <4 x double> %u, %t
+  ret <4 x double> %v
+}
+
diff --git a/test/CodeGen/PowerPC/vsx-infl-copy2.ll b/test/CodeGen/PowerPC/vsx-infl-copy2.ll

index 037473bdec82879690388937cd1bd49f692dbd96..0f279067159fb7f814f0524abe64c0ecc7e030dd 100644 (file)
--- a/test/CodeGen/PowerPC/vsx-infl-copy2.ll
+++ b/test/CodeGen/PowerPC/vsx-infl-copy2.ll
@@ -8,7 +8,6 @@ entry:
    br i1 false, label %loop2_start, label %if.end5
  
  ; CHECK-LABEL: @_Z28test_goto_loop_unroll_factorILi22EiEvPKT0_iPKc
-; CHECK: xxlor
  
  loop2_start:                                      ; preds = %loop2_start, %entry
    br i1 undef, label %loop2_start, label %if.then.i31
diff --git a/test/MC/Disassembler/PowerPC/qpx.txt b/test/MC/Disassembler/PowerPC/qpx.txt

new file mode 100644 (file)

index 0000000..b53bb4c
--- /dev/null
+++ b/test/MC/Disassembler/PowerPC/qpx.txt
@@ -0,0 +1,383 @@
+# RUN: llvm-mc --disassemble %s -triple powerpc64-bgq-linux -mcpu=a2q | FileCheck %s
+
+# CHECK: qvfabs 3, 5
+0x10 0x60 0x2a 0x10
+
+# CHECK: qvfadd 3, 4, 5
+0x10 0x64 0x28 0x2a
+
+# CHECK: qvfadds 3, 4, 5
+0x00 0x64 0x28 0x2a
+
+# FIXME: decode as qvfandc 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 4
+0x10 0x64 0x2a 0x08
+
+# FIXME: decode as qvfand 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 1
+0x10 0x64 0x28 0x88
+
+# CHECK: qvfcfid 3, 5
+0x10 0x60 0x2e 0x9c
+
+# CHECK: qvfcfids 3, 5
+0x00 0x60 0x2e 0x9c
+
+# CHECK: qvfcfidu 3, 5
+0x10 0x60 0x2f 0x9c
+
+# CHECK: qvfcfidus 3, 5
+0x00 0x60 0x2f 0x9c
+
+# FIXME: decode as qvfclr 3
+# CHECK: qvflogical 3, 3, 3, 0
+0x10 0x63 0x18 0x08
+
+# CHECK: qvfcpsgn 3, 4, 5
+0x10 0x64 0x28 0x10
+
+# FIXME: decode as qvfctfb 3, 4
+# CHECK: qvflogical 3, 4, 4, 5
+0x10 0x64 0x22 0x88
+
+# CHECK: qvfctid 3, 5
+0x10 0x60 0x2e 0x5c
+
+# CHECK: qvfctidu 3, 5
+0x10 0x60 0x2f 0x5c
+
+# CHECK: qvfctiduz 3, 5
+0x10 0x60 0x2f 0x5e
+
+# CHECK: qvfctidz 3, 5
+0x10 0x60 0x2e 0x5e
+
+# CHECK: qvfctiw 3, 5
+0x10 0x60 0x28 0x1c
+
+# CHECK: qvfctiwu 3, 5
+0x10 0x60 0x29 0x1c
+
+# CHECK: qvfctiwuz 3, 5
+0x10 0x60 0x29 0x1e
+
+# CHECK: qvfctiwz 3, 5
+0x10 0x60 0x28 0x1e
+
+# FIXME: decode as qvfequ 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 9
+0x10 0x64 0x2c 0x88
+
+# CHECK: qvflogical 3, 4, 5, 12
+0x10 0x64 0x2e 0x08
+
+# CHECK: qvfmadd 3, 4, 6, 5
+0x10 0x64 0x29 0xba
+
+# CHECK: qvfmadds 3, 4, 6, 5
+0x00 0x64 0x29 0xba
+
+# CHECK: qvfmr 3, 5
+0x10 0x60 0x28 0x90
+
+# CHECK: qvfmsub 3, 4, 6, 5
+0x10 0x64 0x29 0xb8
+
+# CHECK: qvfmsubs 3, 4, 6, 5
+0x00 0x64 0x29 0xb8
+
+# CHECK: qvfmul 3, 4, 6
+0x10 0x64 0x01 0xb2
+
+# CHECK: qvfmuls 3, 4, 6
+0x00 0x64 0x01 0xb2
+
+# CHECK: qvfnabs 3, 5
+0x10 0x60 0x29 0x10
+
+# FIXME: decode as qvfnand 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 14
+0x10 0x64 0x2f 0x08
+
+# CHECK: qvfneg 3, 5
+0x10 0x60 0x28 0x50
+
+# CHECK: qvfnmadd 3, 4, 6, 5
+0x10 0x64 0x29 0xbe
+
+# CHECK: qvfnmadds 3, 4, 6, 5
+0x00 0x64 0x29 0xbe
+
+# CHECK: qvfnmsub 3, 4, 6, 5
+0x10 0x64 0x29 0xbc
+
+# CHECK: qvfnmsubs 3, 4, 6, 5
+0x00 0x64 0x29 0xbc
+
+# FIXME: decode as qvfnor 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 8
+0x10 0x64 0x2c 0x08
+
+# FIXME: decode as qvfnot 3, 4
+# CHECK: qvflogical 3, 4, 4, 10
+0x10 0x64 0x25 0x08
+
+# FIXME: decode as qvforc 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 13
+0x10 0x64 0x2e 0x88
+
+# FIXME: decode as qvfor 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 7
+0x10 0x64 0x2b 0x88
+
+# CHECK: qvfperm 3, 4, 5, 6
+0x10 0x64 0x29 0x8c
+
+# CHECK: qvfre 3, 5
+0x10 0x60 0x28 0x30
+
+# CHECK: qvfres 3, 5
+0x00 0x60 0x28 0x30
+
+# CHECK: qvfrim 3, 5
+0x10 0x60 0x2b 0xd0
+
+# CHECK: qvfrin 3, 5
+0x10 0x60 0x2b 0x10
+
+# CHECK: qvfrip 3, 5
+0x10 0x60 0x2b 0x90
+
+# CHECK: qvfriz 3, 5
+0x10 0x60 0x2b 0x50
+
+# CHECK: qvfrsp 3, 5
+0x10 0x60 0x28 0x18
+
+# CHECK: qvfrsqrte 3, 5
+0x10 0x60 0x28 0x34
+
+# CHECK: qvfrsqrtes 3, 5
+0x00 0x60 0x28 0x34
+
+# CHECK: qvfsel 3, 4, 6, 5
+0x10 0x64 0x29 0xae
+
+# FIXME: decode as qvfset 3
+# CHECK: qvflogical 3, 3, 3, 15
+0x10 0x63 0x1f 0x88
+
+# CHECK: qvfsub 3, 4, 5
+0x10 0x64 0x28 0x28
+
+# CHECK: qvfsubs 3, 4, 5
+0x00 0x64 0x28 0x28
+
+# CHECK: qvfxmadd 3, 4, 6, 5
+0x10 0x64 0x29 0x92
+
+# CHECK: qvfxmadds 3, 4, 6, 5
+0x00 0x64 0x29 0x92
+
+# CHECK: qvfxmul 3, 4, 6
+0x10 0x64 0x01 0xa2
+
+# CHECK: qvfxmuls 3, 4, 6
+0x00 0x64 0x01 0xa2
+
+# FIXME: decode as qvfxor 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 6
+0x10 0x64 0x2b 0x08
+
+# CHECK: qvfxxcpnmadd 3, 4, 6, 5
+0x10 0x64 0x29 0x86
+
+# CHECK: qvfxxcpnmadds 3, 4, 6, 5
+0x00 0x64 0x29 0x86
+
+# CHECK: qvfxxmadd 3, 4, 6, 5
+0x10 0x64 0x29 0x82
+
+# CHECK: qvfxxmadds 3, 4, 6, 5
+0x00 0x64 0x29 0x82
+
+# CHECK: qvfxxnpmadd 3, 4, 6, 5
+0x10 0x64 0x29 0x96
+
+# CHECK: qvfxxnpmadds 3, 4, 6, 5
+0x00 0x64 0x29 0x96
+
+# CHECK: qvlfcduxa 3, 9, 11
+0x7c 0x69 0x58 0xcf
+
+# CHECK: qvlfcdux 3, 9, 11
+0x7c 0x69 0x58 0xce
+
+# CHECK: qvlfcdxa 3, 10, 11
+0x7c 0x6a 0x58 0x8f
+
+# CHECK: qvlfcdx 3, 10, 11
+0x7c 0x6a 0x58 0x8e
+
+# CHECK: qvlfcsuxa 3, 9, 11
+0x7c 0x69 0x58 0x4f
+
+# CHECK: qvlfcsux 3, 9, 11
+0x7c 0x69 0x58 0x4e
+
+# CHECK: qvlfcsxa 3, 10, 11
+0x7c 0x6a 0x58 0x0f
+
+# CHECK: qvlfcsx 3, 10, 11
+0x7c 0x6a 0x58 0x0e
+
+# CHECK: qvlfduxa 3, 9, 11
+0x7c 0x69 0x5c 0xcf
+
+# CHECK: qvlfdux 3, 9, 11
+0x7c 0x69 0x5c 0xce
+
+# CHECK: qvlfdxa 3, 10, 11
+0x7c 0x6a 0x5c 0x8f
+
+# CHECK: qvlfdx 3, 10, 11
+0x7c 0x6a 0x5c 0x8e
+
+# CHECK: qvlfiwaxa 3, 10, 11
+0x7c 0x6a 0x5e 0xcf
+
+# CHECK: qvlfiwax 3, 10, 11
+0x7c 0x6a 0x5e 0xce
+
+# CHECK: qvlfiwzxa 3, 10, 11
+0x7c 0x6a 0x5e 0x8f
+
+# CHECK: qvlfiwzx 3, 10, 11
+0x7c 0x6a 0x5e 0x8e
+
+# CHECK: qvlfsuxa 3, 9, 11
+0x7c 0x69 0x5c 0x4f
+
+# CHECK: qvlfsux 3, 9, 11
+0x7c 0x69 0x5c 0x4e
+
+# CHECK: qvlfsxa 3, 10, 11
+0x7c 0x6a 0x5c 0x0f
+
+# CHECK: qvlfsx 3, 10, 11
+0x7c 0x6a 0x5c 0x0e
+
+# CHECK: qvlpcldx 3, 10, 11
+0x7c 0x6a 0x5c 0x8c
+
+# CHECK: qvlpclsx 3, 10, 11
+0x7c 0x6a 0x5c 0x0c
+
+# CHECK: qvlpcrdx 3, 10, 11
+0x7c 0x6a 0x58 0x8c
+
+# CHECK: qvlpcrsx 3, 10, 11
+0x7c 0x6a 0x58 0x0c
+
+# CHECK: qvstfcduxa 2, 9, 11
+0x7c 0x49 0x59 0xcf
+
+# CHECK: qvstfcduxia 2, 9, 11
+0x7c 0x49 0x59 0xcb
+
+# CHECK: qvstfcduxi 2, 9, 11
+0x7c 0x49 0x59 0xca
+
+# CHECK: qvstfcdux 2, 9, 11
+0x7c 0x49 0x59 0xce
+
+# CHECK: qvstfcdxa 2, 10, 11
+0x7c 0x4a 0x59 0x8f
+
+# CHECK: qvstfcdxia 2, 10, 11
+0x7c 0x4a 0x59 0x8b
+
+# CHECK: qvstfcdxi 2, 10, 11
+0x7c 0x4a 0x59 0x8a
+
+# CHECK: qvstfcdx 2, 10, 11
+0x7c 0x4a 0x59 0x8e
+
+# CHECK: qvstfcsuxa 2, 9, 11
+0x7c 0x49 0x59 0x4f
+
+# CHECK: qvstfcsuxia 2, 9, 11
+0x7c 0x49 0x59 0x4b
+
+# CHECK: qvstfcsuxi 2, 9, 11
+0x7c 0x49 0x59 0x4a
+
+# CHECK: qvstfcsux 2, 9, 11
+0x7c 0x49 0x59 0x4e
+
+# CHECK: qvstfcsxa 2, 10, 11
+0x7c 0x4a 0x59 0x0f
+
+# CHECK: qvstfcsxia 2, 10, 11
+0x7c 0x4a 0x59 0x0b
+
+# CHECK: qvstfcsxi 2, 10, 11
+0x7c 0x4a 0x59 0x0a
+
+# CHECK: qvstfcsx 2, 10, 11
+0x7c 0x4a 0x59 0x0e
+
+# CHECK: qvstfduxa 2, 9, 11
+0x7c 0x49 0x5d 0xcf
+
+# CHECK: qvstfduxia 2, 9, 11
+0x7c 0x49 0x5d 0xcb
+
+# CHECK: qvstfduxi 2, 9, 11
+0x7c 0x49 0x5d 0xca
+
+# CHECK: qvstfdux 2, 9, 11
+0x7c 0x49 0x5d 0xce
+
+# CHECK: qvstfdxa 2, 10, 11
+0x7c 0x4a 0x5d 0x8f
+
+# CHECK: qvstfdxia 2, 10, 11
+0x7c 0x4a 0x5d 0x8b
+
+# CHECK: qvstfdxi 2, 10, 11
+0x7c 0x4a 0x5d 0x8a
+
+# CHECK: qvstfdx 2, 10, 11
+0x7c 0x4a 0x5d 0x8e
+
+# CHECK: qvstfiwxa 2, 10, 11
+0x7c 0x4a 0x5f 0x8f
+
+# CHECK: qvstfiwx 2, 10, 11
+0x7c 0x4a 0x5f 0x8e
+
+# CHECK: qvstfsuxa 2, 9, 11
+0x7c 0x49 0x5d 0x4f
+
+# CHECK: qvstfsuxia 2, 9, 11
+0x7c 0x49 0x5d 0x4b
+
+# CHECK: qvstfsuxi 2, 9, 11
+0x7c 0x49 0x5d 0x4a
+
+# CHECK: qvstfsux 2, 9, 11
+0x7c 0x49 0x5d 0x4e
+
+# CHECK: qvstfsxa 2, 10, 11
+0x7c 0x4a 0x5d 0x0f
+
+# CHECK: qvstfsxia 2, 10, 11
+0x7c 0x4a 0x5d 0x0b
+
+# CHECK: qvstfsxi 2, 10, 11
+0x7c 0x4a 0x5d 0x0a
+
+# CHECK: qvstfsx 2, 10, 11
+0x7c 0x4a 0x5d 0x0e
+
diff --git a/test/MC/PowerPC/qpx.s b/test/MC/PowerPC/qpx.s

new file mode 100644 (file)

index 0000000..6c92d71
--- /dev/null
+++ b/test/MC/PowerPC/qpx.s
@@ -0,0 +1,251 @@
+# RUN: llvm-mc -triple powerpc64-bgq-linux --show-encoding %s | FileCheck %s
+
+# FIXME: print qvflogical aliases.
+
+# CHECK: qvfabs 3, 5                     # encoding: [0x10,0x60,0x2a,0x10]
+         qvfabs 3, 5
+# CHECK: qvfadd 3, 4, 5                  # encoding: [0x10,0x64,0x28,0x2a]
+         qvfadd 3, 4, 5
+# CHECK: qvfadds 3, 4, 5                 # encoding: [0x00,0x64,0x28,0x2a]
+         qvfadds 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 4           # encoding: [0x10,0x64,0x2a,0x08]
+         qvfandc 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 1           # encoding: [0x10,0x64,0x28,0x88]
+         qvfand 3, 4, 5
+# CHECK: qvfcfid 3, 5                    # encoding: [0x10,0x60,0x2e,0x9c]
+         qvfcfid 3, 5
+# CHECK: qvfcfids 3, 5                   # encoding: [0x00,0x60,0x2e,0x9c]
+         qvfcfids 3, 5
+# CHECK: qvfcfidu 3, 5                   # encoding: [0x10,0x60,0x2f,0x9c]
+         qvfcfidu 3, 5
+# CHECK: qvfcfidus 3, 5                  # encoding: [0x00,0x60,0x2f,0x9c]
+         qvfcfidus 3, 5
+# CHECK: qvflogical 3, 3, 3, 0           # encoding: [0x10,0x63,0x18,0x08]
+         qvfclr 3
+# CHECK: qvfcpsgn 3, 4, 5                # encoding: [0x10,0x64,0x28,0x10]
+         qvfcpsgn 3, 4, 5
+# CHECK: qvflogical 3, 4, 4, 5           # encoding: [0x10,0x64,0x22,0x88]
+         qvfctfb 3, 4
+# CHECK: qvfctid 3, 5                    # encoding: [0x10,0x60,0x2e,0x5c]
+         qvfctid 3, 5
+# CHECK: qvfctidu 3, 5                   # encoding: [0x10,0x60,0x2f,0x5c]
+         qvfctidu 3, 5
+# CHECK: qvfctiduz 3, 5                  # encoding: [0x10,0x60,0x2f,0x5e]
+         qvfctiduz 3, 5
+# CHECK: qvfctidz 3, 5                   # encoding: [0x10,0x60,0x2e,0x5e]
+         qvfctidz 3, 5
+# CHECK: qvfctiw 3, 5                    # encoding: [0x10,0x60,0x28,0x1c]
+         qvfctiw 3, 5
+# CHECK: qvfctiwu 3, 5                   # encoding: [0x10,0x60,0x29,0x1c]
+         qvfctiwu 3, 5
+# CHECK: qvfctiwuz 3, 5                  # encoding: [0x10,0x60,0x29,0x1e]
+         qvfctiwuz 3, 5
+# CHECK: qvfctiwz 3, 5                   # encoding: [0x10,0x60,0x28,0x1e]
+         qvfctiwz 3, 5
+# CHECK: qvflogical 3, 4, 5, 9           # encoding: [0x10,0x64,0x2c,0x88]
+         qvfequ 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 12          # encoding: [0x10,0x64,0x2e,0x08]
+         qvflogical 3, 4, 5, 12
+# CHECK: qvfmadd 3, 4, 6, 5              # encoding: [0x10,0x64,0x29,0xba]
+         qvfmadd 3, 4, 6, 5
+# CHECK: qvfmadds 3, 4, 6, 5             # encoding: [0x00,0x64,0x29,0xba]
+         qvfmadds 3, 4, 6, 5
+# CHECK: qvfmr 3, 5                      # encoding: [0x10,0x60,0x28,0x90]
+         qvfmr 3, 5
+# CHECK: qvfmsub 3, 4, 6, 5              # encoding: [0x10,0x64,0x29,0xb8]
+         qvfmsub 3, 4, 6, 5
+# CHECK: qvfmsubs 3, 4, 6, 5             # encoding: [0x00,0x64,0x29,0xb8]
+         qvfmsubs 3, 4, 6, 5
+# CHECK: qvfmul 3, 4, 6                  # encoding: [0x10,0x64,0x01,0xb2]
+         qvfmul 3, 4, 6
+# CHECK: qvfmuls 3, 4, 6                 # encoding: [0x00,0x64,0x01,0xb2]
+         qvfmuls 3, 4, 6
+# CHECK: qvfnabs 3, 5                    # encoding: [0x10,0x60,0x29,0x10]
+         qvfnabs 3, 5
+# CHECK: qvflogical 3, 4, 5, 14          # encoding: [0x10,0x64,0x2f,0x08]
+         qvfnand 3, 4, 5
+# CHECK: qvfneg 3, 5                     # encoding: [0x10,0x60,0x28,0x50]
+         qvfneg 3, 5
+# CHECK: qvfnmadd 3, 4, 6, 5             # encoding: [0x10,0x64,0x29,0xbe]
+         qvfnmadd 3, 4, 6, 5
+# CHECK: qvfnmadds 3, 4, 6, 5            # encoding: [0x00,0x64,0x29,0xbe]
+         qvfnmadds 3, 4, 6, 5
+# CHECK: qvfnmsub 3, 4, 6, 5             # encoding: [0x10,0x64,0x29,0xbc]
+         qvfnmsub 3, 4, 6, 5
+# CHECK: qvfnmsubs 3, 4, 6, 5            # encoding: [0x00,0x64,0x29,0xbc]
+         qvfnmsubs 3, 4, 6, 5
+# CHECK: qvflogical 3, 4, 5, 8           # encoding: [0x10,0x64,0x2c,0x08]
+         qvfnor 3, 4, 5
+# CHECK: qvflogical 3, 4, 4, 10          # encoding: [0x10,0x64,0x25,0x08]
+         qvfnot 3, 4
+# CHECK: qvflogical 3, 4, 5, 13          # encoding: [0x10,0x64,0x2e,0x88]
+         qvforc 3, 4, 5
+# CHECK: qvflogical 3, 4, 5, 7           # encoding: [0x10,0x64,0x2b,0x88]
+         qvfor 3, 4, 5
+# CHECK: qvfperm 3, 4, 5, 6              # encoding: [0x10,0x64,0x29,0x8c]
+         qvfperm 3, 4, 5, 6
+# CHECK: qvfre 3, 5                      # encoding: [0x10,0x60,0x28,0x30]
+         qvfre 3, 5
+# CHECK: qvfres 3, 5                     # encoding: [0x00,0x60,0x28,0x30]
+         qvfres 3, 5
+# CHECK: qvfrim 3, 5                     # encoding: [0x10,0x60,0x2b,0xd0]
+         qvfrim 3, 5
+# CHECK: qvfrin 3, 5                     # encoding: [0x10,0x60,0x2b,0x10]
+         qvfrin 3, 5
+# CHECK: qvfrip 3, 5                     # encoding: [0x10,0x60,0x2b,0x90]
+         qvfrip 3, 5
+# CHECK: qvfriz 3, 5                     # encoding: [0x10,0x60,0x2b,0x50]
+         qvfriz 3, 5
+# CHECK: qvfrsp 3, 5                     # encoding: [0x10,0x60,0x28,0x18]
+         qvfrsp 3, 5
+# CHECK: qvfrsqrte 3, 5                  # encoding: [0x10,0x60,0x28,0x34]
+         qvfrsqrte 3, 5
+# CHECK: qvfrsqrtes 3, 5                 # encoding: [0x00,0x60,0x28,0x34]
+         qvfrsqrtes 3, 5
+# CHECK: qvfsel 3, 4, 6, 5               # encoding: [0x10,0x64,0x29,0xae]
+         qvfsel 3, 4, 6, 5
+# CHECK: qvflogical 3, 3, 3, 15          # encoding: [0x10,0x63,0x1f,0x88]
+         qvfset 3
+# CHECK: qvfsub 3, 4, 5                  # encoding: [0x10,0x64,0x28,0x28]
+         qvfsub 3, 4, 5
+# CHECK: qvfsubs 3, 4, 5                 # encoding: [0x00,0x64,0x28,0x28]
+         qvfsubs 3, 4, 5
+# CHECK: qvfxmadd 3, 4, 6, 5             # encoding: [0x10,0x64,0x29,0x92]
+         qvfxmadd 3, 4, 6, 5
+# CHECK: qvfxmadds 3, 4, 6, 5            # encoding: [0x00,0x64,0x29,0x92]
+         qvfxmadds 3, 4, 6, 5
+# CHECK: qvfxmul 3, 4, 6                 # encoding: [0x10,0x64,0x01,0xa2]
+         qvfxmul 3, 4, 6
+# CHECK: qvfxmuls 3, 4, 6                # encoding: [0x00,0x64,0x01,0xa2]
+         qvfxmuls 3, 4, 6
+# CHECK: qvflogical 3, 4, 5, 6           # encoding: [0x10,0x64,0x2b,0x08]
+         qvfxor 3, 4, 5
+# CHECK: qvfxxcpnmadd 3, 4, 6, 5         # encoding: [0x10,0x64,0x29,0x86]
+         qvfxxcpnmadd 3, 4, 6, 5
+# CHECK: qvfxxcpnmadds 3, 4, 6, 5        # encoding: [0x00,0x64,0x29,0x86]
+         qvfxxcpnmadds 3, 4, 6, 5
+# CHECK: qvfxxmadd 3, 4, 6, 5            # encoding: [0x10,0x64,0x29,0x82]
+         qvfxxmadd 3, 4, 6, 5
+# CHECK: qvfxxmadds 3, 4, 6, 5           # encoding: [0x00,0x64,0x29,0x82]
+         qvfxxmadds 3, 4, 6, 5
+# CHECK: qvfxxnpmadd 3, 4, 6, 5          # encoding: [0x10,0x64,0x29,0x96]
+         qvfxxnpmadd 3, 4, 6, 5
+# CHECK: qvfxxnpmadds 3, 4, 6, 5         # encoding: [0x00,0x64,0x29,0x96]
+         qvfxxnpmadds 3, 4, 6, 5
+# CHECK: qvlfcduxa 3, 9, 11              # encoding: [0x7c,0x69,0x58,0xcf]
+         qvlfcduxa 3, 9, 11
+# CHECK: qvlfcdux 3, 9, 11               # encoding: [0x7c,0x69,0x58,0xce]
+         qvlfcdux 3, 9, 11
+# CHECK: qvlfcdxa 3, 10, 11              # encoding: [0x7c,0x6a,0x58,0x8f]
+         qvlfcdxa 3, 10, 11
+# CHECK: qvlfcdx 3, 10, 11               # encoding: [0x7c,0x6a,0x58,0x8e]
+         qvlfcdx 3, 10, 11
+# CHECK: qvlfcsuxa 3, 9, 11              # encoding: [0x7c,0x69,0x58,0x4f]
+         qvlfcsuxa 3, 9, 11
+# CHECK: qvlfcsux 3, 9, 11               # encoding: [0x7c,0x69,0x58,0x4e]
+         qvlfcsux 3, 9, 11
+# CHECK: qvlfcsxa 3, 10, 11              # encoding: [0x7c,0x6a,0x58,0x0f]
+         qvlfcsxa 3, 10, 11
+# CHECK: qvlfcsx 3, 10, 11               # encoding: [0x7c,0x6a,0x58,0x0e]
+         qvlfcsx 3, 10, 11
+# CHECK: qvlfduxa 3, 9, 11               # encoding: [0x7c,0x69,0x5c,0xcf]
+         qvlfduxa 3, 9, 11
+# CHECK: qvlfdux 3, 9, 11                # encoding: [0x7c,0x69,0x5c,0xce]
+         qvlfdux 3, 9, 11
+# CHECK: qvlfdxa 3, 10, 11               # encoding: [0x7c,0x6a,0x5c,0x8f]
+         qvlfdxa 3, 10, 11
+# CHECK: qvlfdx 3, 10, 11                # encoding: [0x7c,0x6a,0x5c,0x8e]
+         qvlfdx 3, 10, 11
+# CHECK: qvlfiwaxa 3, 10, 11             # encoding: [0x7c,0x6a,0x5e,0xcf]
+         qvlfiwaxa 3, 10, 11
+# CHECK: qvlfiwax 3, 10, 11              # encoding: [0x7c,0x6a,0x5e,0xce]
+         qvlfiwax 3, 10, 11
+# CHECK: qvlfiwzxa 3, 10, 11             # encoding: [0x7c,0x6a,0x5e,0x8f]
+         qvlfiwzxa 3, 10, 11
+# CHECK: qvlfiwzx 3, 10, 11              # encoding: [0x7c,0x6a,0x5e,0x8e]
+         qvlfiwzx 3, 10, 11
+# CHECK: qvlfsuxa 3, 9, 11               # encoding: [0x7c,0x69,0x5c,0x4f]
+         qvlfsuxa 3, 9, 11
+# CHECK: qvlfsux 3, 9, 11                # encoding: [0x7c,0x69,0x5c,0x4e]
+         qvlfsux 3, 9, 11
+# CHECK: qvlfsxa 3, 10, 11               # encoding: [0x7c,0x6a,0x5c,0x0f]
+         qvlfsxa 3, 10, 11
+# CHECK: qvlfsx 3, 10, 11                # encoding: [0x7c,0x6a,0x5c,0x0e]
+         qvlfsx 3, 10, 11
+# CHECK: qvlpcldx 3, 10, 11              # encoding: [0x7c,0x6a,0x5c,0x8c]
+         qvlpcldx 3, 10, 11
+# CHECK: qvlpclsx 3, 10, 11              # encoding: [0x7c,0x6a,0x5c,0x0c]
+         qvlpclsx 3, 10, 11
+# CHECK: qvlpcrdx 3, 10, 11              # encoding: [0x7c,0x6a,0x58,0x8c]
+         qvlpcrdx 3, 10, 11
+# CHECK: qvlpcrsx 3, 10, 11              # encoding: [0x7c,0x6a,0x58,0x0c]
+         qvlpcrsx 3, 10, 11
+# CHECK: qvstfcduxa 2, 9, 11             # encoding: [0x7c,0x49,0x59,0xcf]
+         qvstfcduxa 2, 9, 11
+# CHECK: qvstfcduxia 2, 9, 11            # encoding: [0x7c,0x49,0x59,0xcb]
+         qvstfcduxia 2, 9, 11
+# CHECK: qvstfcduxi 2, 9, 11             # encoding: [0x7c,0x49,0x59,0xca]
+         qvstfcduxi 2, 9, 11
+# CHECK: qvstfcdux 2, 9, 11              # encoding: [0x7c,0x49,0x59,0xce]
+         qvstfcdux 2, 9, 11
+# CHECK: qvstfcdxa 2, 10, 11             # encoding: [0x7c,0x4a,0x59,0x8f]
+         qvstfcdxa 2, 10, 11
+# CHECK: qvstfcdxia 2, 10, 11            # encoding: [0x7c,0x4a,0x59,0x8b]
+         qvstfcdxia 2, 10, 11
+# CHECK: qvstfcdxi 2, 10, 11             # encoding: [0x7c,0x4a,0x59,0x8a]
+         qvstfcdxi 2, 10, 11
+# CHECK: qvstfcdx 2, 10, 11              # encoding: [0x7c,0x4a,0x59,0x8e]
+         qvstfcdx 2, 10, 11
+# CHECK: qvstfcsuxa 2, 9, 11             # encoding: [0x7c,0x49,0x59,0x4f]
+         qvstfcsuxa 2, 9, 11
+# CHECK: qvstfcsuxia 2, 9, 11            # encoding: [0x7c,0x49,0x59,0x4b]
+         qvstfcsuxia 2, 9, 11
+# CHECK: qvstfcsuxi 2, 9, 11             # encoding: [0x7c,0x49,0x59,0x4a]
+         qvstfcsuxi 2, 9, 11
+# CHECK: qvstfcsux 2, 9, 11              # encoding: [0x7c,0x49,0x59,0x4e]
+         qvstfcsux 2, 9, 11
+# CHECK: qvstfcsxa 2, 10, 11             # encoding: [0x7c,0x4a,0x59,0x0f]
+         qvstfcsxa 2, 10, 11
+# CHECK: qvstfcsxia 2, 10, 11            # encoding: [0x7c,0x4a,0x59,0x0b]
+         qvstfcsxia 2, 10, 11
+# CHECK: qvstfcsxi 2, 10, 11             # encoding: [0x7c,0x4a,0x59,0x0a]
+         qvstfcsxi 2, 10, 11
+# CHECK: qvstfcsx 2, 10, 11              # encoding: [0x7c,0x4a,0x59,0x0e]
+         qvstfcsx 2, 10, 11
+# CHECK: qvstfduxa 2, 9, 11              # encoding: [0x7c,0x49,0x5d,0xcf]
+         qvstfduxa 2, 9, 11
+# CHECK: qvstfduxia 2, 9, 11             # encoding: [0x7c,0x49,0x5d,0xcb]
+         qvstfduxia 2, 9, 11
+# CHECK: qvstfduxi 2, 9, 11              # encoding: [0x7c,0x49,0x5d,0xca]
+         qvstfduxi 2, 9, 11
+# CHECK: qvstfdux 2, 9, 11               # encoding: [0x7c,0x49,0x5d,0xce]
+         qvstfdux 2, 9, 11
+# CHECK: qvstfdxa 2, 10, 11              # encoding: [0x7c,0x4a,0x5d,0x8f]
+         qvstfdxa 2, 10, 11
+# CHECK: qvstfdxia 2, 10, 11             # encoding: [0x7c,0x4a,0x5d,0x8b]
+         qvstfdxia 2, 10, 11
+# CHECK: qvstfdxi 2, 10, 11              # encoding: [0x7c,0x4a,0x5d,0x8a]
+         qvstfdxi 2, 10, 11
+# CHECK: qvstfdx 2, 10, 11               # encoding: [0x7c,0x4a,0x5d,0x8e]
+         qvstfdx 2, 10, 11
+# CHECK: qvstfiwxa 2, 10, 11             # encoding: [0x7c,0x4a,0x5f,0x8f]
+         qvstfiwxa 2, 10, 11
+# CHECK: qvstfiwx 2, 10, 11              # encoding: [0x7c,0x4a,0x5f,0x8e]
+         qvstfiwx 2, 10, 11
+# CHECK: qvstfsuxa 2, 9, 11              # encoding: [0x7c,0x49,0x5d,0x4f]
+         qvstfsuxa 2, 9, 11
+# CHECK: qvstfsuxia 2, 9, 11             # encoding: [0x7c,0x49,0x5d,0x4b]
+         qvstfsuxia 2, 9, 11
+# CHECK: qvstfsuxi 2, 9, 11              # encoding: [0x7c,0x49,0x5d,0x4a]
+         qvstfsuxi 2, 9, 11
+# CHECK: qvstfsux 2, 9, 11               # encoding: [0x7c,0x49,0x5d,0x4e]
+         qvstfsux 2, 9, 11
+# CHECK: qvstfsxa 2, 10, 11              # encoding: [0x7c,0x4a,0x5d,0x0f]
+         qvstfsxa 2, 10, 11
+# CHECK: qvstfsxia 2, 10, 11             # encoding: [0x7c,0x4a,0x5d,0x0b]
+         qvstfsxia 2, 10, 11
+# CHECK: qvstfsxi 2, 10, 11              # encoding: [0x7c,0x4a,0x5d,0x0a]
+         qvstfsxi 2, 10, 11
+# CHECK: qvstfsx 2, 10, 11               # encoding: [0x7c,0x4a,0x5d,0x0e]
+         qvstfsx 2, 10, 11
+
author	Hal Finkel <hfinkel@anl.gov>
	Wed, 25 Feb 2015 01:06:45 +0000 (01:06 +0000)
committer	Hal Finkel <hfinkel@anl.gov>
	Wed, 25 Feb 2015 01:06:45 +0000 (01:06 +0000)
include/llvm/IR/IntrinsicsPowerPC.td		patch \| blob \| history
lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp		patch \| blob \| history
lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp		patch \| blob \| history
lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp		patch \| blob \| history
lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h		patch \| blob \| history
lib/Target/PowerPC/PPCAsmPrinter.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCCallingConv.td		patch \| blob \| history
lib/Target/PowerPC/PPCFrameLowering.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCISelLowering.h		patch \| blob \| history
lib/Target/PowerPC/PPCInstrFormats.td		patch \| blob \| history
lib/Target/PowerPC/PPCInstrInfo.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCInstrInfo.td		patch \| blob \| history
lib/Target/PowerPC/PPCInstrQPX.td	[new file with mode: 0644]	patch \| blob
lib/Target/PowerPC/PPCRegisterInfo.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCRegisterInfo.td		patch \| blob \| history
lib/Target/PowerPC/PPCSubtarget.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCSubtarget.h		patch \| blob \| history
lib/Target/PowerPC/PPCTargetTransformInfo.cpp		patch \| blob \| history
test/CodeGen/PowerPC/qpx-bv-sint.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/PowerPC/qpx-bv.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/PowerPC/qpx-func-clobber.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/PowerPC/qpx-load.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/PowerPC/qpx-recipest.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/PowerPC/qpx-rounding-ops.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/PowerPC/qpx-s-load.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/PowerPC/qpx-s-sel.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/PowerPC/qpx-s-store.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/PowerPC/qpx-sel.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/PowerPC/qpx-store.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/PowerPC/qpx-unalperm.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/PowerPC/vsx-infl-copy2.ll		patch \| blob \| history
test/MC/Disassembler/PowerPC/qpx.txt	[new file with mode: 0644]	patch \| blob
test/MC/PowerPC/qpx.s	[new file with mode: 0644]	patch \| blob