Add VSX Scalar loads and stores to the PPC back end

author Nemanja Ivanovic <nemanja.i.ibm@gmail.com>

Thu, 7 May 2015 18:24:05 +0000 (18:24 +0000)

committer Nemanja Ivanovic <nemanja.i.ibm@gmail.com>

Thu, 7 May 2015 18:24:05 +0000 (18:24 +0000)
author Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Thu, 7 May 2015 18:24:05 +0000 (18:24 +0000)
committer Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Thu, 7 May 2015 18:24:05 +0000 (18:24 +0000)
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp

index b6f10e61079432049821a5941e38839c5f0e0962..8280f74c063bda58217832cc3564cc984a5d98c0 100644 (file)
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -132,6 +132,25 @@ static const MCPhysReg VSFRegs[64] = {
    PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
    PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
  };
+static const MCPhysReg VSSRegs[64] = {
+  PPC::F0,  PPC::F1,  PPC::F2,  PPC::F3,
+  PPC::F4,  PPC::F5,  PPC::F6,  PPC::F7,
+  PPC::F8,  PPC::F9,  PPC::F10, PPC::F11,
+  PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+  PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+  PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+  PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+  PPC::F28, PPC::F29, PPC::F30, PPC::F31,
+
+  PPC::VF0,  PPC::VF1,  PPC::VF2,  PPC::VF3,
+  PPC::VF4,  PPC::VF5,  PPC::VF6,  PPC::VF7,
+  PPC::VF8,  PPC::VF9,  PPC::VF10, PPC::VF11,
+  PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+  PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+  PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+  PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+  PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
  static unsigned QFRegs[32] = {
    PPC::QF0,  PPC::QF1,  PPC::QF2,  PPC::QF3,
    PPC::QF4,  PPC::QF5,  PPC::QF6,  PPC::QF7,
@@ -577,6 +596,11 @@ public:
      Inst.addOperand(MCOperand::CreateReg(VSFRegs[getVSReg()]));
    }
  
+  void addRegVSSRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(VSSRegs[getVSReg()]));
+  }
+
    void addRegQFRCOperands(MCInst &Inst, unsigned N) const {
      assert(N == 1 && "Invalid number of operands!");
      Inst.addOperand(MCOperand::CreateReg(QFRegs[getReg()]));
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp

index 5cbf3d9a189f58ab0c837261c84f6ef41c2c48ee..9a5c829aa90b71eabd1ee65cc4ea371544125634 100644 (file)
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -131,6 +131,26 @@ static const unsigned VSFRegs[] = {
    PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
  };
  
+static const unsigned VSSRegs[] = {
+  PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+  PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+  PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+  PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+  PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+  PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+  PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+  PPC::F28, PPC::F29, PPC::F30, PPC::F31,
+
+  PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
+  PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
+  PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
+  PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+  PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+  PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+  PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+  PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
+
  static const unsigned GPRegs[] = {
    PPC::R0, PPC::R1, PPC::R2, PPC::R3,
    PPC::R4, PPC::R5, PPC::R6, PPC::R7,
@@ -231,6 +251,12 @@ static DecodeStatus DecodeVSFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
    return decodeRegisterClass(Inst, RegNo, VSFRegs);
  }
  
+static DecodeStatus DecodeVSSRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  return decodeRegisterClass(Inst, RegNo, VSSRegs);
+}
+
  static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
                                              uint64_t Address,
                                              const void *Decoder) {
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp

index 512eddcb0da510062dec73c3e3ba506c84934695..afc1f36ad15267fb8a9033aa5080c9a7874285e1 100644 (file)
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -2734,7 +2734,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
      else if (N->getValueType(0) == MVT::i64)
        SelectCCOp = PPC::SELECT_CC_I8;
      else if (N->getValueType(0) == MVT::f32)
-      SelectCCOp = PPC::SELECT_CC_F4;
+      if (PPCSubTarget->hasP8Vector())
+        SelectCCOp = PPC::SELECT_CC_VSSRC;
+      else
+        SelectCCOp = PPC::SELECT_CC_F4;
      else if (N->getValueType(0) == MVT::f64)
        if (PPCSubTarget->hasVSX())
          SelectCCOp = PPC::SELECT_CC_VSFRC;
@@ -3449,6 +3452,7 @@ void PPCDAGToDAGISel::PeepholeCROps() {
        case PPC::SELECT_QBRC:
        case PPC::SELECT_VRRC:
        case PPC::SELECT_VSFRC:
+      case PPC::SELECT_VSSRC:
        case PPC::SELECT_VSRC: {
          SDValue Op = MachineNode->getOperand(0);
          if (Op.isMachineOpcode()) {
@@ -3759,6 +3763,7 @@ void PPCDAGToDAGISel::PeepholeCROps() {
        case PPC::SELECT_QBRC:
        case PPC::SELECT_VRRC:
        case PPC::SELECT_VSFRC:
+      case PPC::SELECT_VSSRC:
        case PPC::SELECT_VSRC:
          if (Op1Set)
            ResNode = MachineNode->getOperand(1).getNode();
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index 33688dc3c084514b5a8e24d250c378b0f5448160..61fc0c92cb91eef3b616a963d85f200535e6c3b1 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -582,6 +582,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
  
        setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
  
+      if (Subtarget.hasP8Vector())
+        addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
+
        addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
  
        addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
@@ -2680,7 +2683,10 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
            RC = &PPC::GPRCRegClass;
            break;
          case MVT::f32:
-          RC = &PPC::F4RCRegClass;
+          if (Subtarget.hasP8Vector())
+            RC = &PPC::VSSRCRegClass;
+          else
+            RC = &PPC::F4RCRegClass;
            break;
          case MVT::f64:
            if (Subtarget.hasVSX())
@@ -3094,7 +3100,10 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
          unsigned VReg;
  
          if (ObjectVT == MVT::f32)
-          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
+          VReg = MF.addLiveIn(FPR[FPR_idx],
+                              Subtarget.hasP8Vector()
+                                  ? &PPC::VSSRCRegClass
+                                  : &PPC::F4RCRegClass);
          else
            VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
                                                  ? &PPC::VSFRCRegClass
@@ -8383,6 +8392,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
               MI->getOpcode() == PPC::SELECT_CC_QBRC ||
               MI->getOpcode() == PPC::SELECT_CC_VRRC ||
               MI->getOpcode() == PPC::SELECT_CC_VSFRC ||
+             MI->getOpcode() == PPC::SELECT_CC_VSSRC ||
               MI->getOpcode() == PPC::SELECT_CC_VSRC ||
               MI->getOpcode() == PPC::SELECT_I4 ||
               MI->getOpcode() == PPC::SELECT_I8 ||
@@ -8393,6 +8403,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
               MI->getOpcode() == PPC::SELECT_QBRC ||
               MI->getOpcode() == PPC::SELECT_VRRC ||
               MI->getOpcode() == PPC::SELECT_VSFRC ||
+             MI->getOpcode() == PPC::SELECT_VSSRC ||
               MI->getOpcode() == PPC::SELECT_VSRC) {
      // The incoming instruction knows the destination vreg to set, the
      // condition code register to branch on, the true/false values to
@@ -8429,6 +8440,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
          MI->getOpcode() == PPC::SELECT_QBRC ||
          MI->getOpcode() == PPC::SELECT_VRRC ||
          MI->getOpcode() == PPC::SELECT_VSFRC ||
+        MI->getOpcode() == PPC::SELECT_VSSRC ||
          MI->getOpcode() == PPC::SELECT_VSRC) {
        BuildMI(BB, dl, TII->get(PPC::BC))
          .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
@@ -10648,7 +10660,10 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
               Constraint == "wf") {
      return std::make_pair(0U, &PPC::VSRCRegClass);
    } else if (Constraint == "ws") {
-    return std::make_pair(0U, &PPC::VSFRCRegClass);
+    if (VT == MVT::f32)
+      return std::make_pair(0U, &PPC::VSSRCRegClass);
+    else
+      return std::make_pair(0U, &PPC::VSFRCRegClass);
    }
  
    std::pair<unsigned, const TargetRegisterClass *> R =
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp

index c9c2949dc6c6d86729f20eba702ecd94859cf2b8..85ba5a1d640fc9868cf93b7c8a2e8d4d86e01dfc 100644 (file)
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -815,7 +815,8 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
      // copies are generated, they are close enough to some use that the
      // lower-latency form is preferable.
      Opc = PPC::XXLOR;
-  else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg))
+  else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
+           PPC::VSSRCRegClass.contains(DestReg, SrcReg))
      Opc = PPC::XXLORf;
    else if (PPC::QFRCRegClass.contains(DestReg, SrcReg))
      Opc = PPC::QVFMR;
@@ -900,6 +901,12 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                                                 getKillRegState(isKill)),
                                         FrameIdx));
      NonRI = true;
+  } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXSSPX))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
+    NonRI = true;
    } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
      assert(Subtarget.isDarwin() &&
             "VRSAVE only needs spill/restore on Darwin");
@@ -1013,6 +1020,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSDX), DestReg),
                                         FrameIdx));
      NonRI = true;
+  } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSSPX), DestReg),
+                                       FrameIdx));
+    NonRI = true;
    } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
      assert(Subtarget.isDarwin() &&
             "VRSAVE only needs spill/restore on Darwin");
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td

index 3cff14c594fe7aa34d5ce37576a294ab162e4a40..d93fd5e27d68caf9977f177bddfd7d9f0e1e677a 100644 (file)
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -40,6 +40,13 @@ def vsfrc : RegisterOperand<VSFRC> {
    let ParserMatchClass = PPCRegVSFRCAsmOperand;
  }
  
+def PPCRegVSSRCAsmOperand : AsmOperandClass {
+  let Name = "RegVSSRC"; let PredicateMethod = "isVSRegNumber";
+}
+def vssrc : RegisterOperand<VSSRC> {
+  let ParserMatchClass = PPCRegVSSRCAsmOperand;
+}
+
  // Little-endian-specific nodes.
  def SDT_PPClxvd2x : SDTypeProfile<1, 1, [
    SDTCisVT<0, v2f64>, SDTCisPtrTy<1>
@@ -103,7 +110,7 @@ let Uses = [RM] in {
                           (outs vsrc:$XT), (ins memrr:$src),
                           "lxvw4x $XT, $src", IIC_LdStLFD,
                           [(set v4i32:$XT, (int_ppc_vsx_lxvw4x xoaddr:$src))]>;
-  }
+  } // mayLoad
  
    // Store indexed instructions
    let mayStore = 1 in {
@@ -121,7 +128,8 @@ let Uses = [RM] in {
                           (outs), (ins vsrc:$XT, memrr:$dst),
                           "stxvw4x $XT, $dst", IIC_LdStSTFD,
                           [(store v4i32:$XT, xoaddr:$dst)]>;
-  }
+
+  } // mayStore
  
    // Add/Mul Instructions
    let isCommutable = 1 in {
@@ -791,6 +799,15 @@ let usesCustomInserter = 1,    // Expanded after instruction selection.
                             "#SELECT_VSFRC",
                             [(set f64:$dst,
                                   (select i1:$cond, f64:$T, f64:$F))]>;
+  def SELECT_CC_VSSRC: Pseudo<(outs f4rc:$dst),
+                              (ins crrc:$cond, f4rc:$T, f4rc:$F,
+                               i32imm:$BROPC), "#SELECT_CC_VSSRC",
+                              []>;
+  def SELECT_VSSRC: Pseudo<(outs f4rc:$dst),
+                           (ins crbitrc:$cond, f4rc:$T, f4rc:$F),
+                           "#SELECT_VSSRC",
+                           [(set f32:$dst,
+                                 (select i1:$cond, f32:$T, f32:$F))]>;
  } // usesCustomInserter
  } // AddedComplexity
  
@@ -987,7 +1004,45 @@ def XXLORC : XX3Form<60, 170,
                       (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                       "xxlorc $XT, $XA, $XB", IIC_VecGeneral,
                       [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>;
-} // AddedComplexity = 500
+  // VSX scalar loads introduced in ISA 2.07
+  let mayLoad = 1 in {
+    def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src),
+                         "lxsspx $XT, $src", IIC_LdStLFD,
+                         [(set f32:$XT, (load xoaddr:$src))]>;
+    def LXSIWAX : XX1Form<31, 76, (outs vsfrc:$XT), (ins memrr:$src),
+                          "lxsiwax $XT, $src", IIC_LdStLFD,
+                          [(set f64:$XT, (PPClfiwax xoaddr:$src))]>;
+    def LXSIWZX : XX1Form<31, 12, (outs vsfrc:$XT), (ins memrr:$src),
+                          "lxsiwzx $XT, $src", IIC_LdStLFD,
+                          [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>;
+  } // mayLoad
+
+  // VSX scalar stores introduced in ISA 2.07
+  let mayStore = 1 in {
+    def STXSSPX : XX1Form<31, 652, (outs), (ins vssrc:$XT, memrr:$dst),
+                          "stxsspx $XT, $dst", IIC_LdStSTFD,
+                          [(store f32:$XT, xoaddr:$dst)]>;
+    def STXSIWX : XX1Form<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst),
+                          "stxsiwx $XT, $dst", IIC_LdStSTFD,
+                          [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
+  } // mayStore
+def : Pat<(f64 (extloadf32 xoaddr:$src)),
+          (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>;
+def : Pat<(f64 (fextend f32:$src)),
+          (COPY_TO_REGCLASS $src, VSFRC)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
+          (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)),
+          (SELECT_VSSRC (CRORC  $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)),
+          (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)),
+          (SELECT_VSSRC (CRORC  $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)),
+          (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
+          (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+} // AddedComplexity = 400
  } // HasP8Vector
  
  let Predicates = [HasDirectMove, HasVSX] in {
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp

index 0e568d3278e1464e9ebee4c8dc088c1c9cdbca49..656376c641aa977fc33c00fd8626d7be6218bd8f 100644 (file)
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -282,6 +282,7 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
      return 32 - DefaultSafety;
    case PPC::VSRCRegClassID:
    case PPC::VSFRCRegClassID:
+  case PPC::VSSRCRegClassID:
      return 64 - DefaultSafety;
    case PPC::CRRCRegClassID:
      return 8 - DefaultSafety;
@@ -300,6 +301,8 @@ PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
        return &PPC::VSFRCRegClass;
      else if (RC == &PPC::VRRCRegClass)
        return &PPC::VSRCRegClass;
+    else if (RC == &PPC::F4RCRegClass && Subtarget.hasP8Vector())
+      return &PPC::VSSRCRegClass;
    }
  
    return TargetRegisterInfo::getLargestLegalSuperClass(RC, MF);
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td

index 398be78388154a57db3347456e18ed330be1cbff..e5f363c443cdd8f17b88bfee60cd1f9022cf2587 100644 (file)
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -316,6 +316,9 @@ def VFRC :  RegisterClass<"PPC", [f64], 64,
                                 VF22, VF21, VF20)>;
  def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>;
  
+// Register class for single precision scalars in VSX registers
+def VSSRC : RegisterClass<"PPC", [f32], 32, (add VSFRC)>;
+
  // For QPX
  def QFRC : RegisterClass<"PPC", [v4f64], 256, (add (sequence "QF%u", 0, 13),
                                                  (sequence "QF%u", 31, 14))>;
diff --git a/test/CodeGen/PowerPC/ppc64le-smallarg.ll b/test/CodeGen/PowerPC/ppc64le-smallarg.ll

index 77d066363cdeab4d5537cdfaa00f317abe330765..070a617ffe4f4351fb84c03015d222cdc49acb1d 100644 (file)
--- a/test/CodeGen/PowerPC/ppc64le-smallarg.ll
+++ b/test/CodeGen/PowerPC/ppc64le-smallarg.ll
@@ -42,7 +42,8 @@ entry:
    ret float %x
  }
  ; CHECK: @callee2
-; CHECK: lfs {{[0-9]+}}, 136(1)
+; CHECK: addi [[TOCREG:[0-9]+]], 1, 136
+; CHECK: lxsspx {{[0-9]+}}, {{[0-9]+}}, [[TOCREG]]
  ; CHECK: blr
  
  define void @caller2() {
@@ -52,7 +53,8 @@ entry:
    ret void
  }
  ; CHECK: @caller2
-; CHECK: stfs {{[0-9]+}}, 136(1)
+; CHECK: li [[TOCOFF:[0-9]+]], 136
+; CHECK: stxsspx {{[0-9]+}}, 1, [[TOCOFF]]
  ; CHECK: bl test2
  
  declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float)
diff --git a/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll b/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll

new file mode 100644 (file)

index 0000000..1029708
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll
@@ -0,0 +1,139 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-direct-move | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-direct-move | FileCheck %s
+
+@d = common global double 0.000000e+00, align 8
+@f = common global float 0.000000e+00, align 4
+@i = common global i32 0, align 4
+@ui = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define void @dblToInt() #0 {
+entry:
+  %ii = alloca i32, align 4
+  %0 = load double, double* @d, align 8
+  %conv = fptosi double %0 to i32
+  store volatile i32 %conv, i32* %ii, align 4
+  ret void
+; CHECK-LABEL: @dblToInt
+; CHECK: xscvdpsxws [[REGCONV1:[0-9]+]],
+; CHECK: stxsiwx [[REGCONV1]],
+}
+
+; Function Attrs: nounwind
+define void @fltToInt() #0 {
+entry:
+  %ii = alloca i32, align 4
+  %0 = load float, float* @f, align 4
+  %conv = fptosi float %0 to i32
+  store volatile i32 %conv, i32* %ii, align 4
+  ret void
+; CHECK-LABEL: @fltToInt
+; CHECK: xscvdpsxws [[REGCONV2:[0-9]+]],
+; CHECK: stxsiwx [[REGCONV2]],
+}
+
+; Function Attrs: nounwind
+define void @intToDbl() #0 {
+entry:
+  %dd = alloca double, align 8
+  %0 = load i32, i32* @i, align 4
+  %conv = sitofp i32 %0 to double
+  store volatile double %conv, double* %dd, align 8
+  ret void
+; CHECK-LABEL: @intToDbl
+; CHECK: lxsiwax [[REGLD1:[0-9]+]],
+; CHECK: xscvsxddp {{[0-9]+}}, [[REGLD1]]
+}
+
+; Function Attrs: nounwind
+define void @intToFlt() #0 {
+entry:
+  %ff = alloca float, align 4
+  %0 = load i32, i32* @i, align 4
+  %conv = sitofp i32 %0 to float
+  store volatile float %conv, float* %ff, align 4
+  ret void
+; CHECK-LABEL: @intToFlt
+; CHECK: lxsiwax [[REGLD2:[0-9]+]],
+; FIXME: the below will change when the VSX form is implemented
+; CHECK: fcfids {{[0-9]}}, [[REGLD2]]
+}
+
+; Function Attrs: nounwind
+define void @dblToUInt() #0 {
+entry:
+  %uiui = alloca i32, align 4
+  %0 = load double, double* @d, align 8
+  %conv = fptoui double %0 to i32
+  store volatile i32 %conv, i32* %uiui, align 4
+  ret void
+; CHECK-LABEL: @dblToUInt
+; CHECK: xscvdpuxws [[REGCONV3:[0-9]+]],
+; CHECK: stxsiwx [[REGCONV3]],
+}
+
+; Function Attrs: nounwind
+define void @fltToUInt() #0 {
+entry:
+  %uiui = alloca i32, align 4
+  %0 = load float, float* @f, align 4
+  %conv = fptoui float %0 to i32
+  store volatile i32 %conv, i32* %uiui, align 4
+  ret void
+; CHECK-LABEL: @fltToUInt
+; CHECK: xscvdpuxws [[REGCONV4:[0-9]+]],
+; CHECK: stxsiwx [[REGCONV4]],
+}
+
+; Function Attrs: nounwind
+define void @uIntToDbl() #0 {
+entry:
+  %dd = alloca double, align 8
+  %0 = load i32, i32* @ui, align 4
+  %conv = uitofp i32 %0 to double
+  store volatile double %conv, double* %dd, align 8
+  ret void
+; CHECK-LABEL: @uIntToDbl
+; CHECK: lxsiwzx [[REGLD3:[0-9]+]],
+; CHECK: xscvuxddp {{[0-9]+}}, [[REGLD3]]
+}
+
+; Function Attrs: nounwind
+define void @uIntToFlt() #0 {
+entry:
+  %ff = alloca float, align 4
+  %0 = load i32, i32* @ui, align 4
+  %conv = uitofp i32 %0 to float
+  store volatile float %conv, float* %ff, align 4
+  ret void
+; CHECK-LABEL: @uIntToFlt
+; CHECK: lxsiwzx [[REGLD4:[0-9]+]],
+; FIXME: the below will change when the VSX form is implemented
+; CHECK: fcfidus {{[0-9]+}}, [[REGLD4]]
+}
+
+; Function Attrs: nounwind
+define void @dblToFloat() #0 {
+entry:
+  %ff = alloca float, align 4
+  %0 = load double, double* @d, align 8
+  %conv = fptrunc double %0 to float
+  store volatile float %conv, float* %ff, align 4
+  ret void
+; CHECK-LABEL: @dblToFloat
+; CHECK: lxsdx [[REGLD5:[0-9]+]],
+; CHECK: stxsspx [[REGLD5]],
+}
+
+; Function Attrs: nounwind
+define void @floatToDbl() #0 {
+entry:
+  %dd = alloca double, align 8
+  %0 = load float, float* @f, align 4
+  %conv = fpext float %0 to double
+  store volatile double %conv, double* %dd, align 8
+  ret void
+; CHECK-LABEL: @floatToDbl
+; CHECK: lxsspx [[REGLD5:[0-9]+]],
+; CHECK: stxsdx [[REGLD5]],
+}
diff --git a/test/MC/Disassembler/PowerPC/vsx.txt b/test/MC/Disassembler/PowerPC/vsx.txt

index bda25dfd2cb5ec2e5d335450700852e289481f7f..417efd0fe17130af8720ca497a30f9648edf5e25 100644 (file)
--- a/test/MC/Disassembler/PowerPC/vsx.txt
+++ b/test/MC/Disassembler/PowerPC/vsx.txt
@@ -3,6 +3,15 @@
  # CHECK: lxsdx 7, 5, 31
  0x7c 0xe5 0xfc 0x98
  
+# CHECK: lxsiwax 7, 5, 31
+0x7c 0xe5 0xf8 0x98
+
+# CHECK: lxsiwzx 7, 5, 31
+0x7c 0xe5 0xf8 0x18
+
+# CHECK: lxsspx 7, 5, 31
+0x7c 0xe5 0xfc 0x18
+
  # CHECK: lxvd2x 7, 5, 31
  0x7c 0xe5 0xfe 0x98
  
@@ -15,6 +24,12 @@
  # CHECK: stxsdx 8, 5, 31
  0x7d 0x05 0xfd 0x98
  
+# CHECK: stxsiwx 8, 5, 31
+0x7d 0x05 0xf9 0x18
+
+# CHECK: stxsspx 8, 5, 31
+0x7d 0x05 0xfd 0x18
+
  # CHECK: stxvd2x 8, 5, 31
  0x7d 0x05 0xff 0x98
  
diff --git a/test/MC/PowerPC/vsx.s b/test/MC/PowerPC/vsx.s

index f723a3ebc1a50c161a439baf55e63cb7a0258a08..75c934f78c1380a99e97a3ca0b1a315b9350ab1a 100644 (file)
--- a/test/MC/PowerPC/vsx.s
+++ b/test/MC/PowerPC/vsx.s
@@ -5,26 +5,41 @@
  # CHECK-LE: xxswapd 7, 63                      # encoding: [0x56,0xfa,0xff,0xf0]
              xxswapd %vs7, %vs63
  
-# CHECK-BE: lxsdx 39, 5, 31                     # encoding: [0x7c,0xe5,0xfc,0x99]
-# CHECK-LE: lxsdx 39, 5, 31                     # encoding: [0x99,0xfc,0xe5,0x7c]
+# CHECK-BE: lxsdx 39, 5, 31                    # encoding: [0x7c,0xe5,0xfc,0x99]
+# CHECK-LE: lxsdx 39, 5, 31                    # encoding: [0x99,0xfc,0xe5,0x7c]
              lxsdx 39, 5, 31
-# CHECK-BE: lxvd2x 39, 5, 31                    # encoding: [0x7c,0xe5,0xfe,0x99]
-# CHECK-LE: lxvd2x 39, 5, 31                    # encoding: [0x99,0xfe,0xe5,0x7c]
+# CHECK-BE: lxsiwax 39, 5, 31                  # encoding: [0x7c,0xe5,0xf8,0x99]
+# CHECK-LE: lxsiwax 39, 5, 31                  # encoding: [0x99,0xf8,0xe5,0x7c]
+            lxsiwax 39, 5, 31
+# CHECK-BE: lxsiwzx 39, 5, 31                  # encoding: [0x7c,0xe5,0xf8,0x19]
+# CHECK-LE: lxsiwzx 39, 5, 31                  # encoding: [0x19,0xf8,0xe5,0x7c]
+            lxsiwzx 39, 5, 31
+# CHECK-BE: lxsspx 39, 5, 31                   # encoding: [0x7c,0xe5,0xfc,0x19]
+# CHECK-LE: lxsspx 39, 5, 31                   # encoding: [0x19,0xfc,0xe5,0x7c]
+            lxsspx 39, 5, 31
+# CHECK-BE: lxvd2x 39, 5, 31                   # encoding: [0x7c,0xe5,0xfe,0x99]
+# CHECK-LE: lxvd2x 39, 5, 31                   # encoding: [0x99,0xfe,0xe5,0x7c]
              lxvd2x 39, 5, 31
-# CHECK-BE: lxvdsx 39, 5, 31                    # encoding: [0x7c,0xe5,0xfa,0x99]
-# CHECK-LE: lxvdsx 39, 5, 31                    # encoding: [0x99,0xfa,0xe5,0x7c]
+# CHECK-BE: lxvdsx 39, 5, 31                   # encoding: [0x7c,0xe5,0xfa,0x99]
+# CHECK-LE: lxvdsx 39, 5, 31                   # encoding: [0x99,0xfa,0xe5,0x7c]
              lxvdsx 39, 5, 31
-# CHECK-BE: lxvw4x 39, 5, 31                    # encoding: [0x7c,0xe5,0xfe,0x19]
-# CHECK-LE: lxvw4x 39, 5, 31                    # encoding: [0x19,0xfe,0xe5,0x7c]
+# CHECK-BE: lxvw4x 39, 5, 31                   # encoding: [0x7c,0xe5,0xfe,0x19]
+# CHECK-LE: lxvw4x 39, 5, 31                   # encoding: [0x19,0xfe,0xe5,0x7c]
              lxvw4x 39, 5, 31
-# CHECK-BE: stxsdx 40, 5, 31                    # encoding: [0x7d,0x05,0xfd,0x99]
-# CHECK-LE: stxsdx 40, 5, 31                    # encoding: [0x99,0xfd,0x05,0x7d]
+# CHECK-BE: stxsdx 40, 5, 31                   # encoding: [0x7d,0x05,0xfd,0x99]
+# CHECK-LE: stxsdx 40, 5, 31                   # encoding: [0x99,0xfd,0x05,0x7d]
              stxsdx 40, 5, 31
-# CHECK-BE: stxvd2x 40, 5, 31                   # encoding: [0x7d,0x05,0xff,0x99]
-# CHECK-LE: stxvd2x 40, 5, 31                   # encoding: [0x99,0xff,0x05,0x7d]
+# CHECK-BE: stxsiwx 40, 5, 31                  # encoding: [0x7d,0x05,0xf9,0x19]
+# CHECK-LE: stxsiwx 40, 5, 31                  # encoding: [0x19,0xf9,0x05,0x7d]
+            stxsiwx 40, 5, 31
+# CHECK-BE: stxsspx 40, 5, 31                  # encoding: [0x7d,0x05,0xfd,0x19]
+# CHECK-LE: stxsspx 40, 5, 31                  # encoding: [0x19,0xfd,0x05,0x7d]
+            stxsspx 40, 5, 31
+# CHECK-BE: stxvd2x 40, 5, 31                  # encoding: [0x7d,0x05,0xff,0x99]
+# CHECK-LE: stxvd2x 40, 5, 31                  # encoding: [0x99,0xff,0x05,0x7d]
              stxvd2x 40, 5, 31
-# CHECK-BE: stxvw4x 40, 5, 31                   # encoding: [0x7d,0x05,0xff,0x19]
-# CHECK-LE: stxvw4x 40, 5, 31                   # encoding: [0x19,0xff,0x05,0x7d]
+# CHECK-BE: stxvw4x 40, 5, 31                  # encoding: [0x7d,0x05,0xff,0x19]
+# CHECK-LE: stxvw4x 40, 5, 31                  # encoding: [0x19,0xff,0x05,0x7d]
              stxvw4x 40, 5, 31
  # CHECK-BE: xsabsdp 7, 27                      # encoding: [0xf0,0xe0,0xdd,0x64]
  # CHECK-LE: xsabsdp 7, 27                      # encoding: [0x64,0xdd,0xe0,0xf0]
author	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
	Thu, 7 May 2015 18:24:05 +0000 (18:24 +0000)
committer	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
	Thu, 7 May 2015 18:24:05 +0000 (18:24 +0000)
lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp		patch \| blob \| history
lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCInstrInfo.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCInstrVSX.td		patch \| blob \| history
lib/Target/PowerPC/PPCRegisterInfo.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCRegisterInfo.td		patch \| blob \| history
test/CodeGen/PowerPC/ppc64le-smallarg.ll		patch \| blob \| history
test/CodeGen/PowerPC/vsx_scalar_ld_st.ll	[new file with mode: 0644]	patch \| blob
test/MC/Disassembler/PowerPC/vsx.txt		patch \| blob \| history
test/MC/PowerPC/vsx.s		patch \| blob \| history