[Sparc] Implement i64 load/store support for 32-bit sparc.

author James Y Knight <jyknight@google.com>

Mon, 10 Aug 2015 19:11:39 +0000 (19:11 +0000)

committer James Y Knight <jyknight@google.com>

Mon, 10 Aug 2015 19:11:39 +0000 (19:11 +0000)
author James Y Knight <jyknight@google.com>
Mon, 10 Aug 2015 19:11:39 +0000 (19:11 +0000)
committer James Y Knight <jyknight@google.com>
Mon, 10 Aug 2015 19:11:39 +0000 (19:11 +0000)
diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp

index bba5c5ed176516233585255ff46a89196ffeed81..cb7833d2e2b29da8b041200bb99815c3b8168977 100644 (file)
--- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -140,6 +140,12 @@ public:
      SP::ASR24, SP::ASR25, SP::ASR26, SP::ASR27,
      SP::ASR28, SP::ASR29, SP::ASR30, SP::ASR31};
  
+  static unsigned IntPairRegs[] = {
+    Sparc::G0_G1, Sparc::G2_G3, Sparc::G4_G5, Sparc::G6_G7,
+    Sparc::O0_O1, Sparc::O2_O3, Sparc::O4_O5, Sparc::O6_O7,
+    Sparc::L0_L1, Sparc::L2_L3, Sparc::L4_L5, Sparc::L6_L7,
+    Sparc::I0_I1, Sparc::I2_I3, Sparc::I4_I5, Sparc::I6_I7};
+
  /// SparcOperand - Instances of this class represent a parsed Sparc machine
  /// instruction.
  class SparcOperand : public MCParsedAsmOperand {
@@ -147,6 +153,7 @@ public:
    enum RegisterKind {
      rk_None,
      rk_IntReg,
+    rk_IntPairReg,
      rk_FloatReg,
      rk_DoubleReg,
      rk_QuadReg,
@@ -200,6 +207,10 @@ public:
    bool isMEMrr() const { return Kind == k_MemoryReg; }
    bool isMEMri() const { return Kind == k_MemoryImm; }
  
+  bool isIntReg() const {
+    return (Kind == k_Register && Reg.Kind == rk_IntReg);
+  }
+
    bool isFloatReg() const {
      return (Kind == k_Register && Reg.Kind == rk_FloatReg);
    }
@@ -330,6 +341,25 @@ public:
      return Op;
    }
  
+  static bool MorphToIntPairReg(SparcOperand &Op) {
+    unsigned Reg = Op.getReg();
+    assert(Op.Reg.Kind == rk_IntReg);
+    unsigned regIdx = 32;
+    if (Reg >= Sparc::G0 && Reg <= Sparc::G7)
+      regIdx = Reg - Sparc::G0;
+    else if (Reg >= Sparc::O0 && Reg <= Sparc::O7)
+      regIdx = Reg - Sparc::O0 + 8;
+    else if (Reg >= Sparc::L0 && Reg <= Sparc::L7)
+      regIdx = Reg - Sparc::L0 + 16;
+    else if (Reg >= Sparc::I0 && Reg <= Sparc::I7)
+      regIdx = Reg - Sparc::I0 + 24;
+    if (regIdx % 2 || regIdx > 31)
+      return false;
+    Op.Reg.RegNum = IntPairRegs[regIdx / 2];
+    Op.Reg.Kind = rk_IntPairReg;
+    return true;
+  }
+
    static bool MorphToDoubleReg(SparcOperand &Op) {
      unsigned Reg = Op.getReg();
      assert(Op.Reg.Kind == rk_FloatReg);
@@ -1051,5 +1081,9 @@ unsigned SparcAsmParser::validateTargetOperandClass(MCParsedAsmOperand &GOp,
        break;
      }
    }
+  if (Op.isIntReg() && Kind == MCK_IntPair) {
+    if (SparcOperand::MorphToIntPairReg(Op))
+      return MCTargetAsmParser::Match_Success;
+  }
    return Match_InvalidOperand;
  }
diff --git a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp

index 3e56b9e9b8837dcd235949606fdef1d3934247bd..e518fc75c14825f894d89af4ffcf8fd0bf1b0beb 100644 (file)
--- a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -117,6 +117,13 @@ static const unsigned ASRRegDecoderTable[] = {
    SP::ASR24, SP::ASR25, SP::ASR26, SP::ASR27,
    SP::ASR28, SP::ASR29, SP::ASR30, SP::ASR31};
  
+static const uint16_t IntPairDecoderTable[] = {
+  SP::G0_G1, SP::G2_G3, SP::G4_G5, SP::G6_G7,
+  SP::O0_O1, SP::O2_O3, SP::O4_O5, SP::O6_O7,
+  SP::L0_L1, SP::L2_L3, SP::L4_L5, SP::L6_L7,
+  SP::I0_I1, SP::I2_I3, SP::I4_I5, SP::I6_I7,
+};
+
  static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst,
                                                 unsigned RegNo,
                                                 uint64_t Address,
@@ -196,9 +203,25 @@ static DecodeStatus DecodeASRRegsRegisterClass(MCInst &Inst, unsigned RegNo,
    return MCDisassembler::Success;
  }
  
+static DecodeStatus DecodeIntPairRegisterClass(MCInst &Inst, unsigned RegNo,
+                                   uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  if ((RegNo & 1))
+    S = MCDisassembler::SoftFail;
+
+  unsigned RegisterPair = IntPairDecoderTable[RegNo/2];
+  Inst.addOperand(MCOperand::createReg(RegisterPair));
+  return S;
+}
  
  static DecodeStatus DecodeLoadInt(MCInst &Inst, unsigned insn, uint64_t Address,
                                    const void *Decoder);
+static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn, uint64_t Address,
+                                  const void *Decoder);
  static DecodeStatus DecodeLoadFP(MCInst &Inst, unsigned insn, uint64_t Address,
                                   const void *Decoder);
  static DecodeStatus DecodeLoadDFP(MCInst &Inst, unsigned insn, uint64_t Address,
@@ -207,6 +230,8 @@ static DecodeStatus DecodeLoadQFP(MCInst &Inst, unsigned insn, uint64_t Address,
                                    const void *Decoder);
  static DecodeStatus DecodeStoreInt(MCInst &Inst, unsigned insn,
                                     uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeStoreIntPair(MCInst &Inst, unsigned insn,
+                                   uint64_t Address, const void *Decoder);
  static DecodeStatus DecodeStoreFP(MCInst &Inst, unsigned insn,
                                    uint64_t Address, const void *Decoder);
  static DecodeStatus DecodeStoreDFP(MCInst &Inst, unsigned insn,
@@ -326,6 +351,12 @@ static DecodeStatus DecodeLoadInt(MCInst &Inst, unsigned insn, uint64_t Address,
                     DecodeIntRegsRegisterClass);
  }
  
+static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn, uint64_t Address,
+                                  const void *Decoder) {
+  return DecodeMem(Inst, insn, Address, Decoder, true,
+                   DecodeIntPairRegisterClass);
+}
+
  static DecodeStatus DecodeLoadFP(MCInst &Inst, unsigned insn, uint64_t Address,
                                   const void *Decoder) {
    return DecodeMem(Inst, insn, Address, Decoder, true,
@@ -350,6 +381,12 @@ static DecodeStatus DecodeStoreInt(MCInst &Inst, unsigned insn,
                     DecodeIntRegsRegisterClass);
  }
  
+static DecodeStatus DecodeStoreIntPair(MCInst &Inst, unsigned insn,
+                                   uint64_t Address, const void *Decoder) {
+  return DecodeMem(Inst, insn, Address, Decoder, false,
+                   DecodeIntPairRegisterClass);
+}
+
  static DecodeStatus DecodeStoreFP(MCInst &Inst, unsigned insn, uint64_t Address,
                                    const void *Decoder) {
    return DecodeMem(Inst, insn, Address, Decoder, false,
diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td

index dfaaabf344a3ad692d71967d7a1d5982bdea287d..0aa29d186dc1829e13ef665f985b8fbb163c6d52 100644 (file)
--- a/lib/Target/Sparc/SparcCallingConv.td
+++ b/lib/Target/Sparc/SparcCallingConv.td
@@ -21,7 +21,11 @@ def CC_Sparc32 : CallingConv<[
    // i32 f32 arguments get passed in integer registers if there is space.
    CCIfType<[i32, f32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
    // f64 arguments are split and passed through registers or through stack.
-  CCIfType<[f64], CCCustom<"CC_Sparc_Assign_f64">>,
+  CCIfType<[f64], CCCustom<"CC_Sparc_Assign_Split_64">>,
+  // As are v2i32 arguments (this would be the default behavior for
+  // v2i32 if it wasn't allocated to the IntPair register-class)
+  CCIfType<[v2i32], CCCustom<"CC_Sparc_Assign_Split_64">>,
+
  
    // Alternatively, they are assigned to the stack in 4-byte aligned units.
    CCAssignToStack<4, 4>
@@ -30,7 +34,8 @@ def CC_Sparc32 : CallingConv<[
  def RetCC_Sparc32 : CallingConv<[
    CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
    CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
-  CCIfType<[f64], CCAssignToReg<[D0, D1]>>
+  CCIfType<[f64], CCAssignToReg<[D0, D1]>>,
+  CCIfType<[v2i32], CCCustom<"CC_Sparc_Assign_Ret_Split_64">>
  ]>;
  
  
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp

index 8fa10dcae1143121ade65a69897a9de8abbcf750..15f200238aee8362243d0ab5bc3a8d9ee9790902 100644 (file)
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -213,23 +213,35 @@ bool SparcFrameLowering::isLeafProc(MachineFunction &MF) const
  }
  
  void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const {
-
    MachineRegisterInfo &MRI = MF.getRegInfo();
-
    // Remap %i[0-7] to %o[0-7].
    for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) {
      if (MRI.reg_nodbg_empty(reg))
        continue;
-    unsigned mapped_reg = (reg - SP::I0 + SP::O0);
+
+    unsigned mapped_reg = reg - SP::I0 + SP::O0;
      assert(MRI.reg_nodbg_empty(mapped_reg));
  
      // Replace I register with O register.
      MRI.replaceRegWith(reg, mapped_reg);
+
+    // Also replace register pair super-registers.
+    if ((reg - SP::I0) % 2 == 0) {
+      unsigned preg = (reg - SP::I0) / 2 + SP::I0_I1;
+      unsigned mapped_preg = preg - SP::I0_I1 + SP::O0_O1;
+      MRI.replaceRegWith(preg, mapped_preg);
+    }
    }
  
    // Rewrite MBB's Live-ins.
    for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
         MBB != E; ++MBB) {
+    for (unsigned reg = SP::I0_I1; reg <= SP::I6_I7; ++reg) {
+      if (!MBB->isLiveIn(reg))
+        continue;
+      MBB->removeLiveIn(reg);
+      MBB->addLiveIn(reg - SP::I0_I1 + SP::O0_O1);
+    }
      for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) {
        if (!MBB->isLiveIn(reg))
          continue;
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp

index 340b72e7940f188581cf62993e8f9e497a2a70e1..c4c641659df3cececef12543775f6be9f17c7015 100644 (file)
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -12,6 +12,7 @@
  //===----------------------------------------------------------------------===//
  
  #include "SparcTargetMachine.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
  #include "llvm/CodeGen/SelectionDAGISel.h"
  #include "llvm/IR/Intrinsics.h"
  #include "llvm/Support/Compiler.h"
@@ -62,6 +63,7 @@ public:
  
  private:
    SDNode* getGlobalBaseReg();
+  SDNode *SelectInlineAsm(SDNode *N);
  };
  }  // end anonymous namespace
  
@@ -141,6 +143,181 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
    return true;
  }
  
+
+// Re-assemble i64 arguments split up in SelectionDAGBuilder's
+// visitInlineAsm / GetRegistersForValue functions.
+//
+// Note: This function was copied from, and is essentially identical
+// to ARMISelDAGToDAG::SelectInlineAsm. It is very unfortunate that
+// such hacking-up is necessary; a rethink of how inline asm operands
+// are handled may be in order to make doing this more sane.
+//
+// TODO: fix inline asm support so I can simply tell it that 'i64'
+// inputs to asm need to be allocated to the IntPair register type,
+// and have that work. Then, delete this function.
+SDNode *SparcDAGToDAGISel::SelectInlineAsm(SDNode *N){
+  std::vector<SDValue> AsmNodeOperands;
+  unsigned Flag, Kind;
+  bool Changed = false;
+  unsigned NumOps = N->getNumOperands();
+
+  // Normally, i64 data is bounded to two arbitrary GPRs for "%r"
+  // constraint.  However, some instructions (e.g. ldd/std) require
+  // (even/even+1) GPRs.
+
+  // So, here, we check for this case, and mutate the inlineasm to use
+  // a single IntPair register instead, which guarantees such even/odd
+  // placement.
+
+  SDLoc dl(N);
+  SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
+                                   : SDValue(nullptr,0);
+
+  SmallVector<bool, 8> OpChanged;
+  // Glue node will be appended late.
+  for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
+    SDValue op = N->getOperand(i);
+    AsmNodeOperands.push_back(op);
+
+    if (i < InlineAsm::Op_FirstOperand)
+      continue;
+
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
+      Flag = C->getZExtValue();
+      Kind = InlineAsm::getKind(Flag);
+    }
+    else
+      continue;
+
+    // Immediate operands to inline asm in the SelectionDAG are modeled with
+    // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
+    // the second is a constant with the value of the immediate. If we get here
+    // and we have a Kind_Imm, skip the next operand, and continue.
+    if (Kind == InlineAsm::Kind_Imm) {
+      SDValue op = N->getOperand(++i);
+      AsmNodeOperands.push_back(op);
+      continue;
+    }
+
+    unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
+    if (NumRegs)
+      OpChanged.push_back(false);
+
+    unsigned DefIdx = 0;
+    bool IsTiedToChangedOp = false;
+    // If it's a use that is tied with a previous def, it has no
+    // reg class constraint.
+    if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
+      IsTiedToChangedOp = OpChanged[DefIdx];
+
+    if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
+        && Kind != InlineAsm::Kind_RegDefEarlyClobber)
+      continue;
+
+    unsigned RC;
+    bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
+    if ((!IsTiedToChangedOp && (!HasRC || RC != SP::IntRegsRegClassID))
+        || NumRegs != 2)
+      continue;
+
+    assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
+    SDValue V0 = N->getOperand(i+1);
+    SDValue V1 = N->getOperand(i+2);
+    unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
+    unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
+    SDValue PairedReg;
+    MachineRegisterInfo &MRI = MF->getRegInfo();
+
+    if (Kind == InlineAsm::Kind_RegDef ||
+        Kind == InlineAsm::Kind_RegDefEarlyClobber) {
+      // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
+      // the original GPRs.
+
+      unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass);
+      PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32);
+      SDValue Chain = SDValue(N,0);
+
+      SDNode *GU = N->getGluedUser();
+      SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::v2i32,
+                                               Chain.getValue(1));
+
+      // Extract values from a GPRPair reg and copy to the original GPR reg.
+      SDValue Sub0 = CurDAG->getTargetExtractSubreg(SP::sub_even, dl, MVT::i32,
+                                                    RegCopy);
+      SDValue Sub1 = CurDAG->getTargetExtractSubreg(SP::sub_odd, dl, MVT::i32,
+                                                    RegCopy);
+      SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
+                                        RegCopy.getValue(1));
+      SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
+
+      // Update the original glue user.
+      std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
+      Ops.push_back(T1.getValue(1));
+      CurDAG->UpdateNodeOperands(GU, Ops);
+    }
+    else {
+      // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
+      // GPRPair and then pass the GPRPair to the inline asm.
+      SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
+
+      // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
+      SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
+                                          Chain.getValue(1));
+      SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
+                                          T0.getValue(1));
+      SDValue Pair = SDValue(
+          CurDAG->getMachineNode(
+              TargetOpcode::REG_SEQUENCE, dl, MVT::v2i32,
+              {
+                  CurDAG->getTargetConstant(SP::IntPairRegClassID, dl,
+                                            MVT::i32),
+                  T0,
+                  CurDAG->getTargetConstant(SP::sub_even, dl, MVT::i32),
+                  T1,
+                  CurDAG->getTargetConstant(SP::sub_odd, dl, MVT::i32),
+              }),
+          0);
+
+      // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
+      // i32 VRs of inline asm with it.
+      unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass);
+      PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32);
+      Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
+
+      AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
+      Glue = Chain.getValue(1);
+    }
+
+    Changed = true;
+
+    if(PairedReg.getNode()) {
+      OpChanged[OpChanged.size() -1 ] = true;
+      Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
+      if (IsTiedToChangedOp)
+        Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
+      else
+        Flag = InlineAsm::getFlagWordForRegClass(Flag, SP::IntPairRegClassID);
+      // Replace the current flag.
+      AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
+          Flag, dl, MVT::i32);
+      // Add the new register node and skip the original two GPRs.
+      AsmNodeOperands.push_back(PairedReg);
+      // Skip the next two GPRs.
+      i += 2;
+    }
+  }
+
+  if (Glue.getNode())
+    AsmNodeOperands.push_back(Glue);
+  if (!Changed)
+    return nullptr;
+
+  SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
+      CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
+  New->setNodeId(-1);
+  return New.getNode();
+}
+
  SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
    SDLoc dl(N);
    if (N->isMachineOpcode()) {
@@ -150,6 +327,12 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
  
    switch (N->getOpcode()) {
    default: break;
+    case ISD::INLINEASM: {
+    SDNode *ResNode = SelectInlineAsm(N);
+    if (ResNode)
+      return ResNode;
+    break;
+  }
    case SPISD::GLOBAL_BASE_REG:
      return getGlobalBaseReg();
  
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp

index c7c5d27dab6228fbc77b68016eedf9e49dc281fa..637ddff5a0be7c3f633798cabf7de5204f076ffe 100644 (file)
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -49,9 +49,9 @@ static bool CC_Sparc_Assign_SRet(unsigned &ValNo, MVT &ValVT,
    return true;
  }
  
-static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
-                                MVT &LocVT, CCValAssign::LocInfo &LocInfo,
-                                ISD::ArgFlagsTy &ArgFlags, CCState &State)
+static bool CC_Sparc_Assign_Split_64(unsigned &ValNo, MVT &ValVT,
+                                     MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                                     ISD::ArgFlagsTy &ArgFlags, CCState &State)
  {
    static const MCPhysReg RegList[] = {
      SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
@@ -77,6 +77,29 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
    return true;
  }
  
+static bool CC_Sparc_Assign_Ret_Split_64(unsigned &ValNo, MVT &ValVT,
+                                         MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                                         ISD::ArgFlagsTy &ArgFlags, CCState &State)
+{
+  static const MCPhysReg RegList[] = {
+    SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+  };
+
+  // Try to get first reg.
+  if (unsigned Reg = State.AllocateReg(RegList))
+    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  else
+    return false;
+
+  // Try to get second reg.
+  if (unsigned Reg = State.AllocateReg(RegList))
+    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  else
+    return false;
+
+  return true;
+}
+
  // Allocate a full-sized argument for the 64-bit ABI.
  static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT,
                              MVT &LocVT, CCValAssign::LocInfo &LocInfo,
@@ -202,12 +225,34 @@ SparcTargetLowering::LowerReturn_32(SDValue Chain,
    RetOps.push_back(SDValue());
  
    // Copy the result values into the output registers.
-  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+  for (unsigned i = 0, realRVLocIdx = 0;
+       i != RVLocs.size();
+       ++i, ++realRVLocIdx) {
      CCValAssign &VA = RVLocs[i];
      assert(VA.isRegLoc() && "Can only return in registers!");
  
-    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(),
-                             OutVals[i], Flag);
+    SDValue Arg = OutVals[realRVLocIdx];
+
+    if (VA.needsCustom()) {
+      assert(VA.getLocVT() == MVT::v2i32);
+      // Legalize ret v2i32 -> ret 2 x i32 (Basically: do what would
+      // happen by default if this wasn't a legal type)
+
+      SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+                                  Arg,
+                                  DAG.getConstant(0, DL, getVectorIdxTy(DAG.getDataLayout())));
+      SDValue Part1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+                                  Arg,
+                                  DAG.getConstant(1, DL, getVectorIdxTy(DAG.getDataLayout())));
+
+      Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Part0, Flag);
+      Flag = Chain.getValue(1);
+      RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+      VA = RVLocs[++i]; // skip ahead to next loc
+      Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Part1,
+                               Flag);
+    } else
+      Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
  
      // Guarantee that all emitted copies are stuck together with flags.
      Flag = Chain.getValue(1);
@@ -375,7 +420,8 @@ LowerFormalArguments_32(SDValue Chain,
  
      if (VA.isRegLoc()) {
        if (VA.needsCustom()) {
-        assert(VA.getLocVT() == MVT::f64);
+        assert(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2i32);
+
          unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
          MF.getRegInfo().addLiveIn(VA.getLocReg(), VRegHi);
          SDValue HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
@@ -398,7 +444,7 @@ LowerFormalArguments_32(SDValue Chain,
          }
          SDValue WholeValue =
            DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
-        WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+        WholeValue = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), WholeValue);
          InVals.push_back(WholeValue);
          continue;
        }
@@ -422,7 +468,7 @@ LowerFormalArguments_32(SDValue Chain,
      auto PtrVT = getPointerTy(DAG.getDataLayout());
  
      if (VA.needsCustom()) {
-      assert(VA.getValVT() == MVT::f64);
+      assert(VA.getValVT() == MVT::f64 || MVT::v2i32);
        // If it is double-word aligned, just load.
        if (Offset % 8 == 0) {
          int FI = MF.getFrameInfo()->CreateFixedObject(8,
@@ -454,7 +500,7 @@ LowerFormalArguments_32(SDValue Chain,
  
        SDValue WholeValue =
          DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
-      WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+      WholeValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), WholeValue);
        InVals.push_back(WholeValue);
        continue;
      }
@@ -788,7 +834,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
      }
  
      if (VA.needsCustom()) {
-      assert(VA.getLocVT() == MVT::f64);
+      assert(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2i32);
  
        if (VA.isMemLoc()) {
          unsigned Offset = VA.getLocMemOffset() + StackOffset;
@@ -804,49 +850,54 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
          }
        }
  
-      SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32);
-      SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
-                                   Arg, StackPtr, MachinePointerInfo(),
-                                   false, false, 0);
-      // Sparc is big-endian, so the high part comes first.
-      SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
-                               MachinePointerInfo(), false, false, false, 0);
-      // Increment the pointer to the other half.
-      StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
-                             DAG.getIntPtrConstant(4, dl));
-      // Load the low part.
-      SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
-                               MachinePointerInfo(), false, false, false, 0);
+      if (VA.getLocVT() == MVT::f64) {
+        // Move from the float value from float registers into the
+        // integer registers.
+
+        // TODO: this conversion is done in two steps, because
+        // f64->i64 conversion is done efficiently, and i64->v2i32 is
+        // basically a no-op. But f64->v2i32 is NOT done efficiently
+        // for some reason.
+        Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
+        Arg = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, Arg);
+      }
+
+      SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+                                  Arg,
+                                  DAG.getConstant(0, dl, getVectorIdxTy(DAG.getDataLayout())));
+      SDValue Part1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+                                  Arg,
+                                  DAG.getConstant(1, dl, getVectorIdxTy(DAG.getDataLayout())));
  
        if (VA.isRegLoc()) {
-        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Hi));
+        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Part0));
          assert(i+1 != e);
          CCValAssign &NextVA = ArgLocs[++i];
          if (NextVA.isRegLoc()) {
-          RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Lo));
+          RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Part1));
          } else {
-          // Store the low part in stack.
+          // Store the second part in stack.
            unsigned Offset = NextVA.getLocMemOffset() + StackOffset;
            SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
            SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
            PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
-          MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+          MemOpChains.push_back(DAG.getStore(Chain, dl, Part1, PtrOff,
                                               MachinePointerInfo(),
                                               false, false, 0));
          }
        } else {
          unsigned Offset = VA.getLocMemOffset() + StackOffset;
-        // Store the high part.
+        // Store the first part.
          SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
          SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
          PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
-        MemOpChains.push_back(DAG.getStore(Chain, dl, Hi, PtrOff,
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Part0, PtrOff,
                                             MachinePointerInfo(),
                                             false, false, 0));
-        // Store the low part.
+        // Store the second part.
          PtrOff = DAG.getIntPtrConstant(Offset + 4, dl);
          PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
-        MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Part1, PtrOff,
                                             MachinePointerInfo(),
                                             false, false, 0));
        }
@@ -1377,8 +1428,45 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM,
    addRegisterClass(MVT::f32, &SP::FPRegsRegClass);
    addRegisterClass(MVT::f64, &SP::DFPRegsRegClass);
    addRegisterClass(MVT::f128, &SP::QFPRegsRegClass);
-  if (Subtarget->is64Bit())
+  if (Subtarget->is64Bit()) {
      addRegisterClass(MVT::i64, &SP::I64RegsRegClass);
+  } else {
+    // On 32bit sparc, we define a double-register 32bit register
+    // class, as well. This is modeled in LLVM as a 2-vector of i32.
+    addRegisterClass(MVT::v2i32, &SP::IntPairRegClass);
+
+    // ...but almost all operations must be expanded, so set that as
+    // the default.
+    for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
+      setOperationAction(Op, MVT::v2i32, Expand);
+    }
+    // Truncating/extending stores/loads are also not supported.
+    for (MVT VT : MVT::integer_vector_valuetypes()) {
+      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Expand);
+      setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i32, Expand);
+      setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Expand);
+
+      setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, VT, Expand);
+      setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, VT, Expand);
+      setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, VT, Expand);
+
+      setTruncStoreAction(VT, MVT::v2i32, Expand);
+      setTruncStoreAction(MVT::v2i32, VT, Expand);
+    }
+    // However, load and store *are* legal.
+    setOperationAction(ISD::LOAD, MVT::v2i32, Legal);
+    setOperationAction(ISD::STORE, MVT::v2i32, Legal);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Legal);
+    setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Legal);
+
+    // And we need to promote i64 loads/stores into vector load/store
+    setOperationAction(ISD::LOAD, MVT::i64, Custom);
+    setOperationAction(ISD::STORE, MVT::i64, Custom);
+
+    // Sadly, this doesn't work:
+    //    AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
+    //    AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
+  }
  
    // Turn FP extload into load/fextend
    for (MVT VT : MVT::fp_valuetypes()) {
@@ -2604,6 +2692,17 @@ static SDValue LowerF128Load(SDValue Op, SelectionDAG &DAG)
    return DAG.getMergeValues(Ops, dl);
  }
  
+static SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG)
+{
+  LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
+
+  EVT MemVT = LdNode->getMemoryVT();
+  if (MemVT == MVT::f128)
+    return LowerF128Load(Op, DAG);
+
+  return Op;
+}
+
  // Lower a f128 store into two f64 stores.
  static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) {
    SDLoc dl(Op);
@@ -2648,6 +2747,29 @@ static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) {
    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
  }
  
+static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG)
+{
+  SDLoc dl(Op);
+  StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
+
+  EVT MemVT = St->getMemoryVT();
+  if (MemVT == MVT::f128)
+    return LowerF128Store(Op, DAG);
+
+  if (MemVT == MVT::i64) {
+    // Custom handling for i64 stores: turn it into a bitcast and a
+    // v2i32 store.
+    SDValue Val = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, St->getValue());
+    SDValue Chain = DAG.getStore(
+        St->getChain(), dl, Val, St->getBasePtr(), St->getPointerInfo(),
+        St->isVolatile(), St->isNonTemporal(), St->getAlignment(),
+        St->getAAInfo());
+    return Chain;
+  }
+
+  return SDValue();
+}
+
  static SDValue LowerFNEGorFABS(SDValue Op, SelectionDAG &DAG, bool isV9) {
    assert((Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::FABS)
           && "invalid opcode");
@@ -2786,7 +2908,6 @@ static SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) {
    return SDValue();
  }
  
-
  SDValue SparcTargetLowering::
  LowerOperation(SDValue Op, SelectionDAG &DAG) const {
  
@@ -2821,8 +2942,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
    case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG,
                                                                 Subtarget);
  
-  case ISD::LOAD:               return LowerF128Load(Op, DAG);
-  case ISD::STORE:              return LowerF128Store(Op, DAG);
+  case ISD::LOAD:               return LowerLOAD(Op, DAG);
+  case ISD::STORE:              return LowerSTORE(Op, DAG);
    case ISD::FADD:               return LowerF128Op(Op, DAG,
                                         getLibcallName(RTLIB::ADD_F128), 2);
    case ISD::FSUB:               return LowerF128Op(Op, DAG,
@@ -3152,9 +3273,12 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
    if (Constraint.size() == 1) {
      switch (Constraint[0]) {
      case 'r':
-      return std::make_pair(0U, &SP::IntRegsRegClass);
+      if (VT == MVT::v2i32)
+        return std::make_pair(0U, &SP::IntPairRegClass);
+      else
+        return std::make_pair(0U, &SP::IntRegsRegClass);
      }
-  } else  if (!Constraint.empty() && Constraint.size() <= 5
+  } else if (!Constraint.empty() && Constraint.size() <= 5
                && Constraint[0] == '{' && *(Constraint.end()-1) == '}') {
      // constraint = '{r<d>}'
      // Remove the braces from around the name.
@@ -3230,5 +3354,24 @@ void SparcTargetLowering::ReplaceNodeResults(SDNode *N,
                                    getLibcallName(libCall),
                                    1));
      return;
+  case ISD::LOAD: {
+    LoadSDNode *Ld = cast<LoadSDNode>(N);
+    // Custom handling only for i64: turn i64 load into a v2i32 load,
+    // and a bitcast.
+    if (Ld->getValueType(0) != MVT::i64 || Ld->getMemoryVT() != MVT::i64)
+      return;
+
+    SDLoc dl(N);
+    SDValue LoadRes = DAG.getExtLoad(
+        Ld->getExtensionType(), dl, MVT::v2i32,
+        Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
+        MVT::v2i32, Ld->isVolatile(), Ld->isNonTemporal(),
+        Ld->isInvariant(), Ld->getAlignment(), Ld->getAAInfo());
+
+    SDValue Res = DAG.getNode(ISD::BITCAST, dl, MVT::i64, LoadRes);
+    Results.push_back(Res);
+    Results.push_back(LoadRes.getValue(1));
+    return;
+  }
    }
  }
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h

index bbc91a493c9da3665a76d82bd48cbc00df1bac49..6786b9e3cd3b32e247078e7371f089c70daefbec 100644 (file)
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -167,8 +167,8 @@ namespace llvm {
      }
  
      void ReplaceNodeResults(SDNode *N,
-                                    SmallVectorImpl<SDValue>& Results,
-                                    SelectionDAG &DAG) const override;
+                            SmallVectorImpl<SDValue>& Results,
+                            SelectionDAG &DAG) const override;
  
      MachineBasicBlock *expandSelectCC(MachineInstr *MI, MachineBasicBlock *BB,
                                        unsigned BROpcode) const;
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp

index 6167c532db80a7a37af6577bd8f3b97909ab0abb..9de23295dab1fc4da5e70c0c7b5d8bc31290dbf8 100644 (file)
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -284,7 +284,9 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
    unsigned numSubRegs = 0;
    unsigned movOpc     = 0;
    const unsigned *subRegIdx = nullptr;
+  bool ExtraG0 = false;
  
+  const unsigned DW_SubRegsIdx[]  = { SP::sub_even, SP::sub_odd };
    const unsigned DFP_FP_SubRegsIdx[]  = { SP::sub_even, SP::sub_odd };
    const unsigned QFP_DFP_SubRegsIdx[] = { SP::sub_even64, SP::sub_odd64 };
    const unsigned QFP_FP_SubRegsIdx[]  = { SP::sub_even, SP::sub_odd,
@@ -294,7 +296,12 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
    if (SP::IntRegsRegClass.contains(DestReg, SrcReg))
      BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0)
        .addReg(SrcReg, getKillRegState(KillSrc));
-  else if (SP::FPRegsRegClass.contains(DestReg, SrcReg))
+  else if (SP::IntPairRegClass.contains(DestReg, SrcReg)) {
+    subRegIdx  = DW_SubRegsIdx;
+    numSubRegs = 2;
+    movOpc     = SP::ORrr;
+    ExtraG0 = true;
+  } else if (SP::FPRegsRegClass.contains(DestReg, SrcReg))
      BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg)
        .addReg(SrcReg, getKillRegState(KillSrc));
    else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg)) {
@@ -347,7 +354,11 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
      unsigned Src = TRI->getSubReg(SrcReg,  subRegIdx[i]);
      assert(Dst && Src && "Bad sub-register");
  
-    MovMI = BuildMI(MBB, I, DL, get(movOpc), Dst).addReg(Src);
+    MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(movOpc), Dst);
+    if (ExtraG0)
+      MIB.addReg(SP::G0);
+    MIB.addReg(Src);
+    MovMI = MIB.getInstr();
    }
    // Add implicit super-register defs and kills to the last MovMI.
    MovMI->addRegisterDefined(DestReg, TRI);
@@ -372,12 +383,15 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                               MFI.getObjectAlignment(FI));
  
    // On the order of operands here: think "[FrameIdx + 0] = SrcReg".
- if (RC == &SP::I64RegsRegClass)
+  if (RC == &SP::I64RegsRegClass)
      BuildMI(MBB, I, DL, get(SP::STXri)).addFrameIndex(FI).addImm(0)
        .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
    else if (RC == &SP::IntRegsRegClass)
      BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0)
        .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+  else if (RC == &SP::IntPairRegClass)
+    BuildMI(MBB, I, DL, get(SP::STDri)).addFrameIndex(FI).addImm(0)
+      .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
    else if (RC == &SP::FPRegsRegClass)
      BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0)
        .addReg(SrcReg,  getKillRegState(isKill)).addMemOperand(MMO);
@@ -415,6 +429,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
    else if (RC == &SP::IntRegsRegClass)
      BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0)
        .addMemOperand(MMO);
+  else if (RC == &SP::IntPairRegClass)
+    BuildMI(MBB, I, DL, get(SP::LDDri), DestReg).addFrameIndex(FI).addImm(0)
+      .addMemOperand(MMO);
    else if (RC == &SP::FPRegsRegClass)
      BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0)
        .addMemOperand(MMO);
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td

index 51b71f491440602ea568392a9692c66c1db3dd0f..a9fad2c0814df2d08bf9877280263f2baaf26323 100644 (file)
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -408,6 +408,9 @@ let DecoderMethod = "DecodeLoadInt" in {
    defm LD   : LoadA<"ld",   0b000000, 0b010000, load,        IntRegs, i32>;
  }
  
+let DecoderMethod = "DecodeLoadIntPair" in
+  defm LDD : LoadA<"ldd", 0b000011, 0b010011, load, IntPair, v2i32>;
+
  // Section B.2 - Load Floating-point Instructions, p. 92
  let DecoderMethod = "DecodeLoadFP" in
    defm LDF   : Load<"ld",  0b100000, load, FPRegs,  f32>;
@@ -424,6 +427,9 @@ let DecoderMethod = "DecodeStoreInt" in {
    defm ST    : StoreA<"st",  0b000100, 0b010100, store,         IntRegs, i32>;
  }
  
+let DecoderMethod = "DecodeStoreIntPair" in
+  defm STD   : StoreA<"std", 0b000111, 0b010111, store, IntPair, v2i32>;
+
  // Section B.5 - Store Floating-point Instructions, p. 97
  let DecoderMethod = "DecodeStoreFP" in
    defm STF   : Store<"st",  0b100100, store,         FPRegs,  f32>;
@@ -1327,6 +1333,18 @@ def : Pat<(i32 (atomic_load ADDRri:$src)), (LDri ADDRri:$src)>;
  def : Pat<(atomic_store ADDRrr:$dst, i32:$val), (STrr ADDRrr:$dst, $val)>;
  def : Pat<(atomic_store ADDRri:$dst, i32:$val), (STri ADDRri:$dst, $val)>;
  
+// extract_vector
+def : Pat<(vector_extract (v2i32 IntPair:$Rn), 0),
+          (i32 (EXTRACT_SUBREG IntPair:$Rn, sub_even))>;
+def : Pat<(vector_extract (v2i32 IntPair:$Rn), 1),
+          (i32 (EXTRACT_SUBREG IntPair:$Rn, sub_odd))>;
+
+// build_vector
+def : Pat<(build_vector (i32 IntRegs:$a1), (i32 IntRegs:$a2)),
+          (INSERT_SUBREG
+           (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (i32 IntRegs:$a1), sub_even),
+            (i32 IntRegs:$a2), sub_odd)>;
+
  
  include "SparcInstr64Bit.td"
  include "SparcInstrVIS.td"
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp

index 9667bc059f18e26c6b36d6e579220e2d1216e595..b40abe0518186d534390430197ac33d2fdb08040 100644 (file)
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -75,6 +75,18 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
    Reserved.set(SP::G6);
    Reserved.set(SP::G7);
  
+  // Also reserve the register pair aliases covering the above
+  // registers, with the same conditions.
+  Reserved.set(SP::G0_G1);
+  if (ReserveAppRegisters)
+    Reserved.set(SP::G2_G3);
+  if (ReserveAppRegisters || !Subtarget.is64Bit())
+    Reserved.set(SP::G4_G5);
+
+  Reserved.set(SP::O6_O7);
+  Reserved.set(SP::I6_I7);
+  Reserved.set(SP::G6_G7);
+
    // Unaliased double registers are not available in non-V9 targets.
    if (!Subtarget.isV9()) {
      for (unsigned n = 0; n != 16; ++n) {
@@ -210,4 +222,3 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
  unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
    return SP::I6;
  }
-
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td

index db8a7e86962d98691f722a78a5c5323ff66bf9b3..e5bbe2ebc6a7b6ac773769f4e45dcc7ec33ff03b 100644 (file)
--- a/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -32,6 +32,12 @@ def sub_odd64  : SubRegIndex<64, 64>;
  // Ri - 32-bit integer registers
  class Ri<bits<16> Enc, string n> : SparcReg<Enc, n>;
  
+// Rdi - pairs of 32-bit integer registers
+class Rdi<bits<16> Enc, string n, list<Register> subregs> : SparcReg<Enc, n> {
+  let SubRegs = subregs;
+  let SubRegIndices = [sub_even, sub_odd];
+  let CoveredBySubRegs = 1;
+}
  // Rf - 32-bit floating-point registers
  class Rf<bits<16> Enc, string n> : SparcReg<Enc, n>;
  
@@ -217,6 +223,24 @@ def Q13 : Rq<21, "F52", [D26, D27]>;
  def Q14 : Rq<25, "F56", [D28, D29]>;
  def Q15 : Rq<29, "F60", [D30, D31]>;
  
+// Aliases of the integer registers used for LDD/STD double-word operations
+def G0_G1 : Rdi<0, "G0", [G0, G1]>;
+def G2_G3 : Rdi<2, "G2", [G2, G3]>;
+def G4_G5 : Rdi<4, "G4", [G4, G5]>;
+def G6_G7 : Rdi<6, "G6", [G6, G7]>;
+def O0_O1 : Rdi<8, "O0", [O0, O1]>;
+def O2_O3 : Rdi<10, "O2", [O2, O3]>;
+def O4_O5 : Rdi<12, "O4", [O4, O5]>;
+def O6_O7 : Rdi<14, "O6", [O6, O7]>;
+def L0_L1 : Rdi<16, "L0", [L0, L1]>;
+def L2_L3 : Rdi<18, "L2", [L2, L3]>;
+def L4_L5 : Rdi<20, "L4", [L4, L5]>;
+def L6_L7 : Rdi<22, "L6", [L6, L7]>;
+def I0_I1 : Rdi<24, "I0", [I0, I1]>;
+def I2_I3 : Rdi<26, "I2", [I2, I3]>;
+def I4_I5 : Rdi<28, "I4", [I4, I5]>;
+def I6_I7 : Rdi<30, "I6", [I6, I7]>;
+
  // Register classes.
  //
  // FIXME: the register order should be defined in terms of the preferred
@@ -231,6 +255,13 @@ def IntRegs : RegisterClass<"SP", [i32, i64], 32,
                                   (sequence "L%u", 0, 7),
                                   (sequence "O%u", 0, 7))>;
  
+// Should be in the same order as IntRegs.
+def IntPair : RegisterClass<"SP", [v2i32], 64,
+    (add I0_I1, I2_I3, I4_I5, I6_I7,
+         G0_G1, G2_G3, G4_G5, G6_G7,
+         L0_L1, L2_L3, L4_L5, L6_L7,
+         O0_O1, O2_O3, O4_O5, O6_O7)>;
+
  // Register class for 64-bit mode, with a 64-bit spill slot size.
  // These are the same as the 32-bit registers, so TableGen will consider this
  // to be a sub-class of IntRegs. That works out because requiring a 64-bit
diff --git a/llvm.spec.in b/llvm.spec.in

index 0a3f6e807849aaf28c90514eab1af6c4fe2fb2f2..d4ef08ae5ad3defe3af354cf27c5e78bfbcae39b 100644 (file)
--- a/llvm.spec.in
+++ b/llvm.spec.in
@@ -65,3 +65,4 @@ rm -rf %{buildroot}
  * Mon Feb 09 2003 Brian R. Gaeke
  - Initial working version of RPM spec file.
  
+
diff --git a/test/CodeGen/SPARC/basictest.ll b/test/CodeGen/SPARC/basictest.ll

index 3792100b2e6301b18bd873e95441de8080a78c9b..3f5dfda9ad19dbae9f5704f09417379cac6099f3 100644 (file)
--- a/test/CodeGen/SPARC/basictest.ll
+++ b/test/CodeGen/SPARC/basictest.ll
@@ -84,3 +84,16 @@ define i64 @unsigned_multiply_32x32_64(i32 %a, i32 %b) {
    ret i64 %r
  }
  
+; CHECK-LABEL: load_store_64bit:
+; CHECK: ldd [%o0], %o2
+; CHECK: addcc %o3, 3, %o5
+; CHECK: addxcc %o2, 0, %o4
+; CHECK: retl
+; CHECK: std %o4, [%o1]
+define void @load_store_64bit(i64* %x, i64* %y) {
+entry:
+  %0 = load i64, i64* %x
+  %add = add nsw i64 %0, 3
+  store i64 %add, i64* %y
+  ret void
+}
diff --git a/test/CodeGen/SPARC/inlineasm.ll b/test/CodeGen/SPARC/inlineasm.ll

index 526cde8de8b4bda70435ac1a10b08bfa4ac31a86..d54c5c6bc7801791effb7881ebb5c8e5126892d3 100644 (file)
--- a/test/CodeGen/SPARC/inlineasm.ll
+++ b/test/CodeGen/SPARC/inlineasm.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=sparc -no-integrated-as <%s | FileCheck %s
+; RUN: llc -march=sparc <%s | FileCheck %s
  
  ; CHECK-LABEL: test_constraint_r
  ; CHECK:       add %o1, %o0, %o0
@@ -8,7 +8,7 @@ entry:
    ret i32 %0
  }
  
-; CHECK-LABEL: test_constraint_I
+; CHECK-LABEL: test_constraint_I:
  ; CHECK:       add %o0, 1023, %o0
  define i32 @test_constraint_I(i32 %a) {
  entry:
@@ -16,7 +16,7 @@ entry:
    ret i32 %0
  }
  
-; CHECK-LABEL: test_constraint_I_neg
+; CHECK-LABEL: test_constraint_I_neg:
  ; CHECK:       add %o0, -4096, %o0
  define i32 @test_constraint_I_neg(i32 %a) {
  entry:
@@ -24,7 +24,7 @@ entry:
    ret i32 %0
  }
  
-; CHECK-LABEL: test_constraint_I_largeimm
+; CHECK-LABEL: test_constraint_I_largeimm:
  ; CHECK:       sethi 9, [[R0:%[gilo][0-7]]]
  ; CHECK:       or [[R0]], 784, [[R1:%[gilo][0-7]]]
  ; CHECK:       add %o0, [[R1]], %o0
@@ -34,12 +34,51 @@ entry:
    ret i32 %0
  }
  
-; CHECK-LABEL: test_constraint_reg
+; CHECK-LABEL: test_constraint_reg:
  ; CHECK:       ldda [%o1] 43, %g2
-; CHECK:       ldda [%o1] 43, %g3
+; CHECK:       ldda [%o1] 43, %g4
  define void @test_constraint_reg(i32 %s, i32* %ptr) {
  entry:
    %0 = tail call i64 asm sideeffect "ldda [$1] $2, $0", "={r2},r,n"(i32* %ptr, i32 43)
-  %1 = tail call i64 asm sideeffect "ldda [$1] $2, $0", "={g3},r,n"(i32* %ptr, i32 43)
+  %1 = tail call i64 asm sideeffect "ldda [$1] $2, $0", "={g4},r,n"(i32* %ptr, i32 43)
    ret void
  }
+
+;; Ensure that i64 args to asm are allocated to the IntPair register class.
+;; Also checks that register renaming for leaf proc works.
+; CHECK-LABEL: test_constraint_r_i64:
+; CHECK: mov %o0, %o5
+; CHECK: sra %o5, 31, %o4
+; CHECK: std %o4, [%o1]
+define i32 @test_constraint_r_i64(i32 %foo, i64* %out, i32 %o) {
+entry:
+  %conv = sext i32 %foo to i64
+  tail call void asm sideeffect "std $0, [$1]", "r,r,~{memory}"(i64 %conv, i64* %out)
+  ret i32 %o
+}
+
+;; Same test without leaf-proc opt
+; CHECK-LABEL: test_constraint_r_i64_noleaf:
+; CHECK: mov %i0, %i5
+; CHECK: sra %i5, 31, %i4
+; CHECK: std %i4, [%i1]
+define i32 @test_constraint_r_i64_noleaf(i32 %foo, i64* %out, i32 %o) #0 {
+entry:
+  %conv = sext i32 %foo to i64
+  tail call void asm sideeffect "std $0, [$1]", "r,r,~{memory}"(i64 %conv, i64* %out)
+  ret i32 %o
+}
+attributes #0 = { "no-frame-pointer-elim"="true" }
+
+;; Ensures that tied in and out gets allocated properly.
+; CHECK-LABEL: test_i64_inout:
+; CHECK: sethi 0, %o2
+; CHECK: mov 5, %o3
+; CHECK: xor %o2, %g0, %o2
+; CHECK: mov %o2, %o0
+; CHECK: ret
+define i64 @test_i64_inout() {
+entry:
+  %0 = call i64 asm sideeffect "xor $1, %g0, $0", "=r,0,~{i1}"(i64 5);
+  ret i64 %0
+}
diff --git a/test/CodeGen/SPARC/reserved-regs.ll b/test/CodeGen/SPARC/reserved-regs.ll

new file mode 100644 (file)

index 0000000..fe20801
--- /dev/null
+++ b/test/CodeGen/SPARC/reserved-regs.ll
@@ -0,0 +1,135 @@
+; RUN: llc -march=sparc  < %s | FileCheck %s
+
+@g = common global [32 x i32] zeroinitializer, align 16
+@h = common global [16 x i64] zeroinitializer, align 16
+
+;; Ensures that we don't use registers which are supposed to be reserved.
+
+; CHECK-LABEL: use_all_i32_regs:
+; CHECK-NOT: %g0
+; CHECK-NOT: %g1
+; CHECK-NOT: %g5
+; CHECK-NOT: %g6
+; CHECK-NOT: %g7
+; CHECK-NOT: %o6
+; CHECK-NOT: %i6
+; CHECK-NOT: %i7
+; CHECK: ret
+define void @use_all_i32_regs() {
+entry:
+  %0 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 0), align 16
+  %1 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 1), align 4
+  %2 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 2), align 8
+  %3 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 3), align 4
+  %4 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 4), align 16
+  %5 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 5), align 4
+  %6 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 6), align 8
+  %7 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 7), align 4
+  %8 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 8), align 16
+  %9 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 9), align 4
+  %10 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 10), align 8
+  %11 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 11), align 4
+  %12 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 12), align 16
+  %13 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 13), align 4
+  %14 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 14), align 8
+  %15 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 15), align 4
+  %16 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 16), align 16
+  %17 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 17), align 4
+  %18 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 18), align 8
+  %19 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 19), align 4
+  %20 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 20), align 16
+  %21 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 21), align 4
+  %22 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 22), align 8
+  %23 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 23), align 4
+  %24 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 24), align 16
+  %25 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 25), align 4
+  %26 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 26), align 8
+  %27 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 27), align 4
+  %28 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 28), align 16
+  %29 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 29), align 4
+  %30 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 30), align 8
+  %31 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 31), align 4
+  store volatile i32 %1, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 0), align 16
+  store volatile i32 %2, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 1), align 4
+  store volatile i32 %3, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 2), align 8
+  store volatile i32 %4, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 3), align 4
+  store volatile i32 %5, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 4), align 16
+  store volatile i32 %6, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 5), align 4
+  store volatile i32 %7, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 6), align 8
+  store volatile i32 %8, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 7), align 4
+  store volatile i32 %9, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 8), align 16
+  store volatile i32 %10, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 9), align 4
+  store volatile i32 %11, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 10), align 8
+  store volatile i32 %12, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 11), align 4
+  store volatile i32 %13, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 12), align 16
+  store volatile i32 %14, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 13), align 4
+  store volatile i32 %15, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 14), align 8
+  store volatile i32 %16, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 15), align 4
+  store volatile i32 %17, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 16), align 16
+  store volatile i32 %18, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 17), align 4
+  store volatile i32 %19, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 18), align 8
+  store volatile i32 %20, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 19), align 4
+  store volatile i32 %21, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 20), align 16
+  store volatile i32 %22, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 21), align 4
+  store volatile i32 %23, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 22), align 8
+  store volatile i32 %24, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 23), align 4
+  store volatile i32 %25, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 24), align 16
+  store volatile i32 %26, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 25), align 4
+  store volatile i32 %27, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 26), align 8
+  store volatile i32 %28, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 27), align 4
+  store volatile i32 %29, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 28), align 16
+  store volatile i32 %30, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 29), align 4
+  store volatile i32 %31, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 30), align 8
+  store volatile i32 %0, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 31), align 4
+  ret void
+}
+
+
+; CHECK-LABEL: use_all_i64_regs:
+; CHECK-NOT: %g0
+; CHECK-NOT: %g1
+; CHECK-NOT: %g4
+; CHECK-NOT: %g5
+; CHECK-NOT: %g6
+; CHECK-NOT: %g7
+; CHECK-NOT: %o6
+; CHECK-NOT: %o7
+; CHECK-NOT: %i6
+; CHECK-NOT: %i7
+; CHECK: ret
+define void @use_all_i64_regs() {
+entry:
+  %0 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 0), align 16
+  %1 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 1), align 4
+  %2 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 2), align 8
+  %3 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 3), align 4
+  %4 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 4), align 16
+  %5 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 5), align 4
+  %6 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 6), align 8
+  %7 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 7), align 4
+  %8 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 8), align 16
+  %9 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 9), align 4
+  %10 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 10), align 8
+  %11 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 11), align 4
+  %12 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 12), align 16
+  %13 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 13), align 4
+  %14 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 14), align 8
+  %15 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 15), align 4
+  store volatile i64 %1, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 0), align 16
+  store volatile i64 %2, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 1), align 4
+  store volatile i64 %3, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 2), align 8
+  store volatile i64 %4, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 3), align 4
+  store volatile i64 %5, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 4), align 16
+  store volatile i64 %6, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 5), align 4
+  store volatile i64 %7, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 6), align 8
+  store volatile i64 %8, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 7), align 4
+  store volatile i64 %9, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 8), align 16
+  store volatile i64 %10, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 9), align 4
+  store volatile i64 %11, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 10), align 8
+  store volatile i64 %12, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 11), align 4
+  store volatile i64 %13, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 12), align 16
+  store volatile i64 %14, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 13), align 4
+  store volatile i64 %15, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 14), align 8
+  store volatile i64 %0, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 15), align 4
+  ret void
+}
diff --git a/test/CodeGen/SPARC/spill.ll b/test/CodeGen/SPARC/spill.ll

new file mode 100644 (file)

index 0000000..a461de9
--- /dev/null
+++ b/test/CodeGen/SPARC/spill.ll
@@ -0,0 +1,64 @@
+; RUN: llc -march=sparc  < %s | FileCheck %s
+
+;; Ensure that spills and reloads work for various types on
+;; sparcv8.
+
+;; For i32/i64 tests, use an asm statement which clobbers most
+;; registers to ensure the spill will happen.
+
+; CHECK-LABEL: test_i32_spill:
+; CHECK:       and %i0, %i1, %o0
+; CHECK:       st %o0, [%fp+{{.+}}]
+; CHECK:       add %o0, %o0, %g0
+; CHECK:       ld [%fp+{{.+}}, %i0
+define i32 @test_i32_spill(i32 %a, i32 %b) {
+entry:
+  %r0 = and i32 %a, %b
+  ; The clobber list has all registers except g0/o0. (Only o0 is usable.)
+  %0 = call i32 asm sideeffect "add $0,$1,%g0", "=r,0,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6},~{o7}"(i32 %r0)
+  ret i32 %r0
+}
+
+; CHECK-LABEL: test_i64_spill:
+; CHECK:       and %i0, %i2, %o0
+; CHECK:       and %i1, %i3, %o1
+; CHECK:       std %o0, [%fp+{{.+}}]
+; CHECK:       add %o0, %o0, %g0
+; CHECK:       ldd [%fp+{{.+}}, %i0
+define i64 @test_i64_spill(i64 %a, i64 %b) {
+entry:
+  %r0 = and i64 %a, %b
+  ; The clobber list has all registers except g0,g1,o0,o1. (Only o0/o1 are a usable pair)
+  ; So, o0/o1 must be used.
+  %0 = call i64 asm sideeffect "add $0,$1,%g0", "=r,0,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o2},~{o3},~{o4},~{o5},~{o7}"(i64 %r0)
+  ret i64 %r0
+}
+
+;; For float/double tests, a call is a suitable clobber as *all* FPU
+;; registers are caller-save on sparcv8.
+
+; CHECK-LABEL: test_float_spill:
+; CHECK:       fadds %f1, %f0, [[R:%[f][0-31]]]
+; CHECK:       st [[R]], [%fp+{{.+}}]
+; CHECK:       call
+; CHECK:       ld [%fp+{{.+}}, %f0
+declare float @foo_float(float)
+define float @test_float_spill(float %a, float %b) {
+entry:
+  %r0 = fadd float %a, %b
+  %0 = call float @foo_float(float %r0)
+  ret float %r0
+}
+
+; CHECK-LABEL: test_double_spill:
+; CHECK:       faddd %f2, %f0, [[R:%[f][0-31]]]
+; CHECK:       std [[R]], [%fp+{{.+}}]
+; CHECK:       call
+; CHECK:       ldd [%fp+{{.+}}, %f0
+declare double @foo_double(double)
+define double @test_double_spill(double %a, double %b) {
+entry:
+  %r0 = fadd double %a, %b
+  %0 = call double @foo_double(double %r0)
+  ret double %r0
+}
diff --git a/test/MC/Disassembler/Sparc/sparc-mem.txt b/test/MC/Disassembler/Sparc/sparc-mem.txt

index 5f8886ef8b76e7435563ea1c1fb65f75c29b55fb..04a0365cc7c19fc5912feb61862a1363566a5750 100644 (file)
--- a/test/MC/Disassembler/Sparc/sparc-mem.txt
+++ b/test/MC/Disassembler/Sparc/sparc-mem.txt
@@ -221,3 +221,27 @@
  
  # CHECK:     swapa [%g1] 131, %o2
  0xd4 0xf8 0x50 0x60
+
+# CHECK:     ldd [%i0+%l6], %o2
+0xd4 0x1e 0x00 0x16
+
+# CHECK:     ldd [%i0+32], %o2
+0xd4 0x1e 0x20 0x20
+
+# CHECK:     ldd [%g1], %o2
+0xd4 0x18 0x60 0x00
+
+# CHECK:     ldd [%g1], %o2
+0xd4 0x18 0x40 0x00
+
+# CHECK:     std %o2, [%i0+%l6]
+0xd4 0x3e 0x00 0x16
+
+# CHECK:     std %o2, [%i0+32]
+0xd4 0x3e 0x20 0x20
+
+# CHECK:     std %o2, [%g1]
+0xd4 0x38 0x60 0x00
+
+# CHECK:     std %o2, [%g1]
+0xd4 0x38 0x40 0x00
diff --git a/test/MC/Sparc/sparc-mem-instructions.s b/test/MC/Sparc/sparc-mem-instructions.s

index c10c8781fd88870315afc8722c28608a444b7c7a..1d098fe158fd18a1da7e92c470505d04e27e2b56 100644 (file)
--- a/test/MC/Sparc/sparc-mem-instructions.s
+++ b/test/MC/Sparc/sparc-mem-instructions.s
@@ -46,6 +46,15 @@
          ! CHECK: lda [%i0+%l6] 131, %o2 ! encoding: [0xd4,0x86,0x10,0x76]
          lda [%i0 + %l6] 131, %o2
  
+        ! CHECK: ldd [%i0+%l6], %o2    ! encoding: [0xd4,0x1e,0x00,0x16]
+        ldd [%i0 + %l6], %o2
+        ! CHECK: ldd [%i0+32], %o2     ! encoding: [0xd4,0x1e,0x20,0x20]
+        ldd [%i0 + 32], %o2
+        ! CHECK: ldd [%g1], %o2        ! encoding: [0xd4,0x18,0x40,0x00]
+        ldd [%g1], %o2
+        ! CHECK: ldda [%i0+%l6] 131, %o2 ! encoding: [0xd4,0x9e,0x10,0x76]
+        ldda [%i0 + %l6] 131, %o2
+
          ! CHECK: stb %o2, [%i0+%l6]   ! encoding: [0xd4,0x2e,0x00,0x16]
          stb %o2, [%i0 + %l6]
          ! CHECK: stb %o2, [%i0+32]    ! encoding: [0xd4,0x2e,0x20,0x20]
@@ -73,6 +82,15 @@
          ! CHECK: sta %o2, [%i0+%l6] 131 ! encoding: [0xd4,0xa6,0x10,0x76]
          sta %o2, [%i0 + %l6] 131
  
+        ! CHECK: std %o2, [%i0+%l6]    ! encoding: [0xd4,0x3e,0x00,0x16]
+        std %o2, [%i0 + %l6]
+        ! CHECK: std %o2, [%i0+32]     ! encoding: [0xd4,0x3e,0x20,0x20]
+        std %o2, [%i0 + 32]
+        ! CHECK: std %o2, [%g1]        ! encoding: [0xd4,0x38,0x40,0x00]
+        std %o2, [%g1]
+        ! CHECK: stda %o2, [%i0+%l6] 131 ! encoding: [0xd4,0xbe,0x10,0x76]
+        stda %o2, [%i0 + %l6] 131
+
          ! CHECK:  flush %g1+%g2         ! encoding: [0x81,0xd8,0x40,0x02]
          flush %g1 + %g2
          ! CHECK:  flush %g1+8           ! encoding: [0x81,0xd8,0x60,0x08]
author	James Y Knight <jyknight@google.com>
	Mon, 10 Aug 2015 19:11:39 +0000 (19:11 +0000)
committer	James Y Knight <jyknight@google.com>
	Mon, 10 Aug 2015 19:11:39 +0000 (19:11 +0000)
lib/Target/Sparc/AsmParser/SparcAsmParser.cpp		patch \| blob \| history
lib/Target/Sparc/Disassembler/SparcDisassembler.cpp		patch \| blob \| history
lib/Target/Sparc/SparcCallingConv.td		patch \| blob \| history
lib/Target/Sparc/SparcFrameLowering.cpp		patch \| blob \| history
lib/Target/Sparc/SparcISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/Sparc/SparcISelLowering.cpp		patch \| blob \| history
lib/Target/Sparc/SparcISelLowering.h		patch \| blob \| history
lib/Target/Sparc/SparcInstrInfo.cpp		patch \| blob \| history
lib/Target/Sparc/SparcInstrInfo.td		patch \| blob \| history
lib/Target/Sparc/SparcRegisterInfo.cpp		patch \| blob \| history
lib/Target/Sparc/SparcRegisterInfo.td		patch \| blob \| history
llvm.spec.in		patch \| blob \| history
test/CodeGen/SPARC/basictest.ll		patch \| blob \| history
test/CodeGen/SPARC/inlineasm.ll		patch \| blob \| history
test/CodeGen/SPARC/reserved-regs.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/SPARC/spill.ll	[new file with mode: 0644]	patch \| blob
test/MC/Disassembler/Sparc/sparc-mem.txt		patch \| blob \| history
test/MC/Sparc/sparc-mem-instructions.s		patch \| blob \| history