Two types of instructions have register lists:

[oota-llvm.git] / lib / Target / CellSPU / SPUISelLowering.cpp
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp

index d5f3fec2554fc67d9710767fd929d92086c3ae5b..b5f0e055a9427ea38f6bdeb6a51523c79103fec1 100644 (file)
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -20,6 +20,7 @@
  #include "llvm/Function.h"
  #include "llvm/Intrinsics.h"
  #include "llvm/CallingConv.h"
+#include "llvm/Type.h"
  #include "llvm/CodeGen/CallingConvLower.h"
  #include "llvm/CodeGen/MachineFrameInfo.h"
  #include "llvm/CodeGen/MachineFunction.h"
@@ -514,8 +515,6 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
      node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
      node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
      node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
-    node_names[(unsigned) SPUISD::HALF2VEC] = "SPUISD::HALF2VEC";
-    node_names[(unsigned) SPUISD::VEC2HALF] = "SPUISD::VEC2HALF";
    }
  
    std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
@@ -664,7 +663,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
  
      // Re-emit as a v16i8 vector load
      result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
-                         LN->getSrcValue(), LN->getSrcValueOffset(),
+                         LN->getPointerInfo(),
                           LN->isVolatile(), LN->isNonTemporal(), 16);
  
      // Update the chain
@@ -736,14 +735,12 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
    DebugLoc dl = Op.getDebugLoc();
    unsigned alignment = SN->getAlignment();
-  const bool isVec = VT.isVector();
-  EVT eltTy = isVec ? VT.getVectorElementType(): VT;
  
    switch (SN->getAddressingMode()) {
    case ISD::UNINDEXED: {
      // The vector type we really want to load from the 16-byte chunk.
      EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
-                                 eltTy, (128 / eltTy.getSizeInBits()));
+                                 VT, (128 / VT.getSizeInBits()));
  
      SDValue alignLoadVec;
      SDValue basePtr = SN->getBasePtr();
@@ -816,7 +813,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
  
      // Load the memory to which to store.
      alignLoadVec = DAG.getLoad(vecVT, dl, the_chain, basePtr,
-                               SN->getSrcValue(), SN->getSrcValueOffset(),
+                               SN->getPointerInfo(),
                                 SN->isVolatile(), SN->isNonTemporal(), 16);
  
      // Update the chain
@@ -846,26 +843,18 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
        }
  #endif
  
-    SDValue insertEltOp;
-    SDValue vectorizeOp;
-    if (isVec)
-    {
-      // FIXME: this works only if the vector is 64bit!
-      insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v2i64, insertEltOffs);
-      vectorizeOp = DAG.getNode(SPUISD::HALF2VEC, dl, vecVT, theValue);
-    }
-    else
-    {
-      insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
-      vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
-    }
+    SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
+                                      insertEltOffs);
+    SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, 
+                                      theValue);
+
      result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
                           vectorizeOp, alignLoadVec,
                           DAG.getNode(ISD::BIT_CONVERT, dl,
                                       MVT::v4i32, insertEltOp));
  
      result = DAG.getStore(the_chain, dl, result, basePtr,
-                          LN->getSrcValue(), LN->getSrcValueOffset(),
+                          LN->getPointerInfo(),
                            LN->isVolatile(), LN->isNonTemporal(),
                            LN->getAlignment());
  
@@ -1092,7 +1081,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
        // or we're forced to do vararg
        int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
        SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0);
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+                           false, false, 0);
        ArgOffset += StackSlotSize;
      }
  
@@ -1131,7 +1121,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
        SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
        unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
        SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
-      SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0,
+      SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
                                     false, false, 0);
        Chain = Store.getOperand(0);
        MemOps.push_back(Store);
@@ -1231,7 +1221,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
        if (ArgRegIdx != NumArgRegs) {
          RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
        } else {
-        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                           MachinePointerInfo(),
                                             false, false, 0));
          ArgOffset += StackSlotSize;
        }
@@ -1336,41 +1327,23 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
    if (Ins.empty())
      return Chain;
  
+  // Now handle the return value(s)
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(),
+                    RVLocs, *DAG.getContext());
+  CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
+
+
    // If the call has results, copy the values out of the ret val registers.
-  switch (Ins[0].VT.getSimpleVT().SimpleTy) {
-  default: llvm_unreachable("Unexpected ret value!");
-  case MVT::Other: break;
-  case MVT::i32:
-    if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
-      Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
-                                 MVT::i32, InFlag).getValue(1);
-      InVals.push_back(Chain.getValue(0));
-      Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
-                                 Chain.getValue(2)).getValue(1);
-      InVals.push_back(Chain.getValue(0));
-    } else {
-      Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
-                                 InFlag).getValue(1);
-      InVals.push_back(Chain.getValue(0));
-    }
-    break;
-  case MVT::i8:
-  case MVT::i16:
-  case MVT::i64:
-  case MVT::i128:
-  case MVT::f32:
-  case MVT::f64:
-  case MVT::v2f64:
-  case MVT::v2i64:
-  case MVT::v4f32:
-  case MVT::v4i32:
-  case MVT::v8i16:
-  case MVT::v16i8:
-    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
-                                   InFlag).getValue(1);
-    InVals.push_back(Chain.getValue(0));
-    break;
-  }
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign VA = RVLocs[i];
+    
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+                                     InFlag);
+    Chain = Val.getValue(1);
+    InFlag = Val.getValue(2);
+    InVals.push_back(Val);
+   }
  
    return Chain;
  }
@@ -1755,18 +1728,19 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
  
    // If we have a single element being moved from V1 to V2, this can be handled
    // using the C*[DX] compute mask instructions, but the vector elements have
-  // to be monotonically increasing with one exception element.
+  // to be monotonically increasing with one exception element, and the source
+  // slot of the element to move must be the same as the destination.
    EVT VecVT = V1.getValueType();
    EVT EltVT = VecVT.getVectorElementType();
    unsigned EltsFromV2 = 0;
-  unsigned V2Elt = 0;
+  unsigned V2EltOffset = 0;
    unsigned V2EltIdx0 = 0;
    unsigned CurrElt = 0;
    unsigned MaxElts = VecVT.getVectorNumElements();
    unsigned PrevElt = 0;
-  unsigned V0Elt = 0;
    bool monotonic = true;
    bool rotate = true;
+  int rotamt=0;
    EVT maskVT;             // which of the c?d instructions to use
  
    if (EltVT == MVT::i8) {
@@ -1792,9 +1766,13 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
  
      if (monotonic) {
        if (SrcElt >= V2EltIdx0) {
-        if (1 >= (++EltsFromV2)) {
-          V2Elt = (V2EltIdx0 - SrcElt) << 2;
-        }
+        // TODO: optimize for the monotonic case when several consecutive
+        // elements are taken form V2. Do we ever get such a case?
+        if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
+          V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
+        else
+          monotonic = false;
+        ++EltsFromV2;
        } else if (CurrElt != SrcElt) {
          monotonic = false;
        }
@@ -1806,14 +1784,13 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
        if (PrevElt > 0 && SrcElt < MaxElts) {
          if ((PrevElt == SrcElt - 1)
              || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
+          rotamt = SrcElt-i;
            PrevElt = SrcElt;
-          if (SrcElt == 0)
-            V0Elt = i;
          } else {
            rotate = false;
          }
-      } else if (i == 0) {
-        // First time through, need to keep track of previous element
+      } else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
+        // First time or after a "wrap around"
          PrevElt = SrcElt;
        } else {
          // This isn't a rotation, takes elements from vector 2
@@ -1830,7 +1807,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
      // R1 ($sp) is used here only as it is guaranteed to have last bits zero
      SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
                                  DAG.getRegister(SPU::R1, PtrVT),
-                                DAG.getConstant(V2Elt, MVT::i32));
+                                DAG.getConstant(V2EltOffset, MVT::i32));
      SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, 
                                       maskVT, Pointer);
  
@@ -1838,8 +1815,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
      return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
                         ShufMaskOp);
    } else if (rotate) {
-    int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
-
+    if (rotamt < 0)
+      rotamt +=MaxElts;
+    rotamt *= EltVT.getSizeInBits()/8;
      return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
                         V1, DAG.getConstant(rotamt, MVT::i16));
    } else {
@@ -2078,20 +2056,21 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
    SDValue IdxOp = Op.getOperand(2);
    DebugLoc dl = Op.getDebugLoc();
    EVT VT = Op.getValueType();
+  EVT eltVT = ValOp.getValueType();
  
    // use 0 when the lane to insert to is 'undef'
-  int64_t Idx=0;
+  int64_t Offset=0;
    if (IdxOp.getOpcode() != ISD::UNDEF) {
      ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
      assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
-    Idx = (CN->getSExtValue());
+    Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
    }
  
    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
    // Use $sp ($1) because it's always 16-byte aligned and it's available:
    SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
                                  DAG.getRegister(SPU::R1, PtrVT),
-                                DAG.getConstant(Idx, PtrVT));
+                                DAG.getConstant(Offset, PtrVT));
    // widen the mask when dealing with half vectors
    EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(), 
                                  128/ VT.getVectorElementType().getSizeInBits());
@@ -2599,7 +2578,7 @@ static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
    SDValue Op0 = Op.getOperand(0);
    EVT Op0VT = Op0.getValueType();
  
-  if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
+  if (Op0VT == MVT::i128 && simpleVT == MVT::i64) {
      // Create shuffle mask, least significant doubleword of quadword
      unsigned maskHigh = 0x08090a0b;
      unsigned maskLow = 0x0c0d0e0f;
@@ -2664,11 +2643,16 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
                   DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
                   DAG.getConstant(31, MVT::i32));
  
+  // reinterpret as a i128 (SHUFB requires it). This gets lowered away.
+  SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 
+                                        dl, Op0VT, Op0,
+                                        DAG.getTargetConstant(
+                                                  SPU::GPRCRegClass.getID(), 
+                                                  MVT::i32)), 0);
    // Shuffle bytes - Copy the sign bits into the upper 64 bits
    // and the input value into the lower 64 bits.
    SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
-      DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
-
+        extended, sraVal, shufMask);
    return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
  }
  
@@ -3006,6 +2990,38 @@ SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const
    return TargetLowering::getConstraintType(ConstraintLetter);
  }
  
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+SPUTargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;\r
+    //FIXME: Seems like the supported constraint letters were just copied
+    // from PPC, as the following doesn't correspond to the GCC docs.
+    // I'm leaving it so until someone adds the corresponding lowering support.
+  case 'b':
+  case 'r':
+  case 'f':
+  case 'd':
+  case 'v':
+  case 'y':
+    weight = CW_Register;
+    break;
+  }
+  return weight;
+}
+
  std::pair<unsigned, const TargetRegisterClass*>
  SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
                                                  EVT VT) const
@@ -3110,3 +3126,29 @@ SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
    // The SPU target isn't yet aware of offsets.
    return false;
  }
+
+// can we compare to Imm without writing it into a register?
+bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+  //ceqi, cgti, etc. all take s10 operand
+  return isInt<10>(Imm);
+}
+
+bool 
+SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM, 
+                                         const Type * ) const{
+
+  // A-form: 18bit absolute address. 
+  if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
+    return true;
+ 
+  // D-form: reg + 14bit offset
+  if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
+    return true;
+
+  // X-form: reg+reg
+  if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)
+    return true;
+
+  return false;
+}
+