fix PR4984 by ensuring that fastisel adds properly sign extended GEP displacement

[oota-llvm.git] / lib / Target / CellSPU / SPUISelLowering.cpp
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp

index d1e6dae8bb38529f2d805b9173c4ce04e9d122e1..aaf07838fb683eacac0f55947e1d43c8323ef0af 100644 (file)
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -40,10 +40,10 @@ using namespace llvm;
  namespace {
    std::map<unsigned, const char *> node_names;
  
-  //! MVT mapping to useful data for Cell SPU
+  //! EVT mapping to useful data for Cell SPU
    struct valtype_map_s {
-    const MVT   valtype;
-    const int   prefslot_byte;
+    EVT   valtype;
+    int   prefslot_byte;
    };
  
    const valtype_map_s valtype_map[] = {
@@ -59,7 +59,7 @@ namespace {
  
    const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  
-  const valtype_map_s *getValueTypeMapEntry(MVT VT) {
+  const valtype_map_s *getValueTypeMapEntry(EVT VT) {
      const valtype_map_s *retval = 0;
  
      for (size_t i = 0; i < n_valtype_map; ++i) {
@@ -74,7 +74,7 @@ namespace {
        std::string msg;
        raw_string_ostream Msg(msg);
        Msg << "getValueTypeMapEntry returns NULL for "
-           << VT.getMVTString();
+           << VT.getEVTString();
        llvm_report_error(Msg.str());
      }
  #endif
@@ -100,8 +100,8 @@ namespace {
      TargetLowering::ArgListTy Args;
      TargetLowering::ArgListEntry Entry;
      for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
-      MVT ArgVT = Op.getOperand(i).getValueType();
-      const Type *ArgTy = ArgVT.getTypeForMVT();
+      EVT ArgVT = Op.getOperand(i).getValueType();
+      const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
        Entry.Node = Op.getOperand(i);
        Entry.Ty = ArgTy;
        Entry.isSExt = isSigned;
@@ -112,10 +112,11 @@ namespace {
                                             TLI.getPointerTy());
  
      // Splice the libcall in wherever FindInputOutputChains tells us to.
-    const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT();
+    const Type *RetTy =
+                Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
      std::pair<SDValue, SDValue> CallInfo =
              TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
-                            0, CallingConv::C, false,
+                            0, TLI.getLibcallCallingConv(LC), false,
                              /*isReturnValueUsed=*/true,
                              Callee, Args, DAG,
                              Op.getDebugLoc());
@@ -168,7 +169,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
    // SPU's loads and stores have to be custom lowered:
    for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
         ++sctype) {
-    MVT VT = (MVT::SimpleValueType)sctype;
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
  
      setOperationAction(ISD::LOAD,   VT, Custom);
      setOperationAction(ISD::STORE,  VT, Custom);
@@ -177,20 +178,20 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
      setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
  
      for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
-      MVT StoreVT = (MVT::SimpleValueType) stype;
+      MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
        setTruncStoreAction(VT, StoreVT, Expand);
      }
    }
  
    for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
         ++sctype) {
-    MVT VT = (MVT::SimpleValueType) sctype;
+    MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
  
      setOperationAction(ISD::LOAD,   VT, Custom);
      setOperationAction(ISD::STORE,  VT, Custom);
  
      for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
-      MVT StoreVT = (MVT::SimpleValueType) stype;
+      MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
        setTruncStoreAction(VT, StoreVT, Expand);
      }
    }
@@ -349,6 +350,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
    // Custom lower i128 -> i64 truncates
    setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
  
+  // Custom lower i32/i64 -> i128 sign extend
+  setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
+
    setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
    setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
    setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
@@ -391,7 +395,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
    // appropriate instructions to materialize the address.
    for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
         ++sctype) {
-    MVT VT = (MVT::SimpleValueType)sctype;
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
  
      setOperationAction(ISD::GlobalAddress,  VT, Custom);
      setOperationAction(ISD::ConstantPool,   VT, Custom);
@@ -434,7 +438,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
  
    for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
         i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
-    MVT VT = (MVT::SimpleValueType)i;
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
  
      // add/sub are legal for all supported vector VT's.
      setOperationAction(ISD::ADD,     VT, Legal);
@@ -510,9 +514,6 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
      node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
      node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
      node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
-    node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
-    node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
-    node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
      node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
      node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
      node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
@@ -539,10 +540,10 @@ unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
  // Return the Cell SPU's SETCC result type
  //===----------------------------------------------------------------------===//
  
-MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(MVT VT) const {
+MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
    // i16 and i32 are valid SETCC result types
    return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
-    VT.getSimpleVT() :
+    VT.getSimpleVT().SimpleTy :
      MVT::i32);
  }
  
@@ -576,9 +577,9 @@ static SDValue
  LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
    LoadSDNode *LN = cast<LoadSDNode>(Op);
    SDValue the_chain = LN->getChain();
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  MVT InVT = LN->getMemoryVT();
-  MVT OutVT = Op.getValueType();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT InVT = LN->getMemoryVT();
+  EVT OutVT = Op.getValueType();
    ISD::LoadExtType ExtType = LN->getExtensionType();
    unsigned alignment = LN->getAlignment();
    const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
@@ -683,7 +684,8 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
  
      // Convert the loaded v16i8 vector to the appropriate vector type
      // specified by the operand:
-    MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
+    EVT vecVT = EVT::getVectorVT(*DAG.getContext(), 
+                                 InVT, (128 / InVT.getSizeInBits()));
      result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
                           DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
  
@@ -740,17 +742,19 @@ static SDValue
  LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
    StoreSDNode *SN = cast<StoreSDNode>(Op);
    SDValue Value = SN->getValue();
-  MVT VT = Value.getValueType();
-  MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT VT = Value.getValueType();
+  EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
    DebugLoc dl = Op.getDebugLoc();
    unsigned alignment = SN->getAlignment();
  
    switch (SN->getAddressingMode()) {
    case ISD::UNINDEXED: {
      // The vector type we really want to load from the 16-byte chunk.
-    MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
-        stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
+    EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+                                 VT, (128 / VT.getSizeInBits())),
+        stVecVT = EVT::getVectorVT(*DAG.getContext(),
+                                   StVT, (128 / StVT.getSizeInBits()));
  
      SDValue alignLoadVec;
      SDValue basePtr = SN->getBasePtr();
@@ -845,9 +849,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
      // to the stack pointer, which is always aligned.
  #if !defined(NDEBUG)
        if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-        cerr << "CellSPU LowerSTORE: basePtr = ";
+        errs() << "CellSPU LowerSTORE: basePtr = ";
          basePtr.getNode()->dump(&DAG);
-        cerr << "\n";
+        errs() << "\n";
        }
  #endif
  
@@ -870,9 +874,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
        const SDValue &currentRoot = DAG.getRoot();
  
        DAG.setRoot(result);
-      cerr << "------- CellSPU:LowerStore result:\n";
+      errs() << "------- CellSPU:LowerStore result:\n";
        DAG.dump();
-      cerr << "-------\n";
+      errs() << "-------\n";
        DAG.setRoot(currentRoot);
      }
  #endif
@@ -902,7 +906,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
  //! Generate the address of a constant pool entry.
  static SDValue
  LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  MVT PtrVT = Op.getValueType();
+  EVT PtrVT = Op.getValueType();
    ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
    Constant *C = CP->getConstVal();
    SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
@@ -935,7 +939,7 @@ SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM
  
  static SDValue
  LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  MVT PtrVT = Op.getValueType();
+  EVT PtrVT = Op.getValueType();
    JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
    SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
    SDValue Zero = DAG.getConstant(0, PtrVT);
@@ -960,7 +964,7 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
  
  static SDValue
  LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
-  MVT PtrVT = Op.getValueType();
+  EVT PtrVT = Op.getValueType();
    GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
    GlobalValue *GV = GSDN->getGlobal();
    SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
@@ -989,7 +993,7 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
  //! Custom lower double precision floating point constants
  static SDValue
  LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
    // FIXME there is no actual debug info here
    DebugLoc dl = Op.getDebugLoc();
  
@@ -1011,7 +1015,7 @@ LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
  
  SDValue
  SPUTargetLowering::LowerFormalArguments(SDValue Chain,
-                                        unsigned CallConv, bool isVarArg,
+                                        CallingConv::ID CallConv, bool isVarArg,
                                          const SmallVectorImpl<ISD::InputArg>
                                            &Ins,
                                          DebugLoc dl, SelectionDAG &DAG,
@@ -1028,23 +1032,23 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
    unsigned ArgRegIdx = 0;
    unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
  
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
  
    // Add DAG nodes to load the arguments or copy them out of registers.
    for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
-    MVT ObjectVT = Ins[ArgNo].VT;
+    EVT ObjectVT = Ins[ArgNo].VT;
      unsigned ObjSize = ObjectVT.getSizeInBits()/8;
      SDValue ArgVal;
  
      if (ArgRegIdx < NumArgRegs) {
        const TargetRegisterClass *ArgRegClass;
  
-      switch (ObjectVT.getSimpleVT()) {
+      switch (ObjectVT.getSimpleVT().SimpleTy) {
        default: {
          std::string msg;
          raw_string_ostream Msg(msg);
          Msg << "LowerFormalArguments Unhandled argument type: "
-             << ObjectVT.getMVTString();
+             << ObjectVT.getEVTString();
          llvm_report_error(Msg.str());
        }
        case MVT::i8:
@@ -1140,7 +1144,7 @@ static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
  
  SDValue
  SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
-                             unsigned CallConv, bool isVarArg,
+                             CallingConv::ID CallConv, bool isVarArg,
                               bool isTailCall,
                               const SmallVectorImpl<ISD::OutputArg> &Outs,
                               const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -1154,7 +1158,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
    const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
  
    // Handy pointer type
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
  
    // Accumulate how many bytes are to be pushed on the stack, including the
    // linkage area, and parameter passing area.  According to the SPU ABI,
@@ -1184,7 +1188,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
      SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
  
-    switch (Arg.getValueType().getSimpleVT()) {
+    switch (Arg.getValueType().getSimpleVT().SimpleTy) {
      default: llvm_unreachable("Unexpected ValueType for argument!");
      case MVT::i8:
      case MVT::i16:
@@ -1251,7 +1255,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
    // node so that legalize doesn't hack it.
    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
      GlobalValue *GV = G->getGlobal();
-    MVT CalleeVT = Callee.getValueType();
+    EVT CalleeVT = Callee.getValueType();
      SDValue Zero = DAG.getConstant(0, PtrVT);
      SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
  
@@ -1275,7 +1279,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
        Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
      }
    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
-    MVT CalleeVT = Callee.getValueType();
+    EVT CalleeVT = Callee.getValueType();
      SDValue Zero = DAG.getConstant(0, PtrVT);
      SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
          Callee.getValueType());
@@ -1317,7 +1321,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
      return Chain;
  
    // If the call has results, copy the values out of the ret val registers.
-  switch (Ins[0].VT.getSimpleVT()) {
+  switch (Ins[0].VT.getSimpleVT().SimpleTy) {
    default: llvm_unreachable("Unexpected ret value!");
    case MVT::Other: break;
    case MVT::i32:
@@ -1367,7 +1371,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
  
  SDValue
  SPUTargetLowering::LowerReturn(SDValue Chain,
-                               unsigned CallConv, bool isVarArg,
+                               CallingConv::ID CallConv, bool isVarArg,
                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                                 DebugLoc dl, SelectionDAG &DAG) {
  
@@ -1431,7 +1435,7 @@ getVecImm(SDNode *N) {
  /// and the value fits into an unsigned 18-bit constant, and if so, return the
  /// constant
  SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
-                              MVT ValueType) {
+                              EVT ValueType) {
    if (ConstantSDNode *CN = getVecImm(N)) {
      uint64_t Value = CN->getZExtValue();
      if (ValueType == MVT::i64) {
@@ -1453,7 +1457,7 @@ SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
  /// and the value fits into a signed 16-bit constant, and if so, return the
  /// constant
  SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
-                              MVT ValueType) {
+                              EVT ValueType) {
    if (ConstantSDNode *CN = getVecImm(N)) {
      int64_t Value = CN->getSExtValue();
      if (ValueType == MVT::i64) {
@@ -1476,7 +1480,7 @@ SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
  /// and the value fits into a signed 10-bit constant, and if so, return the
  /// constant
  SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
-                              MVT ValueType) {
+                              EVT ValueType) {
    if (ConstantSDNode *CN = getVecImm(N)) {
      int64_t Value = CN->getSExtValue();
      if (ValueType == MVT::i64) {
@@ -1502,7 +1506,7 @@ SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
  /// constant vectors. Thus, we test to see if the upper and lower bytes are the
  /// same value.
  SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
-                             MVT ValueType) {
+                             EVT ValueType) {
    if (ConstantSDNode *CN = getVecImm(N)) {
      int Value = (int) CN->getZExtValue();
      if (ValueType == MVT::i16
@@ -1521,7 +1525,7 @@ SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
  /// and the value fits into a signed 16-bit constant, and if so, return the
  /// constant
  SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
-                               MVT ValueType) {
+                               EVT ValueType) {
    if (ConstantSDNode *CN = getVecImm(N)) {
      uint64_t Value = CN->getZExtValue();
      if ((ValueType == MVT::i32
@@ -1554,8 +1558,8 @@ SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
  //! Lower a BUILD_VECTOR instruction creatively:
  static SDValue
  LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
-  MVT VT = Op.getValueType();
-  MVT EltVT = VT.getVectorElementType();
+  EVT VT = Op.getValueType();
+  EVT EltVT = VT.getVectorElementType();
    DebugLoc dl = Op.getDebugLoc();
    BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
    assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
@@ -1575,12 +1579,12 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
  
    uint64_t SplatBits = APSplatBits.getZExtValue();
  
-  switch (VT.getSimpleVT()) {
+  switch (VT.getSimpleVT().SimpleTy) {
    default: {
      std::string msg;
      raw_string_ostream Msg(msg);
      Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
-         << VT.getMVTString();
+         << VT.getEVTString();
      llvm_report_error(Msg.str());
      /*NOTREACHED*/
    }
@@ -1640,7 +1644,7 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
  /*!
   */
  SDValue
-SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
+SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
                       DebugLoc dl) {
    uint32_t upper = uint32_t(SplatVal >> 32);
    uint32_t lower = uint32_t(SplatVal);
@@ -1753,8 +1757,8 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
    // If we have a single element being moved from V1 to V2, this can be handled
    // using the C*[DX] compute mask instructions, but the vector elements have
    // to be monotonically increasing with one exception element.
-  MVT VecVT = V1.getValueType();
-  MVT EltVT = VecVT.getVectorElementType();
+  EVT VecVT = V1.getValueType();
+  EVT EltVT = VecVT.getVectorElementType();
    unsigned EltsFromV2 = 0;
    unsigned V2Elt = 0;
    unsigned V2EltIdx0 = 0;
@@ -1819,7 +1823,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
      MachineFunction &MF = DAG.getMachineFunction();
      MachineRegisterInfo &RegInfo = MF.getRegInfo();
      unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
-    MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
      // Initialize temporary register to 0
      SDValue InitTempReg =
        DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
@@ -1865,11 +1869,11 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
  
      ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
      SmallVector<SDValue, 16> ConstVecValues;
-    MVT VT;
+    EVT VT;
      size_t n_copies;
  
      // Create a constant vector:
-    switch (Op.getValueType().getSimpleVT()) {
+    switch (Op.getValueType().getSimpleVT().SimpleTy) {
      default: llvm_unreachable("Unexpected constant value type in "
                                "LowerSCALAR_TO_VECTOR");
      case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
@@ -1888,7 +1892,7 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
                         &ConstVecValues[0], ConstVecValues.size());
    } else {
      // Otherwise, copy the value from one register to another:
-    switch (Op0.getValueType().getSimpleVT()) {
+    switch (Op0.getValueType().getSimpleVT().SimpleTy) {
      default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
      case MVT::i8:
      case MVT::i16:
@@ -1904,7 +1908,7 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
  }
  
  static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
    SDValue N = Op.getOperand(0);
    SDValue Elt = Op.getOperand(1);
    DebugLoc dl = Op.getDebugLoc();
@@ -1933,7 +1937,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
      int prefslot_begin = -1, prefslot_end = -1;
      int elt_byte = EltNo * VT.getSizeInBits() / 8;
  
-    switch (VT.getSimpleVT()) {
+    switch (VT.getSimpleVT().SimpleTy) {
      default:
        assert(false && "Invalid value type!");
      case MVT::i8: {
@@ -1959,7 +1963,9 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
      assert(prefslot_begin != -1 && prefslot_end != -1 &&
             "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
  
-    unsigned int ShufBytes[16];
+    unsigned int ShufBytes[16] = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    };
      for (int i = 0; i < 16; ++i) {
        // zero fill uppper part of preferred slot, don't care about the
        // other slots:
@@ -1995,7 +2001,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
    } else {
      // Variable index: Rotate the requested element into slot 0, then replicate
      // slot 0 across the vector
-    MVT VecVT = N.getValueType();
+    EVT VecVT = N.getValueType();
      if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
        llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
                          "vector type!");
@@ -2023,7 +2029,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
      // consistency with the notion of a unified register set)
      SDValue replicate;
  
-    switch (VT.getSimpleVT()) {
+    switch (VT.getSimpleVT().SimpleTy) {
      default:
        llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
                          "type");
@@ -2070,12 +2076,12 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
    SDValue ValOp = Op.getOperand(1);
    SDValue IdxOp = Op.getOperand(2);
    DebugLoc dl = Op.getDebugLoc();
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
  
    ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
    assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
  
-  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
    // Use $sp ($1) because it's always 16-byte aligned and it's available:
    SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
                                  DAG.getRegister(SPU::R1, PtrVT),
@@ -2096,7 +2102,7 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
  {
    SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
    DebugLoc dl = Op.getDebugLoc();
-  MVT ShiftVT = TLI.getShiftAmountTy();
+  EVT ShiftVT = TLI.getShiftAmountTy();
  
    assert(Op.getValueType() == MVT::i8);
    switch (Opc) {
@@ -2127,7 +2133,7 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
    case ISD::ROTR:
    case ISD::ROTL: {
      SDValue N1 = Op.getOperand(1);
-    MVT N1VT = N1.getValueType();
+    EVT N1VT = N1.getValueType();
  
      N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
      if (!N1VT.bitsEq(ShiftVT)) {
@@ -2150,7 +2156,7 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
    case ISD::SRL:
    case ISD::SHL: {
      SDValue N1 = Op.getOperand(1);
-    MVT N1VT = N1.getValueType();
+    EVT N1VT = N1.getValueType();
  
      N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
      if (!N1VT.bitsEq(ShiftVT)) {
@@ -2167,7 +2173,7 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
    }
    case ISD::SRA: {
      SDValue N1 = Op.getOperand(1);
-    MVT N1VT = N1.getValueType();
+    EVT N1VT = N1.getValueType();
  
      N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
      if (!N1VT.bitsEq(ShiftVT)) {
@@ -2200,7 +2206,7 @@ static SDValue
  LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
    SDValue ConstVec;
    SDValue Arg;
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
    DebugLoc dl = Op.getDebugLoc();
  
    ConstVec = Op.getOperand(0);
@@ -2251,11 +2257,12 @@ LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
    ones per byte, which then have to be accumulated.
  */
  static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
-  MVT VT = Op.getValueType();
-  MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
+  EVT VT = Op.getValueType();
+  EVT vecVT = EVT::getVectorVT(*DAG.getContext(), 
+                               VT, (128 / VT.getSizeInBits()));
    DebugLoc dl = Op.getDebugLoc();
  
-  switch (VT.getSimpleVT()) {
+  switch (VT.getSimpleVT().SimpleTy) {
    default:
      assert(false && "Invalid value type!");
    case MVT::i8: {
@@ -2361,9 +2368,9 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
   */
  static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
                                SPUTargetLowering &TLI) {
-  MVT OpVT = Op.getValueType();
+  EVT OpVT = Op.getValueType();
    SDValue Op0 = Op.getOperand(0);
-  MVT Op0VT = Op0.getValueType();
+  EVT Op0VT = Op0.getValueType();
  
    if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
        || OpVT == MVT::i64) {
@@ -2387,9 +2394,9 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
   */
  static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
                                SPUTargetLowering &TLI) {
-  MVT OpVT = Op.getValueType();
+  EVT OpVT = Op.getValueType();
    SDValue Op0 = Op.getOperand(0);
-  MVT Op0VT = Op0.getValueType();
+  EVT Op0VT = Op0.getValueType();
  
    if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
        || Op0VT == MVT::i64) {
@@ -2418,12 +2425,12 @@ static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
  
    SDValue lhs = Op.getOperand(0);
    SDValue rhs = Op.getOperand(1);
-  MVT lhsVT = lhs.getValueType();
+  EVT lhsVT = lhs.getValueType();
    assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
  
-  MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
+  EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
    APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
-  MVT IntVT(MVT::i64);
+  EVT IntVT(MVT::i64);
  
    // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
    // selected to a NOP:
@@ -2544,7 +2551,7 @@ static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
  
  static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
                                const TargetLowering &TLI) {
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
    SDValue lhs = Op.getOperand(0);
    SDValue rhs = Op.getOperand(1);
    SDValue trueval = Op.getOperand(2);
@@ -2573,14 +2580,15 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
  static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
  {
    // Type to truncate to
-  MVT VT = Op.getValueType();
-  MVT::SimpleValueType simpleVT = VT.getSimpleVT();
-  MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
+  EVT VT = Op.getValueType();
+  MVT simpleVT = VT.getSimpleVT();
+  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), 
+                               VT, (128 / VT.getSizeInBits()));
    DebugLoc dl = Op.getDebugLoc();
  
    // Type to truncate from
    SDValue Op0 = Op.getOperand(0);
-  MVT Op0VT = Op0.getValueType();
+  EVT Op0VT = Op0.getValueType();
  
    if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
      // Create shuffle mask, least significant doubleword of quadword
@@ -2602,6 +2610,61 @@ static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
    return SDValue();             // Leave the truncate unmolested
  }
  
+/*!
+ * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
+ * algorithm is to duplicate the sign bit using rotmai to generate at
+ * least one byte full of sign bits. Then propagate the "sign-byte" into
+ * the leftmost words and the i64/i32 into the rightmost words using shufb.
+ *
+ * @param Op The sext operand
+ * @param DAG The current DAG
+ * @return The SDValue with the entire instruction sequence
+ */
+static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
+{
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Type to extend to
+  MVT OpVT = Op.getValueType().getSimpleVT();
+  EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
+                               OpVT, (128 / OpVT.getSizeInBits()));
+
+  // Type to extend from
+  SDValue Op0 = Op.getOperand(0);
+  MVT Op0VT = Op0.getValueType().getSimpleVT();
+
+  // The type to extend to needs to be a i128 and
+  // the type to extend from needs to be i64 or i32.
+  assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
+          "LowerSIGN_EXTEND: input and/or output operand have wrong size");
+
+  // Create shuffle mask
+  unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
+  unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte  8 - 11
+  unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
+  SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                                 DAG.getConstant(mask1, MVT::i32),
+                                 DAG.getConstant(mask1, MVT::i32),
+                                 DAG.getConstant(mask2, MVT::i32),
+                                 DAG.getConstant(mask3, MVT::i32));
+
+  // Word wise arithmetic right shift to generate at least one byte
+  // that contains sign bits.
+  MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
+  SDValue sraVal = DAG.getNode(ISD::SRA,
+                 dl,
+                 mvt,
+                 DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
+                 DAG.getConstant(31, MVT::i32));
+
+  // Shuffle bytes - Copy the sign bits into the upper 64 bits
+  // and the input value into the lower 64 bits.
+  SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
+      DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
+
+  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
+}
+
  //! Custom (target-specific) lowering entry point
  /*!
    This is where LLVM's DAG selection process calls to do target-specific
@@ -2611,14 +2674,14 @@ SDValue
  SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
  {
    unsigned Opc = (unsigned) Op.getOpcode();
-  MVT VT = Op.getValueType();
+  EVT VT = Op.getValueType();
  
    switch (Opc) {
    default: {
  #ifndef NDEBUG
-    cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
-    cerr << "Op.getOpcode() = " << Opc << "\n";
-    cerr << "*Op.getNode():\n";
+    errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
+    errs() << "Op.getOpcode() = " << Opc << "\n";
+    errs() << "*Op.getNode():\n";
      Op.getNode()->dump();
  #endif
      llvm_unreachable(0);
@@ -2694,6 +2757,9 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
  
    case ISD::TRUNCATE:
      return LowerTRUNCATE(Op, DAG);
+
+  case ISD::SIGN_EXTEND:
+    return LowerSIGN_EXTEND(Op, DAG);
    }
  
    return SDValue();
@@ -2705,13 +2771,13 @@ void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
  {
  #if 0
    unsigned Opc = (unsigned) N->getOpcode();
-  MVT OpVT = N->getValueType(0);
+  EVT OpVT = N->getValueType(0);
  
    switch (Opc) {
    default: {
-    cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
-    cerr << "Op.getOpcode() = " << Opc << "\n";
-    cerr << "*Op.getNode():\n";
+    errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
+    errs() << "Op.getOpcode() = " << Opc << "\n";
+    errs() << "*Op.getNode():\n";
      N->dump();
      abort();
      /*NOTREACHED*/
@@ -2735,8 +2801,8 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
    const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
    SelectionDAG &DAG = DCI.DAG;
    SDValue Op0 = N->getOperand(0);       // everything has at least one operand
-  MVT NodeVT = N->getValueType(0);      // The node's value type
-  MVT Op0VT = Op0.getValueType();       // The first operand's result
+  EVT NodeVT = N->getValueType(0);      // The node's value type
+  EVT Op0VT = Op0.getValueType();       // The first operand's result
    SDValue Result;                       // Initially, empty result
    DebugLoc dl = N->getDebugLoc();
  
@@ -2765,7 +2831,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
  
  #if !defined(NDEBUG)
            if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-            cerr << "\n"
+            errs() << "\n"
                   << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
                   << "With:    (SPUindirect <arg>, <arg>)\n";
            }
@@ -2781,7 +2847,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
  
  #if !defined(NDEBUG)
            if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-            cerr << "\n"
+            errs() << "\n"
                   << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
                   << "), " << CN0->getSExtValue() << ")\n"
                   << "With:    (SPUindirect <arg>, "
@@ -2805,11 +2871,11 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
        // Types must match, however...
  #if !defined(NDEBUG)
        if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-        cerr << "\nReplace: ";
+        errs() << "\nReplace: ";
          N->dump(&DAG);
-        cerr << "\nWith:    ";
+        errs() << "\nWith:    ";
          Op0.getNode()->dump(&DAG);
-        cerr << "\n";
+        errs() << "\n";
        }
  #endif
  
@@ -2824,11 +2890,11 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
          // (SPUindirect (SPUaform <addr>, 0), 0) ->
          // (SPUaform <addr>, 0)
  
-        DEBUG(cerr << "Replace: ");
+        DEBUG(errs() << "Replace: ");
          DEBUG(N->dump(&DAG));
-        DEBUG(cerr << "\nWith:    ");
+        DEBUG(errs() << "\nWith:    ");
          DEBUG(Op0.getNode()->dump(&DAG));
-        DEBUG(cerr << "\n");
+        DEBUG(errs() << "\n");
  
          return Op0;
        }
@@ -2841,7 +2907,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
  
  #if !defined(NDEBUG)
            if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
-            cerr << "\n"
+            errs() << "\n"
                   << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
                   << "With:    (SPUindirect <arg>, <arg>)\n";
            }
@@ -2856,9 +2922,6 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
    }
    case SPUISD::SHLQUAD_L_BITS:
    case SPUISD::SHLQUAD_L_BYTES:
-  case SPUISD::VEC_SHL:
-  case SPUISD::VEC_SRL:
-  case SPUISD::VEC_SRA:
    case SPUISD::ROTBYTES_LEFT: {
      SDValue Op1 = N->getOperand(1);
  
@@ -2903,11 +2966,11 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
    // Otherwise, return unchanged.
  #ifndef NDEBUG
    if (Result.getNode()) {
-    DEBUG(cerr << "\nReplace.SPU: ");
+    DEBUG(errs() << "\nReplace.SPU: ");
      DEBUG(N->dump(&DAG));
-    DEBUG(cerr << "\nWith:        ");
+    DEBUG(errs() << "\nWith:        ");
      DEBUG(Result.getNode()->dump(&DAG));
-    DEBUG(cerr << "\n");
+    DEBUG(errs() << "\n");
    }
  #endif
  
@@ -2938,7 +3001,7 @@ SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const
  
  std::pair<unsigned, const TargetRegisterClass*>
  SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
-                                                MVT VT) const
+                                                EVT VT) const
  {
    if (Constraint.size() == 1) {
      // GCC RS6000 Constraint Letters
@@ -2986,9 +3049,6 @@ SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
    case SPUISD::VEC2PREFSLOT:
    case SPUISD::SHLQUAD_L_BITS:
    case SPUISD::SHLQUAD_L_BYTES:
-  case SPUISD::VEC_SHL:
-  case SPUISD::VEC_SRL:
-  case SPUISD::VEC_SRA:
    case SPUISD::VEC_ROTL:
    case SPUISD::VEC_ROTR:
    case SPUISD::ROTBYTES_LEFT:
@@ -3006,7 +3066,7 @@ SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
      return 1;
  
    case ISD::SETCC: {
-    MVT VT = Op.getValueType();
+    EVT VT = Op.getValueType();
  
      if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
        VT = MVT::i32;