Get rid of the Pass+Context magic.

[oota-llvm.git] / lib / Target / SystemZ / SystemZISelDAGToDAG.cpp
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp

index 83fea60bef50f95d63a37055402014d0b61ad50d..482d93448bf2c70ad87874984dd76c90d5c8403f 100644 (file)
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -28,8 +28,14 @@
  #include "llvm/Target/TargetLowering.h"
  #include "llvm/Support/Compiler.h"
  #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
  using namespace llvm;
  
+static const unsigned subreg_even32 = 1;
+static const unsigned subreg_odd32  = 2;
+static const unsigned subreg_even   = 3;
+static const unsigned subreg_odd    = 4;
+
  namespace {
    /// SystemZRRIAddressMode - This corresponds to rriaddr, but uses SDValue's
    /// instead of register numbers for the leaves of the matched tree.
@@ -45,25 +51,29 @@ namespace {
      } Base;
  
      SDValue IndexReg;
-    int32_t Disp;
+    int64_t Disp;
+    bool isRI;
  
-    SystemZRRIAddressMode()
-      : BaseType(RegBase), IndexReg(), Disp(0) {
+    SystemZRRIAddressMode(bool RI = false)
+      : BaseType(RegBase), IndexReg(), Disp(0), isRI(RI) {
      }
  
      void dump() {
-      cerr << "SystemZRRIAddressMode " << this << "\n";
+      cerr << "SystemZRRIAddressMode " << this << '\n';
        if (BaseType == RegBase) {
          cerr << "Base.Reg ";
          if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump();
          else cerr << "nul";
+        cerr << '\n';
        } else {
-        cerr << " Base.FrameIndex " << Base.FrameIndex << "\n";
+        cerr << " Base.FrameIndex " << Base.FrameIndex << '\n';
+      }
+      if (!isRI) {
+        cerr << "IndexReg ";
+        if (IndexReg.getNode() != 0) IndexReg.getNode()->dump();
+        else cerr << "nul";
        }
-      cerr << "IndexReg ";
-      if (IndexReg.getNode() != 0) IndexReg.getNode()->dump();
-      else cerr << "nul";
-      cerr << " Disp " << Disp << "\n";
+      cerr << " Disp " << Disp << '\n';
      }
    };
  }
@@ -76,6 +86,12 @@ namespace {
      SystemZTargetLowering &Lowering;
      const SystemZSubtarget &Subtarget;
  
+    void getAddressOperandsRI(const SystemZRRIAddressMode &AM,
+                            SDValue &Base, SDValue &Disp);
+    void getAddressOperands(const SystemZRRIAddressMode &AM,
+                            SDValue &Base, SDValue &Disp,
+                            SDValue &Index);
+
    public:
      SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel)
        : SelectionDAGISel(TM, OptLevel),
@@ -88,6 +104,12 @@ namespace {
        return "SystemZ DAG->DAG Pattern Instruction Selection";
      }
  
+    /// getI8Imm - Return a target constant with the specified value, of type
+    /// i8.
+    inline SDValue getI8Imm(uint64_t Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i8);
+    }
+
      /// getI16Imm - Return a target constant with the specified value, of type
      /// i16.
      inline SDValue getI16Imm(uint64_t Imm) {
@@ -104,13 +126,30 @@ namespace {
      #include "SystemZGenDAGISel.inc"
  
    private:
-    bool SelectAddrRRI(SDValue Op, SDValue Addr,
-                       SDValue &Base, SDValue &Index, SDValue &Disp);
-    SDNode *Select(SDValue Op);
-    bool SelectAddrRI(const SDValue& Op, SDValue& Addr,
+    bool SelectAddrRI12Only(SDValue Op, SDValue& Addr,
+                            SDValue &Base, SDValue &Disp);
+    bool SelectAddrRI12(SDValue Op, SDValue& Addr,
+                        SDValue &Base, SDValue &Disp,
+                        bool is12BitOnly = false);
+    bool SelectAddrRI(SDValue Op, SDValue& Addr,
                        SDValue &Base, SDValue &Disp);
-    bool MatchAddress(SDValue N, SystemZRRIAddressMode &AM, unsigned Depth = 0);
+    bool SelectAddrRRI12(SDValue Op, SDValue Addr,
+                         SDValue &Base, SDValue &Disp, SDValue &Index);
+    bool SelectAddrRRI20(SDValue Op, SDValue Addr,
+                         SDValue &Base, SDValue &Disp, SDValue &Index);
+    bool SelectLAAddr(SDValue Op, SDValue Addr,
+                      SDValue &Base, SDValue &Disp, SDValue &Index);
+
+    SDNode *Select(SDValue Op);
+
+    bool TryFoldLoad(SDValue P, SDValue N,
+                     SDValue &Base, SDValue &Disp, SDValue &Index);
+
+    bool MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
+                      bool is12Bit, unsigned Depth = 0);
      bool MatchAddressBase(SDValue N, SystemZRRIAddressMode &AM);
+    bool MatchAddressRI(SDValue N, SystemZRRIAddressMode &AM,
+                        bool is12Bit);
  
    #ifndef NDEBUG
      unsigned Indent;
@@ -130,91 +169,31 @@ FunctionPass *llvm::createSystemZISelDag(SystemZTargetMachine &TM,
  /// or 64-bit immediate, and if the value can be accurately represented as a
  /// sign extension from a 20-bit value. If so, this returns true and the
  /// immediate.
-static bool isImmSExt20(int64_t Val, int32_t &Imm) {
+static bool isImmSExt20(int64_t Val, int64_t &Imm) {
    if (Val >= -524288 && Val <= 524287) {
-    Imm = (int32_t)Val;
+    Imm = Val;
      return true;
    }
    return false;
  }
  
-static bool isImmSExt20(SDNode *N, int32_t &Imm) {
-  if (N->getOpcode() != ISD::Constant)
-    return false;
-
-  return isImmSExt20(cast<ConstantSDNode>(N)->getSExtValue(), Imm);
-}
-
-static bool isImmSExt20(SDValue Op, int32_t &Imm) {
-  return isImmSExt20(Op.getNode(), Imm);
-}
-
-/// Returns true if the address can be represented by a base register plus
-/// a signed 20-bit displacement [r+imm].
-bool SystemZDAGToDAGISel::SelectAddrRI(const SDValue& Op, SDValue& Addr,
-                                       SDValue &Base, SDValue &Disp) {
-  // FIXME dl should come from parent load or store, not from address
-  DebugLoc dl = Addr.getDebugLoc();
-  MVT VT = Addr.getValueType();
-
-  if (Addr.getOpcode() == ISD::ADD) {
-    int32_t Imm = 0;
-    if (isImmSExt20(Addr.getOperand(1), Imm)) {
-      Disp = CurDAG->getTargetConstant(Imm, MVT::i32);
-      if (FrameIndexSDNode *FI =
-          dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
-        Base = CurDAG->getTargetFrameIndex(FI->getIndex(), VT);
-      } else {
-        Base = Addr.getOperand(0);
-      }
-      return true; // [r+i]
-    }
-  } else if (Addr.getOpcode() == ISD::OR) {
-    int32_t Imm = 0;
-    if (isImmSExt20(Addr.getOperand(1), Imm)) {
-      // If this is an or of disjoint bitfields, we can codegen this as an add
-      // (for better address arithmetic) if the LHS and RHS of the OR are
-      // provably disjoint.
-      APInt LHSKnownZero, LHSKnownOne;
-      CurDAG->ComputeMaskedBits(Addr.getOperand(0),
-                                APInt::getAllOnesValue(Addr.getOperand(0)
-                                                       .getValueSizeInBits()),
-                                LHSKnownZero, LHSKnownOne);
-
-      if ((LHSKnownZero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
-        // If all of the bits are known zero on the LHS or RHS, the add won't
-        // carry.
-        Base = Addr.getOperand(0);
-        Disp = CurDAG->getTargetConstant(Imm, MVT::i32);
-        return true;
-      }
-    }
-  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr)) {
-    // Loading from a constant address.
-
-    // If this address fits entirely in a 20-bit sext immediate field, codegen
-    // this as "d(r0)"
-    int32_t Imm;
-    if (isImmSExt20(CN, Imm)) {
-      Disp = CurDAG->getTargetConstant(Imm, MVT::i32);
-      Base = CurDAG->getRegister(0, VT);
-      return true;
-    }
+/// isImmZExt12 - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// zero extension from a 12-bit value. If so, this returns true and the
+/// immediate.
+static bool isImmZExt12(int64_t Val, int64_t &Imm) {
+  if (Val >= 0 && Val <= 0xFFF) {
+    Imm = Val;
+    return true;
    }
-
-  Disp = CurDAG->getTargetConstant(0, MVT::i32);
-  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Addr))
-    Base = CurDAG->getTargetFrameIndex(FI->getIndex(), VT);
-  else
-    Base = Addr;
-  return true;      // [r+0]
+  return false;
  }
  
  /// MatchAddress - Add the specified node to the specified addressing mode,
  /// returning true if it cannot be done.  This just pattern matches for the
  /// addressing mode.
  bool SystemZDAGToDAGISel::MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
-                                       unsigned Depth) {
+                                       bool is12Bit, unsigned Depth) {
    DebugLoc dl = N.getDebugLoc();
    DOUT << "MatchAddress: "; DEBUG(AM.dump());
    // Limit recursion.
@@ -227,9 +206,12 @@ bool SystemZDAGToDAGISel::MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
    switch (N.getOpcode()) {
    default: break;
    case ISD::Constant: {
-    uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
-    int32_t Imm;
-    if (isImmSExt20(AM.Disp + Val, Imm)) {
+    int64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+    int64_t Imm = 0;
+    bool Match = (is12Bit ?
+                  isImmZExt12(AM.Disp + Val, Imm) :
+                  isImmSExt20(AM.Disp + Val, Imm));
+    if (Match) {
        AM.Disp = Imm;
        return false;
      }
@@ -237,8 +219,8 @@ bool SystemZDAGToDAGISel::MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
    }
  
    case ISD::FrameIndex:
-    if (AM.BaseType == SystemZRRIAddressMode::RegBase
-        && AM.Base.Reg.getNode() == 0) {
+    if (AM.BaseType == SystemZRRIAddressMode::RegBase &&
+        AM.Base.Reg.getNode() == 0) {
        AM.BaseType = SystemZRRIAddressMode::FrameIndexBase;
        AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
        return false;
@@ -255,12 +237,12 @@ bool SystemZDAGToDAGISel::MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
  
      // Test if the LHS of the sub can be folded.
      SystemZRRIAddressMode Backup = AM;
-    if (MatchAddress(N.getNode()->getOperand(0), AM, Depth+1)) {
+    if (MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1)) {
        AM = Backup;
        break;
      }
      // Test if the index field is free for use.
-    if (AM.IndexReg.getNode()) {
+    if (AM.IndexReg.getNode() || AM.isRI) {
        AM = Backup;
        break;
      }
@@ -295,19 +277,20 @@ bool SystemZDAGToDAGISel::MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
  
    case ISD::ADD: {
      SystemZRRIAddressMode Backup = AM;
-    if (!MatchAddress(N.getNode()->getOperand(0), AM, Depth+1) &&
-        !MatchAddress(N.getNode()->getOperand(1), AM, Depth+1))
+    if (!MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1) &&
+        !MatchAddress(N.getNode()->getOperand(1), AM, is12Bit, Depth+1))
        return false;
      AM = Backup;
-    if (!MatchAddress(N.getNode()->getOperand(1), AM, Depth+1) &&
-        !MatchAddress(N.getNode()->getOperand(0), AM, Depth+1))
+    if (!MatchAddress(N.getNode()->getOperand(1), AM, is12Bit, Depth+1) &&
+        !MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1))
        return false;
      AM = Backup;
  
      // If we couldn't fold both operands into the address at the same time,
      // see if we can just put each operand into a register and fold at least
      // the add.
-    if (AM.BaseType == SystemZRRIAddressMode::RegBase &&
+    if (!AM.isRI &&
+        AM.BaseType == SystemZRRIAddressMode::RegBase &&
          !AM.Base.Reg.getNode() && !AM.IndexReg.getNode()) {
        AM.Base.Reg = N.getNode()->getOperand(0);
        AM.IndexReg = N.getNode()->getOperand(1);
@@ -320,12 +303,15 @@ bool SystemZDAGToDAGISel::MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
      // Handle "X | C" as "X + C" iff X is known to have C bits clear.
      if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
        SystemZRRIAddressMode Backup = AM;
-      uint64_t Offset = CN->getSExtValue();
-      int32_t Imm;
-      // Start with the LHS as an addr mode.
-      if (!MatchAddress(N.getOperand(0), AM, Depth+1) &&
-          // The resultant disp must fit in 20-bits.
-          isImmSExt20(AM.Disp + Offset, Imm) &&
+      int64_t Offset = CN->getSExtValue();
+      int64_t Imm = 0;
+      bool MatchOffset = (is12Bit ?
+                          isImmZExt12(AM.Disp + Offset, Imm) :
+                          isImmSExt20(AM.Disp + Offset, Imm));
+      // The resultant disp must fit in 12 or 20-bits.
+      if (MatchOffset &&
+          // LHS should be an addr mode.
+          !MatchAddress(N.getOperand(0), AM, is12Bit, Depth+1) &&
            // Check to see if the LHS & C is zero.
            CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
          AM.Disp = Imm;
@@ -345,8 +331,8 @@ bool SystemZDAGToDAGISel::MatchAddressBase(SDValue N,
                                             SystemZRRIAddressMode &AM) {
    // Is the base register already occupied?
    if (AM.BaseType != SystemZRRIAddressMode::RegBase || AM.Base.Reg.getNode()) {
-    // If so, check to see if the scale index register is set.
-    if (AM.IndexReg.getNode() == 0) {
+    // If so, check to see if the index register is set.
+    if (AM.IndexReg.getNode() == 0 && !AM.isRI) {
        AM.IndexReg = N;
        return false;
      }
@@ -361,18 +347,199 @@ bool SystemZDAGToDAGISel::MatchAddressBase(SDValue N,
    return false;
  }
  
+void SystemZDAGToDAGISel::getAddressOperandsRI(const SystemZRRIAddressMode &AM,
+                                               SDValue &Base, SDValue &Disp) {
+  if (AM.BaseType == SystemZRRIAddressMode::RegBase)
+    Base = AM.Base.Reg;
+  else
+    Base = CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy());
+  Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i64);
+}
+
+void SystemZDAGToDAGISel::getAddressOperands(const SystemZRRIAddressMode &AM,
+                                             SDValue &Base, SDValue &Disp,
+                                             SDValue &Index) {
+  getAddressOperandsRI(AM, Base, Disp);
+  Index = AM.IndexReg;
+}
+
+/// Returns true if the address can be represented by a base register plus
+/// an unsigned 12-bit displacement [r+imm].
+bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDValue Op, SDValue& Addr,
+                                             SDValue &Base, SDValue &Disp) {
+  return SelectAddrRI12(Op, Addr, Base, Disp, /*is12BitOnly*/true);
+}
+
+bool SystemZDAGToDAGISel::SelectAddrRI12(SDValue Op, SDValue& Addr,
+                                         SDValue &Base, SDValue &Disp,
+                                         bool is12BitOnly) {
+  SystemZRRIAddressMode AM20(/*isRI*/true), AM12(/*isRI*/true);
+  bool Done = false;
+
+  if (!Addr.hasOneUse()) {
+    unsigned Opcode = Addr.getOpcode();
+    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+      // If we are able to fold N into addressing mode, then we'll allow it even
+      // if N has multiple uses. In general, addressing computation is used as
+      // addresses by all of its uses. But watch out for CopyToReg uses, that
+      // means the address computation is liveout. It will be computed by a LA
+      // so we want to avoid computing the address twice.
+      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+        if (UI->getOpcode() == ISD::CopyToReg) {
+          MatchAddressBase(Addr, AM12);
+          Done = true;
+          break;
+        }
+      }
+    }
+  }
+  if (!Done && MatchAddress(Addr, AM12, /* is12Bit */ true))
+    return false;
+
+  // Check, whether we can match stuff using 20-bit displacements
+  if (!Done && !is12BitOnly &&
+      !MatchAddress(Addr, AM20, /* is12Bit */ false))
+    if (AM12.Disp == 0 && AM20.Disp != 0)
+      return false;
+
+  DOUT << "MatchAddress (final): "; DEBUG(AM12.dump());
+
+  MVT VT = Addr.getValueType();
+  if (AM12.BaseType == SystemZRRIAddressMode::RegBase) {
+    if (!AM12.Base.Reg.getNode())
+      AM12.Base.Reg = CurDAG->getRegister(0, VT);
+  }
+
+  assert(AM12.IndexReg.getNode() == 0 && "Invalid reg-imm address mode!");
+
+  getAddressOperandsRI(AM12, Base, Disp);
+
+  return true;
+}
+
+/// Returns true if the address can be represented by a base register plus
+/// a signed 20-bit displacement [r+imm].
+bool SystemZDAGToDAGISel::SelectAddrRI(SDValue Op, SDValue& Addr,
+                                       SDValue &Base, SDValue &Disp) {
+  SystemZRRIAddressMode AM(/*isRI*/true);
+  bool Done = false;
+
+  if (!Addr.hasOneUse()) {
+    unsigned Opcode = Addr.getOpcode();
+    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+      // If we are able to fold N into addressing mode, then we'll allow it even
+      // if N has multiple uses. In general, addressing computation is used as
+      // addresses by all of its uses. But watch out for CopyToReg uses, that
+      // means the address computation is liveout. It will be computed by a LA
+      // so we want to avoid computing the address twice.
+      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+        if (UI->getOpcode() == ISD::CopyToReg) {
+          MatchAddressBase(Addr, AM);
+          Done = true;
+          break;
+        }
+      }
+    }
+  }
+  if (!Done && MatchAddress(Addr, AM, /* is12Bit */ false))
+    return false;
+
+  DOUT << "MatchAddress (final): "; DEBUG(AM.dump());
+
+  MVT VT = Addr.getValueType();
+  if (AM.BaseType == SystemZRRIAddressMode::RegBase) {
+    if (!AM.Base.Reg.getNode())
+      AM.Base.Reg = CurDAG->getRegister(0, VT);
+  }
+
+  assert(AM.IndexReg.getNode() == 0 && "Invalid reg-imm address mode!");
+
+  getAddressOperandsRI(AM, Base, Disp);
+
+  return true;
+}
+
+/// Returns true if the address can be represented by a base register plus
+/// index register plus an unsigned 12-bit displacement [base + idx + imm].
+bool SystemZDAGToDAGISel::SelectAddrRRI12(SDValue Op, SDValue Addr,
+                                SDValue &Base, SDValue &Disp, SDValue &Index) {
+  SystemZRRIAddressMode AM20, AM12;
+  bool Done = false;
+
+  if (!Addr.hasOneUse()) {
+    unsigned Opcode = Addr.getOpcode();
+    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+      // If we are able to fold N into addressing mode, then we'll allow it even
+      // if N has multiple uses. In general, addressing computation is used as
+      // addresses by all of its uses. But watch out for CopyToReg uses, that
+      // means the address computation is liveout. It will be computed by a LA
+      // so we want to avoid computing the address twice.
+      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+        if (UI->getOpcode() == ISD::CopyToReg) {
+          MatchAddressBase(Addr, AM12);
+          Done = true;
+          break;
+        }
+      }
+    }
+  }
+  if (!Done && MatchAddress(Addr, AM12, /* is12Bit */ true))
+    return false;
+
+  // Check, whether we can match stuff using 20-bit displacements
+  if (!Done && !MatchAddress(Addr, AM20, /* is12Bit */ false))
+    if (AM12.Disp == 0 && AM20.Disp != 0)
+      return false;
+
+  DOUT << "MatchAddress (final): "; DEBUG(AM12.dump());
+
+  MVT VT = Addr.getValueType();
+  if (AM12.BaseType == SystemZRRIAddressMode::RegBase) {
+    if (!AM12.Base.Reg.getNode())
+      AM12.Base.Reg = CurDAG->getRegister(0, VT);
+  }
+
+  if (!AM12.IndexReg.getNode())
+    AM12.IndexReg = CurDAG->getRegister(0, VT);
+
+  getAddressOperands(AM12, Base, Disp, Index);
+
+  return true;
+}
+
  /// Returns true if the address can be represented by a base register plus
  /// index register plus a signed 20-bit displacement [base + idx + imm].
-bool SystemZDAGToDAGISel::SelectAddrRRI(SDValue Op, SDValue Addr,
-                                SDValue &Base, SDValue &Index, SDValue &Disp) {
+bool SystemZDAGToDAGISel::SelectAddrRRI20(SDValue Op, SDValue Addr,
+                                SDValue &Base, SDValue &Disp, SDValue &Index) {
    SystemZRRIAddressMode AM;
    bool Done = false;
  
-  // FIXME: Should we better use lay instruction for non-single uses?
-
-  if (!Done && MatchAddress(Addr, AM))
+  if (!Addr.hasOneUse()) {
+    unsigned Opcode = Addr.getOpcode();
+    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+      // If we are able to fold N into addressing mode, then we'll allow it even
+      // if N has multiple uses. In general, addressing computation is used as
+      // addresses by all of its uses. But watch out for CopyToReg uses, that
+      // means the address computation is liveout. It will be computed by a LA
+      // so we want to avoid computing the address twice.
+      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+        if (UI->getOpcode() == ISD::CopyToReg) {
+          MatchAddressBase(Addr, AM);
+          Done = true;
+          break;
+        }
+      }
+    }
+  }
+  if (!Done && MatchAddress(Addr, AM, /* is12Bit */ false))
      return false;
  
+  DOUT << "MatchAddress (final): "; DEBUG(AM.dump());
+
    MVT VT = Addr.getValueType();
    if (AM.BaseType == SystemZRRIAddressMode::RegBase) {
      if (!AM.Base.Reg.getNode())
@@ -382,14 +549,53 @@ bool SystemZDAGToDAGISel::SelectAddrRRI(SDValue Op, SDValue Addr,
    if (!AM.IndexReg.getNode())
      AM.IndexReg = CurDAG->getRegister(0, VT);
  
+  getAddressOperands(AM, Base, Disp, Index);
+
+  return true;
+}
+
+/// SelectLAAddr - it calls SelectAddr and determines if the maximal addressing
+/// mode it matches can be cost effectively emitted as an LA/LAY instruction.
+bool SystemZDAGToDAGISel::SelectLAAddr(SDValue Op, SDValue Addr,
+                                  SDValue &Base, SDValue &Disp, SDValue &Index) {
+  SystemZRRIAddressMode AM;
+
+  if (MatchAddress(Addr, AM, false))
+    return false;
+
+  MVT VT = Addr.getValueType();
+  unsigned Complexity = 0;
    if (AM.BaseType == SystemZRRIAddressMode::RegBase)
-    Base = AM.Base.Reg;
+    if (AM.Base.Reg.getNode())
+      Complexity = 1;
+    else
+      AM.Base.Reg = CurDAG->getRegister(0, VT);
+  else if (AM.BaseType == SystemZRRIAddressMode::FrameIndexBase)
+    Complexity = 4;
+
+  if (AM.IndexReg.getNode())
+    Complexity += 1;
    else
-    Base = CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy());
-  Index = AM.IndexReg;
-  Disp = Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
+    AM.IndexReg = CurDAG->getRegister(0, VT);
  
-  return true;
+  if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
+    Complexity += 1;
+
+  if (Complexity > 2) {
+    getAddressOperands(AM, Base, Disp, Index);
+    return true;
+  }
+
+  return false;
+}
+
+bool SystemZDAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,
+                                 SDValue &Base, SDValue &Disp, SDValue &Index) {
+  if (ISD::isNON_EXTLoad(N.getNode()) &&
+      N.hasOneUse() &&
+      IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode()))
+    return SelectAddrRRI20(P, N.getOperand(1), Base, Disp, Index);
+  return false;
  }
  
  /// InstructionSelect - This callback is invoked by
@@ -412,7 +618,9 @@ void SystemZDAGToDAGISel::InstructionSelect() {
  
  SDNode *SystemZDAGToDAGISel::Select(SDValue Op) {
    SDNode *Node = Op.getNode();
+  MVT NVT = Node->getValueType(0);
    DebugLoc dl = Op.getDebugLoc();
+  unsigned Opcode = Node->getOpcode();
  
    // Dump information about the Node being selected
    #ifndef NDEBUG
@@ -430,8 +638,193 @@ SDNode *SystemZDAGToDAGISel::Select(SDValue Op) {
      DOUT << "\n";
      Indent -= 2;
      #endif
+    return NULL; // Already selected.
+  }
+
+  switch (Opcode) {
+  default: break;
+  case ISD::SDIVREM: {
+    unsigned Opc, MOpc;
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+
+    MVT ResVT;
+    bool is32Bit = false;
+    switch (NVT.getSimpleVT()) {
+      default: assert(0 && "Unsupported VT!");
+      case MVT::i32:
+        Opc = SystemZ::SDIVREM32r; MOpc = SystemZ::SDIVREM32m;
+        ResVT = MVT::v2i64;
+        is32Bit = true;
+        break;
+      case MVT::i64:
+        Opc = SystemZ::SDIVREM64r; MOpc = SystemZ::SDIVREM64m;
+        ResVT = MVT::v2i64;
+        break;
+    }
+
+    SDValue Tmp0, Tmp1, Tmp2;
+    bool foldedLoad = TryFoldLoad(Op, N1, Tmp0, Tmp1, Tmp2);
+
+    // Prepare the dividend
+    SDNode *Dividend;
+    if (is32Bit)
+      Dividend = CurDAG->getTargetNode(SystemZ::MOVSX64rr32, dl, MVT::i64, N0);
+    else
+      Dividend = N0.getNode();
+
+    // Insert prepared dividend into suitable 'subreg'
+    SDNode *Tmp = CurDAG->getTargetNode(TargetInstrInfo::IMPLICIT_DEF,
+                                        dl, ResVT);
+    Dividend =
+      CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG, dl, ResVT,
+                            SDValue(Tmp, 0), SDValue(Dividend, 0),
+                            CurDAG->getTargetConstant(subreg_odd, MVT::i32));
+
+    SDNode *Result;
+    SDValue DivVal = SDValue(Dividend, 0);
+    if (foldedLoad) {
+      SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) };
+      Result = CurDAG->getTargetNode(MOpc, dl, ResVT, Ops, array_lengthof(Ops));
+      // Update the chain.
+      ReplaceUses(N1.getValue(1), SDValue(Result, 0));
+    } else {
+      Result = CurDAG->getTargetNode(Opc, dl, ResVT, SDValue(Dividend, 0), N1);
+    }
+
+    // Copy the division (odd subreg) result, if it is needed.
+    if (!Op.getValue(0).use_empty()) {
+      unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd);
+      SDNode *Div = CurDAG->getTargetNode(TargetInstrInfo::EXTRACT_SUBREG,
+                                          dl, NVT,
+                                          SDValue(Result, 0),
+                                          CurDAG->getTargetConstant(SubRegIdx,
+                                                                    MVT::i32));
+
+      ReplaceUses(Op.getValue(0), SDValue(Div, 0));
+      #ifndef NDEBUG
+      DOUT << std::string(Indent-2, ' ') << "=> ";
+      DEBUG(Result->dump(CurDAG));
+      DOUT << "\n";
+      #endif
+    }
+
+    // Copy the remainder (even subreg) result, if it is needed.
+    if (!Op.getValue(1).use_empty()) {
+      unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even);
+      SDNode *Rem = CurDAG->getTargetNode(TargetInstrInfo::EXTRACT_SUBREG,
+                                          dl, NVT,
+                                          SDValue(Result, 0),
+                                          CurDAG->getTargetConstant(SubRegIdx,
+                                                                    MVT::i32));
+
+      ReplaceUses(Op.getValue(1), SDValue(Rem, 0));
+      #ifndef NDEBUG
+      DOUT << std::string(Indent-2, ' ') << "=> ";
+      DEBUG(Result->dump(CurDAG));
+      DOUT << "\n";
+      #endif
+    }
+
+#ifndef NDEBUG
+    Indent -= 2;
+#endif
+
      return NULL;
    }
+  case ISD::UDIVREM: {
+    unsigned Opc, MOpc, ClrOpc;
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+    MVT ResVT;
+
+    bool is32Bit = false;
+    switch (NVT.getSimpleVT()) {
+      default: assert(0 && "Unsupported VT!");
+      case MVT::i32:
+        Opc = SystemZ::UDIVREM32r; MOpc = SystemZ::UDIVREM32m;
+        ClrOpc = SystemZ::MOV64Pr0_even;
+        ResVT = MVT::v2i32;
+        is32Bit = true;
+        break;
+      case MVT::i64:
+        Opc = SystemZ::UDIVREM64r; MOpc = SystemZ::UDIVREM64m;
+        ClrOpc = SystemZ::MOV128r0_even;
+        ResVT = MVT::v2i64;
+        break;
+    }
+
+    SDValue Tmp0, Tmp1, Tmp2;
+    bool foldedLoad = TryFoldLoad(Op, N1, Tmp0, Tmp1, Tmp2);
+
+    // Prepare the dividend
+    SDNode *Dividend = N0.getNode();
+
+    // Insert prepared dividend into suitable 'subreg'
+    SDNode *Tmp = CurDAG->getTargetNode(TargetInstrInfo::IMPLICIT_DEF,
+                                        dl, ResVT);
+    {
+      unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd);
+      Dividend =
+        CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG, dl, ResVT,
+                              SDValue(Tmp, 0), SDValue(Dividend, 0),
+                              CurDAG->getTargetConstant(SubRegIdx, MVT::i32));
+    }
+
+    // Zero out even subreg
+    Dividend = CurDAG->getTargetNode(ClrOpc, dl, ResVT, SDValue(Dividend, 0));
+
+    SDValue DivVal = SDValue(Dividend, 0);
+    SDNode *Result;
+    if (foldedLoad) {
+      SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) };
+      Result = CurDAG->getTargetNode(MOpc, dl,ResVT,
+                                     Ops, array_lengthof(Ops));
+      // Update the chain.
+      ReplaceUses(N1.getValue(1), SDValue(Result, 0));
+    } else {
+      Result = CurDAG->getTargetNode(Opc, dl, ResVT, DivVal, N1);
+    }
+
+    // Copy the division (odd subreg) result, if it is needed.
+    if (!Op.getValue(0).use_empty()) {
+      unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd);
+      SDNode *Div = CurDAG->getTargetNode(TargetInstrInfo::EXTRACT_SUBREG,
+                                          dl, NVT,
+                                          SDValue(Result, 0),
+                                          CurDAG->getTargetConstant(SubRegIdx,
+                                                                    MVT::i32));
+      ReplaceUses(Op.getValue(0), SDValue(Div, 0));
+      #ifndef NDEBUG
+      DOUT << std::string(Indent-2, ' ') << "=> ";
+      DEBUG(Result->dump(CurDAG));
+      DOUT << "\n";
+      #endif
+    }
+
+    // Copy the remainder (even subreg) result, if it is needed.
+    if (!Op.getValue(1).use_empty()) {
+      unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even);
+      SDNode *Rem = CurDAG->getTargetNode(TargetInstrInfo::EXTRACT_SUBREG,
+                                          dl, NVT,
+                                          SDValue(Result, 0),
+                                          CurDAG->getTargetConstant(SubRegIdx,
+                                                                    MVT::i32));
+      ReplaceUses(Op.getValue(1), SDValue(Rem, 0));
+      #ifndef NDEBUG
+      DOUT << std::string(Indent-2, ' ') << "=> ";
+      DEBUG(Result->dump(CurDAG));
+      DOUT << "\n";
+      #endif
+    }
+
+#ifndef NDEBUG
+    Indent -= 2;
+#endif
+
+    return NULL;
+  }
+  }
  
    // Select the default instruction
    SDNode *ResNode = SelectCode(Op);