//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "systemz-selectiondag-info"
#include "SystemZTargetMachine.h"
#include "llvm/CodeGen/SelectionDAG.h"
using namespace llvm;
-SystemZSelectionDAGInfo::
-SystemZSelectionDAGInfo(const SystemZTargetMachine &TM)
- : TargetSelectionDAGInfo(TM) {
-}
+#define DEBUG_TYPE "systemz-selectiondag-info"
+
+SystemZSelectionDAGInfo::SystemZSelectionDAGInfo(const DataLayout &DL)
+ : TargetSelectionDAGInfo(&DL) {}
SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() {
}
-// Use MVC to copy Size bytes from Src to Dest, deciding whether to use
-// a loop or straight-line code.
-static SDValue emitMVC(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
- SDValue Dst, SDValue Src, uint64_t Size) {
+// Decide whether it is best to use a loop or straight-line code for
+// a block operation of Size bytes with source address Src and destination
+// address Dest. Sequence is the opcode to use for straight-line code
+// (such as MVC) and Loop is the opcode to use for loops (such as MVC_LOOP).
+// Return the chain for the completed operation.
+static SDValue emitMemMem(SelectionDAG &DAG, SDLoc DL, unsigned Sequence,
+ unsigned Loop, SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size) {
EVT PtrVT = Src.getValueType();
// The heuristic we use is to prefer loops for anything that would
// require 7 or more MVCs. With these kinds of sizes there isn't
// The next value up, 6 * 256, can be implemented in the same
// number of straight-line MVCs as 6 * 256 - 1.
if (Size > 6 * 256)
- return DAG.getNode(SystemZISD::MVC_LOOP, DL, MVT::Other, Chain, Dst, Src,
+ return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src,
DAG.getConstant(Size, PtrVT),
DAG.getConstant(Size / 256, PtrVT));
- return DAG.getNode(SystemZISD::MVC, DL, MVT::Other, Chain, Dst, Src,
+ return DAG.getNode(Sequence, DL, MVT::Other, Chain, Dst, Src,
DAG.getConstant(Size, PtrVT));
}
if (IsVolatile)
return SDValue();
- if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size))
- return emitMVC(DAG, DL, Chain, Dst, Src, CSize->getZExtValue());
+ if (auto *CSize = dyn_cast<ConstantSDNode>(Size))
+ return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
+ Chain, Dst, Src, CSize->getZExtValue());
return SDValue();
}
if (IsVolatile)
return SDValue();
- if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) {
+ if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) {
uint64_t Bytes = CSize->getZExtValue();
if (Bytes == 0)
return SDValue();
- if (ConstantSDNode *CByte = dyn_cast<ConstantSDNode>(Byte)) {
+ if (auto *CByte = dyn_cast<ConstantSDNode>(Byte)) {
// Handle cases that can be done using at most two of
// MVI, MVHI, MVHHI and MVGHI. The latter two can only be
// used if ByteVal is all zeros or all ones; in other casees,
}
}
assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already");
+
+ // Handle the special case of a memset of 0, which can use XC.
+ auto *CByte = dyn_cast<ConstantSDNode>(Byte);
+ if (CByte && CByte->getZExtValue() == 0)
+ return emitMemMem(DAG, DL, SystemZISD::XC, SystemZISD::XC_LOOP,
+ Chain, Dst, Dst, Bytes);
+
// Copy the byte to the first location and then use MVC to copy
// it to the rest.
Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo,
false, false, Align);
SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
DAG.getConstant(1, PtrVT));
- return emitMVC(DAG, DL, Chain, DstPlus1, Dst, Bytes - 1);
+ return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
+ Chain, DstPlus1, Dst, Bytes - 1);
}
return SDValue();
}
+// Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size),
+// deciding whether to use a loop or straight-line code.
+static SDValue emitCLC(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src1, SDValue Src2, uint64_t Size) {
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ EVT PtrVT = Src1.getValueType();
+ // A two-CLC sequence is a clear win over a loop, not least because it
+ // needs only one branch. A three-CLC sequence needs the same number
+ // of branches as a loop (i.e. 2), but is shorter. That brings us to
+ // lengths greater than 768 bytes. It seems relatively likely that
+ // a difference will be found within the first 768 bytes, so we just
+ // optimize for the smallest number of branch instructions, in order
+ // to avoid polluting the prediction buffer too much. A loop only ever
+ // needs 2 branches, whereas a straight-line sequence would need 3 or more.
+ if (Size > 3 * 256)
+ return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2,
+ DAG.getConstant(Size, PtrVT),
+ DAG.getConstant(Size / 256, PtrVT));
+ return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2,
+ DAG.getConstant(Size, PtrVT));
+}
+
// Convert the current CC value into an integer that is 0 if CC == 0,
// less than zero if CC == 1 and greater than zero if CC >= 2.
// The sequence starts with IPM, which puts CC into bits 29 and 28
static SDValue addIPMSequence(SDLoc DL, SDValue Glue, SelectionDAG &DAG) {
SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
- DAG.getConstant(28, MVT::i32));
+ DAG.getConstant(SystemZ::IPM_CC, MVT::i32));
SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL,
DAG.getConstant(31, MVT::i32));
return ROTL;
SDValue Src1, SDValue Src2, SDValue Size,
MachinePointerInfo Op1PtrInfo,
MachinePointerInfo Op2PtrInfo) const {
- EVT PtrVT = Src1.getValueType();
- if (ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(Size)) {
+ if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) {
uint64_t Bytes = CSize->getZExtValue();
- if (Bytes >= 1 && Bytes <= 0x100) {
- // A single CLC.
- SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(SystemZISD::CLC, DL, VTs, Chain,
- Src1, Src2, Size, DAG.getConstant(0, PtrVT));
- SDValue Glue = Chain.getValue(1);
- return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain);
- }
+ assert(Bytes > 0 && "Caller should have handled 0-size case");
+ Chain = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes);
+ SDValue Glue = Chain.getValue(1);
+ return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain);
}
return std::make_pair(SDValue(), SDValue());
}
Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, MVT::i32));
Ops.push_back(Glue);
VTs = DAG.getVTList(PtrVT, MVT::Glue);
- End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, &Ops[0], Ops.size());
+ End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops);
return std::make_pair(End, Chain);
}