1 //===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the PTXSelectionDAGInfo class.
12 //===----------------------------------------------------------------------===//
14 #define DEBUG_TYPE "ptx-selectiondag-info"
15 #include "PTXTargetMachine.h"
16 #include "llvm/DerivedTypes.h"
17 #include "llvm/CodeGen/SelectionDAG.h"
20 PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM)
21 : TargetSelectionDAGInfo(TM),
22 Subtarget(&TM.getSubtarget<PTXSubtarget>()) {
25 PTXSelectionDAGInfo::~PTXSelectionDAGInfo() {
29 PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
31 SDValue Dst, SDValue Src,
32 SDValue Size, unsigned Align,
33 bool isVolatile, bool AlwaysInline,
34 MachinePointerInfo DstPtrInfo,
35 MachinePointerInfo SrcPtrInfo) const {
36 // Do repeated 4-byte loads and stores. To be improved.
37 // This requires 4-byte alignment.
40 // This requires the copy size to be a constant, preferably
41 // within a subtarget-specific limit.
42 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
45 uint64_t SizeVal = ConstantSize->getZExtValue();
46 // Always inline memcpys. In PTX, we do not have a C library that provides
51 unsigned BytesLeft = SizeVal & 3;
52 unsigned NumMemOps = SizeVal >> 2;
53 unsigned EmittedNumMemOps = 0;
57 const unsigned MAX_LOADS_IN_LDM = 6;
58 SDValue TFOps[MAX_LOADS_IN_LDM];
59 SDValue Loads[MAX_LOADS_IN_LDM];
60 uint64_t SrcOff = 0, DstOff = 0;
61 EVT PointerType = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
63 // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
64 // same number of stores. The loads and stores will get combined into
66 while (EmittedNumMemOps < NumMemOps) {
68 i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
69 Loads[i] = DAG.getLoad(VT, dl, Chain,
70 DAG.getNode(ISD::ADD, dl, PointerType, Src,
71 DAG.getConstant(SrcOff, PointerType)),
72 SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
74 TFOps[i] = Loads[i].getValue(1);
77 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
80 i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
81 TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
82 DAG.getNode(ISD::ADD, dl, PointerType, Dst,
83 DAG.getConstant(DstOff, PointerType)),
84 DstPtrInfo.getWithOffset(DstOff),
85 isVolatile, false, 0);
88 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
90 EmittedNumMemOps += i;
96 // Issue loads / stores for the trailing (1 - 3) bytes.
97 unsigned BytesLeftSave = BytesLeft;
100 if (BytesLeft >= 2) {
108 Loads[i] = DAG.getLoad(VT, dl, Chain,
109 DAG.getNode(ISD::ADD, dl, PointerType, Src,
110 DAG.getConstant(SrcOff, PointerType)),
111 SrcPtrInfo.getWithOffset(SrcOff), false, false,
113 TFOps[i] = Loads[i].getValue(1);
118 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
121 BytesLeft = BytesLeftSave;
123 if (BytesLeft >= 2) {
131 TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
132 DAG.getNode(ISD::ADD, dl, PointerType, Dst,
133 DAG.getConstant(DstOff, PointerType)),
134 DstPtrInfo.getWithOffset(DstOff), false, false, 0);
139 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
142 SDValue PTXSelectionDAGInfo::
143 EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
144 SDValue Chain, SDValue Dst,
145 SDValue Src, SDValue Size,
146 unsigned Align, bool isVolatile,
147 MachinePointerInfo DstPtrInfo) const {
148 llvm_unreachable("memset lowering not implemented for PTX yet");