1 //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the ARMSelectionDAGInfo class.
12 //===----------------------------------------------------------------------===//
14 #include "ARMTargetMachine.h"
15 #include "llvm/CodeGen/SelectionDAG.h"
16 #include "llvm/IR/DerivedTypes.h"
19 #define DEBUG_TYPE "arm-selectiondag-info"
21 ARMSelectionDAGInfo::ARMSelectionDAGInfo(const DataLayout &DL)
22 : TargetSelectionDAGInfo(&DL) {}
24 ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
27 // Emit, if possible, a specialized version of the given Libcall. Typically this
28 // means selecting the appropriately aligned version, but we also convert memset
30 SDValue ARMSelectionDAGInfo::
31 EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
33 SDValue Dst, SDValue Src,
34 SDValue Size, unsigned Align,
35 RTLIB::Libcall LC) const {
36 const ARMSubtarget &Subtarget =
37 DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
38 const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
40 // Only use a specialized AEABI function if the default version of this
41 // Libcall is an AEABI function.
42 if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
45 // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
46 // able to translate memset to memclr and use the value to index the function
56 AEABILibcall = AEABI_MEMCPY;
59 AEABILibcall = AEABI_MEMMOVE;
62 AEABILibcall = AEABI_MEMSET;
63 if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
64 if (ConstantSrc->getZExtValue() == 0)
65 AEABILibcall = AEABI_MEMCLR;
71 // Choose the most-aligned libcall variant that we can
78 AlignVariant = ALIGN8;
79 else if ((Align & 3) == 0)
80 AlignVariant = ALIGN4;
82 AlignVariant = ALIGN1;
84 TargetLowering::ArgListTy Args;
85 TargetLowering::ArgListEntry Entry;
86 Entry.Ty = TLI->getDataLayout()->getIntPtrType(*DAG.getContext());
88 Args.push_back(Entry);
89 if (AEABILibcall == AEABI_MEMCLR) {
91 Args.push_back(Entry);
92 } else if (AEABILibcall == AEABI_MEMSET) {
93 // Adjust parameters for memset, EABI uses format (ptr, size, value),
94 // GNU library uses (ptr, value, size)
95 // See RTABI section 4.3.4
97 Args.push_back(Entry);
99 // Extend or truncate the argument to be an i32 value for the call.
100 if (Src.getValueType().bitsGT(MVT::i32))
101 Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
102 else if (Src.getValueType().bitsLT(MVT::i32))
103 Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
106 Entry.Ty = Type::getInt32Ty(*DAG.getContext());
107 Entry.isSExt = false;
108 Args.push_back(Entry);
111 Args.push_back(Entry);
114 Args.push_back(Entry);
117 char const *FunctionNames[4][3] = {
118 { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" },
119 { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
120 { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" },
121 { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" }
123 TargetLowering::CallLoweringInfo CLI(DAG);
124 CLI.setDebugLoc(dl).setChain(Chain)
125 .setCallee(TLI->getLibcallCallingConv(LC),
126 Type::getVoidTy(*DAG.getContext()),
127 DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
128 TLI->getPointerTy()), std::move(Args), 0)
130 std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
132 return CallResult.second;
136 ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
138 SDValue Dst, SDValue Src,
139 SDValue Size, unsigned Align,
140 bool isVolatile, bool AlwaysInline,
141 MachinePointerInfo DstPtrInfo,
142 MachinePointerInfo SrcPtrInfo) const {
143 const ARMSubtarget &Subtarget =
144 DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
145 // Do repeated 4-byte loads and stores. To be improved.
146 // This requires 4-byte alignment.
147 if ((Align & 3) != 0)
149 // This requires the copy size to be a constant, preferably
150 // within a subtarget-specific limit.
151 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
153 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
155 uint64_t SizeVal = ConstantSize->getZExtValue();
156 if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
157 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
160 unsigned BytesLeft = SizeVal & 3;
161 unsigned NumMemOps = SizeVal >> 2;
162 unsigned EmittedNumMemOps = 0;
166 // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
167 const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
170 uint64_t SrcOff = 0, DstOff = 0;
172 // FIXME: We should invent a VMCOPY pseudo-instruction that lowers to
173 // VLDM/VSTM and make this code emit it when appropriate. This would reduce
174 // pressure on the general purpose registers. However this seems harder to map
175 // onto the register allocator's view of the world.
177 // The number of MCOPY pseudo-instructions to emit. We use up to MaxLoadsInLDM
178 // registers per mcopy, which will get lowered into ldm/stm later on. This is
179 // a lower bound on the number of MCOPY operations we must emit.
180 unsigned NumMCOPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
182 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
184 for (unsigned I = 0; I != NumMCOPYs; ++I) {
185 // Evenly distribute registers among MCOPY operations to reduce register
187 unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMCOPYs;
188 unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
190 Dst = DAG.getNode(ARMISD::MCOPY, dl, VTs, Chain, Dst, Src,
191 DAG.getConstant(NumRegs, dl, MVT::i32));
192 Src = Dst.getValue(1);
193 Chain = Dst.getValue(2);
195 DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
196 SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
198 EmittedNumMemOps = NextEmittedNumMemOps;
204 // Issue loads / stores for the trailing (1 - 3) bytes.
205 unsigned BytesLeftSave = BytesLeft;
208 if (BytesLeft >= 2) {
216 Loads[i] = DAG.getLoad(VT, dl, Chain,
217 DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
218 DAG.getConstant(SrcOff, dl, MVT::i32)),
219 SrcPtrInfo.getWithOffset(SrcOff),
220 false, false, false, 0);
221 TFOps[i] = Loads[i].getValue(1);
226 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
227 makeArrayRef(TFOps, i));
230 BytesLeft = BytesLeftSave;
232 if (BytesLeft >= 2) {
240 TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
241 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
242 DAG.getConstant(DstOff, dl, MVT::i32)),
243 DstPtrInfo.getWithOffset(DstOff), false, false, 0);
248 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
249 makeArrayRef(TFOps, i));
253 SDValue ARMSelectionDAGInfo::
254 EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl,
256 SDValue Dst, SDValue Src,
257 SDValue Size, unsigned Align,
259 MachinePointerInfo DstPtrInfo,
260 MachinePointerInfo SrcPtrInfo) const {
261 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
266 SDValue ARMSelectionDAGInfo::
267 EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
268 SDValue Chain, SDValue Dst,
269 SDValue Src, SDValue Size,
270 unsigned Align, bool isVolatile,
271 MachinePointerInfo DstPtrInfo) const {
272 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,