return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi);
}
-SDOperand ARMTargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG,
- const ARMSubtarget *ST) {
+SDOperand ARMTargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
SDOperand ChainOp = Op.getOperand(0);
SDOperand DestOp = Op.getOperand(1);
SDOperand SourceOp = Op.getOperand(2);
// The libc version is likely to be faster for the these cases. It can
// use the address value and run time information about the CPU.
// With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster
- // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb. Change
- // this once Thumb ldmia / stmia support is added.
unsigned Size = I->getValue();
if (AlwaysInline ||
- (!ST->isThumb() && Size <= Subtarget->getMaxInlineSizeThreshold() &&
+ (Size <= Subtarget->getMaxInlineSizeThreshold() &&
(Align & 3) == 0))
return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG);
return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
case ISD::RETURNADDR: break;
case ISD::FRAMEADDR: break;
case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
- case ISD::MEMCPY: return LowerMEMCPY(Op, DAG, Subtarget);
+ case ISD::MEMCPY: return LowerMEMCPY(Op, DAG);
}
return SDOperand();
}
SDOperand LowerGLOBAL_OFFSET_TABLE(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerBR_JT(SDOperand Op, SelectionDAG &DAG);
- SDOperand LowerMEMCPY(SDOperand Op, SelectionDAG &DAG,
- const ARMSubtarget *ST);
+ SDOperand LowerMEMCPY(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerMEMCPYCall(SDOperand Chain, SDOperand Dest,
SDOperand Source, SDOperand Count,
SelectionDAG &DAG);
///
ARMSubtarget(const Module &M, const std::string &FS, bool thumb);
- unsigned getMaxInlineSizeThreshold() const { return 64; }
+ unsigned getMaxInlineSizeThreshold() const {
+ // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb.
+ // Change this once Thumb ldmia / stmia support is added.
+ return isThumb() ? 0 : 64;
+ }
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU);
assert(!AlwaysInline && "Cannot inline copy of unknown size");
return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
}
- unsigned Size = I->getValue();
-
- if (AlwaysInline)
- return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG);
+ // If not DWORD aligned or if size is more than threshold, then call memcpy.
// The libc version is likely to be faster for the following cases. It can
// use the address value and run time information about the CPU.
// With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster
-
- // If not DWORD aligned, call memcpy.
- if ((Align & 3) != 0)
- return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
-
- // If size is more than the threshold, call memcpy.
- if (Size > Subtarget->getMaxInlineSizeThreshold())
- return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
-
- return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG);
+ unsigned Size = I->getValue();
+ if (AlwaysInline ||
+ (Size <= Subtarget->getMaxInlineSizeThreshold() &&
+ (Align & 3) == 0))
+ return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG);
+ return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
}
SDOperand X86TargetLowering::LowerMEMCPYCall(SDOperand Chain,