From: Mon P Wang Date: Sun, 4 Apr 2010 03:10:48 +0000 (+0000) Subject: Reapply address space patch after fixing an issue in MemCopyOptimizer. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=20adc9dc4650313f017b27d9818eb2176238113d;p=oota-llvm.git Reapply address space patch after fixing an issue in MemCopyOptimizer. Added support for address spaces and added a isVolatile field to memcpy, memmove, and memset, e.g., llvm.memcpy.i32(i8*, i8*, i32, i32) -> llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100304 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index ef5d7e2a895..8962bee02ee 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -532,17 +532,17 @@ public: SDValue getStackArgumentTokenFactor(SDValue Chain); SDValue getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool AlwaysInline, + SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, const Value *DstSV, uint64_t DstSVOff, const Value *SrcSV, uint64_t SrcSVOff); SDValue getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, + SDValue Size, unsigned Align, bool isVol, const Value *DstSV, uint64_t DstOSVff, const Value *SrcSV, uint64_t SrcSVOff); SDValue getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, + SDValue Size, unsigned Align, bool isVol, const Value *DstSV, uint64_t DstSVOff); /// getSetCC - Helper function to make it easier to build SetCC's if you just diff --git a/include/llvm/IntrinsicInst.h b/include/llvm/IntrinsicInst.h index d86b33ef50a..bd8a8c4e9d3 100644 --- a/include/llvm/IntrinsicInst.h +++ b/include/llvm/IntrinsicInst.h @@ -133,6 +133,13 @@ namespace llvm { return getAlignmentCst()->getZExtValue(); } + ConstantInt *getVolatileCst() const { + return cast(const_cast(getOperand(5))); + } + bool isVolatile() const { + return getVolatileCst()->getZExtValue() != 0; + } + /// getDest - This is just like getRawDest, but it strips off any cast /// instructions that feed it, giving the original input. The returned /// value is guaranteed to be a pointer. @@ -155,7 +162,11 @@ namespace llvm { void setAlignment(Constant* A) { setOperand(4, A); } - + + void setVolatile(Constant* V) { + setOperand(5, V); + } + const Type *getAlignmentType() const { return getOperand(4)->getType(); } diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td index d66e80fb22f..8bbfd774bdb 100644 --- a/include/llvm/Intrinsics.td +++ b/include/llvm/Intrinsics.td @@ -224,16 +224,16 @@ def int_stackprotector : Intrinsic<[], // def int_memcpy : Intrinsic<[], - [llvm_ptr_ty, llvm_ptr_ty, llvm_anyint_ty, - llvm_i32_ty], + [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, + llvm_i32_ty, llvm_i1_ty], [IntrWriteArgMem, NoCapture<0>, NoCapture<1>]>; def int_memmove : Intrinsic<[], - [llvm_ptr_ty, llvm_ptr_ty, llvm_anyint_ty, - llvm_i32_ty], + [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, + llvm_i32_ty, llvm_i1_ty], [IntrWriteArgMem, NoCapture<0>, NoCapture<1>]>; def int_memset : Intrinsic<[], - [llvm_ptr_ty, llvm_i8_ty, llvm_anyint_ty, - llvm_i32_ty], + [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, + llvm_i32_ty, llvm_i1_ty], [IntrWriteArgMem, NoCapture<0>]>; // These functions do not actually read memory, but they are sensitive to the diff --git a/include/llvm/Support/IRBuilder.h b/include/llvm/Support/IRBuilder.h index faa8fa3aee2..660c9f8001b 100644 --- a/include/llvm/Support/IRBuilder.h +++ b/include/llvm/Support/IRBuilder.h @@ -917,6 +917,11 @@ public: Value *Args[] = { Arg1, Arg2, Arg3, Arg4 }; return Insert(CallInst::Create(Callee, Args, Args+4), Name); } + CallInst *CreateCall5(Value *Callee, Value *Arg1, Value *Arg2, Value *Arg3, + Value *Arg4, Value *Arg5, const Twine &Name = "") { + Value *Args[] = { Arg1, Arg2, Arg3, Arg4, Arg5 }; + return Insert(CallInst::Create(Callee, Args, Args+5), Name); + } template CallInst *CreateCall(Value *Callee, InputIterator ArgBegin, diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index f040c9db38c..874f840d194 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -1191,7 +1191,7 @@ public: EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Op1, SDValue Op2, - SDValue Op3, unsigned Align, + SDValue Op3, unsigned Align, bool isVolatile, bool AlwaysInline, const Value *DstSV, uint64_t DstOff, const Value *SrcSV, uint64_t SrcOff) { @@ -1208,7 +1208,7 @@ public: EmitTargetCodeForMemmove(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Op1, SDValue Op2, - SDValue Op3, unsigned Align, + SDValue Op3, unsigned Align, bool isVolatile, const Value *DstSV, uint64_t DstOff, const Value *SrcSV, uint64_t SrcOff) { return SDValue(); @@ -1224,7 +1224,7 @@ public: EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Op1, SDValue Op2, - SDValue Op3, unsigned Align, + SDValue Op3, unsigned Align, bool isVolatile, const Value *DstSV, uint64_t DstOff) { return SDValue(); } diff --git a/include/llvm/Transforms/Utils/BuildLibCalls.h b/include/llvm/Transforms/Utils/BuildLibCalls.h index d278672a1f0..8e76f50bb21 100644 --- a/include/llvm/Transforms/Utils/BuildLibCalls.h +++ b/include/llvm/Transforms/Utils/BuildLibCalls.h @@ -46,8 +46,8 @@ namespace llvm { /// EmitMemCpy - Emit a call to the memcpy function to the builder. This /// always expects that the size has type 'intptr_t' and Dst/Src are pointers. - Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len, - unsigned Align, IRBuilder<> &B, const TargetData *TD); + Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len, unsigned Align, + bool isVolatile, IRBuilder<> &B, const TargetData *TD); /// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder. /// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src @@ -57,8 +57,8 @@ namespace llvm { /// EmitMemMove - Emit a call to the memmove function to the builder. This /// always expects that the size has type 'intptr_t' and Dst/Src are pointers. - Value *EmitMemMove(Value *Dst, Value *Src, Value *Len, - unsigned Align, IRBuilder<> &B, const TargetData *TD); + Value *EmitMemMove(Value *Dst, Value *Src, Value *Len, unsigned Align, + bool isVolatile, IRBuilder<> &B, const TargetData *TD); /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. @@ -70,8 +70,8 @@ namespace llvm { const TargetData *TD); /// EmitMemSet - Emit a call to the memset function - Value *EmitMemSet(Value *Dst, Value *Val, Value *Len, IRBuilder<> &B, - const TargetData *TD); + Value *EmitMemSet(Value *Dst, Value *Val, Value *Len, bool isVolatile, + IRBuilder<> &B, const TargetData *TD); /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' /// (e.g. 'floor'). This function is known to take a single of type matching diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 3643ea7c531..7291c5acbb3 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3263,7 +3263,8 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, - unsigned Align, bool AlwaysInline, + unsigned Align, bool isVol, + bool AlwaysInline, const Value *DstSV, uint64_t DstSVOff, const Value *SrcSV, uint64_t SrcSVOff) { // Turn a memcpy of undef to nop. @@ -3322,7 +3323,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff); Store = DAG.getStore(Chain, dl, Value, getMemBasePlusOffset(Dst, DstOff, DAG), - DstSV, DstSVOff + DstOff, false, false, Align); + DstSV, DstSVOff + DstOff, isVol, false, Align); } else { // The type might not be legal for the target. This should only happen // if the type is smaller than a legal type, as on PPC, so the right @@ -3333,11 +3334,11 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, assert(NVT.bitsGE(VT)); Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, getMemBasePlusOffset(Src, SrcOff, DAG), - SrcSV, SrcSVOff + SrcOff, VT, false, false, + SrcSV, SrcSVOff + SrcOff, VT, isVol, false, MinAlign(SrcAlign, SrcOff)); Store = DAG.getTruncStore(Chain, dl, Value, getMemBasePlusOffset(Dst, DstOff, DAG), - DstSV, DstSVOff + DstOff, VT, false, false, + DstSV, DstSVOff + DstOff, VT, isVol, false, Align); } OutChains.push_back(Store); @@ -3352,7 +3353,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, - unsigned Align,bool AlwaysInline, + unsigned Align, bool isVol, + bool AlwaysInline, const Value *DstSV, uint64_t DstSVOff, const Value *SrcSV, uint64_t SrcSVOff) { // Turn a memmove of undef to nop. @@ -3403,7 +3405,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Value = DAG.getLoad(VT, dl, Chain, getMemBasePlusOffset(Src, SrcOff, DAG), - SrcSV, SrcSVOff + SrcOff, false, false, SrcAlign); + SrcSV, SrcSVOff + SrcOff, isVol, false, SrcAlign); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; @@ -3418,7 +3420,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Store = DAG.getStore(Chain, dl, LoadValues[i], getMemBasePlusOffset(Dst, DstOff, DAG), - DstSV, DstSVOff + DstOff, false, false, Align); + DstSV, DstSVOff + DstOff, isVol, false, Align); OutChains.push_back(Store); DstOff += VTSize; } @@ -3430,7 +3432,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, - unsigned Align, + unsigned Align, bool isVol, const Value *DstSV, uint64_t DstSVOff) { // Turn a memset of undef to nop. if (Src.getOpcode() == ISD::UNDEF) @@ -3472,7 +3474,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, SDValue Value = getMemsetValue(Src, VT, DAG, dl); SDValue Store = DAG.getStore(Chain, dl, Value, getMemBasePlusOffset(Dst, DstOff, DAG), - DstSV, DstSVOff + DstOff, false, false, 0); + DstSV, DstSVOff + DstOff, isVol, false, 0); OutChains.push_back(Store); DstOff += VTSize; } @@ -3483,7 +3485,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src, SDValue Size, - unsigned Align, bool AlwaysInline, + unsigned Align, bool isVol, bool AlwaysInline, const Value *DstSV, uint64_t DstSVOff, const Value *SrcSV, uint64_t SrcSVOff) { @@ -3497,7 +3499,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),Align, - false, DstSV, DstSVOff, SrcSV, SrcSVOff); + isVol, false, DstSV, DstSVOff, SrcSV, SrcSVOff); if (Result.getNode()) return Result; } @@ -3506,7 +3508,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, // code. If the target chooses to do this, this is the next best. SDValue Result = TLI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, - AlwaysInline, + isVol, AlwaysInline, DstSV, DstSVOff, SrcSV, SrcSVOff); if (Result.getNode()) return Result; @@ -3516,11 +3518,12 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, if (AlwaysInline) { assert(ConstantSize && "AlwaysInline requires a constant size!"); return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(), Align, true, - DstSV, DstSVOff, SrcSV, SrcSVOff); + ConstantSize->getZExtValue(), Align, isVol, + true, DstSV, DstSVOff, SrcSV, SrcSVOff); } // Emit a library call. + assert(!isVol && "library memcpy does not support volatile"); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext()); @@ -3541,7 +3544,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src, SDValue Size, - unsigned Align, + unsigned Align, bool isVol, const Value *DstSV, uint64_t DstSVOff, const Value *SrcSV, uint64_t SrcSVOff) { @@ -3555,8 +3558,8 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Result = getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(), - Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff); + ConstantSize->getZExtValue(), Align, isVol, + false, DstSV, DstSVOff, SrcSV, SrcSVOff); if (Result.getNode()) return Result; } @@ -3564,12 +3567,13 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, // Then check to see if we should lower the memmove with target-specific // code. If the target chooses to do this, this is the next best. SDValue Result = - TLI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, + TLI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol, DstSV, DstSVOff, SrcSV, SrcSVOff); if (Result.getNode()) return Result; // Emit a library call. + assert(!isVol && "library memmove does not support volatile"); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext()); @@ -3590,7 +3594,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src, SDValue Size, - unsigned Align, + unsigned Align, bool isVol, const Value *DstSV, uint64_t DstSVOff) { // Check to see if we should lower the memset to stores first. @@ -3601,9 +3605,10 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, if (ConstantSize->isNullValue()) return Chain; - SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(), - Align, DstSV, DstSVOff); + SDValue Result = + getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), + Align, isVol, DstSV, DstSVOff); + if (Result.getNode()) return Result; } @@ -3611,12 +3616,13 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, // Then check to see if we should lower the memset with target-specific // code. If the target chooses to do this, this is the next best. SDValue Result = - TLI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, + TLI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol, DstSV, DstSVOff); if (Result.getNode()) return Result; // Emit a library call. + assert(!isVol && "library memset does not support volatile"); const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 879bdb2cbbb..b06239f4182 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3731,28 +3731,50 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { case Intrinsic::longjmp: return "_longjmp"+!TLI.usesUnderscoreLongJmp(); case Intrinsic::memcpy: { + // Assert for address < 256 since we support only user defined address + // spaces. + assert(cast(I.getOperand(1)->getType())->getAddressSpace() + < 256 && + cast(I.getOperand(2)->getType())->getAddressSpace() + < 256 && + "Unknown address space"); SDValue Op1 = getValue(I.getOperand(1)); SDValue Op2 = getValue(I.getOperand(2)); SDValue Op3 = getValue(I.getOperand(3)); unsigned Align = cast(I.getOperand(4))->getZExtValue(); - DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false, + bool isVol = cast(I.getOperand(5))->getZExtValue(); + DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false, I.getOperand(1), 0, I.getOperand(2), 0)); return 0; } case Intrinsic::memset: { + // Assert for address < 256 since we support only user defined address + // spaces. + assert(cast(I.getOperand(1)->getType())->getAddressSpace() + < 256 && + "Unknown address space"); SDValue Op1 = getValue(I.getOperand(1)); SDValue Op2 = getValue(I.getOperand(2)); SDValue Op3 = getValue(I.getOperand(3)); unsigned Align = cast(I.getOperand(4))->getZExtValue(); - DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, + bool isVol = cast(I.getOperand(5))->getZExtValue(); + DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol, I.getOperand(1), 0)); return 0; } case Intrinsic::memmove: { + // Assert for address < 256 since we support only user defined address + // spaces. + assert(cast(I.getOperand(1)->getType())->getAddressSpace() + < 256 && + cast(I.getOperand(2)->getType())->getAddressSpace() + < 256 && + "Unknown address space"); SDValue Op1 = getValue(I.getOperand(1)); SDValue Op2 = getValue(I.getOperand(2)); SDValue Op3 = getValue(I.getOperand(3)); unsigned Align = cast(I.getOperand(4))->getZExtValue(); + bool isVol = cast(I.getOperand(5))->getZExtValue(); // If the source and destination are known to not be aliases, we can // lower memmove as memcpy. @@ -3761,12 +3783,12 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { Size = C->getZExtValue(); if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) == AliasAnalysis::NoAlias) { - DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false, - I.getOperand(1), 0, I.getOperand(2), 0)); + DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, + false, I.getOperand(1), 0, I.getOperand(2), 0)); return 0; } - DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, + DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol, I.getOperand(1), 0, I.getOperand(2), 0)); return 0; } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index b6c81f6910a..77fb0c3cdbd 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -861,7 +861,8 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, DebugLoc dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), - /*AlwaysInline=*/false, NULL, 0, NULL, 0); + /*isVolatile=*/false, /*AlwaysInline=*/false, + NULL, 0, NULL, 0); } /// LowerMemOpCallTo - Store the argument to the stack. @@ -2053,7 +2054,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, - bool AlwaysInline, + bool isVolatile, bool AlwaysInline, const Value *DstSV, uint64_t DstSVOff, const Value *SrcSV, uint64_t SrcSVOff){ // Do repeated 4-byte loads and stores. To be improved. @@ -2089,7 +2090,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, Loads[i] = DAG.getLoad(VT, dl, Chain, DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), - SrcSV, SrcSVOff + SrcOff, false, false, 0); + SrcSV, SrcSVOff + SrcOff, isVolatile, false, 0); TFOps[i] = Loads[i].getValue(1); SrcOff += VTSize; } @@ -2100,7 +2101,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, TFOps[i] = DAG.getStore(Chain, dl, Loads[i], DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, DAG.getConstant(DstOff, MVT::i32)), - DstSV, DstSVOff + DstOff, false, false, 0); + DstSV, DstSVOff + DstOff, isVolatile, false, 0); DstOff += VTSize; } Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index f8f8adc70af..fa33ad30750 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -305,7 +305,7 @@ namespace llvm { SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, - bool AlwaysInline, + bool isVolatile, bool AlwaysInline, const Value *DstSV, uint64_t DstSVOff, const Value *SrcSV, uint64_t SrcSVOff); SDValue LowerCallResult(SDValue Chain, SDValue InFlag, diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index dda530eef43..9cd01be54e4 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2392,7 +2392,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, DebugLoc dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), - false, NULL, 0, NULL, 0); + false, false, NULL, 0, NULL, 0); } /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 527e19b6088..3eaf64d1de4 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1434,7 +1434,8 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, DebugLoc dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), - /*AlwaysInline=*/true, NULL, 0, NULL, 0); + /*isVolatile*/false, /*AlwaysInline=*/true, + NULL, 0, NULL, 0); } /// IsTailCallConvention - Return true if the calling convention is one that @@ -6548,6 +6549,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, + bool isVolatile, const Value *DstSV, uint64_t DstSVOff) { ConstantSDNode *ConstantSize = dyn_cast(Size); @@ -6676,7 +6678,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, DAG.getConstant(Offset, AddrVT)), Src, DAG.getConstant(BytesLeft, SizeVT), - Align, DstSV, DstSVOff + Offset); + Align, isVolatile, DstSV, DstSVOff + Offset); } // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. @@ -6687,7 +6689,7 @@ SDValue X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, - bool AlwaysInline, + bool isVolatile, bool AlwaysInline, const Value *DstSV, uint64_t DstSVOff, const Value *SrcSV, uint64_t SrcSVOff) { // This requires the copy size to be a constant, preferrably @@ -6746,7 +6748,7 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, SrcVT)), DAG.getConstant(BytesLeft, SizeVT), - Align, AlwaysInline, + Align, isVolatile, AlwaysInline, DstSV, DstSVOff + Offset, SrcSV, SrcSVOff + Offset)); } @@ -6829,8 +6831,8 @@ SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); return DAG.getMemcpy(Chain, dl, DstPtr, SrcPtr, - DAG.getIntPtrConstant(24), 8, false, - DstSV, 0, SrcSV, 0); + DAG.getIntPtrConstant(24), 8, /*isVolatile*/false, + false, DstSV, 0, SrcSV, 0); } SDValue diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 2c2a5fbb803..9547f1927dc 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -741,12 +741,13 @@ namespace llvm { SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, + bool isVolatile, const Value *DstSV, uint64_t DstSVOff); SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, - bool AlwaysInline, + bool isVolatile, bool AlwaysInline, const Value *DstSV, uint64_t DstSVOff, const Value *SrcSV, uint64_t SrcSVOff); @@ -756,7 +757,7 @@ namespace llvm { /// block, the number of args, and whether or not the second arg is /// in memory or not. MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB, - unsigned argNum, bool inMem) const; + unsigned argNum, bool inMem) const; /// Utility function to emit atomic bitwise operations (and, or, xor). /// It takes the bitwise instruction to expand, the associated machine basic diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index bf1a4576279..27e52332466 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -1443,7 +1443,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, return DAG.getMemmove(Chain, dl, ST->getBasePtr(), LD->getBasePtr(), DAG.getConstant(StoreBits/8, MVT::i32), - Alignment, ST->getSrcValue(), + Alignment, false, ST->getSrcValue(), ST->getSrcValueOffset(), LD->getSrcValue(), LD->getSrcValueOffset()); } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 76c815da862..e025b053765 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -136,8 +136,14 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { return 0; // If not 1/2/4/8 bytes, exit. // Use an integer load+store unless we can find something better. - Type *NewPtrTy = - PointerType::getUnqual(IntegerType::get(MI->getContext(), Size<<3)); + unsigned SrcAddrSp = + cast(MI->getOperand(2)->getType())->getAddressSpace(); + unsigned DstAddrSp = + cast(MI->getOperand(1)->getType())->getAddressSpace(); + + const IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); + Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp); + Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp); // Memcpy forces the use of i8* for the source and destination. That means // that if you're using memcpy to move one double around, you'll get a cast @@ -167,8 +173,10 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { break; } - if (SrcETy->isSingleValueType()) - NewPtrTy = PointerType::getUnqual(SrcETy); + if (SrcETy->isSingleValueType()) { + NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp); + NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp); + } } } @@ -178,11 +186,12 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { SrcAlign = std::max(SrcAlign, CopyAlign); DstAlign = std::max(DstAlign, CopyAlign); - Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewPtrTy); - Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewPtrTy); - Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign); + Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewSrcPtrTy); + Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewDstPtrTy); + Instruction *L = new LoadInst(Src, "tmp", MI->isVolatile(), SrcAlign); InsertNewInstBefore(L, *MI); - InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI); + InsertNewInstBefore(new StoreInst(L, Dest, MI->isVolatile(), DstAlign), + *MI); // Set the size of the copy to 0, it will be deleted on the next iteration. MI->setOperand(3, Constant::getNullValue(MemOpLength->getType())); @@ -275,10 +284,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (GVSrc->isConstant()) { Module *M = CI.getParent()->getParent()->getParent(); Intrinsic::ID MemCpyID = Intrinsic::memcpy; - const Type *Tys[1]; - Tys[0] = CI.getOperand(3)->getType(); + const Type *Tys[3] = { CI.getOperand(1)->getType(), + CI.getOperand(2)->getType(), + CI.getOperand(3)->getType() }; CI.setOperand(0, - Intrinsic::getDeclaration(M, MemCpyID, Tys, 1)); + Intrinsic::getDeclaration(M, MemCpyID, Tys, 3)); Changed = true; } } diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 62e29770588..3b305ae766e 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -413,7 +413,6 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // interesting as a small compile-time optimization. Ranges.addStore(0, SI); - Function *MemSetF = 0; // Now that we have full information about ranges, loop over the ranges and // emit memset's for anything big enough to be worthwhile. @@ -433,29 +432,40 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { // memset block. This ensure that the memset is dominated by any addressing // instruction needed by the start of the block. BasicBlock::iterator InsertPt = BI; - - if (MemSetF == 0) { - const Type *Ty = Type::getInt64Ty(Context); - MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, &Ty, 1); - } - + // Get the starting pointer of the block. StartPtr = Range.StartPtr; - + + // Determine alignment + unsigned Alignment = Range.Alignment; + if (Alignment == 0) { + const Type *EltType = + cast(StartPtr->getType())->getElementType(); + Alignment = TD->getABITypeAlignment(EltType); + } + // Cast the start ptr to be i8* as memset requires. - const Type *i8Ptr = Type::getInt8PtrTy(Context); - if (StartPtr->getType() != i8Ptr) + const PointerType* StartPTy = cast(StartPtr->getType()); + const PointerType *i8Ptr = Type::getInt8PtrTy(Context, + StartPTy->getAddressSpace()); + if (StartPTy!= i8Ptr) StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(), InsertPt); - + Value *Ops[] = { StartPtr, ByteVal, // Start, value // size ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start), // align - ConstantInt::get(Type::getInt32Ty(Context), Range.Alignment) + ConstantInt::get(Type::getInt32Ty(Context), Alignment), + // volatile + ConstantInt::get(Type::getInt1Ty(Context), 0), }; - Value *C = CallInst::Create(MemSetF, Ops, Ops+4, "", InsertPt); + const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() }; + + Function *MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2); + + Value *C = CallInst::Create(MemSetF, Ops, Ops+5, "", InsertPt); DEBUG(dbgs() << "Replace stores:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) dbgs() << *Range.TheStores[i]; @@ -680,16 +690,19 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { return false; // If all checks passed, then we can transform these memcpy's - const Type *Ty = M->getLength()->getType(); + const Type *ArgTys[3] = { M->getRawDest()->getType(), + MDep->getRawSource()->getType(), + M->getLength()->getType() }; Function *MemCpyFun = Intrinsic::getDeclaration( M->getParent()->getParent()->getParent(), - M->getIntrinsicID(), &Ty, 1); + M->getIntrinsicID(), ArgTys, 3); - Value *Args[4] = { - M->getRawDest(), MDep->getRawSource(), M->getLength(), M->getAlignmentCst() + Value *Args[5] = { + M->getRawDest(), MDep->getRawSource(), M->getLength(), + M->getAlignmentCst(), M->getVolatileCst() }; - CallInst *C = CallInst::Create(MemCpyFun, Args, Args+4, "", M); + CallInst *C = CallInst::Create(MemCpyFun, Args, Args+5, "", M); // If C and M don't interfere, then this is a valid transformation. If they @@ -728,8 +741,10 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { // If not, then we know we can transform this. Module *Mod = M->getParent()->getParent()->getParent(); - const Type *Ty = M->getLength()->getType(); - M->setOperand(0, Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, &Ty, 1)); + const Type *ArgTys[3] = { M->getRawDest()->getType(), + M->getRawSource()->getType(), + M->getLength()->getType() }; + M->setOperand(0,Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, ArgTys, 3)); // MemDep may have over conservative information about this instruction, just // conservatively flush it from the cache. diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index bbe62706558..6211beb70ba 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -858,8 +858,17 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getName(), MI); // Cast the other pointer (if we have one) to BytePtrTy. - if (OtherElt && OtherElt->getType() != BytePtrTy) - OtherElt = new BitCastInst(OtherElt, BytePtrTy, OtherElt->getName(), MI); + if (OtherElt && OtherElt->getType() != BytePtrTy) { + // Preserve address space of OtherElt + const PointerType* OtherPTy = cast(OtherElt->getType()); + const PointerType* PTy = cast(BytePtrTy); + if (OtherPTy->getElementType() != PTy->getElementType()) { + Type *NewOtherPTy = PointerType::get(PTy->getElementType(), + OtherPTy->getAddressSpace()); + OtherElt = new BitCastInst(OtherElt, NewOtherPTy, + OtherElt->getNameStr(), MI); + } + } unsigned EltSize = TD->getTypeAllocSize(EltTy); @@ -870,17 +879,28 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, SROADest ? OtherElt : EltPtr, // Src ptr ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size // Align - ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign) + ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign), + MI->getVolatileCst() }; - CallInst::Create(TheFn, Ops, Ops + 4, "", MI); + // In case we fold the address space overloaded memcpy of A to B + // with memcpy of B to C, change the function to be a memcpy of A to C. + const Type *Tys[] = { Ops[0]->getType(), Ops[1]->getType(), + Ops[2]->getType() }; + Module *M = MI->getParent()->getParent()->getParent(); + TheFn = Intrinsic::getDeclaration(M, MI->getIntrinsicID(), Tys, 3); + CallInst::Create(TheFn, Ops, Ops + 5, "", MI); } else { assert(isa(MI)); Value *Ops[] = { EltPtr, MI->getOperand(2), // Dest, Value, ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size - Zero // Align + Zero, // Align + ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile }; - CallInst::Create(TheFn, Ops, Ops + 4, "", MI); + const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() }; + Module *M = MI->getParent()->getParent()->getParent(); + TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2); + CallInst::Create(TheFn, Ops, Ops + 5, "", MI); } } DeadInsts.push_back(MI); diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 5941ea6571b..b053cfc3b46 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -142,7 +142,8 @@ struct StrCatOpt : public LibCallOptimization { // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. EmitMemCpy(CpyDst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len+1), 1, B, TD); + ConstantInt::get(TD->getIntPtrType(*Context), Len+1), + 1, false, B, TD); } }; @@ -383,7 +384,8 @@ struct StrCpyOpt : public LibCallOptimization { CI->getOperand(3), B, TD); else EmitMemCpy(Dst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD); + ConstantInt::get(TD->getIntPtrType(*Context), Len), + 1, false, B, TD); return Dst; } }; @@ -411,8 +413,8 @@ struct StrNCpyOpt : public LibCallOptimization { if (SrcLen == 0) { // strncpy(x, "", y) -> memset(x, '\0', y, 1) - EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'), LenOp, - B, TD); + EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'), + LenOp, false, B, TD); return Dst; } @@ -432,7 +434,8 @@ struct StrNCpyOpt : public LibCallOptimization { // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] EmitMemCpy(Dst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD); + ConstantInt::get(TD->getIntPtrType(*Context), Len), + 1, false, B, TD); return Dst; } @@ -593,7 +596,7 @@ struct MemCpyOpt : public LibCallOptimization { // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) EmitMemCpy(CI->getOperand(1), CI->getOperand(2), - CI->getOperand(3), 1, B, TD); + CI->getOperand(3), 1, false, B, TD); return CI->getOperand(1); } }; @@ -615,7 +618,7 @@ struct MemMoveOpt : public LibCallOptimization { // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) EmitMemMove(CI->getOperand(1), CI->getOperand(2), - CI->getOperand(3), 1, B, TD); + CI->getOperand(3), 1, false, B, TD); return CI->getOperand(1); } }; @@ -637,8 +640,8 @@ struct MemSetOpt : public LibCallOptimization { // memset(p, v, n) -> llvm.memset(p, v, n, 1) Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context), - false); - EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B, TD); + false); + EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), false, B, TD); return CI->getOperand(1); } }; @@ -999,7 +1002,7 @@ struct SPrintFOpt : public LibCallOptimization { // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte. ConstantInt::get(TD->getIntPtrType(*Context), - FormatStr.size()+1), 1, B, TD); + FormatStr.size()+1), 1, false, B, TD); return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1013,11 +1016,11 @@ struct SPrintFOpt : public LibCallOptimization { // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0; Value *V = B.CreateTrunc(CI->getOperand(3), - Type::getInt8Ty(*Context), "char"); + Type::getInt8Ty(*Context), "char"); Value *Ptr = CastToCStr(CI->getOperand(1), B); B.CreateStore(V, Ptr); Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1), - "nul"); + "nul"); B.CreateStore(Constant::getNullValue(Type::getInt8Ty(*Context)), Ptr); return ConstantInt::get(CI->getType(), 1); @@ -1034,7 +1037,7 @@ struct SPrintFOpt : public LibCallOptimization { Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); - EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B, TD); + EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, false, B, TD); // The sprintf result is the unincremented number of bytes in the string. return B.CreateIntCast(Len, CI->getType(), false); diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 0afccf42f63..fff817928f3 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -109,15 +109,16 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, /// EmitMemCpy - Emit a call to the memcpy function to the builder. This always /// expects that Len has type 'intptr_t' and Dst/Src are pointers. -Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len, - unsigned Align, IRBuilder<> &B, const TargetData *TD) { +Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len, unsigned Align, + bool isVolatile, IRBuilder<> &B, const TargetData *TD) { Module *M = B.GetInsertBlock()->getParent()->getParent(); - const Type *Ty = Len->getType(); - Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, &Ty, 1); + const Type *ArgTys[3] = { Dst->getType(), Src->getType(), Len->getType() }; + Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, ArgTys, 3); Dst = CastToCStr(Dst, B); Src = CastToCStr(Src, B); - return B.CreateCall4(MemCpy, Dst, Src, Len, - ConstantInt::get(B.getInt32Ty(), Align)); + return B.CreateCall5(MemCpy, Dst, Src, Len, + ConstantInt::get(B.getInt32Ty(), Align), + ConstantInt::get(B.getInt1Ty(), isVolatile)); } /// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder. @@ -146,16 +147,18 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, /// EmitMemMove - Emit a call to the memmove function to the builder. This /// always expects that the size has type 'intptr_t' and Dst/Src are pointers. -Value *llvm::EmitMemMove(Value *Dst, Value *Src, Value *Len, - unsigned Align, IRBuilder<> &B, const TargetData *TD) { +Value *llvm::EmitMemMove(Value *Dst, Value *Src, Value *Len, unsigned Align, + bool isVolatile, IRBuilder<> &B, const TargetData *TD) { Module *M = B.GetInsertBlock()->getParent()->getParent(); LLVMContext &Context = B.GetInsertBlock()->getContext(); - const Type *Ty = TD->getIntPtrType(Context); - Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, &Ty, 1); + const Type *ArgTys[3] = { Dst->getType(), Src->getType(), + TD->getIntPtrType(Context) }; + Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, ArgTys, 3); Dst = CastToCStr(Dst, B); Src = CastToCStr(Src, B); Value *A = ConstantInt::get(B.getInt32Ty(), Align); - return B.CreateCall4(MemMove, Dst, Src, Len, A); + Value *Vol = ConstantInt::get(B.getInt1Ty(), isVolatile); + return B.CreateCall5(MemMove, Dst, Src, Len, A, Vol); } /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is @@ -206,15 +209,15 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, } /// EmitMemSet - Emit a call to the memset function -Value *llvm::EmitMemSet(Value *Dst, Value *Val, - Value *Len, IRBuilder<> &B, const TargetData *TD) { +Value *llvm::EmitMemSet(Value *Dst, Value *Val, Value *Len, bool isVolatile, + IRBuilder<> &B, const TargetData *TD) { Module *M = B.GetInsertBlock()->getParent()->getParent(); Intrinsic::ID IID = Intrinsic::memset; - const Type *Tys[1]; - Tys[0] = Len->getType(); - Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 1); + const Type *Tys[2] = { Dst->getType(), Len->getType() }; + Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 2); Value *Align = ConstantInt::get(B.getInt32Ty(), 1); - return B.CreateCall4(MemSet, CastToCStr(Dst, B), Val, Len, Align); + Value *Vol = ConstantInt::get(B.getInt1Ty(), isVolatile); + return B.CreateCall5(MemSet, CastToCStr(Dst, B), Val, Len, Align, Vol); } /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g. @@ -381,7 +384,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { if (Name == "__memcpy_chk") { if (isFoldable(4, 3, false)) { EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), - 1, B, TD); + 1, false, B, TD); replaceCall(CI->getOperand(1)); return true; } @@ -396,7 +399,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { if (Name == "__memmove_chk") { if (isFoldable(4, 3, false)) { EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), - 1, B, TD); + 1, false, B, TD); replaceCall(CI->getOperand(1)); return true; } @@ -407,7 +410,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { if (isFoldable(4, 3, false)) { Value *Val = B.CreateIntCast(CI->getOperand(2), B.getInt8Ty(), false); - EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B, TD); + EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), false, B, TD); replaceCall(CI->getOperand(1)); return true; } diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 17f8827fd5c..75c9ccdd7a9 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -297,10 +297,10 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD, I->getName(), &*Caller->begin()->begin()); // Emit a memcpy. - const Type *Tys[] = { Type::getInt64Ty(Context) }; + const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), Intrinsic::memcpy, - Tys, 1); + Tys, 3); Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall); @@ -309,17 +309,18 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD, Size = ConstantExpr::getSizeOf(AggTy); else Size = ConstantInt::get(Type::getInt64Ty(Context), - TD->getTypeStoreSize(AggTy)); + TD->getTypeStoreSize(AggTy)); // Always generate a memcpy of alignment 1 here because we don't know // the alignment of the src pointer. Other optimizations can infer // better alignment. Value *CallArgs[] = { DestCast, SrcCast, Size, - ConstantInt::get(Type::getInt32Ty(Context), 1) + ConstantInt::get(Type::getInt32Ty(Context), 1), + ConstantInt::get(Type::getInt1Ty(Context), 0) }; CallInst *TheMemCpy = - CallInst::Create(MemCpyFn, CallArgs, CallArgs+4, "", TheCall); + CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall); // If we have a call graph, update it. if (CG) { diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index b9aa5c34675..4d06b666816 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -145,6 +145,54 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { } break; + case 'm': { + // This upgrades the llvm.memcpy, llvm.memmove, and llvm.memset to the + // new format that allows overloading the pointer for different address + // space (e.g., llvm.memcpy.i16 => llvm.memcpy.p0i8.p0i8.i16) + const char* NewFnName = NULL; + if (Name.compare(5,8,"memcpy.i",8) == 0) { + if (Name[13] == '8') + NewFnName = "llvm.memcpy.p0i8.p0i8.i8"; + else if (Name.compare(13,2,"16") == 0) + NewFnName = "llvm.memcpy.p0i8.p0i8.i16"; + else if (Name.compare(13,2,"32") == 0) + NewFnName = "llvm.memcpy.p0i8.p0i8.i32"; + else if (Name.compare(13,2,"64") == 0) + NewFnName = "llvm.memcpy.p0i8.p0i8.i64"; + } else if (Name.compare(5,9,"memmove.i",9) == 0) { + if (Name[14] == '8') + NewFnName = "llvm.memmove.p0i8.p0i8.i8"; + else if (Name.compare(14,2,"16") == 0) + NewFnName = "llvm.memmove.p0i8.p0i8.i16"; + else if (Name.compare(14,2,"32") == 0) + NewFnName = "llvm.memmove.p0i8.p0i8.i32"; + else if (Name.compare(14,2,"64") == 0) + NewFnName = "llvm.memmove.p0i8.p0i8.i64"; + } + else if (Name.compare(5,8,"memset.i",8) == 0) { + if (Name[13] == '8') + NewFnName = "llvm.memset.p0i8.i8"; + else if (Name.compare(13,2,"16") == 0) + NewFnName = "llvm.memset.p0i8.i16"; + else if (Name.compare(13,2,"32") == 0) + NewFnName = "llvm.memset.p0i8.i32"; + else if (Name.compare(13,2,"64") == 0) + NewFnName = "llvm.memset.p0i8.i64"; + } + if (NewFnName) { + const FunctionType *FTy = F->getFunctionType(); + NewFn = cast(M->getOrInsertFunction(NewFnName, + FTy->getReturnType(), + FTy->getParamType(0), + FTy->getParamType(1), + FTy->getParamType(2), + FTy->getParamType(3), + Type::getInt1Ty(F->getContext()), + (Type *)0)); + return true; + } + break; + } case 'p': // This upgrades the llvm.part.select overloaded intrinsic names to only // use one type specifier in the name. We only care about the old format @@ -472,6 +520,28 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { CI->eraseFromParent(); } break; + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memset: { + // Add isVolatile + const llvm::Type *I1Ty = llvm::Type::getInt1Ty(CI->getContext()); + Value *Operands[5] = { CI->getOperand(1), CI->getOperand(2), + CI->getOperand(3), CI->getOperand(4), + llvm::ConstantInt::get(I1Ty, 0) }; + CallInst *NewCI = CallInst::Create(NewFn, Operands, Operands+5, + CI->getName(), CI); + NewCI->setTailCall(CI->isTailCall()); + NewCI->setCallingConv(CI->getCallingConv()); + // Handle any uses of the old CallInst. + if (!CI->use_empty()) + // Replace all uses of the old call with the new cast which has the + // correct type. + CI->replaceAllUsesWith(NewCI); + + // Clean up the old call now that it has been completely upgraded. + CI->eraseFromParent(); + break; + } } } diff --git a/test/Analysis/BasicAA/modref.ll b/test/Analysis/BasicAA/modref.ll index 4a616364b00..a2aabf135f6 100644 --- a/test/Analysis/BasicAA/modref.ll +++ b/test/Analysis/BasicAA/modref.ll @@ -103,7 +103,7 @@ define i32 @test4(i8* %P) { ret i32 %sub ; CHECK: @test4 ; CHECK: load i32* @G -; CHECK: memset.i32 +; CHECK: memset.p0i8.i32 ; CHECK-NOT: load ; CHECK: sub i32 %tmp, %tmp } @@ -118,7 +118,7 @@ define i32 @test5(i8* %P, i32 %Len) { ret i32 %sub ; CHECK: @test5 ; CHECK: load i32* @G -; CHECK: memcpy.i32 +; CHECK: memcpy.p0i8.p0i8.i32 ; CHECK-NOT: load ; CHECK: sub i32 %tmp, %tmp } diff --git a/test/Bitcode/memcpy.ll b/test/Bitcode/memcpy.ll index 85b95fe5726..b6573b5f621 100644 --- a/test/Bitcode/memcpy.ll +++ b/test/Bitcode/memcpy.ll @@ -8,6 +8,7 @@ entry: tail call void @llvm.memcpy.i64( i8* %tmp.1, i8* %tmp.3, i64 100000, i32 1 ) tail call void @llvm.memset.i32( i8* %tmp.3, i8 14, i32 10000, i32 0 ) tail call void @llvm.memmove.i32( i8* %tmp.1, i8* %tmp.3, i32 123124, i32 1 ) + tail call void @llvm.memmove.i64( i8* %tmp.1, i8* %tmp.3, i64 123124, i32 1 ) ret void } @@ -19,3 +20,4 @@ declare void @llvm.memset.i32(i8*, i8, i32, i32) declare void @llvm.memmove.i32(i8*, i8*, i32, i32) +declare void @llvm.memmove.i64(i8*, i8*, i32, i32) diff --git a/test/Transforms/InstCombine/memset_chk.ll b/test/Transforms/InstCombine/memset_chk.ll index 5a4e6d9e84f..58ecda582fd 100644 --- a/test/Transforms/InstCombine/memset_chk.ll +++ b/test/Transforms/InstCombine/memset_chk.ll @@ -7,7 +7,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define i32 @t() nounwind ssp { ; CHECK: @t -; CHECK: @llvm.memset.i64 +; CHECK: @llvm.memset.p0i8.i64 entry: %0 = alloca %struct.data, align 8 ; <%struct.data*> [#uses=1] %1 = bitcast %struct.data* %0 to i8* ; [#uses=1] diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll index f8b2ffca01e..d74312d2e7e 100644 --- a/test/Transforms/InstCombine/objsize.ll +++ b/test/Transforms/InstCombine/objsize.ll @@ -113,7 +113,7 @@ entry: %1 = bitcast %struct.data* %0 to i8* %2 = call i64 @llvm.objectsize.i64(i8* %1, i1 false) nounwind ; CHECK-NOT: @llvm.objectsize -; CHECK: @llvm.memset.i64(i8* %1, i8 0, i64 1824, i32 8) +; CHECK: @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 1824, i32 8, i1 false) %3 = call i8* @__memset_chk(i8* %1, i32 0, i64 1824, i64 %2) nounwind ret i32 0 } @@ -128,7 +128,7 @@ entry: %1 = tail call i32 @llvm.objectsize.i32(i8* %0, i1 false) %2 = load i8** @s, align 8 ; CHECK-NOT: @llvm.objectsize -; CHECK: @llvm.memcpy.i32(i8* %0, i8* %1, i32 10, i32 1) +; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 10, i32 1, i1 false) %3 = tail call i8* @__memcpy_chk(i8* %0, i8* %2, i32 10, i32 %1) nounwind ret void } diff --git a/test/Transforms/MemCpyOpt/align.ll b/test/Transforms/MemCpyOpt/align.ll index 47df380b2da..b0ae5f413a5 100644 --- a/test/Transforms/MemCpyOpt/align.ll +++ b/test/Transforms/MemCpyOpt/align.ll @@ -4,7 +4,7 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 ; The resulting memset is only 4-byte aligned, despite containing ; a 16-byte alignmed store in the middle. -; CHECK: call void @llvm.memset.i64(i8* %a01, i8 0, i64 16, i32 4) +; CHECK: call void @llvm.memset.p0i8.i64(i8* %a01, i8 0, i64 16, i32 4, i1 false) define void @foo(i32* %p) { %a0 = getelementptr i32* %p, i64 0 diff --git a/test/Transforms/SimplifyLibCalls/StrCpy.ll b/test/Transforms/SimplifyLibCalls/StrCpy.ll index c3cc58ce380..83406ff8f86 100644 --- a/test/Transforms/SimplifyLibCalls/StrCpy.ll +++ b/test/Transforms/SimplifyLibCalls/StrCpy.ll @@ -21,7 +21,7 @@ define i32 @t1() { %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0 %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0 %rslt1 = call i8* @strcpy( i8* %arg1, i8* %arg2 ) -; CHECK: @llvm.memcpy.i32 +; CHECK: @llvm.memcpy.p0i8.p0i8.i32 ret i32 0 } diff --git a/test/Verifier/2006-12-12-IntrinsicDefine.ll b/test/Verifier/2006-12-12-IntrinsicDefine.ll index b63ae65aa2e..8d09b512066 100644 --- a/test/Verifier/2006-12-12-IntrinsicDefine.ll +++ b/test/Verifier/2006-12-12-IntrinsicDefine.ll @@ -1,7 +1,7 @@ ; RUN: not llvm-as < %s |& grep {llvm intrinsics cannot be defined} ; PR1047 -define void @llvm.memcpy.i32(i8*, i8*, i32, i32) { +define void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) { entry: ret void }