From 8b170f7f290843dc3849eaa75b6f74a87a7a2de6 Mon Sep 17 00:00:00 2001 From: Pete Cooper Date: Wed, 18 Nov 2015 22:17:24 +0000 Subject: [PATCH] Change memcpy/memset/memmove to have dest and source alignments. Note, this was reviewed (and more details are in) http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html These intrinsics currently have an explicit alignment argument which is required to be a constant integer. It represents the alignment of the source and dest, and so must be the minimum of those. This change allows source and dest to each have their own alignments by using the alignment attribute on their arguments. The alignment argument itself is removed. There are a few places in the code for which the code needs to be checked by an expert as to whether using only src/dest alignment is safe. For those places, they currently take the minimum of src/dest alignments which matches the current behaviour. For example, code which used to read: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 8, i1 false) will now read: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 500, i1 false) For out of tree owners, I was able to strip alignment from calls using sed by replacing: (call.*llvm\.memset.*)i32\ [0-9]*\,\ i1 false\) with: $1i1 false) and similarly for memmove and memcpy. I then added back in alignment to test cases which needed it. A similar commit will be made to clang which actually has many differences in alignment as now IRBuilder can generate different source/dest alignments on calls. In IRBuilder itself, a new argument was added. Instead of calling: CreateMemCpy(Dst, Src, getInt64(Size), DstAlign, /* isVolatile */ false) you now call CreateMemCpy(Dst, Src, getInt64(Size), DstAlign, SrcAlign, /* isVolatile */ false) There is a temporary class (IntegerAlignment) which takes the source alignment and rejects implicit conversion from bool. This is to prevent isVolatile here from passing its default parameter to the source alignment. Note, changes in future can now be made to codegen. I didn't change anything here, but this change should enable better memcpy code sequences. Reviewed by Hal Finkel. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253511 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IRBuilder.h | 45 +++- include/llvm/IR/Instructions.h | 17 ++ include/llvm/IR/IntrinsicInst.h | 32 +-- include/llvm/IR/Intrinsics.td | 6 +- lib/Analysis/Lint.cpp | 10 +- lib/CodeGen/CodeGenPrepare.cpp | 4 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 64 +++--- lib/IR/Attributes.cpp | 5 - lib/IR/AutoUpgrade.cpp | 55 +++++ lib/IR/IRBuilder.cpp | 25 ++- lib/IR/Verifier.cpp | 2 +- lib/Target/AArch64/AArch64FastISel.cpp | 7 +- lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 6 +- lib/Target/ARM/ARMFastISel.cpp | 7 +- lib/Target/Mips/MipsFastISel.cpp | 4 +- lib/Target/X86/X86FastISel.cpp | 4 +- .../InstCombine/InstCombineCalls.cpp | 29 +-- .../InstCombine/InstCombineInternal.h | 2 +- .../Instrumentation/DataFlowSanitizer.cpp | 23 ++- .../Instrumentation/MemorySanitizer.cpp | 17 +- .../Scalar/AlignmentFromAssumptions.cpp | 17 +- .../Scalar/DeadStoreElimination.cpp | 2 +- lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 6 +- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 31 +-- lib/Transforms/Scalar/SROA.cpp | 32 +-- .../Scalar/ScalarReplAggregates.cpp | 25 ++- lib/Transforms/Utils/InlineFunction.cpp | 2 +- lib/Transforms/Utils/SimplifyLibCalls.cpp | 21 +- test/Analysis/BasicAA/assume.ll | 12 +- test/Analysis/BasicAA/cs-cs.ll | 192 +++++++++--------- test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll | 10 +- test/Analysis/BasicAA/modref.ll | 20 +- test/Analysis/CallGraph/no-intrinsics.ll | 4 +- .../DependenceAnalysis/Preliminary.ll | 2 +- test/Analysis/GlobalsModRef/pr12351.ll | 4 +- .../Analysis/GlobalsModRef/volatile-instrs.ll | 6 +- test/Analysis/ScalarEvolution/avoid-smax-1.ll | 12 +- test/Analysis/ScalarEvolution/trip-count.ll | 4 +- test/Analysis/ScalarEvolution/trip-count3.ll | 6 +- .../TypeBasedAliasAnalysis/functionattrs.ll | 6 +- .../TypeBasedAliasAnalysis/memcpyopt.ll | 8 +- test/Bitcode/memintrinsics.3.7.ll | 33 +++ test/Bitcode/memintrinsics.3.7.ll.bc | Bin 0 -> 916 bytes test/Bitcode/standardCIntrinsic.3.2.ll | 7 +- test/CodeGen/AArch64/PBQP-csr.ll | 4 +- .../AArch64/aarch64-deferred-spilling.ll | 4 +- .../arm64-2012-05-07-MemcpyAlignBug.ll | 4 +- test/CodeGen/AArch64/arm64-abi-varargs.ll | 4 +- test/CodeGen/AArch64/arm64-abi_align.ll | 18 +- .../AArch64/arm64-fast-isel-intrinsic.ll | 24 +-- test/CodeGen/AArch64/arm64-memcpy-inline.ll | 20 +- test/CodeGen/AArch64/arm64-memset-inline.ll | 8 +- test/CodeGen/AArch64/arm64-memset-to-bzero.ll | 8 +- .../AArch64/arm64-misaligned-memcpy-inline.ll | 4 +- .../AArch64/arm64-misched-basic-A53.ll | 6 +- .../AArch64/arm64-misched-basic-A57.ll | 6 +- test/CodeGen/AArch64/arm64-stur.ll | 4 +- test/CodeGen/AArch64/arm64-virtual_base.ll | 4 +- test/CodeGen/AArch64/fast-isel-memcpy.ll | 4 +- test/CodeGen/AArch64/func-argpassing.ll | 4 +- test/CodeGen/AArch64/memcpy-f128.ll | 4 +- .../AArch64/tailcall-mem-intrinsics.ll | 12 +- test/CodeGen/AMDGPU/llvm.memcpy.ll | 22 +- test/CodeGen/ARM/2009-03-07-SpillerBug.ll | 4 +- .../CodeGen/ARM/2011-03-10-DAGCombineCrash.ll | 4 +- test/CodeGen/ARM/2011-10-26-memset-inline.ll | 4 +- .../ARM/2011-10-26-memset-with-neon.ll | 4 +- .../ARM/2012-04-24-SplitEHCriticalEdge.ll | 2 +- test/CodeGen/ARM/Windows/memset.ll | 4 +- test/CodeGen/ARM/Windows/no-aeabi.ll | 8 +- test/CodeGen/ARM/crash-O0.ll | 6 +- test/CodeGen/ARM/debug-info-blocks.ll | 6 +- test/CodeGen/ARM/dyn-stackalloc.ll | 4 +- test/CodeGen/ARM/fast-isel-intrinsic.ll | 20 +- test/CodeGen/ARM/machine-cse-cmp.ll | 4 +- test/CodeGen/ARM/memcpy-inline.ll | 20 +- test/CodeGen/ARM/memfunc.ll | 86 ++++---- test/CodeGen/ARM/memset-inline.ll | 8 +- .../ARM/stack-protector-bmovpcb_call.ll | 4 +- test/CodeGen/ARM/struct-byval-frame-index.ll | 8 +- test/CodeGen/BPF/byval.ll | 4 +- test/CodeGen/BPF/ex1.ll | 6 +- test/CodeGen/BPF/sanity.ll | 4 +- test/CodeGen/Generic/ForceStackAlign.ll | 4 +- test/CodeGen/Generic/invalid-memcpy.ll | 4 +- test/CodeGen/Hexagon/mem-fi-add.ll | 4 +- .../Hexagon/tail-call-mem-intrinsics.ll | 12 +- test/CodeGen/MSP430/memset.ll | 4 +- test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll | 4 +- test/CodeGen/Mips/Fast-ISel/memtest1.ll | 12 +- test/CodeGen/Mips/biggot.ll | 4 +- ...nts-small-structures-bigger-than-32bits.ll | 6 +- .../arguments-varargs-small-structs-byte.ll | 12 +- ...ents-varargs-small-structs-combinations.ll | 6 +- test/CodeGen/Mips/cconv/return-struct.ll | 6 +- test/CodeGen/Mips/largeimmprinting.ll | 4 +- test/CodeGen/Mips/memcpy.ll | 4 +- test/CodeGen/Mips/tailcall.ll | 4 +- test/CodeGen/NVPTX/lower-aggr-copies.ll | 16 +- .../PowerPC/2011-12-05-NoSpillDupCR.ll | 2 +- .../PowerPC/2011-12-06-SpillAndRestoreCR.ll | 2 +- test/CodeGen/PowerPC/ctrloop-reg.ll | 2 +- test/CodeGen/PowerPC/emptystruct.ll | 4 +- test/CodeGen/PowerPC/fsl-e500mc.ll | 4 +- test/CodeGen/PowerPC/fsl-e5500.ll | 4 +- test/CodeGen/PowerPC/glob-comp-aa-crash.ll | 4 +- test/CodeGen/PowerPC/isel-rc-nox0.ll | 4 +- test/CodeGen/PowerPC/memcpy-vec.ll | 12 +- test/CodeGen/PowerPC/memset-nc-le.ll | 4 +- test/CodeGen/PowerPC/memset-nc.ll | 6 +- test/CodeGen/PowerPC/ppc-empty-fs.ll | 4 +- test/CodeGen/PowerPC/resolvefi-basereg.ll | 28 +-- test/CodeGen/PowerPC/resolvefi-disp.ll | 16 +- test/CodeGen/PowerPC/structsinmem.ll | 30 +-- test/CodeGen/PowerPC/structsinregs.ll | 30 +-- test/CodeGen/PowerPC/stwu8.ll | 4 +- test/CodeGen/PowerPC/toc-load-sched-bug.ll | 14 +- test/CodeGen/SystemZ/memcpy-01.ll | 36 ++-- test/CodeGen/SystemZ/memset-01.ll | 32 +-- test/CodeGen/SystemZ/memset-02.ll | 36 ++-- test/CodeGen/SystemZ/memset-03.ll | 84 ++++---- test/CodeGen/SystemZ/memset-04.ll | 84 ++++---- .../SystemZ/tail-call-mem-intrinsics.ll | 12 +- test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll | 10 +- test/CodeGen/Thumb/dyn-stackalloc.ll | 4 +- .../Thumb/ldm-stm-base-materialization.ll | 10 +- .../Thumb/stack-coloring-without-frame-ptr.ll | 4 +- .../Thumb2/2009-08-04-SubregLoweringBug.ll | 4 +- test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll | 4 +- test/CodeGen/X86/2009-01-25-NoSSE.ll | 4 +- test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll | 4 +- test/CodeGen/X86/2010-04-08-CoalescerBug.ll | 4 +- test/CodeGen/X86/2010-04-21-CoalescerBug.ll | 4 +- .../X86/2010-06-25-CoalescerSubRegDefDead.ll | 4 +- .../X86/2010-09-17-SideEffectsInChain.ll | 4 +- .../X86/2012-01-10-UndefExceptionEdge.ll | 8 +- test/CodeGen/X86/alignment-2.ll | 4 +- test/CodeGen/X86/darwin-bzero.ll | 4 +- test/CodeGen/X86/fast-isel-call.ll | 8 +- test/CodeGen/X86/fast-isel-x86-64.ll | 4 +- test/CodeGen/X86/force-align-stack-alloca.ll | 4 +- test/CodeGen/X86/immediate_merging.ll | 4 +- test/CodeGen/X86/load-slice.ll | 2 +- test/CodeGen/X86/lsr-normalization.ll | 4 +- test/CodeGen/X86/mem-intrin-base-reg.ll | 10 +- test/CodeGen/X86/memcpy-2.ll | 10 +- test/CodeGen/X86/memcpy.ll | 20 +- test/CodeGen/X86/memset-2.ll | 10 +- test/CodeGen/X86/memset-3.ll | 4 +- .../X86/memset-sse-stack-realignment.ll | 6 +- test/CodeGen/X86/memset.ll | 6 +- test/CodeGen/X86/memset64-on-x86-32.ll | 4 +- test/CodeGen/X86/misaligned-memset.ll | 4 +- test/CodeGen/X86/misched-new.ll | 4 +- test/CodeGen/X86/optimize-max-0.ll | 20 +- test/CodeGen/X86/pr11985.ll | 4 +- test/CodeGen/X86/pr14333.ll | 4 +- test/CodeGen/X86/ragreedy-hoist-spill.ll | 4 +- test/CodeGen/X86/remat-fold-load.ll | 6 +- test/CodeGen/X86/small-byval-memcpy.ll | 4 +- test/CodeGen/X86/stack-protector.ll | 6 +- test/CodeGen/X86/tailcall-mem-intrinsics.ll | 12 +- test/CodeGen/X86/tlv-1.ll | 4 +- test/CodeGen/X86/unaligned-load.ll | 4 +- test/CodeGen/X86/unwindraise.ll | 6 +- .../X86/variable-sized-darwin-bzero.ll | 4 +- test/CodeGen/X86/x86-64-static-relo-movl.ll | 4 +- test/CodeGen/XCore/memcpy.ll | 8 +- test/DebugInfo/AArch64/frameindices.ll | 10 +- test/DebugInfo/X86/array.ll | 4 +- test/DebugInfo/X86/array2.ll | 4 +- test/DebugInfo/X86/debug-ranges-offset.ll | 2 +- test/DebugInfo/X86/pieces-2.ll | 2 +- test/DebugInfo/X86/pieces-3.ll | 2 +- test/DebugInfo/X86/sroasplit-1.ll | 4 +- test/DebugInfo/X86/sroasplit-2.ll | 4 +- test/DebugInfo/X86/sroasplit-4.ll | 6 +- test/DebugInfo/X86/sroasplit-5.ll | 8 +- .../Instrumentation/AddressSanitizer/basic.ll | 12 +- .../DataFlowSanitizer/memset.ll | 4 +- .../MemorySanitizer/byval-alignment.ll | 2 +- .../MemorySanitizer/check_access_address.ll | 4 +- .../MemorySanitizer/msan_basic.ll | 18 +- .../ThreadSanitizer/tsan_basic.ll | 12 +- test/Linker/type-unique-simple2-a.ll | 4 +- test/Linker/type-unique-type-array-a.ll | 4 +- test/Linker/type-unique-type-array-b.ll | 4 +- test/Object/mangle-ir.ll | 4 +- test/Other/lint.ll | 4 +- .../AlignmentFromAssumptions/simple.ll | 12 +- .../AlignmentFromAssumptions/simple32.ll | 12 +- test/Transforms/BBVectorize/X86/wr-aliases.ll | 8 +- .../X86/memset_chk-simplify-nobuiltin.ll | 2 +- .../CorrelatedValuePropagation/non-null.ll | 20 +- .../DeadStoreElimination/2011-09-06-MemCpy.ll | 6 +- .../DeadStoreElimination/OverwriteStoreEnd.ll | 32 +-- test/Transforms/DeadStoreElimination/crash.ll | 4 +- .../DeadStoreElimination/cs-cs-aliasing.ll | 10 +- .../DeadStoreElimination/lifetime.ll | 4 +- .../DeadStoreElimination/memintrinsics.ll | 12 +- .../DeadStoreElimination/no-targetdata.ll | 6 +- .../DeadStoreElimination/pr11390.ll | 6 +- .../Transforms/DeadStoreElimination/simple.ll | 28 +-- test/Transforms/GVN/nonescaping-malloc.ll | 4 +- test/Transforms/GVN/pr17732.ll | 6 +- test/Transforms/GVN/rle.ll | 22 +- test/Transforms/GlobalOpt/crash.ll | 4 +- test/Transforms/GlobalOpt/memcpy.ll | 4 +- test/Transforms/GlobalOpt/memset-null.ll | 6 +- test/Transforms/GlobalOpt/memset.ll | 14 +- test/Transforms/Inline/alloca-dbgdeclare.ll | 6 +- test/Transforms/Inline/inline-invoke-tail.ll | 4 +- test/Transforms/Inline/inline-vla.ll | 6 +- test/Transforms/Inline/noalias-calls.ll | 18 +- .../InstCombine/2007-10-10-EliminateMemCpy.ll | 4 +- .../2009-02-20-InstCombine-SROA.ll | 4 +- test/Transforms/InstCombine/addrspacecast.ll | 8 +- test/Transforms/InstCombine/align-addr.ll | 6 +- test/Transforms/InstCombine/alloca.ll | 6 +- .../Transforms/InstCombine/call-intrinsics.ll | 12 +- .../InstCombine/malloc-free-delete.ll | 22 +- .../InstCombine/memcpy-from-global.ll | 42 ++-- test/Transforms/InstCombine/memcpy-to-load.ll | 4 +- test/Transforms/InstCombine/memcpy.ll | 10 +- test/Transforms/InstCombine/memcpy_chk-1.ll | 4 +- test/Transforms/InstCombine/memmove.ll | 14 +- test/Transforms/InstCombine/memmove_chk-1.ll | 4 +- test/Transforms/InstCombine/memset.ll | 12 +- test/Transforms/InstCombine/memset2.ll | 4 +- test/Transforms/InstCombine/memset_chk-1.ll | 8 +- test/Transforms/InstCombine/objsize.ll | 4 +- .../InstCombine/simplify-libcalls.ll | 8 +- test/Transforms/InstCombine/sprintf-1.ll | 4 +- .../Transforms/InstCombine/stack-overalign.ll | 11 +- test/Transforms/InstCombine/stpcpy_chk-1.ll | 6 +- test/Transforms/InstCombine/strcpy_chk-1.ll | 6 +- test/Transforms/InstCombine/strncpy_chk-1.ll | 4 +- .../InstCombine/struct-assign-tbaa.ll | 6 +- .../LoopIdiom/basic-address-space.ll | 2 +- test/Transforms/LoopIdiom/basic.ll | 18 +- .../MemCpyOpt/2008-02-24-MultipleUseofSRet.ll | 6 +- .../MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll | 4 +- test/Transforms/MemCpyOpt/align.ll | 10 +- test/Transforms/MemCpyOpt/atomic.ll | 4 +- test/Transforms/MemCpyOpt/callslot_aa.ll | 8 +- test/Transforms/MemCpyOpt/callslot_deref.ll | 12 +- test/Transforms/MemCpyOpt/capturing-func.ll | 4 +- test/Transforms/MemCpyOpt/form-memset.ll | 40 ++-- .../memcpy-to-memset-with-lifetimes.ll | 26 +-- test/Transforms/MemCpyOpt/memcpy-to-memset.ll | 4 +- test/Transforms/MemCpyOpt/memcpy-undef.ll | 8 +- test/Transforms/MemCpyOpt/memcpy.ll | 36 ++-- test/Transforms/MemCpyOpt/memmove.ll | 8 +- .../memset-memcpy-redundant-memset.ll | 102 +++++----- .../MemCpyOpt/memset-memcpy-to-2x-memset.ll | 80 ++++---- test/Transforms/MemCpyOpt/smaller.ll | 8 +- test/Transforms/MemCpyOpt/sret.ll | 4 +- test/Transforms/MergeFunc/vector.ll | 2 +- test/Transforms/MetaRenamer/metarenamer.ll | 4 +- test/Transforms/ObjCARC/nested.ll | 24 +-- test/Transforms/PlaceSafepoints/memset.ll | 4 +- test/Transforms/SROA/address-spaces.ll | 22 +- test/Transforms/SROA/alignment.ll | 18 +- test/Transforms/SROA/basictest.ll | 166 +++++++-------- test/Transforms/SROA/big-endian.ll | 4 +- .../SROA/slice-order-independence.ll | 6 +- test/Transforms/SROA/slice-width.ll | 14 +- test/Transforms/SROA/vector-promotion.ll | 42 ++-- .../ScalarRepl/2007-05-29-MemcpyPreserve.ll | 6 +- .../ScalarRepl/2008-06-22-LargeArray.ll | 6 +- .../2008-08-22-out-of-range-array-promote.ll | 4 +- .../ScalarRepl/2008-09-22-vector-gep.ll | 4 +- .../ScalarRepl/2009-03-04-MemCpyAlign.ll | 4 +- .../ScalarRepl/2009-12-11-NeonTypes.ll | 10 +- .../ScalarRepl/2010-01-18-SelfCopy.ll | 4 +- .../ScalarRepl/2011-05-06-CapturedAlloca.ll | 4 +- .../2011-06-17-VectorPartialMemset.ll | 6 +- .../ScalarRepl/2011-10-11-VectorMemset.ll | 4 +- .../ScalarRepl/2011-11-11-EmptyStruct.ll | 4 +- test/Transforms/ScalarRepl/address-space.ll | 8 +- test/Transforms/ScalarRepl/badarray.ll | 4 +- test/Transforms/ScalarRepl/copy-aggregate.ll | 8 +- test/Transforms/ScalarRepl/crash.ll | 8 +- test/Transforms/ScalarRepl/inline-vector.ll | 6 +- test/Transforms/ScalarRepl/memcpy-align.ll | 6 +- .../memset-aggregate-byte-leader.ll | 4 +- .../Transforms/ScalarRepl/memset-aggregate.ll | 12 +- test/Transforms/ScalarRepl/negative-memset.ll | 6 +- .../Transforms/ScalarRepl/only-memcpy-uses.ll | 6 +- test/Transforms/ScalarRepl/vector_memcpy.ll | 8 +- .../Util/combine-alias-scope-metadata.ll | 10 +- test/Verifier/2006-12-12-IntrinsicDefine.ll | 2 +- test/Verifier/2008-08-22-MemCpyAlignment.ll | 12 -- test/Verifier/memcpy.ll | 6 +- 294 files changed, 1820 insertions(+), 1653 deletions(-) create mode 100644 test/Bitcode/memintrinsics.3.7.ll create mode 100644 test/Bitcode/memintrinsics.3.7.ll.bc delete mode 100644 test/Verifier/2008-08-22-MemCpyAlignment.ll diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h index a9e040b825c..4936ccb417d 100644 --- a/include/llvm/IR/IRBuilder.h +++ b/include/llvm/IR/IRBuilder.h @@ -362,34 +362,56 @@ public: /// If the pointer isn't an i8*, it will be converted. If a TBAA tag is /// specified, it will be added to the instruction. Likewise with alias.scope /// and noalias tags. - CallInst *CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, unsigned Align, + CallInst *CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, + unsigned DstAlign, bool isVolatile = false, MDNode *TBAATag = nullptr, MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr) { - return CreateMemSet(Ptr, Val, getInt64(Size), Align, isVolatile, + return CreateMemSet(Ptr, Val, getInt64(Size), DstAlign, isVolatile, TBAATag, ScopeTag, NoAliasTag); } - CallInst *CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align, + CallInst *CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned DstAlign, bool isVolatile = false, MDNode *TBAATag = nullptr, MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr); - /// \brief Create and insert a memcpy between the specified pointers. + /// Create and insert a memcpy between the specified pointers. /// /// If the pointers aren't i8*, they will be converted. If a TBAA tag is /// specified, it will be added to the instruction. Likewise with alias.scope /// and noalias tags. - CallInst *CreateMemCpy(Value *Dst, Value *Src, uint64_t Size, unsigned Align, + /// + /// Note! This is very temporary. It is only intended to catch calls to + /// CreateMemCpy in out of tree code which would otherwise silently pass the + /// volatile flag to source alignment. + class IntegerAlignment { + private: + uint64_t Align; + + IntegerAlignment() = delete; + IntegerAlignment(bool) = delete; + public: + IntegerAlignment(int Align) : Align(Align) { } + IntegerAlignment(long long Align) : Align(Align) { } + IntegerAlignment(unsigned Align) : Align(Align) { } + IntegerAlignment(uint64_t Align) : Align(Align) { } + + operator unsigned() { return Align; } + }; + CallInst *CreateMemCpy(Value *Dst, Value *Src, uint64_t Size, + unsigned DstAlign, IntegerAlignment SrcAlign, bool isVolatile = false, MDNode *TBAATag = nullptr, MDNode *TBAAStructTag = nullptr, MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr) { - return CreateMemCpy(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag, + return CreateMemCpy(Dst, Src, getInt64(Size), DstAlign, SrcAlign, + isVolatile, TBAATag, TBAAStructTag, ScopeTag, NoAliasTag); } - CallInst *CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align, + CallInst *CreateMemCpy(Value *Dst, Value *Src, Value *Size, + unsigned DstAlign, IntegerAlignment SrcAlign, bool isVolatile = false, MDNode *TBAATag = nullptr, MDNode *TBAAStructTag = nullptr, MDNode *ScopeTag = nullptr, @@ -401,15 +423,18 @@ public: /// If the pointers aren't i8*, they will be converted. If a TBAA tag is /// specified, it will be added to the instruction. Likewise with alias.scope /// and noalias tags. - CallInst *CreateMemMove(Value *Dst, Value *Src, uint64_t Size, unsigned Align, + CallInst *CreateMemMove(Value *Dst, Value *Src, uint64_t Size, + unsigned DstAlign, IntegerAlignment SrcAlign, bool isVolatile = false, MDNode *TBAATag = nullptr, MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr) { - return CreateMemMove(Dst, Src, getInt64(Size), Align, isVolatile, + return CreateMemMove(Dst, Src, getInt64(Size), DstAlign, SrcAlign, + isVolatile, TBAATag, ScopeTag, NoAliasTag); } - CallInst *CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align, + CallInst *CreateMemMove(Value *Dst, Value *Src, Value *Size, + unsigned DstAlign, IntegerAlignment SrcAlign, bool isVolatile = false, MDNode *TBAATag = nullptr, MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr); diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h index a5a48cb30b0..51b06133378 100644 --- a/include/llvm/IR/Instructions.h +++ b/include/llvm/IR/Instructions.h @@ -1638,6 +1638,23 @@ public: return AttributeList.getParamAlignment(i); } + /// Set the alignment for a call or parameter (0=unknown). + void setParamAlignment(unsigned Index, unsigned Align) { + // Its not valid to change the parameter alignment. Instead we have to + // remove the old one if its there, and add a new one. + if (AttributeList.hasAttribute(Index, Attribute::Alignment)) + AttributeList = AttributeList.removeAttribute(getContext(), + Index, + Attribute::Alignment); + + // Now add the new alignment. + llvm::AttrBuilder B; + B.addAlignmentAttr(Align); + AttributeList = AttributeList.addAttributes(getContext(), Index, + AttributeSet::get(getContext(), + Index, B)); + } + /// \brief Extract the number of dereferenceable bytes for a call or /// parameter (0=unknown). uint64_t getDereferenceableBytes(unsigned i) const { diff --git a/include/llvm/IR/IntrinsicInst.h b/include/llvm/IR/IntrinsicInst.h index 169bcc02198..ba75e4ca8de 100644 --- a/include/llvm/IR/IntrinsicInst.h +++ b/include/llvm/IR/IntrinsicInst.h @@ -150,16 +150,13 @@ namespace llvm { const Use &getLengthUse() const { return getArgOperandUse(2); } Use &getLengthUse() { return getArgOperandUse(2); } - ConstantInt *getAlignmentCst() const { - return cast(const_cast(getArgOperand(3))); - } - - unsigned getAlignment() const { - return getAlignmentCst()->getZExtValue(); + unsigned getDestAlignment() const { + // Note, param attributes start at 1, so offset dest index from 0 to 1. + return getParamAlignment(1); } ConstantInt *getVolatileCst() const { - return cast(const_cast(getArgOperand(4))); + return cast(const_cast(getArgOperand(3))); } bool isVolatile() const { return !getVolatileCst()->isZero(); @@ -188,16 +185,13 @@ namespace llvm { setArgOperand(2, L); } - void setAlignment(Constant* A) { - setArgOperand(3, A); + void setDestAlignment(unsigned Align) { + // Note, param attributes start at 1, so offset dest index from 0 to 1. + setParamAlignment(1, Align); } void setVolatile(Constant* V) { - setArgOperand(4, V); - } - - Type *getAlignmentType() const { - return getArgOperand(3)->getType(); + setArgOperand(3, V); } // Methods for support type inquiry through isa, cast, and dyn_cast: @@ -259,12 +253,22 @@ namespace llvm { return cast(getRawSource()->getType())->getAddressSpace(); } + unsigned getSrcAlignment() const { + // Note, param attributes start at 1, so offset src index from 1 to 2. + return getParamAlignment(2); + } + void setSource(Value *Ptr) { assert(getRawSource()->getType() == Ptr->getType() && "setSource called with pointer of wrong type!"); setArgOperand(1, Ptr); } + void setSrcAlignment(unsigned Align) { + // Note, param attributes start at 1, so offset src index from 1 to 2. + setParamAlignment(2, Align); + } + // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::memcpy || diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index e7b60ef3d41..5c14e7de4b0 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -333,17 +333,17 @@ def int_instrprof_value_profile : Intrinsic<[], def int_memcpy : Intrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, - llvm_i32_ty, llvm_i1_ty], + llvm_i1_ty], [IntrReadWriteArgMem, NoCapture<0>, NoCapture<1>, ReadOnly<1>]>; def int_memmove : Intrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, - llvm_i32_ty, llvm_i1_ty], + llvm_i1_ty], [IntrReadWriteArgMem, NoCapture<0>, NoCapture<1>, ReadOnly<1>]>; def int_memset : Intrinsic<[], [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, - llvm_i32_ty, llvm_i1_ty], + llvm_i1_ty], [IntrReadWriteArgMem, NoCapture<0>]>; let Properties = [IntrNoMem] in { diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 2dfb09c95ad..5fb5b8c2e9a 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -284,9 +284,9 @@ void Lint::visitCallSite(CallSite CS) { MemCpyInst *MCI = cast(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize, - MCI->getAlignment(), nullptr, MemRef::Write); + MCI->getDestAlignment(), nullptr, MemRef::Write); visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize, - MCI->getAlignment(), nullptr, MemRef::Read); + MCI->getSrcAlignment(), nullptr, MemRef::Read); // Check that the memcpy arguments don't overlap. The AliasAnalysis API // isn't expressive enough for what we really want to do. Known partial @@ -306,16 +306,16 @@ void Lint::visitCallSite(CallSite CS) { MemMoveInst *MMI = cast(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MMI->getDest(), MemoryLocation::UnknownSize, - MMI->getAlignment(), nullptr, MemRef::Write); + MMI->getDestAlignment(), nullptr, MemRef::Write); visitMemoryReference(I, MMI->getSource(), MemoryLocation::UnknownSize, - MMI->getAlignment(), nullptr, MemRef::Read); + MMI->getSrcAlignment(), nullptr, MemRef::Read); break; } case Intrinsic::memset: { MemSetInst *MSI = cast(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MSI->getDest(), MemoryLocation::UnknownSize, - MSI->getAlignment(), nullptr, MemRef::Write); + MSI->getDestAlignment(), nullptr, MemRef::Write); break; } diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index de5c68f7767..d0ff84fa6f3 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -1665,8 +1665,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { unsigned Align = getKnownAlignment(MI->getDest(), *DL); if (MemTransferInst *MTI = dyn_cast(MI)) Align = std::min(Align, getKnownAlignment(MTI->getSource(), *DL)); - if (Align > MI->getAlignment()) - MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align)); + if (Align > MI->getDestAlignment()) + MI->setDestAlignment(Align); } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index de0c0fba5f7..9d9b5dbb7d2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4365,69 +4365,73 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::longjmp: return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { + const MemCpyInst &MemCpyI = cast(I); // FIXME: this definition of "user defined address space" is x86-specific // Assert for address < 256 since we support only user defined address // spaces. - assert(cast(I.getArgOperand(0)->getType())->getAddressSpace() - < 256 && - cast(I.getArgOperand(1)->getType())->getAddressSpace() - < 256 && + assert(MemCpyI.getDestAddressSpace() < 256 && + MemCpyI.getSourceAddressSpace() < 256 && "Unknown address space"); - SDValue Op1 = getValue(I.getArgOperand(0)); - SDValue Op2 = getValue(I.getArgOperand(1)); - SDValue Op3 = getValue(I.getArgOperand(2)); - unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); + SDValue Op1 = getValue(MemCpyI.getDest()); + SDValue Op2 = getValue(MemCpyI.getSource()); + SDValue Op3 = getValue(MemCpyI.getLength()); + // FIXME: Support passing different dest/src alignments to the memcpy + // DAG node. + unsigned Align = std::min(MemCpyI.getDestAlignment(), + MemCpyI.getSrcAlignment()); if (!Align) Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. - bool isVol = cast(I.getArgOperand(4))->getZExtValue(); + bool isVol = MemCpyI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false, isTC, - MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1))); + MachinePointerInfo(MemCpyI.getDest()), + MachinePointerInfo(MemCpyI.getSource())); updateDAGForMaybeTailCall(MC); return nullptr; } case Intrinsic::memset: { + const MemSetInst &MemSetI = cast(I); // FIXME: this definition of "user defined address space" is x86-specific // Assert for address < 256 since we support only user defined address // spaces. - assert(cast(I.getArgOperand(0)->getType())->getAddressSpace() - < 256 && + assert(MemSetI.getDestAddressSpace() < 256 && "Unknown address space"); - SDValue Op1 = getValue(I.getArgOperand(0)); - SDValue Op2 = getValue(I.getArgOperand(1)); - SDValue Op3 = getValue(I.getArgOperand(2)); - unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); + SDValue Op1 = getValue(MemSetI.getDest()); + SDValue Op2 = getValue(MemSetI.getValue()); + SDValue Op3 = getValue(MemSetI.getLength()); + unsigned Align = MemSetI.getDestAlignment(); if (!Align) Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. - bool isVol = cast(I.getArgOperand(4))->getZExtValue(); + bool isVol = MemSetI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, - isTC, MachinePointerInfo(I.getArgOperand(0))); + isTC, MachinePointerInfo(MemSetI.getDest())); updateDAGForMaybeTailCall(MS); return nullptr; } case Intrinsic::memmove: { + const MemMoveInst &MemMoveI = cast(I); // FIXME: this definition of "user defined address space" is x86-specific // Assert for address < 256 since we support only user defined address // spaces. - assert(cast(I.getArgOperand(0)->getType())->getAddressSpace() - < 256 && - cast(I.getArgOperand(1)->getType())->getAddressSpace() - < 256 && + assert(MemMoveI.getDestAddressSpace() < 256 && + MemMoveI.getSourceAddressSpace() < 256 && "Unknown address space"); - SDValue Op1 = getValue(I.getArgOperand(0)); - SDValue Op2 = getValue(I.getArgOperand(1)); - SDValue Op3 = getValue(I.getArgOperand(2)); - unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); + SDValue Op1 = getValue(MemMoveI.getDest()); + SDValue Op2 = getValue(MemMoveI.getSource()); + SDValue Op3 = getValue(MemMoveI.getLength()); + // FIXME: Support passing different dest/src alignments to the memcpy + // DAG node. + unsigned Align = std::min(MemMoveI.getDestAlignment(), + MemMoveI.getSrcAlignment()); if (!Align) Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. - bool isVol = cast(I.getArgOperand(4))->getZExtValue(); + bool isVol = MemMoveI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, - isTC, MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1))); + isTC, MachinePointerInfo(MemMoveI.getDest()), + MachinePointerInfo(MemMoveI.getSource())); updateDAGForMaybeTailCall(MM); return nullptr; } diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index bdefe5917fe..fe09c47fb48 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -830,11 +830,6 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Index, if (!pImpl) return AttributeSet(); if (!Attrs.pImpl) return *this; - // FIXME it is not obvious how this should work for alignment. - // For now, say we can't pass in alignment, which no current use does. - assert(!Attrs.hasAttribute(Index, Attribute::Alignment) && - "Attempt to change alignment!"); - // Add the attribute slots before the one we're trying to add. SmallVector AttrSet; uint64_t NumAttrs = pImpl->getNumAttributes(); diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index 12c354c89b2..c83313fa654 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -144,6 +144,36 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { } break; } + case 'm': { + if (Name.startswith("memcpy.") && F->arg_size() == 5) { + F->setName(Name + ".old"); + // Get the types of dest, src, and len. + ArrayRef ParamTypes = F->getFunctionType()->params().slice(0, 3); + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy, + ParamTypes); + return true; + } + if (Name.startswith("memmove.") && F->arg_size() == 5) { + F->setName(Name + ".old"); + // Get the types of dest, src, and len. + ArrayRef ParamTypes = F->getFunctionType()->params().slice(0, 3); + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove, + ParamTypes); + return true; + } + if (Name.startswith("memset.") && F->arg_size() == 5) { + F->setName(Name + ".old"); + // Get the types of dest and len. + Type *ParamTypes[2] = { + F->getFunctionType()->getParamType(0), + F->getFunctionType()->getParamType(2) + }; + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset, + ParamTypes); + return true; + } + break; + } case 'o': // We only need to change the name to match the mangling including the @@ -727,6 +757,31 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { CI->eraseFromParent(); return; + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memset: { + // Remove alignment argument (3), and add alignment attributes to the + // dest/src pointers. + Value *Args[4] = { + CI->getArgOperand(0), + CI->getArgOperand(1), + CI->getArgOperand(2), + CI->getArgOperand(4) + }; + auto *MemCI = cast(Builder.CreateCall(NewFn, Args, Name)); + + // All mem intrinsics support dest alignment. + const ConstantInt *Align = cast(CI->getArgOperand(3)); + MemCI->setDestAlignment(Align->getZExtValue()); + + // Memcpy/Memmove also support source alignment. + if (auto *MemTransferI = dyn_cast(MemCI)) + MemTransferI->setSrcAlignment(Align->getZExtValue()); + CI->replaceAllUsesWith(MemCI); + CI->eraseFromParent(); + return; + } + case Intrinsic::objectsize: CI->replaceAllUsesWith(Builder.CreateCall( NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name)); diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp index 44741293633..b07f15515ad 100644 --- a/lib/IR/IRBuilder.cpp +++ b/lib/IR/IRBuilder.cpp @@ -15,6 +15,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Statepoint.h" @@ -79,11 +80,11 @@ static InvokeInst *createInvokeHelper(Value *Invokee, BasicBlock *NormalDest, } CallInst *IRBuilderBase:: -CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align, +CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned DstAlign, bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag, MDNode *NoAliasTag) { Ptr = getCastedInt8PtrValue(Ptr); - Value *Ops[] = { Ptr, Val, Size, getInt32(Align), getInt1(isVolatile) }; + Value *Ops[] = { Ptr, Val, Size, getInt1(isVolatile) }; Type *Tys[] = { Ptr->getType(), Size->getType() }; Module *M = BB->getParent()->getParent(); Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys); @@ -99,18 +100,21 @@ CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align, if (NoAliasTag) CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag); + + cast(CI)->setDestAlignment(DstAlign); return CI; } CallInst *IRBuilderBase:: -CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align, +CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned DstAlign, + IntegerAlignment SrcAlign, bool isVolatile, MDNode *TBAATag, MDNode *TBAAStructTag, MDNode *ScopeTag, MDNode *NoAliasTag) { Dst = getCastedInt8PtrValue(Dst); Src = getCastedInt8PtrValue(Src); - Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) }; + Value *Ops[] = { Dst, Src, Size, getInt1(isVolatile) }; Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() }; Module *M = BB->getParent()->getParent(); Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys); @@ -130,18 +134,23 @@ CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align, if (NoAliasTag) CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag); + + auto *MCI = cast(CI); + MCI->setDestAlignment(DstAlign); + MCI->setSrcAlignment(SrcAlign); return CI; } CallInst *IRBuilderBase:: -CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align, +CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned DstAlign, + IntegerAlignment SrcAlign, bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag, MDNode *NoAliasTag) { Dst = getCastedInt8PtrValue(Dst); Src = getCastedInt8PtrValue(Src); - Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) }; + Value *Ops[] = { Dst, Src, Size, getInt1(isVolatile) }; Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() }; Module *M = BB->getParent()->getParent(); Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys); @@ -157,6 +166,10 @@ CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align, if (NoAliasTag) CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag); + + auto *MMI = cast(CI); + MMI->setDestAlignment(DstAlign); + MMI->setSrcAlignment(SrcAlign); return CI; } diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index cf7b4cac342..86e30620dec 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -3511,7 +3511,7 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { const APInt &AlignVal = AlignCI->getValue(); Assert(AlignCI->isZero() || AlignVal.isPowerOf2(), "alignment argument of memory intrinsics must be a power of 2", CS); - Assert(isa(CS.getArgOperand(4)), + Assert(isa(CS.getArgOperand(3)), "isvolatile argument of memory intrinsics must be a constant int", CS); break; diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 284f5263f90..efab048e00a 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -3379,7 +3379,8 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { // Small memcpy's are common enough that we want to do them without a call // if possible. uint64_t Len = cast(MTI->getLength())->getZExtValue(); - unsigned Alignment = MTI->getAlignment(); + unsigned Alignment = std::min(MTI->getDestAlignment(), + MTI->getSrcAlignment()); if (isMemCpySmall(Len, Alignment)) { Address Dest, Src; if (!computeAddress(MTI->getRawDest(), Dest) || @@ -3399,7 +3400,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { return false; const char *IntrMemName = isa(II) ? "memcpy" : "memmove"; - return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2); + return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1); } case Intrinsic::memset: { const MemSetInst *MSI = cast(II); @@ -3415,7 +3416,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { // address spaces. return false; - return lowerCallTo(II, "memset", II->getNumArgOperands() - 2); + return lowerCallTo(II, "memset", II->getNumArgOperands() - 1); } case Intrinsic::sin: case Intrinsic::cos: diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 87d50d58705..d8238f73e20 100644 --- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -400,15 +400,15 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) { case Intrinsic::memcpy: { MemCpyInst *MemCpy = cast(Intr); Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(), - MemCpy->getLength(), MemCpy->getAlignment(), - MemCpy->isVolatile()); + MemCpy->getLength(), MemCpy->getDestAlignment(), + MemCpy->getSrcAlignment(), MemCpy->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memset: { MemSetInst *MemSet = cast(Intr); Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(), - MemSet->getLength(), MemSet->getAlignment(), + MemSet->getLength(), MemSet->getDestAlignment(), MemSet->isVolatile()); Intr->eraseFromParent(); continue; diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 175107450fc..ce928289408 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -2328,8 +2328,8 @@ bool ARMFastISel::SelectCall(const Instruction *I, for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { // If we're lowering a memory intrinsic instead of a regular call, skip the - // last two arguments, which shouldn't be passed to the underlying function. - if (IntrMemName && e-i <= 2) + // last argument, which shouldn't be passed to the underlying function. + if (IntrMemName && e-i <= 1) break; ISD::ArgFlagsTy Flags; @@ -2527,7 +2527,8 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { if (!ARMComputeAddress(MTI.getRawDest(), Dest) || !ARMComputeAddress(MTI.getRawSource(), Src)) return false; - unsigned Alignment = MTI.getAlignment(); + unsigned Alignment = std::min(MTI.getDestAlignment(), + MTI.getSrcAlignment()); if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment)) return true; } diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index e9eaf810637..f5bf36dea5e 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -1403,7 +1403,7 @@ bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { if (!MTI->getLength()->getType()->isIntegerTy(32)) return false; const char *IntrMemName = isa(II) ? "memcpy" : "memmove"; - return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2); + return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1); } case Intrinsic::memset: { const MemSetInst *MSI = cast(II); @@ -1412,7 +1412,7 @@ bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { return false; if (!MSI->getLength()->getType()->isIntegerTy(32)) return false; - return lowerCallTo(II, "memset", II->getNumArgOperands() - 2); + return lowerCallTo(II, "memset", II->getNumArgOperands() - 1); } } return false; diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 914fd04ad6b..886e6226e43 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -2409,7 +2409,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255) return false; - return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 2); + return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 1); } case Intrinsic::memset: { const MemSetInst *MSI = cast(II); @@ -2424,7 +2424,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { if (MSI->getDestAddressSpace() > 255) return false; - return lowerCallTo(II, "memset", II->getNumArgOperands() - 2); + return lowerCallTo(II, "memset", II->getNumArgOperands() - 1); } case Intrinsic::stackprotector: { // Emit code to store the stack guard onto the stack. diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index cde26cc24c2..2bc96cce42c 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -60,14 +60,18 @@ static Type *reduceToSingleValueType(Type *T) { return T; } -Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { +Instruction *InstCombiner::SimplifyMemTransfer(MemTransferInst *MI) { unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, AC, DT); unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, AC, DT); - unsigned MinAlign = std::min(DstAlign, SrcAlign); - unsigned CopyAlign = MI->getAlignment(); + unsigned CopyDestAlign = MI->getDestAlignment(); + unsigned CopySrcAlign = MI->getSrcAlignment(); - if (CopyAlign < MinAlign) { - MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), MinAlign, false)); + if (CopyDestAlign < DstAlign) { + MI->setDestAlignment(DstAlign); + return MI; + } + if (CopySrcAlign < SrcAlign) { + MI->setSrcAlignment(SrcAlign); return MI; } @@ -135,8 +139,8 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // If the memcpy/memmove provides better alignment info than we can // infer, use it. - SrcAlign = std::max(SrcAlign, CopyAlign); - DstAlign = std::max(DstAlign, CopyAlign); + SrcAlign = std::max(SrcAlign, CopySrcAlign); + DstAlign = std::max(DstAlign, CopyDestAlign); Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); @@ -156,9 +160,8 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, AC, DT); - if (MI->getAlignment() < Alignment) { - MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), - Alignment, false)); + if (MI->getDestAlignment() < Alignment) { + MI->setDestAlignment(Alignment); return MI; } @@ -168,7 +171,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8)) return nullptr; uint64_t Len = LenC->getLimitedValue(); - Alignment = MI->getAlignment(); + Alignment = MI->getDestAlignment(); assert(Len && "0-sized memory setting should be removed already."); // memset(s,c,n) -> store s, c (for n=1,2,4,8) @@ -743,8 +746,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // If we can determine a pointer alignment that is bigger than currently // set, update the alignment. - if (isa(MI)) { - if (Instruction *I = SimplifyMemTransfer(MI)) + if (auto *MTI = dyn_cast(MI)) { + if (Instruction *I = SimplifyMemTransfer(MTI)) return I; } else if (MemSetInst *MSI = dyn_cast(MI)) { if (Instruction *I = SimplifyMemSet(MSI)) diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index 1bb3ad6c534..4b167021307 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -558,7 +558,7 @@ private: Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI); Instruction *MatchBSwap(BinaryOperator &I); bool SimplifyStoreAtEndOfBlock(StoreInst &SI); - Instruction *SimplifyMemTransfer(MemIntrinsic *MI); + Instruction *SimplifyMemTransfer(MemTransferInst *MI); Instruction *SimplifyMemSet(MemSetInst *MI); Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned); diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 21ef3207e89..2a9749d65c2 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -1356,20 +1356,21 @@ void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) { Value *LenShadow = IRB.CreateMul( I.getLength(), ConstantInt::get(I.getLength()->getType(), DFSF.DFS.ShadowWidth / 8)); - Value *AlignShadow; - if (ClPreserveAlignment) { - AlignShadow = IRB.CreateMul(I.getAlignmentCst(), - ConstantInt::get(I.getAlignmentCst()->getType(), - DFSF.DFS.ShadowWidth / 8)); - } else { - AlignShadow = ConstantInt::get(I.getAlignmentCst()->getType(), - DFSF.DFS.ShadowWidth / 8); - } Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx); DestShadow = IRB.CreateBitCast(DestShadow, Int8Ptr); SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr); - IRB.CreateCall(I.getCalledValue(), {DestShadow, SrcShadow, LenShadow, - AlignShadow, I.getVolatileCst()}); + auto *MTI = cast(IRB.CreateCall(I.getCalledValue(), + { DestShadow, SrcShadow, + LenShadow, + I.getVolatileCst() })); + + if (ClPreserveAlignment) { + MTI->setDestAlignment(I.getDestAlignment() * (DFSF.DFS.ShadowWidth / 8)); + MTI->setSrcAlignment(I.getSrcAlignment() * (DFSF.DFS.ShadowWidth / 8)); + } else { + MTI->setDestAlignment(DFSF.DFS.ShadowWidth / 8); + MTI->setSrcAlignment(DFSF.DFS.ShadowWidth / 8); + } } void DFSanVisitor::visitReturnInst(ReturnInst &RI) { diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 218e3e96c23..836995fa3af 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -1117,7 +1117,7 @@ struct MemorySanitizerVisitor : public InstVisitor { unsigned CopyAlign = std::min(ArgAlign, kShadowTLSAlignment); Value *Cpy = EntryIRB.CreateMemCpy( getShadowPtr(V, EntryIRB.getInt8Ty(), EntryIRB), Base, Size, - CopyAlign); + CopyAlign, CopyAlign); DEBUG(dbgs() << " ByValCpy: " << *Cpy << "\n"); (void)Cpy; } @@ -2482,7 +2482,7 @@ struct MemorySanitizerVisitor : public InstVisitor { unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment); Store = IRB.CreateMemCpy(ArgShadowBase, getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB), - Size, Alignment); + Size, Alignment, Alignment); } else { Size = DL.getTypeAllocSize(A->getType()); if (ArgOffset + Size > kParamTLSSize) break; @@ -2834,7 +2834,7 @@ struct VarArgAMD64Helper : public VarArgHelper { Value *Base = getShadowPtrForVAArgument(RealTy, IRB, OverflowOffset); OverflowOffset += RoundUpToAlignment(ArgSize, 8); IRB.CreateMemCpy(Base, MSV.getShadowPtr(A, IRB.getInt8Ty(), IRB), - ArgSize, kShadowTLSAlignment); + ArgSize, kShadowTLSAlignment, kShadowTLSAlignment); } else { ArgKind AK = classifyArgument(A); if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset) @@ -2912,7 +2912,7 @@ struct VarArgAMD64Helper : public VarArgHelper { IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset), VAArgOverflowSize); VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); - IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8); + IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8, 8); } // Instrument va_start. @@ -2931,7 +2931,7 @@ struct VarArgAMD64Helper : public VarArgHelper { Value *RegSaveAreaShadowPtr = MSV.getShadowPtr(RegSaveAreaPtr, IRB.getInt8Ty(), IRB); IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, - AMD64FpEndOffset, 16); + AMD64FpEndOffset, 16, 16); Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr( @@ -2943,7 +2943,8 @@ struct VarArgAMD64Helper : public VarArgHelper { MSV.getShadowPtr(OverflowArgAreaPtr, IRB.getInt8Ty(), IRB); Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy, AMD64FpEndOffset); - IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, 16); + IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, + 16, 16); } } }; @@ -3029,7 +3030,7 @@ struct VarArgMIPS64Helper : public VarArgHelper { // If there is a va_start in this function, make a backup copy of // va_arg_tls somewhere in the function entry block. VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); - IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8); + IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8, 8); } // Instrument va_start. @@ -3044,7 +3045,7 @@ struct VarArgMIPS64Helper : public VarArgHelper { Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr); Value *RegSaveAreaShadowPtr = MSV.getShadowPtr(RegSaveAreaPtr, IRB.getInt8Ty(), IRB); - IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, CopySize, 8); + IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, CopySize, 8, 8); } } }; diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp index a13e552cbd0..f041a296684 100644 --- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -347,6 +347,8 @@ bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) { // instruction, but only for one operand, save it. If we reach the // other operand through another assumption later, then we may // change the alignment at that point. + // FIXME: The above statement is no longer true. Fix the code below + // to be able to reason about different dest/src alignments. if (MemTransferInst *MTI = dyn_cast(MI)) { unsigned NewSrcAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, MTI->getSource(), SE); @@ -376,20 +378,23 @@ bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) { if (AltSrcAlignment <= std::max(NewDestAlignment, AltDestAlignment)) NewAlignment = std::max(NewAlignment, AltSrcAlignment); - if (NewAlignment > MI->getAlignment()) { - MI->setAlignment(ConstantInt::get(Type::getInt32Ty( - MI->getParent()->getContext()), NewAlignment)); + if (NewAlignment > MTI->getDestAlignment()) { + MTI->setDestAlignment(NewAlignment); + ++NumMemIntAlignChanged; + } + + if (NewAlignment > MTI->getSrcAlignment()) { + MTI->setSrcAlignment(NewAlignment); ++NumMemIntAlignChanged; } NewDestAlignments.insert(std::make_pair(MTI, NewDestAlignment)); NewSrcAlignments.insert(std::make_pair(MTI, NewSrcAlignment)); - } else if (NewDestAlignment > MI->getAlignment()) { + } else if (NewDestAlignment > MI->getDestAlignment()) { assert((!isa(MI) || isa(MI)) && "Unknown memory intrinsic"); - MI->setAlignment(ConstantInt::get(Type::getInt32Ty( - MI->getParent()->getContext()), NewDestAlignment)); + MI->setDestAlignment(NewDestAlignment); ++NumMemIntAlignChanged; } } diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 36ad0a5f7b9..b0e6d19c505 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -611,7 +611,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // as any store/memset/memcpy is likely using vector instructions so // shortening it to not vector size is likely to be slower MemIntrinsic* DepIntrinsic = cast(DepWrite); - unsigned DepWriteAlign = DepIntrinsic->getAlignment(); + unsigned DepWriteAlign = DepIntrinsic->getDestAlignment(); if (llvm::isPowerOf2_64(InstWriteOffset) || ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) { diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index c2fb8cd49b7..aed43b39d64 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -414,8 +414,8 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI, return false; return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, - MSI->getAlignment(), MSI->getValue(), MSI, Ev, - BECount, /*NegStride=*/false); + MSI->getDestAlignment(), MSI->getValue(), MSI, + Ev, BECount, /*NegStride=*/false); } /// mayLoopAccessLocation - Return true if the specified loop might access the @@ -700,7 +700,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( CallInst *NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, - std::min(SI->getAlignment(), LI->getAlignment())); + SI->getAlignment(), LI->getAlignment()); NewCall->setDebugLoc(SI->getDebugLoc()); DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index f80b07bf219..2db2b802f08 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -229,7 +229,8 @@ public: void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) { int64_t Size = cast(MSI->getLength())->getZExtValue(); - addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI); + addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlignment(), + MSI); } void addRange(int64_t Start, int64_t Size, Value *Ptr, @@ -819,20 +820,17 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) { // If all checks passed, then we can transform M. - // Make sure to use the lesser of the alignment of the source and the dest - // since we're changing where we're reading from, but don't want to increase - // the alignment past what can be read from or written to. // TODO: Is this worth it if we're creating a less aligned memcpy? For // example we could be moving from movaps -> movq on x86. - unsigned Align = std::min(MDep->getAlignment(), M->getAlignment()); - IRBuilder<> Builder(M); if (UseMemMove) Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(), - Align, M->isVolatile()); + M->getDestAlignment(), MDep->getSrcAlignment(), + M->isVolatile()); else Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(), - Align, M->isVolatile()); + M->getDestAlignment(), MDep->getSrcAlignment(), + M->isVolatile()); // Remove the instruction we're replacing. MD->removeInstruction(M); @@ -878,7 +876,7 @@ bool MemCpyOpt::processMemSetMemCpyDependence(MemCpyInst *MemCpy, // If Dest is aligned, and SrcSize is constant, use the minimum alignment // of the sum. const unsigned DestAlign = - std::max(MemSet->getAlignment(), MemCpy->getAlignment()); + std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment()); if (DestAlign > 1) if (ConstantInt *SrcSizeC = dyn_cast(SrcSize)) Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign); @@ -935,7 +933,7 @@ bool MemCpyOpt::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, IRBuilder<> Builder(MemCpy); Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), - CopySize, MemCpy->getAlignment()); + CopySize, MemCpy->getDestAlignment()); return true; } @@ -961,7 +959,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) { IRBuilder<> Builder(M); Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(), - M->getAlignment(), false); + M->getDestAlignment(), false); MD->removeInstruction(M); M->eraseFromParent(); ++NumCpyToSet; @@ -990,8 +988,11 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { // d) memcpy from a just-memset'd source can be turned into memset. if (DepInfo.isClobber()) { if (CallInst *C = dyn_cast(DepInfo.getInst())) { + // FIXME: Can we pass in either of dest/src alignment here instead of + // convervatively taking the minimum? + unsigned Align = std::min(M->getDestAlignment(), M->getSrcAlignment()); if (performCallSlotOptzn(M, M->getDest(), M->getSource(), - CopySize->getZExtValue(), M->getAlignment(), + CopySize->getZExtValue(), Align, C)) { MD->removeInstruction(M); M->eraseFromParent(); @@ -1108,7 +1109,11 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { getAnalysis().getAssumptionCache( *CS->getParent()->getParent()); DominatorTree &DT = getAnalysis().getDomTree(); - if (MDep->getAlignment() < ByValAlign && + // FIXME: Can we use either of dest/src alignment here instead of + // convervatively taking the minimum? + unsigned MinAlign = std::min(MDep->getDestAlignment(), + MDep->getSrcAlignment()); + if (MinAlign < ByValAlign && getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, CS.getInstruction(), &AC, &DT) < ByValAlign) return false; diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index bfd48ff15f6..223c531b7d7 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -2618,8 +2618,7 @@ private: assert(!IsSplit); assert(NewBeginOffset == BeginOffset); II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType())); - Type *CstTy = II.getAlignmentCst()->getType(); - II.setAlignment(ConstantInt::get(CstTy, getSliceAlign())); + II.setDestAlignment(getSliceAlign()); deleteIfTriviallyDead(OldPtr); return false; @@ -2735,15 +2734,16 @@ private: // update both source and dest of a single call. if (!IsSplittable) { Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType()); - if (IsDest) + if (IsDest) { II.setDest(AdjustedPtr); - else + + if (II.getDestAlignment() > SliceAlign) + II.setDestAlignment(MinAlign(II.getDestAlignment(), SliceAlign)); + } else { II.setSource(AdjustedPtr); - if (II.getAlignment() > SliceAlign) { - Type *CstTy = II.getAlignmentCst()->getType(); - II.setAlignment( - ConstantInt::get(CstTy, MinAlign(II.getAlignment(), SliceAlign))); + if (II.getSrcAlignment() > SliceAlign) + II.setSrcAlignment(MinAlign(II.getSrcAlignment(), SliceAlign)); } DEBUG(dbgs() << " to: " << II << "\n"); @@ -2796,8 +2796,10 @@ private: // Compute the relative offset for the other pointer within the transfer. unsigned IntPtrWidth = DL.getPointerSizeInBits(OtherAS); APInt OtherOffset(IntPtrWidth, NewBeginOffset - BeginOffset); - unsigned OtherAlign = MinAlign(II.getAlignment() ? II.getAlignment() : 1, - OtherOffset.zextOrTrunc(64).getZExtValue()); + unsigned OtherDestAlign = MinAlign(II.getDestAlignment() ? II.getDestAlignment() : 1, + OtherOffset.zextOrTrunc(64).getZExtValue()); + unsigned OtherSrcAlign = MinAlign(II.getSrcAlignment() ? II.getSrcAlignment() : 1, + OtherOffset.zextOrTrunc(64).getZExtValue()); if (EmitMemCpy) { // Compute the other pointer, folding as much as possible to produce @@ -2809,9 +2811,11 @@ private: Type *SizeTy = II.getLength()->getType(); Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset); - CallInst *New = IRB.CreateMemCpy( - IsDest ? OurPtr : OtherPtr, IsDest ? OtherPtr : OurPtr, Size, - MinAlign(SliceAlign, OtherAlign), II.isVolatile()); + CallInst *New = IRB.CreateMemCpy(IsDest ? OurPtr : OtherPtr, + IsDest ? OtherPtr : OurPtr, Size, + MinAlign(SliceAlign, OtherDestAlign), + MinAlign(SliceAlign, OtherSrcAlign), + II.isVolatile()); (void)New; DEBUG(dbgs() << " to: " << *New << "\n"); return false; @@ -2843,7 +2847,7 @@ private: Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy, OtherPtr->getName() + "."); - unsigned SrcAlign = OtherAlign; + unsigned SrcAlign = OtherSrcAlign; Value *DstPtr = &NewAI; unsigned DstAlign = SliceAlign; if (!IsDest) { diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 114d22ddf2e..679e241d971 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -716,7 +716,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy); LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval"); - SrcVal->setAlignment(MTI->getAlignment()); + SrcVal->setAlignment(MTI->getSrcAlignment()); Builder.CreateStore(SrcVal, NewAI); } else if (GetUnderlyingObject(MTI->getDest(), DL, 0) != OrigAI) { // Src must be OrigAI, change this to be a load from NewAI then a store @@ -733,7 +733,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy); StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr); - NewStore->setAlignment(MTI->getAlignment()); + NewStore->setAlignment(MTI->getDestAlignment()); } else { // Noop transfer. Src == Dst } @@ -2182,7 +2182,8 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // that doesn't have anything to do with the alloca that we are promoting. For // memset, this Value* stays null. Value *OtherPtr = nullptr; - unsigned MemAlignment = MI->getAlignment(); + unsigned DestMemAlignment = MI->getDestAlignment(); + unsigned SrcMemAlignment = 0; if (MemTransferInst *MTI = dyn_cast(MI)) { // memmove/memcopy if (Inst == MTI->getRawDest()) OtherPtr = MTI->getRawSource(); @@ -2190,6 +2191,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, assert(Inst == MTI->getRawSource()); OtherPtr = MTI->getRawDest(); } + SrcMemAlignment = MTI->getSrcAlignment(); } // If there is an other pointer, we want to convert it to the same pointer @@ -2235,7 +2237,8 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // If this is a memcpy/memmove, emit a GEP of the other element address. Value *OtherElt = nullptr; - unsigned OtherEltAlign = MemAlignment; + unsigned OtherDestEltAlign = DestMemAlignment; + unsigned OtherSrcEltAlign = SrcMemAlignment; if (OtherPtr) { Value *Idx[2] = { Zero, @@ -2258,7 +2261,8 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // mem intrinsic and the alignment of the element. If the alignment of // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the // known alignment is just 4 bytes. - OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset); + OtherDestEltAlign = (unsigned)MinAlign(OtherDestEltAlign, EltOffset); + OtherSrcEltAlign = (unsigned)MinAlign(OtherSrcEltAlign, EltOffset); } Value *EltPtr = NewElts[i]; @@ -2269,12 +2273,13 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, if (isa(MI)) { if (SROADest) { // From Other to Alloca. - Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI); + Value *Elt = new LoadInst(OtherElt, "tmp", false, + OtherSrcEltAlign, MI); new StoreInst(Elt, EltPtr, MI); } else { // From Alloca to Other. Value *Elt = new LoadInst(EltPtr, "tmp", MI); - new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI); + new StoreInst(Elt, OtherElt, false, OtherDestEltAlign, MI); } continue; } @@ -2337,9 +2342,11 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, Value *Src = SROADest ? OtherElt : EltPtr; // Src ptr if (isa(MI)) - Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile()); + Builder.CreateMemCpy(Dst, Src, EltSize, OtherDestEltAlign, + OtherSrcEltAlign, MI->isVolatile()); else - Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile()); + Builder.CreateMemMove(Dst, Src, EltSize, OtherDestEltAlign, + OtherSrcEltAlign, MI->isVolatile()); } } DeadInsts.push_back(MI); diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index a31131bd4ac..7cb7c3ab54b 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -851,7 +851,7 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, // Always generate a memcpy of alignment 1 here because we don't know // the alignment of the src pointer. Other optimizations can infer // better alignment. - Builder.CreateMemCpy(Dst, Src, Size, /*Align=*/1); + Builder.CreateMemCpy(Dst, Src, Size, /*DestAlign=*/1, /*SrcAlign=*/1); } /// When inlining a call site that has a byval argument, diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 144e23522c1..8e2eeb9211c 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -238,7 +238,7 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, // concatenation for us. Make a memcpy to copy the nul byte with align = 1. B.CreateMemCpy(CpyDst, Src, ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1), - 1); + 1, 1); return Dst; } @@ -471,7 +471,8 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) { // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. B.CreateMemCpy(Dst, Src, - ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), 1); + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), 1, + 1); return Dst; } @@ -498,7 +499,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) { // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. - B.CreateMemCpy(Dst, Src, LenV, 1); + B.CreateMemCpy(Dst, Src, LenV, 1, 1); return DstEnd; } @@ -538,7 +539,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) { Type *PT = Callee->getFunctionType()->getParamType(0); // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] - B.CreateMemCpy(Dst, Src, ConstantInt::get(DL.getIntPtrType(PT), Len), 1); + B.CreateMemCpy(Dst, Src, ConstantInt::get(DL.getIntPtrType(PT), Len), 1, 1); return Dst; } @@ -917,7 +918,7 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1); + CI->getArgOperand(2), 1, 1); return CI->getArgOperand(0); } @@ -929,7 +930,7 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) { // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1); + CI->getArgOperand(2), 1, 1); return CI->getArgOperand(0); } @@ -1758,7 +1759,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) { B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), ConstantInt::get(DL.getIntPtrType(CI->getContext()), FormatStr.size() + 1), - 1); // Copy the null byte. + 1, 1); // Copy the null byte. return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1792,7 +1793,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) { return nullptr; Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); - B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1); + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1, 1); // The sprintf result is the unincremented number of bytes in the string. return B.CreateIntCast(Len, CI->getType(), false); @@ -2329,7 +2330,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> & if (isFortifiedCallFoldable(CI, 3, 2, false)) { B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1); + CI->getArgOperand(2), 1, 1); return CI->getArgOperand(0); } return nullptr; @@ -2343,7 +2344,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> if (isFortifiedCallFoldable(CI, 3, 2, false)) { B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1); + CI->getArgOperand(2), 1, 1); return CI->getArgOperand(0); } return nullptr; diff --git a/test/Analysis/BasicAA/assume.ll b/test/Analysis/BasicAA/assume.ll index e163b5a4161..a3cf0c3687a 100644 --- a/test/Analysis/BasicAA/assume.ll +++ b/test/Analysis/BasicAA/assume.ll @@ -1,12 +1,12 @@ ; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) #0 declare void @llvm.assume(i1) #0 define void @test1(i8* %P, i8* %Q) nounwind ssp { tail call void @llvm.assume(i1 true) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test1: @@ -14,10 +14,10 @@ define void @test1(i8* %P, i8* %Q) nounwind ssp { ; CHECK: MayAlias: i8* %P, i8* %Q ; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.assume(i1 true) ; CHECK: NoModRef: Ptr: i8* %Q <-> tail call void @llvm.assume(i1 true) -; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: tail call void @llvm.assume(i1 true) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.assume(i1 true) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: tail call void @llvm.assume(i1 true) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.assume(i1 true) } attributes #0 = { nounwind } diff --git a/test/Analysis/BasicAA/cs-cs.ll b/test/Analysis/BasicAA/cs-cs.ll index dc298f1668b..a69dc4aa680 100644 --- a/test/Analysis/BasicAA/cs-cs.ll +++ b/test/Analysis/BasicAA/cs-cs.ll @@ -5,8 +5,8 @@ target triple = "arm-apple-ios" declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32) nounwind readonly declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind declare void @a_readonly_func(i8 *) noinline nounwind readonly @@ -37,41 +37,41 @@ entry: } define void @test2(i8* %P, i8* %Q) nounwind ssp { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test2: ; CHECK: MayAlias: i8* %P, i8* %Q -; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test2a(i8* noalias %P, i8* noalias %Q) nounwind ssp { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test2a: ; CHECK: NoAlias: i8* %P, i8* %Q -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test2b(i8* noalias %P, i8* noalias %Q) nounwind ssp { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) %R = getelementptr i8, i8* %P, i64 12 - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test2b: @@ -79,20 +79,20 @@ define void @test2b(i8* noalias %P, i8* noalias %Q) nounwind ssp { ; CHECK: NoAlias: i8* %P, i8* %Q ; CHECK: NoAlias: i8* %P, i8* %R ; CHECK: NoAlias: i8* %Q, i8* %R -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test2c(i8* noalias %P, i8* noalias %Q) nounwind ssp { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) %R = getelementptr i8, i8* %P, i64 11 - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test2c: @@ -100,20 +100,20 @@ define void @test2c(i8* noalias %P, i8* noalias %Q) nounwind ssp { ; CHECK: NoAlias: i8* %P, i8* %Q ; CHECK: NoAlias: i8* %P, i8* %R ; CHECK: NoAlias: i8* %Q, i8* %R -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test2d(i8* noalias %P, i8* noalias %Q) nounwind ssp { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) %R = getelementptr i8, i8* %P, i64 -12 - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test2d: @@ -121,20 +121,20 @@ define void @test2d(i8* noalias %P, i8* noalias %Q) nounwind ssp { ; CHECK: NoAlias: i8* %P, i8* %Q ; CHECK: NoAlias: i8* %P, i8* %R ; CHECK: NoAlias: i8* %Q, i8* %R -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test2e(i8* noalias %P, i8* noalias %Q) nounwind ssp { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) %R = getelementptr i8, i8* %P, i64 -11 - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test2e: @@ -142,67 +142,67 @@ define void @test2e(i8* noalias %P, i8* noalias %Q) nounwind ssp { ; CHECK: NoAlias: i8* %P, i8* %Q ; CHECK: NoAlias: i8* %P, i8* %R ; CHECK: NoAlias: i8* %Q, i8* %R -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test3(i8* %P, i8* %Q) nounwind ssp { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test3: ; CHECK: MayAlias: i8* %P, i8* %Q -; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) -; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) -; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) } define void @test3a(i8* noalias %P, i8* noalias %Q) nounwind ssp { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test3a: ; CHECK: NoAlias: i8* %P, i8* %Q -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) } define void @test4(i8* %P, i8* noalias %Q) nounwind ssp { - tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test4: ; CHECK: NoAlias: i8* %P, i8* %Q -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false) -; CHECK: NoModRef: Ptr: i8* %Q <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false) +; CHECK: NoModRef: Ptr: i8* %Q <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false) } define void @test5(i8* %P, i8* %Q, i8* %R) nounwind ssp { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test5: @@ -210,27 +210,27 @@ define void @test5(i8* %P, i8* %Q, i8* %R) nounwind ssp { ; CHECK: MayAlias: i8* %P, i8* %Q ; CHECK: MayAlias: i8* %P, i8* %R ; CHECK: MayAlias: i8* %Q, i8* %R -; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Both ModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) -; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) -; CHECK: Both ModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) -; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) -; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Both ModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test6(i8* %P) nounwind ssp { - call void @llvm.memset.p0i8.i64(i8* %P, i8 -51, i64 32, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* %P, i8 -51, i64 32, i1 false) call void @a_readonly_func(i8* %P) ret void ; CHECK-LABEL: Function: test6: -; CHECK: Just Mod: Ptr: i8* %P <-> call void @llvm.memset.p0i8.i64(i8* %P, i8 -51, i64 32, i32 8, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> call void @llvm.memset.p0i8.i64(i8* %P, i8 -51, i64 32, i1 false) ; CHECK: Just Ref: Ptr: i8* %P <-> call void @a_readonly_func(i8* %P) -; CHECK: Just Mod: call void @llvm.memset.p0i8.i64(i8* %P, i8 -51, i64 32, i32 8, i1 false) <-> call void @a_readonly_func(i8* %P) -; CHECK: Just Ref: call void @a_readonly_func(i8* %P) <-> call void @llvm.memset.p0i8.i64(i8* %P, i8 -51, i64 32, i32 8, i1 false) +; CHECK: Just Mod: call void @llvm.memset.p0i8.i64(i8* %P, i8 -51, i64 32, i1 false) <-> call void @a_readonly_func(i8* %P) +; CHECK: Just Ref: call void @a_readonly_func(i8* %P) <-> call void @llvm.memset.p0i8.i64(i8* %P, i8 -51, i64 32, i1 false) } attributes #0 = { nounwind readonly argmemonly } diff --git a/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll index f0f1a631d08..755a9ccb23c 100644 --- a/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll +++ b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll @@ -12,15 +12,15 @@ define void @test0() { ret void } -; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false) -; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false) +; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i1 false) +; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i1 false) -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind @A = external global i8 @B = external global i8 define void @test1() { - call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false) - call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false) + call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i1 false) + call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i1 false) ret void } diff --git a/test/Analysis/BasicAA/modref.ll b/test/Analysis/BasicAA/modref.ll index e42793936c3..f3fb4a8a455 100644 --- a/test/Analysis/BasicAA/modref.ll +++ b/test/Analysis/BasicAA/modref.ll @@ -11,7 +11,7 @@ define i32 @test0(i8* %P) { store i32 0, i32* %A - call void @llvm.memset.p0i8.i32(i8* %P, i8 0, i32 42, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* %P, i8 0, i32 42, i1 false) %B = load i32, i32* %A ret i32 %B @@ -27,7 +27,7 @@ define i8 @test1() { store i8 2, i8* %B ;; Not written to by memcpy - call void @llvm.memcpy.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i1 false) %C = load i8, i8* %B ret i8 %C @@ -38,7 +38,7 @@ define i8 @test2(i8* %P) { ; CHECK-LABEL: @test2 %P2 = getelementptr i8, i8* %P, i32 127 store i8 1, i8* %P2 ;; Not dead across memset - call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i32 0, i1 false) + call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i1 false) %A = load i8, i8* %P2 ret i8 %A ; CHECK: ret i8 1 @@ -51,7 +51,7 @@ define i8 @test2a(i8* %P) { ;; FIXME: DSE isn't zapping this dead store. store i8 1, i8* %P2 ;; Dead, clobbered by memset. - call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i32 0, i1 false) + call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i1 false) %A = load i8, i8* %P2 ret i8 %A ; CHECK-NOT: load @@ -91,7 +91,7 @@ define void @test3a(i8* %P, i8 %X) { define i32 @test4(i8* %P) { %tmp = load i32, i32* @G1 - call void @llvm.memset.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8 0, i32 4000, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8 0, i32 4000, i1 false) %tmp2 = load i32, i32* @G1 %sub = sub i32 %tmp2, %tmp ret i32 %sub @@ -106,7 +106,7 @@ define i32 @test4(i8* %P) { ; write to G1. define i32 @test5(i8* %P, i32 %Len) { %tmp = load i32, i32* @G1 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8* bitcast (i32* @G1 to i8*), i32 %Len, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8* bitcast (i32* @G1 to i8*), i32 %Len, i1 false) %tmp2 = load i32, i32* @G1 %sub = sub i32 %tmp2, %tmp ret i32 %sub @@ -227,7 +227,7 @@ define i32 @test13(i32* %P, i32* %P2) { ; CHECK: ret i32 0 } -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind -declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind +declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind diff --git a/test/Analysis/CallGraph/no-intrinsics.ll b/test/Analysis/CallGraph/no-intrinsics.ll index d858907d724..bebcc624b9f 100644 --- a/test/Analysis/CallGraph/no-intrinsics.ll +++ b/test/Analysis/CallGraph/no-intrinsics.ll @@ -2,10 +2,10 @@ ; Check that intrinsics aren't added to the call graph -declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) define void @f(i8* %out, i8* %in) { - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %in, i32 100, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %in, i32 100, i1 false) ret void } diff --git a/test/Analysis/DependenceAnalysis/Preliminary.ll b/test/Analysis/DependenceAnalysis/Preliminary.ll index d6500cc0336..31bd5712af8 100644 --- a/test/Analysis/DependenceAnalysis/Preliminary.ll +++ b/test/Analysis/DependenceAnalysis/Preliminary.ll @@ -696,4 +696,4 @@ while.end: ; preds = %while.end.loopexit, ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind diff --git a/test/Analysis/GlobalsModRef/pr12351.ll b/test/Analysis/GlobalsModRef/pr12351.ll index 5d299cd2e91..65f3cc06b2b 100644 --- a/test/Analysis/GlobalsModRef/pr12351.ll +++ b/test/Analysis/GlobalsModRef/pr12351.ll @@ -1,8 +1,8 @@ ; RUN: opt < %s -basicaa -globals-aa -gvn -S | FileCheck %s -declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) define void @foo(i8* %x, i8* %y) { - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x, i8* %y, i32 1, i32 1, i1 false); + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x, i8* %y, i32 1, i1 false); ret void } diff --git a/test/Analysis/GlobalsModRef/volatile-instrs.ll b/test/Analysis/GlobalsModRef/volatile-instrs.ll index 5dd47bca3a0..d20ad6fc2d7 100644 --- a/test/Analysis/GlobalsModRef/volatile-instrs.ll +++ b/test/Analysis/GlobalsModRef/volatile-instrs.ll @@ -10,7 +10,7 @@ target triple = "x86_64-apple-macosx10.8.0" @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 declare i32 @printf(i8* nocapture, ...) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind ; Make sure that the initial memcpy call does not go away @@ -21,10 +21,10 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, define i32 @main() nounwind uwtable ssp { main_entry: - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.anon* @b to i8*), i8* bitcast (%struct.anon* @a to i8*), i64 12, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.anon* @b to i8*), i8* bitcast (%struct.anon* @a to i8*), i64 12, i1 false) %0 = load volatile i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @b, i64 0, i32 0), align 4 store i32 %0, i32* @c, align 4 - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.anon* @b to i8*), i8* bitcast (%struct.anon* @a to i8*), i64 12, i32 4, i1 false) nounwind + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.anon* @b to i8*), i8* bitcast (%struct.anon* @a to i8*), i64 12, i1 false) nounwind %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %0) nounwind ret i32 0 } diff --git a/test/Analysis/ScalarEvolution/avoid-smax-1.ll b/test/Analysis/ScalarEvolution/avoid-smax-1.ll index e6c62ee6b47..50c30431af5 100644 --- a/test/Analysis/ScalarEvolution/avoid-smax-1.ll +++ b/test/Analysis/ScalarEvolution/avoid-smax-1.ll @@ -172,7 +172,7 @@ bb23: ; preds = %bb24, %bb.nph %55 = mul i32 %y.21, %w ; [#uses=1] %.sum5 = add i32 %55, %.sum3 ; [#uses=1] %56 = getelementptr i8, i8* %j, i32 %.sum5 ; [#uses=1] - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %56, i8* %54, i32 %w, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %56, i8* %54, i32 %w, i1 false) %57 = add i32 %y.21, 1 ; [#uses=2] br label %bb24 @@ -189,7 +189,7 @@ bb26: ; preds = %bb24.bb26_crit_edge, %bb22 %60 = getelementptr i8, i8* %j, i32 %.sum4 ; [#uses=1] %61 = mul i32 %x, %w ; [#uses=1] %62 = sdiv i32 %61, 2 ; [#uses=1] - tail call void @llvm.memset.p0i8.i32(i8* %60, i8 -128, i32 %62, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i32(i8* %60, i8 -128, i32 %62, i1 false) ret void bb29: ; preds = %bb20, %entry @@ -207,7 +207,7 @@ bb30: ; preds = %bb31, %bb.nph11 %67 = getelementptr i8, i8* %r, i32 %66 ; [#uses=1] %68 = mul i32 %y.310, %w ; [#uses=1] %69 = getelementptr i8, i8* %j, i32 %68 ; [#uses=1] - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %69, i8* %67, i32 %w, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %69, i8* %67, i32 %w, i1 false) %70 = add i32 %y.310, 1 ; [#uses=2] br label %bb31 @@ -223,12 +223,12 @@ bb33: ; preds = %bb31.bb33_crit_edge, %bb29 %73 = getelementptr i8, i8* %j, i32 %72 ; [#uses=1] %74 = mul i32 %x, %w ; [#uses=1] %75 = sdiv i32 %74, 2 ; [#uses=1] - tail call void @llvm.memset.p0i8.i32(i8* %73, i8 -128, i32 %75, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i32(i8* %73, i8 -128, i32 %75, i1 false) ret void return: ; preds = %bb20 ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind diff --git a/test/Analysis/ScalarEvolution/trip-count.ll b/test/Analysis/ScalarEvolution/trip-count.ll index 89750810d1b..edd9be058f8 100644 --- a/test/Analysis/ScalarEvolution/trip-count.ll +++ b/test/Analysis/ScalarEvolution/trip-count.ll @@ -41,7 +41,7 @@ define i32 @test2() { entry: %bins = alloca [16 x i64], align 16 %0 = bitcast [16 x i64]* %bins to i8* - call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 128, i32 16, i1 false) + call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 128, i1 false) br label %preheader preheader: ; preds = %for.inc.1, %entry @@ -88,4 +88,4 @@ for.inc.1: ; preds = %for.body.1, %for.in } ; Function Attrs: nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #0 +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #0 diff --git a/test/Analysis/ScalarEvolution/trip-count3.ll b/test/Analysis/ScalarEvolution/trip-count3.ll index cce0182d649..df6637a4ced 100644 --- a/test/Analysis/ScalarEvolution/trip-count3.ll +++ b/test/Analysis/ScalarEvolution/trip-count3.ll @@ -50,7 +50,7 @@ sha_update.exit.exitStub: ; preds = %bb3.i bb2.i: ; preds = %bb3.i %1 = getelementptr %struct.SHA_INFO, %struct.SHA_INFO* %sha_info, i64 0, i32 3 %2 = bitcast [16 x i32]* %1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %buffer_addr.0.i, i64 64, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %buffer_addr.0.i, i64 64, i1 false) %3 = getelementptr %struct.SHA_INFO, %struct.SHA_INFO* %sha_info, i64 0, i32 3, i64 0 %4 = bitcast i32* %3 to i8* br label %codeRepl @@ -74,7 +74,7 @@ bb3.i: ; preds = %byte_reverse.exit.i declare void @sha_stream_bb3_2E_i_bb1_2E_i_2E_i(i8*) nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind diff --git a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll index fe2fdd74b41..7a0c6289eaf 100644 --- a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll +++ b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll @@ -45,13 +45,13 @@ define void @test1_no(i32* %p) nounwind { ; CHECK: define void @test2_yes(i8* nocapture %p, i8* nocapture %q, i64 %n) #0 { define void @test2_yes(i8* %p, i8* %q, i64 %n) nounwind { - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i32 1, i1 false), !tbaa !1 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i1 false), !tbaa !1 ret void } ; CHECK: define void @test2_no(i8* nocapture %p, i8* nocapture readonly %q, i64 %n) #1 { define void @test2_no(i8* %p, i8* %q, i64 %n) nounwind { - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i32 1, i1 false), !tbaa !2 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i1 false), !tbaa !2 ret void } @@ -70,7 +70,7 @@ define i32 @test3_no(i8* %p) nounwind { } declare void @callee(i32* %p) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) nounwind ; CHECK: attributes #0 = { norecurse nounwind readnone } ; CHECK: attributes #1 = { norecurse nounwind } diff --git a/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll b/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll index 9fc9e42fc6c..19927aaa43b 100644 --- a/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll +++ b/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll @@ -6,17 +6,17 @@ target datalayout = "e-p:64:64:64" ; it has a TBAA tag which declares that it is unrelated. ; CHECK: @foo -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i32 1, i1 false), !tbaa !0 +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %p, i8* align 1 %q, i64 16, i1 false), !tbaa !0 ; CHECK-NEXT: store i8 2, i8* %s, align 1, !tbaa [[TAGA:!.*]] ; CHECK-NEXT: ret void define void @foo(i8* nocapture %p, i8* nocapture %q, i8* nocapture %s) nounwind { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i32 1, i1 false), !tbaa !2 + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i1 false), !tbaa !2 store i8 2, i8* %s, align 1, !tbaa !1 - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %p, i64 16, i32 1, i1 false), !tbaa !2 + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %p, i64 16, i1 false), !tbaa !2 ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind ; CHECK: [[TAGA]] = !{[[TYPEA:!.*]], [[TYPEA]], i64 0} ; CHECK: [[TYPEA]] = !{!"A", !{{.*}}} diff --git a/test/Bitcode/memintrinsics.3.7.ll b/test/Bitcode/memintrinsics.3.7.ll new file mode 100644 index 00000000000..242aad6dec9 --- /dev/null +++ b/test/Bitcode/memintrinsics.3.7.ll @@ -0,0 +1,33 @@ +; RUN: llvm-dis < %s.bc| FileCheck %s + +; memintrinsics.3.7.ll.bc was generated by passing this file to llvm-as-3.7. +; The test checks that LLVM does not misread memcpy/memmove/memset intrinsic functions +; of older bitcode files. + +define void @memcpyintrinsic(i8* %dest, i8* %src, i32 %len) { +entry: + +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 %len, i1 true) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 4, i1 true) + ret void +} + +define void @memmoveintrinsic(i8* %dest, i8* %src, i32 %len) { +entry: + +; CHECK: call void @llvm.memmove.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 %len, i1 true) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 8, i1 true) + ret void +} + +define void @memsetintrinsic(i8* %dest, i8* %src, i32 %len) { +entry: + +; CHECK: call void @llvm.memset.p0i8.i32(i8* align 16 %dest, i8 0, i32 %len, i1 true) + call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 %len, i32 16, i1 true) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 %align, i1 %isvolatile) +declare void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 %align, i1 %isvolatile) +declare void @llvm.memset.p0i8.i32(i8* %dest, i8 %src, i32 %len, i32 %align, i1 %isvolatile) \ No newline at end of file diff --git a/test/Bitcode/memintrinsics.3.7.ll.bc b/test/Bitcode/memintrinsics.3.7.ll.bc new file mode 100644 index 0000000000000000000000000000000000000000..187ef044db8ae9ed644b9e2021eeec00416564ae GIT binary patch literal 916 zcmb7?Z%7ki9LJySZo9Kxw^J9)ysf*9!WTw!8?>>FZg);5N+^NvqD^A>#yB;W%QyRT z?qbKkY4!0xhX?d7(^I|WUgMBjVKU`?AdhGi(b?Zp5Jr#`~B|o{qo$y z3rkxp0l<3zfF{lWCEQ!#!eZuo-V)lYrmW66g+!cI;cZ6r_HdN)Dfu}KFHu&V1K0N* zv+^l_M#l&}sVAV#%2)hkrrA^+@D9e(+lddiP_{=ss0Tm|ne8bCJUE_YfM#HnNglvB z1)5;J7xwGANlt$T&x08T04=OrO?VS?bsB9oJ(ghFk#ZSbOMfb@k)(dgVg?f^zCtVX zI%RiqWsVM6RQ0h$ zdl~VR1Wy|Alqb{IMQjSkB7RvEv6~_io|HvKZYwvFyBTuZqP=F(?o`R@0BNI&hAo!f zU=7PbW5_b&T1>ds1?Mxwy_j%Ih^t)Hg#*~s2zGu%9)~$9iy2uAVBrodysv`T8&UZR zE|2Nu@vI!l#ya<^ @neon4xfloat(<4 x float> %A, <4 x float> %B) { } ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #1 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } diff --git a/test/CodeGen/AArch64/arm64-misched-basic-A57.ll b/test/CodeGen/AArch64/arm64-misched-basic-A57.ll index fac5f8ad2e9..231c2cff154 100644 --- a/test/CodeGen/AArch64/arm64-misched-basic-A57.ll +++ b/test/CodeGen/AArch64/arm64-misched-basic-A57.ll @@ -32,9 +32,9 @@ entry: %yy = alloca i32, align 4 store i32 0, i32* %retval %0 = bitcast [8 x i32]* %x to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([8 x i32]* @main.x to i8*), i64 32, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([8 x i32]* @main.x to i8*), i64 32, i1 false) %1 = bitcast [8 x i32]* %y to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([8 x i32]* @main.y to i8*), i64 32, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([8 x i32]* @main.y to i8*), i64 32, i1 false) store i32 0, i32* %xx, align 4 store i32 0, i32* %yy, align 4 store i32 0, i32* %i, align 4 @@ -106,7 +106,7 @@ for.end: ; preds = %for.cond ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #1 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } diff --git a/test/CodeGen/AArch64/arm64-stur.ll b/test/CodeGen/AArch64/arm64-stur.ll index 5f4cb9f3d95..f8675cb6cda 100644 --- a/test/CodeGen/AArch64/arm64-stur.ll +++ b/test/CodeGen/AArch64/arm64-stur.ll @@ -55,11 +55,11 @@ define void @foo(%struct.X* nocapture %p) nounwind optsize ssp { ; CHECK-NEXT: ret %B = getelementptr inbounds %struct.X, %struct.X* %p, i64 0, i32 1 %val = bitcast i64* %B to i8* - call void @llvm.memset.p0i8.i64(i8* %val, i8 0, i64 16, i32 1, i1 false) + call void @llvm.memset.p0i8.i64(i8* %val, i8 0, i64 16, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind ; Unaligned 16b stores are split into 8b stores for performance. ; radar://15424193 diff --git a/test/CodeGen/AArch64/arm64-virtual_base.ll b/test/CodeGen/AArch64/arm64-virtual_base.ll index 703d81a8d4f..be864b7cc26 100644 --- a/test/CodeGen/AArch64/arm64-virtual_base.ll +++ b/test/CodeGen/AArch64/arm64-virtual_base.ll @@ -43,9 +43,9 @@ entry: %tmp14 = bitcast double* %arraydecay5.3.1 to i8* %arraydecay11.3.1 = getelementptr inbounds %struct.Bicubic_Patch_Struct, %struct.Bicubic_Patch_Struct* %Shape, i64 0, i32 12, i64 1, i64 3, i64 0 %tmp15 = bitcast double* %arraydecay11.3.1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp15, i64 24, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp15, i64 24, i1 false) ret void } ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) diff --git a/test/CodeGen/AArch64/fast-isel-memcpy.ll b/test/CodeGen/AArch64/fast-isel-memcpy.ll index 07595a954db..f8e41cb7f3b 100644 --- a/test/CodeGen/AArch64/fast-isel-memcpy.ll +++ b/test/CodeGen/AArch64/fast-isel-memcpy.ll @@ -8,8 +8,8 @@ define void @test(i64 %a, i8* %b) { %1 = and i64 %a, 9223372036854775807 %2 = inttoptr i64 %1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %b, i64 8, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %b, i64 8, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll index 9100ae39282..f4930130b82 100644 --- a/test/CodeGen/AArch64/func-argpassing.ll +++ b/test/CodeGen/AArch64/func-argpassing.ll @@ -186,11 +186,11 @@ define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3, ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) define i32 @test_extern() { ; CHECK-LABEL: test_extern: - call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 undef, i32 4, i1 0) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 undef, i1 0) ; CHECK: bl memcpy ret i32 0 } diff --git a/test/CodeGen/AArch64/memcpy-f128.ll b/test/CodeGen/AArch64/memcpy-f128.ll index 76db2974ab4..f61593a1bd2 100644 --- a/test/CodeGen/AArch64/memcpy-f128.ll +++ b/test/CodeGen/AArch64/memcpy-f128.ll @@ -12,8 +12,8 @@ define void @test1() { ; CHECK: str q0 ; CHECK: ret entry: - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* bitcast (%structA* @stubA to i8*), i64 48, i32 8, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* bitcast (%structA* @stubA to i8*), i64 48, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) diff --git a/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll b/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll index b970fb12415..c780d15b58d 100644 --- a/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll +++ b/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll @@ -4,7 +4,7 @@ ; CHECK: b memcpy define void @tail_memcpy(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { entry: - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false) ret void } @@ -12,7 +12,7 @@ entry: ; CHECK: b memmove define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { entry: - tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false) ret void } @@ -20,12 +20,12 @@ entry: ; CHECK: b memset define void @tail_memset(i8* nocapture %p, i8 %c, i32 %n) #0 { entry: - tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #0 +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #0 attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.memcpy.ll b/test/CodeGen/AMDGPU/llvm.memcpy.ll index e491732cf9c..55b447ba07b 100644 --- a/test/CodeGen/AMDGPU/llvm.memcpy.ll +++ b/test/CodeGen/AMDGPU/llvm.memcpy.ll @@ -1,8 +1,8 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind -declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1: @@ -82,7 +82,7 @@ declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace define void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind + call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 1 %bcout, i8 addrspace(3)* align 1 %bcin, i32 32, i1 false) nounwind ret void } @@ -127,7 +127,7 @@ define void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias % define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind + call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 2 %bcout, i8 addrspace(3)* align 2 %bcin, i32 32, i1 false) nounwind ret void } @@ -163,7 +163,7 @@ define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias % define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind + call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 4 %bcout, i8 addrspace(3)* align 4 %bcin, i32 32, i1 false) nounwind ret void } @@ -201,7 +201,7 @@ define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias % define void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind + call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 8 %bcout, i8 addrspace(3)* align 8 %bcin, i32 32, i1 false) nounwind ret void } @@ -278,7 +278,7 @@ define void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias % define void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 1 %bcout, i8 addrspace(1)* align 1 %bcin, i64 32, i1 false) nounwind ret void } @@ -321,7 +321,7 @@ define void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noa define void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 2 %bcout, i8 addrspace(1)* align 2 %bcin, i64 32, i1 false) nounwind ret void } @@ -334,7 +334,7 @@ define void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noa define void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %bcout, i8 addrspace(1)* align 4 %bcin, i64 32, i1 false) nounwind ret void } @@ -347,7 +347,7 @@ define void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noa define void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 8 %bcout, i8 addrspace(1)* align 8 %bcin, i64 32, i1 false) nounwind ret void } @@ -360,6 +360,6 @@ define void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noa define void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 16 %bcout, i8 addrspace(1)* align 16 %bcin, i64 32, i1 false) nounwind ret void } diff --git a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll index 567400318ee..0f2f3fecb6f 100644 --- a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll +++ b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll @@ -59,7 +59,7 @@ bb3: ; preds = %entry %34 = fadd double %31, 0.000000e+00 %35 = fadd double %32, 0.000000e+00 %36 = bitcast %struct.ggPoint3* %x to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* null, i8* %36, i32 24, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* null, i8* %36, i32 24, i1 false) store double %33, double* null, align 8 br i1 false, label %_Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit, label %bb5.i.i.i @@ -76,4 +76,4 @@ bb7: ; preds = %entry ret i32 0 } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind diff --git a/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll b/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll index c447a1f25b6..73497b2112b 100644 --- a/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll +++ b/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll @@ -16,7 +16,7 @@ bb: ; preds = %entry bb1: ; preds = %entry %0 = call %struct.ui* @vn_pp_to_ui(i32* undef) nounwind - call void @llvm.memset.p0i8.i32(i8* undef, i8 0, i32 40, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* undef, i8 0, i32 40, i1 false) %1 = getelementptr inbounds %struct.ui, %struct.ui* %0, i32 0, i32 0 store %struct.mo* undef, %struct.mo** %1, align 4 %2 = getelementptr inbounds %struct.ui, %struct.ui* %0, i32 0, i32 5 @@ -40,7 +40,7 @@ bb6: ; preds = %bb3 declare %struct.ui* @vn_pp_to_ui(i32*) -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind declare i32 @mo_create_nnm(%struct.mo*, i64, i32**) diff --git a/test/CodeGen/ARM/2011-10-26-memset-inline.ll b/test/CodeGen/ARM/2011-10-26-memset-inline.ll index 5df439389cd..d2d3116cbd0 100644 --- a/test/CodeGen/ARM/2011-10-26-memset-inline.ll +++ b/test/CodeGen/ARM/2011-10-26-memset-inline.ll @@ -14,8 +14,8 @@ target triple = "thumbv7-apple-ios5.0.0" ; CHECK-UNALIGNED: str define void @foo(i8* nocapture %c) nounwind optsize { entry: - call void @llvm.memset.p0i8.i64(i8* %c, i8 -1, i64 5, i32 1, i1 false) + call void @llvm.memset.p0i8.i64(i8* %c, i8 -1, i64 5, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind diff --git a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll index c8e08c22ab1..7024a653b6c 100644 --- a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll +++ b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll @@ -5,8 +5,8 @@ ; CHECK: vst1.64 define void @f_0_40(i8* nocapture %c) nounwind optsize { entry: - call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 40, i32 16, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 16 %c, i8 0, i64 40, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind diff --git a/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll b/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll index ce0dcc70952..ef33b2f5018 100644 --- a/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll +++ b/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll @@ -19,7 +19,7 @@ declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone declare i8* @__cxa_begin_catch(i8*) -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind declare void @__cxa_end_catch() diff --git a/test/CodeGen/ARM/Windows/memset.ll b/test/CodeGen/ARM/Windows/memset.ll index 500e25e259c..c9b22f47a15 100644 --- a/test/CodeGen/ARM/Windows/memset.ll +++ b/test/CodeGen/ARM/Windows/memset.ll @@ -2,11 +2,11 @@ @source = common global [512 x i8] zeroinitializer, align 4 -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind define void @function() { entry: - call void @llvm.memset.p0i8.i32(i8* bitcast ([512 x i8]* @source to i8*), i8 0, i32 512, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* bitcast ([512 x i8]* @source to i8*), i8 0, i32 512, i1 false) unreachable } diff --git a/test/CodeGen/ARM/Windows/no-aeabi.ll b/test/CodeGen/ARM/Windows/no-aeabi.ll index 3971b9ccf58..35cc7d5032d 100644 --- a/test/CodeGen/ARM/Windows/no-aeabi.ll +++ b/test/CodeGen/ARM/Windows/no-aeabi.ll @@ -1,14 +1,14 @@ ; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -o - %s | FileCheck %s -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind @source = common global [512 x i8] zeroinitializer, align 4 @target = common global [512 x i8] zeroinitializer, align 4 define void @move() nounwind { entry: - call void @llvm.memmove.p0i8.p0i8.i32(i8* bitcast ([512 x i8]* @target to i8*), i8* bitcast ([512 x i8]* @source to i8*), i32 512, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* bitcast ([512 x i8]* @target to i8*), i8* bitcast ([512 x i8]* @source to i8*), i32 512, i1 false) unreachable } @@ -16,7 +16,7 @@ entry: define void @copy() nounwind { entry: - call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([512 x i8]* @target to i8*), i8* bitcast ([512 x i8]* @source to i8*), i32 512, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([512 x i8]* @target to i8*), i8* bitcast ([512 x i8]* @source to i8*), i32 512, i1 false) unreachable } diff --git a/test/CodeGen/ARM/crash-O0.ll b/test/CodeGen/ARM/crash-O0.ll index f92af999be5..8f3b5f10cd6 100644 --- a/test/CodeGen/ARM/crash-O0.ll +++ b/test/CodeGen/ARM/crash-O0.ll @@ -12,7 +12,7 @@ entry: } @.str523 = private constant [256 x i8] calign 4 ; <[256 x i8]*> [#uses=1] -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind ; This function uses the scavenger for an ADDri instruction. ; ARMBaseRegisterInfo::estimateRSStackSizeLimit must return a 255 limit. @@ -21,8 +21,8 @@ entry: %letter = alloca i8 ; [#uses=0] %prodvers = alloca [256 x i8] ; <[256 x i8]*> [#uses=1] %buildver = alloca [256 x i8] ; <[256 x i8]*> [#uses=0] - call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* getelementptr inbounds ([256 x i8], [256 x i8]* @.str523, i32 0, i32 0), i32 256, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* getelementptr inbounds ([256 x i8], [256 x i8]* @.str523, i32 0, i32 0), i32 256, i1 false) %prodvers2 = bitcast [256 x i8]* %prodvers to i8* ; [#uses=1] - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %prodvers2, i8* getelementptr inbounds ([256 x i8], [256 x i8]* @.str523, i32 0, i32 0), i32 256, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %prodvers2, i8* getelementptr inbounds ([256 x i8], [256 x i8]* @.str523, i32 0, i32 0), i32 256, i1 false) unreachable } diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll index 2a8898651f0..07fb4ef9ba9 100644 --- a/test/CodeGen/ARM/debug-info-blocks.ll +++ b/test/CodeGen/ARM/debug-info-blocks.ll @@ -25,7 +25,7 @@ declare i8* @objc_msgSend(i8*, i8*, ...) declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %loadedMydata, [4 x i32] %bounds.coerce0, [4 x i32] %data.coerce0) ssp !dbg !23 { %1 = alloca %0*, align 4 @@ -67,7 +67,7 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load %24 = bitcast i8* %23 to %struct.CR*, !dbg !143 %25 = bitcast %struct.CR* %24 to i8*, !dbg !143 %26 = bitcast %struct.CR* %data to i8*, !dbg !143 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %25, i8* %26, i32 16, i32 4, i1 false), !dbg !143 + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %25, i8* %26, i32 16, i1 false), !dbg !143 %27 = getelementptr inbounds %2, %2* %6, i32 0, i32 6, !dbg !144 %28 = load %3*, %3** %27, align 4, !dbg !144 %29 = load i32, i32* @"OBJC_IVAR_$_MyWork._bounds", !dbg !144 @@ -76,7 +76,7 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load %32 = bitcast i8* %31 to %struct.CR*, !dbg !144 %33 = bitcast %struct.CR* %32 to i8*, !dbg !144 %34 = bitcast %struct.CR* %bounds to i8*, !dbg !144 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %33, i8* %34, i32 16, i32 4, i1 false), !dbg !144 + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %33, i8* %34, i32 16, i1 false), !dbg !144 %35 = getelementptr inbounds %2, %2* %6, i32 0, i32 6, !dbg !145 %36 = load %3*, %3** %35, align 4, !dbg !145 %37 = getelementptr inbounds %2, %2* %6, i32 0, i32 5, !dbg !145 diff --git a/test/CodeGen/ARM/dyn-stackalloc.ll b/test/CodeGen/ARM/dyn-stackalloc.ll index 5b963fd64de..06d8f8dc179 100644 --- a/test/CodeGen/ARM/dyn-stackalloc.ll +++ b/test/CodeGen/ARM/dyn-stackalloc.ll @@ -51,7 +51,7 @@ define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) { %tmp9 = call i8* @strcpy(i8* %tmp6, i8* %tag) %tmp6.len = call i32 @strlen(i8* %tmp6) %tmp6.indexed = getelementptr i8, i8* %tmp6, i32 %tmp6.len - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp6.indexed, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp6.indexed, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str215, i32 0, i32 0), i32 2, i1 false) %tmp15 = call i8* @strcat(i8* %tmp6, i8* %contents) call fastcc void @comment_add(%struct.comment* %vc, i8* %tmp6) ret void @@ -65,4 +65,4 @@ declare fastcc void @comment_add(%struct.comment*, i8*) declare i8* @strcpy(i8*, i8*) -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll index 1c7ff687938..131023e7ab6 100644 --- a/test/CodeGen/ARM/fast-isel-intrinsic.ll +++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll @@ -39,11 +39,11 @@ define void @t1() nounwind ssp { ; THUMB-LONG: movt r3, :upper16:L_memset$non_lazy_ptr ; THUMB-LONG: ldr r3, [r3] ; THUMB-LONG: blx r3 - call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i1 false) ret void } -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind define void @t2() nounwind ssp { ; ARM-LABEL: t2: @@ -78,11 +78,11 @@ define void @t2() nounwind ssp { ; THUMB-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr ; THUMB-LONG: ldr r3, [r3] ; THUMB-LONG: blx r3 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 17, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 17, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind define void @t3() nounwind ssp { ; ARM-LABEL: t3: @@ -115,7 +115,7 @@ define void @t3() nounwind ssp { ; THUMB-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr ; THUMB-LONG: ldr r3, [r3] ; THUMB-LONG: blx r3 - call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void } @@ -142,11 +142,11 @@ define void @t4() nounwind ssp { ; THUMB: ldrh r1, [r0, #24] ; THUMB: strh r1, [r0, #12] ; THUMB: bx lr - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void } -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind define void @t5() nounwind ssp { ; ARM-LABEL: t5: @@ -179,7 +179,7 @@ define void @t5() nounwind ssp { ; THUMB: ldrh r1, [r0, #24] ; THUMB: strh r1, [r0, #12] ; THUMB: bx lr - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i32 2, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void } @@ -234,14 +234,14 @@ define void @t6() nounwind ssp { ; THUMB: ldrb r1, [r0, #25] ; THUMB: strb r1, [r0, #13] ; THUMB: bx lr - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void } ; rdar://13202135 define void @t7() nounwind ssp { ; Just make sure this doesn't assert when we have an odd length and an alignment of 2. - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 3, i32 2, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 3, i1 false) ret void } diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll index 611cba6ed1f..10e56a346a2 100644 --- a/test/CodeGen/ARM/machine-cse-cmp.ll +++ b/test/CodeGen/ARM/machine-cse-cmp.ll @@ -37,14 +37,14 @@ entry: for.body.lr.ph: ; preds = %entry %1 = icmp sgt i32 %0, 1 %smax = select i1 %1, i32 %0, i32 1 - call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8], [250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8], [250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i1 false) unreachable for.cond1.preheader: ; preds = %entry ret void } -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind ; rdar://12462006 define i8* @f3(i8* %base, i32* nocapture %offset, i32 %size) nounwind { diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll index d874884dcb3..a5c525c062f 100644 --- a/test/CodeGen/ARM/memcpy-inline.ll +++ b/test/CodeGen/ARM/memcpy-inline.ll @@ -23,7 +23,7 @@ entry: ; CHECK-T1: strb [[TREG1]], ; CHECK-T1: ldrh [[TREG2:r[0-9]]], ; CHECK-T1: strh [[TREG2]] - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i1 false) ret i32 0 } @@ -36,7 +36,7 @@ entry: ; CHECK: adds r1, #15 ; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i1 false) ret void } @@ -50,7 +50,7 @@ entry: ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]! ; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i1 false) ret void } @@ -61,7 +61,7 @@ entry: ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]! ; CHECK: vldr d{{[0-9]+}}, [r1] ; CHECK: vst1.8 {d{{[0-9]+}}}, [r0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i1 false) ret void } @@ -71,7 +71,7 @@ entry: ; CHECK: vld1.64 {[[REG3:d[0-9]+]], [[REG4:d[0-9]+]]}, [r1] ; CHECK: vst1.8 {[[REG3]], [[REG4]]}, [r0]! ; CHECK: strh [[REG5:r[0-9]+]], [r0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i1 false) ret void } @@ -90,7 +90,7 @@ entry: ; CHECK-T1: strb [[TREG3]], ; CHECK-T1: movs [[TREG4:r[0-9]]], ; CHECK-T1: strb [[TREG4]], - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i1 false) ret void } @@ -108,7 +108,7 @@ entry: ; CHECK-T1: strh [[TREG5]], ; CHECK-T1: ldr [[TREG6:r[0-9]]], ; CHECK-T1: str [[TREG6]] - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8], [512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str6, i64 0, i64 0), i64 14, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8], [512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str6, i64 0, i64 0), i64 14, i1 false) ret void } @@ -124,9 +124,9 @@ entry: ; CHECK-T1: str %0 = bitcast %struct.Foo* %a to i8* %1 = bitcast %struct.Foo* %b to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 %1, i32 16, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll index 66743f3e9d5..1edf53b19d3 100644 --- a/test/CodeGen/ARM/memfunc.ll +++ b/test/CodeGen/ARM/memfunc.ll @@ -14,13 +14,13 @@ entry: ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove ; CHECK-GNUEABI: bl memmove - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i1 false) ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy ; CHECK-GNUEABI: bl memcpy - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i1 false) ; EABI memset swaps arguments ; CHECK-IOS: mov r1, #1 @@ -31,7 +31,7 @@ entry: ; CHECK-EABI: bl __aeabi_memset ; CHECK-GNUEABI: mov r1, #1 ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i1 false) ; EABI uses memclr if value set to 0 ; CHECK-IOS: mov r1, #0 @@ -40,7 +40,7 @@ entry: ; CHECK-DARWIN: bl _memset ; CHECK-EABI: bl __aeabi_memclr ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i1 false) ; EABI uses aligned function variants if possible @@ -48,49 +48,49 @@ entry: ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove4 ; CHECK-GNUEABI: bl memmove - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 4, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 500, i1 false) ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy4 ; CHECK-GNUEABI: bl memcpy - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 500, i1 false) ; CHECK-IOS: bl _memset ; CHECK-DARWIN: bl _memset ; CHECK-EABI: bl __aeabi_memset4 ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 4 %dest, i8 1, i32 500, i1 false) ; CHECK-IOS: bl _memset ; CHECK-DARWIN: bl _memset ; CHECK-EABI: bl __aeabi_memclr4 ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 4 %dest, i8 0, i32 500, i1 false) ; CHECK-IOS: bl _memmove ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove8 ; CHECK-GNUEABI: bl memmove - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 8, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 500, i1 false) ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy8 ; CHECK-GNUEABI: bl memcpy - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 500, i1 false) ; CHECK-IOS: bl _memset ; CHECK-DARWIN: bl _memset ; CHECK-EABI: bl __aeabi_memset8 ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 8, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 8 %dest, i8 1, i32 500, i1 false) ; CHECK-IOS: bl _memset ; CHECK-DARWIN: bl _memset ; CHECK-EABI: bl __aeabi_memclr8 ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 8, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 8 %dest, i8 0, i32 500, i1 false) unreachable } @@ -111,7 +111,7 @@ entry: ; CHECK-GNUEABI: bl memmove %arr0 = alloca [9 x i8], align 1 %0 = bitcast [9 x i8]* %arr0 to i8* - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: add r1, sp, #16 ; CHECK-IOS: bl _memcpy @@ -120,7 +120,7 @@ entry: ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [9 x i8], align 1 %1 = bitcast [9 x i8]* %arr1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK-IOS: mov r0, sp ; CHECK-IOS: mov r1, #1 @@ -136,7 +136,7 @@ entry: ; CHECK-GNUEABI: bl memset %arr2 = alloca [9 x i8], align 1 %2 = bitcast [9 x i8]* %arr2 to i8* - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -153,7 +153,7 @@ entry: ; CHECK-GNUEABI: bl memmove %arr0 = alloca [7 x i8], align 1 %0 = bitcast [7 x i8]* %arr0 to i8* - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r1, sp, #10}} ; CHECK-IOS: bl _memcpy @@ -162,7 +162,7 @@ entry: ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [7 x i8], align 1 %1 = bitcast [7 x i8]* %arr1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r0, sp, #3}} ; CHECK-IOS: mov r1, #1 @@ -175,7 +175,7 @@ entry: ; CHECK-GNUEABI: bl memset %arr2 = alloca [7 x i8], align 1 %2 = bitcast [7 x i8]* %arr2 to i8* - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -192,7 +192,7 @@ entry: ; CHECK-GNUEABI: bl memmove %arr0 = alloca [9 x i8], align 1 %0 = getelementptr inbounds [9 x i8], [9 x i8]* %arr0, i32 0, i32 4 - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(10|14)}} ; CHECK-IOS: bl _memcpy @@ -201,7 +201,7 @@ entry: ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [9 x i8], align 1 %1 = getelementptr inbounds [9 x i8], [9 x i8]* %arr1, i32 0, i32 4 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(1|5)}} ; CHECK-IOS: mov r1, #1 @@ -214,7 +214,7 @@ entry: ; CHECK-GNUEABI: bl memset %arr2 = alloca [9 x i8], align 1 %2 = getelementptr inbounds [9 x i8], [9 x i8]* %arr2, i32 0, i32 4 - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -231,7 +231,7 @@ entry: ; CHECK-GNUEABI: bl memmove %arr0 = alloca [13 x i8], align 1 %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 1 - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(10|14)}} ; CHECK-IOS: bl _memcpy @@ -240,7 +240,7 @@ entry: ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [13 x i8], align 1 %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 1 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(1|5)}} ; CHECK-IOS: mov r1, #1 @@ -253,7 +253,7 @@ entry: ; CHECK-GNUEABI: bl memset %arr2 = alloca [13 x i8], align 1 %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 1 - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -270,7 +270,7 @@ entry: ; CHECK-GNUEABI: bl memmove %arr0 = alloca [13 x i8], align 1 %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 %i - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(10|14)}} ; CHECK-IOS: bl _memcpy @@ -279,7 +279,7 @@ entry: ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [13 x i8], align 1 %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 %i - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(1|5)}} ; CHECK-IOS: mov r1, #1 @@ -292,7 +292,7 @@ entry: ; CHECK-GNUEABI: bl memset %arr2 = alloca [13 x i8], align 1 %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 %i - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -309,7 +309,7 @@ entry: ; CHECK-GNUEABI: bl memmove %arr0 = alloca [13 x i8], align 1 %0 = getelementptr [13 x i8], [13 x i8]* %arr0, i32 0, i32 4 - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(10|14)}} ; CHECK-IOS: bl _memcpy @@ -318,7 +318,7 @@ entry: ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [13 x i8], align 1 %1 = getelementptr [13 x i8], [13 x i8]* %arr1, i32 0, i32 4 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(1|5)}} ; CHECK-IOS: mov r1, #1 @@ -331,7 +331,7 @@ entry: ; CHECK-GNUEABI: bl memset %arr2 = alloca [13 x i8], align 1 %2 = getelementptr [13 x i8], [13 x i8]* %arr2, i32 0, i32 4 - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -348,7 +348,7 @@ entry: ; CHECK-GNUEABI: bl memmove %arr0 = alloca [13 x i8], align 1 %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 16 - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(10|14)}} ; CHECK-IOS: bl _memcpy @@ -357,7 +357,7 @@ entry: ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [13 x i8], align 1 %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 16 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(1|5)}} ; CHECK-IOS: mov r1, #1 @@ -370,7 +370,7 @@ entry: ; CHECK-GNUEABI: bl memset %arr2 = alloca [13 x i8], align 1 %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 16 - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -386,13 +386,13 @@ entry: @arr7 = external global [7 x i8], align 1 define void @f9(i8* %dest, i32 %n) { entry: - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr1, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr2, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr3, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr4, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr5, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr6, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr7, i32 0, i32 0), i32 %n, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr1, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr2, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr3, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr4, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr5, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr6, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr7, i32 0, i32 0), i32 %n, i1 false) unreachable } @@ -417,6 +417,6 @@ entry: ; CHECK: arr6: ; CHECK-NOT: arr7: -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind diff --git a/test/CodeGen/ARM/memset-inline.ll b/test/CodeGen/ARM/memset-inline.ll index f6f8d562350..98334600ddb 100644 --- a/test/CodeGen/ARM/memset-inline.ll +++ b/test/CodeGen/ARM/memset-inline.ll @@ -6,7 +6,7 @@ entry: ; CHECK: movs r1, #0 ; CHECK: strd r1, r1, [r0] ; CHECK: str r1, [r0, #8] - call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %c, i8 0, i64 12, i1 false) ret void } @@ -19,11 +19,11 @@ entry: ; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] %buf = alloca [26 x i8], align 1 %0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0 - call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 1 %0, i8 0, i32 26, i1 false) call void @something(i8* %0) nounwind ret void } declare void @something(i8*) nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind diff --git a/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll b/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll index 2a7a82da8f6..e2fe0cfaabb 100644 --- a/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll +++ b/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll @@ -15,13 +15,13 @@ define i32 @main() #0 { entry: %title = alloca [15 x i8], align 1 %0 = getelementptr inbounds [15 x i8], [15 x i8]* %title, i32 0, i32 0 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* getelementptr inbounds ([15 x i8], [15 x i8]* @main.title, i32 0, i32 0), i32 15, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* getelementptr inbounds ([15 x i8], [15 x i8]* @main.title, i32 0, i32 0), i32 15, i1 false) %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i8* %0) #3 ret i32 0 } ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #1 ; Function Attrs: nounwind optsize declare i32 @printf(i8* nocapture readonly, ...) #2 diff --git a/test/CodeGen/ARM/struct-byval-frame-index.ll b/test/CodeGen/ARM/struct-byval-frame-index.ll index 52f70fe1e0f..a2e2c32f50e 100644 --- a/test/CodeGen/ARM/struct-byval-frame-index.ll +++ b/test/CodeGen/ARM/struct-byval-frame-index.ll @@ -61,10 +61,10 @@ target triple = "armv7l-unknown-linux-gnueabihf" @brefframe = external global [4 x [4 x i8]], align 1 ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) #0 ; Function Attrs: nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #0 ; Function Attrs: nounwind declare void @SetMotionVectorsMB(%structK* nocapture, i32) #1 @@ -123,10 +123,10 @@ for.cond210.preheader: ; preds = %if.then169 unreachable if.end230: ; preds = %if.end164 - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* bitcast ([4 x i32]* @b8mode to i8*), i32 16, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* bitcast ([4 x i32]* @b8mode to i8*), i32 16, i1 false) %b8pdir = getelementptr inbounds %structK, %structK* %2, i32 %1, i32 15 %3 = bitcast [4 x i32]* %b8pdir to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* bitcast ([4 x i32]* @b8pdir to i8*), i32 16, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* bitcast ([4 x i32]* @b8pdir to i8*), i32 16, i1 false) br i1 undef, label %if.end236, label %if.then233 if.then233: ; preds = %if.end230 diff --git a/test/CodeGen/BPF/byval.ll b/test/CodeGen/BPF/byval.ll index 25ba909d9cd..2a09cf1376d 100644 --- a/test/CodeGen/BPF/byval.ll +++ b/test/CodeGen/BPF/byval.ll @@ -16,7 +16,7 @@ entry: store i32 3, i32* %arrayinit.element2, align 8 %arrayinit.start = getelementptr inbounds %struct.S, %struct.S* %.compoundliteral, i64 0, i32 0, i64 3 %scevgep4 = bitcast i32* %arrayinit.start to i8* - call void @llvm.memset.p0i8.i64(i8* %scevgep4, i8 0, i64 28, i32 4, i1 false) + call void @llvm.memset.p0i8.i64(i8* %scevgep4, i8 0, i64 28, i1 false) call void @foo(i32 %a, %struct.S* byval align 8 %.compoundliteral) #3 ret void } @@ -24,4 +24,4 @@ entry: declare void @foo(i32, %struct.S* byval align 8) #1 ; Function Attrs: nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #3 +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #3 diff --git a/test/CodeGen/BPF/ex1.ll b/test/CodeGen/BPF/ex1.ll index 546e5d49da6..4aa90ef13df 100644 --- a/test/CodeGen/BPF/ex1.ll +++ b/test/CodeGen/BPF/ex1.ll @@ -12,7 +12,7 @@ define i32 @bpf_prog1(%struct.bpf_context* nocapture %ctx) #0 section "events/ne %devname = alloca [3 x i8], align 1 %fmt = alloca [15 x i8], align 1 %1 = getelementptr inbounds [3 x i8], [3 x i8]* %devname, i64 0, i64 0 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @bpf_prog1.devname, i64 0, i64 0), i64 3, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @bpf_prog1.devname, i64 0, i64 0), i64 3, i1 false) %2 = getelementptr inbounds %struct.bpf_context, %struct.bpf_context* %ctx, i64 0, i32 0 %3 = load i64, i64* %2, align 8 %4 = inttoptr i64 %3 to %struct.sk_buff* @@ -25,7 +25,7 @@ define i32 @bpf_prog1(%struct.bpf_context* nocapture %ctx) #0 section "events/ne ;