From a59adc40153f3e0f9843952c127d179b5ebe6c4c Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Mon, 14 Dec 2009 05:11:02 +0000 Subject: [PATCH] revert r91184, because it causes a crash on a .bc file I just sent to Bob. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@91268 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Scalar/ScalarReplAggregates.cpp | 756 +++++++++--------- .../ScalarRepl/2009-12-11-NeonTypes.ll | 68 -- 2 files changed, 390 insertions(+), 434 deletions(-) diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 4b686ccd297..b040a2711b4 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -102,27 +102,25 @@ namespace { int isSafeAllocaToScalarRepl(AllocaInst *AI); - void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, - uint64_t ArrayOffset, AllocaInfo &Info); - void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset, - uint64_t &ArrayOffset, AllocaInfo &Info); - void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t ArrayOffset, - uint64_t MemSize, const Type *MemOpType, bool isStore, - AllocaInfo &Info); - bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size); - unsigned FindElementAndOffset(const Type *&T, uint64_t &Offset); + void isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, + AllocaInfo &Info); + void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, + AllocaInfo &Info); + void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, + unsigned OpNo, AllocaInfo &Info); + void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocaInst *AI, + AllocaInfo &Info); void DoScalarReplacement(AllocaInst *AI, std::vector &WorkList); void CleanupGEP(GetElementPtrInst *GEP); - void CleanupAllocaUsers(Value *V); + void CleanupAllocaUsers(AllocaInst *AI); AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base); - void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, - SmallVector &NewElts); - void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, - SmallVector &NewElts); - void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, + void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, + SmallVector &NewElts); + + void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, AllocaInst *AI, SmallVector &NewElts); void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, @@ -362,12 +360,176 @@ void SROA::DoScalarReplacement(AllocaInst *AI, } } - // Now that we have created the new alloca instructions, rewrite all the - // uses of the old alloca. - RewriteForScalarRepl(AI, AI, 0, ElementAllocas); + // Now that we have created the alloca instructions that we want to use, + // expand the getelementptr instructions to use them. + while (!AI->use_empty()) { + Instruction *User = cast(AI->use_back()); + if (BitCastInst *BCInst = dyn_cast(User)) { + RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas); + BCInst->eraseFromParent(); + continue; + } + + // Replace: + // %res = load { i32, i32 }* %alloc + // with: + // %load.0 = load i32* %alloc.0 + // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 + // %load.1 = load i32* %alloc.1 + // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 + // (Also works for arrays instead of structs) + if (LoadInst *LI = dyn_cast(User)) { + Value *Insert = UndefValue::get(LI->getType()); + for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { + Value *Load = new LoadInst(ElementAllocas[i], "load", LI); + Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); + } + LI->replaceAllUsesWith(Insert); + LI->eraseFromParent(); + continue; + } + + // Replace: + // store { i32, i32 } %val, { i32, i32 }* %alloc + // with: + // %val.0 = extractvalue { i32, i32 } %val, 0 + // store i32 %val.0, i32* %alloc.0 + // %val.1 = extractvalue { i32, i32 } %val, 1 + // store i32 %val.1, i32* %alloc.1 + // (Also works for arrays instead of structs) + if (StoreInst *SI = dyn_cast(User)) { + Value *Val = SI->getOperand(0); + for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { + Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); + new StoreInst(Extract, ElementAllocas[i], SI); + } + SI->eraseFromParent(); + continue; + } + + GetElementPtrInst *GEPI = cast(User); + // We now know that the GEP is of the form: GEP , 0, + unsigned Idx = + (unsigned)cast(GEPI->getOperand(2))->getZExtValue(); + + assert(Idx < ElementAllocas.size() && "Index out of range?"); + AllocaInst *AllocaToUse = ElementAllocas[Idx]; + + Value *RepValue; + if (GEPI->getNumOperands() == 3) { + // Do not insert a new getelementptr instruction with zero indices, only + // to have it optimized out later. + RepValue = AllocaToUse; + } else { + // We are indexing deeply into the structure, so we still need a + // getelement ptr instruction to finish the indexing. This may be + // expanded itself once the worklist is rerun. + // + SmallVector NewArgs; + NewArgs.push_back(Constant::getNullValue( + Type::getInt32Ty(AI->getContext()))); + NewArgs.append(GEPI->op_begin()+3, GEPI->op_end()); + RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(), + NewArgs.end(), "", GEPI); + RepValue->takeName(GEPI); + } + + // If this GEP is to the start of the aggregate, check for memcpys. + if (Idx == 0 && GEPI->hasAllZeroIndices()) + RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas); + + // Move all of the users over to the new GEP. + GEPI->replaceAllUsesWith(RepValue); + // Delete the old GEP + GEPI->eraseFromParent(); + } + + // Finally, delete the Alloca instruction + AI->eraseFromParent(); NumReplaced++; } - + +/// isSafeElementUse - Check to see if this use is an allowed use for a +/// getelementptr instruction of an array aggregate allocation. isFirstElt +/// indicates whether Ptr is known to the start of the aggregate. +void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, + AllocaInfo &Info) { + for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); + I != E; ++I) { + Instruction *User = cast(*I); + switch (User->getOpcode()) { + case Instruction::Load: break; + case Instruction::Store: + // Store is ok if storing INTO the pointer, not storing the pointer + if (User->getOperand(0) == Ptr) return MarkUnsafe(Info); + break; + case Instruction::GetElementPtr: { + GetElementPtrInst *GEP = cast(User); + bool AreAllZeroIndices = isFirstElt; + if (GEP->getNumOperands() > 1 && + (!isa(GEP->getOperand(1)) || + !cast(GEP->getOperand(1))->isZero())) + // Using pointer arithmetic to navigate the array. + return MarkUnsafe(Info); + + // Verify that any array subscripts are in range. + for (gep_type_iterator GEPIt = gep_type_begin(GEP), + E = gep_type_end(GEP); GEPIt != E; ++GEPIt) { + // Ignore struct elements, no extra checking needed for these. + if (isa(*GEPIt)) + continue; + + // This GEP indexes an array. Verify that this is an in-range + // constant integer. Specifically, consider A[0][i]. We cannot know that + // the user isn't doing invalid things like allowing i to index an + // out-of-range subscript that accesses A[1]. Because of this, we have + // to reject SROA of any accesses into structs where any of the + // components are variables. + ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); + if (!IdxVal) return MarkUnsafe(Info); + + // Are all indices still zero? + AreAllZeroIndices &= IdxVal->isZero(); + + if (const ArrayType *AT = dyn_cast(*GEPIt)) { + if (IdxVal->getZExtValue() >= AT->getNumElements()) + return MarkUnsafe(Info); + } else if (const VectorType *VT = dyn_cast(*GEPIt)) { + if (IdxVal->getZExtValue() >= VT->getNumElements()) + return MarkUnsafe(Info); + } + } + + isSafeElementUse(GEP, AreAllZeroIndices, AI, Info); + if (Info.isUnsafe) return; + break; + } + case Instruction::BitCast: + if (isFirstElt) { + isSafeUseOfBitCastedAllocation(cast(User), AI, Info); + if (Info.isUnsafe) return; + break; + } + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); + return MarkUnsafe(Info); + case Instruction::Call: + if (MemIntrinsic *MI = dyn_cast(User)) { + if (isFirstElt) { + isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info); + if (Info.isUnsafe) return; + break; + } + } + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); + return MarkUnsafe(Info); + default: + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); + return MarkUnsafe(Info); + } + } + return; // All users look ok :) +} + /// AllUsersAreLoads - Return true if all users of this value are loads. static bool AllUsersAreLoads(Value *Ptr) { for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); @@ -377,116 +539,72 @@ static bool AllUsersAreLoads(Value *Ptr) { return true; } -/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to -/// performing scalar replacement of alloca AI. The results are flagged in -/// the Info parameter. Offset and ArrayOffset indicate the position within -/// AI that is referenced by this instruction. -void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, - uint64_t ArrayOffset, AllocaInfo &Info) { - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { - Instruction *User = cast(*UI); - - if (BitCastInst *BC = dyn_cast(User)) { - isSafeForScalarRepl(BC, AI, Offset, ArrayOffset, Info); - } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { - uint64_t GEPArrayOffset = ArrayOffset; - uint64_t GEPOffset = Offset; - isSafeGEP(GEPI, AI, GEPOffset, GEPArrayOffset, Info); - if (!Info.isUnsafe) - isSafeForScalarRepl(GEPI, AI, GEPOffset, GEPArrayOffset, Info); - } else if (MemIntrinsic *MI = dyn_cast(UI)) { - ConstantInt *Length = dyn_cast(MI->getLength()); - if (Length) - isSafeMemAccess(AI, Offset, ArrayOffset, Length->getZExtValue(), 0, - UI.getOperandNo() == 1, Info); - else - MarkUnsafe(Info); - } else if (LoadInst *LI = dyn_cast(User)) { - if (!LI->isVolatile()) { - const Type *LIType = LI->getType(); - isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(LIType), - LIType, false, Info); - } else - MarkUnsafe(Info); - } else if (StoreInst *SI = dyn_cast(User)) { - // Store is ok if storing INTO the pointer, not storing the pointer - if (!SI->isVolatile() && SI->getOperand(0) != I) { - const Type *SIType = SI->getOperand(0)->getType(); - isSafeMemAccess(AI, Offset, ArrayOffset, TD->getTypeAllocSize(SIType), - SIType, true, Info); - } else - MarkUnsafe(Info); - } else if (isa(UI)) { - // If one user is DbgInfoIntrinsic then check if all users are - // DbgInfoIntrinsics. - if (OnlyUsedByDbgInfoIntrinsics(I)) { - Info.needsCleanup = true; - return; - } - MarkUnsafe(Info); - } else { - DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); - MarkUnsafe(Info); - } - if (Info.isUnsafe) return; - } -} +/// isSafeUseOfAllocation - Check if this user is an allowed use for an +/// aggregate allocation. +void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, + AllocaInfo &Info) { + if (BitCastInst *C = dyn_cast(User)) + return isSafeUseOfBitCastedAllocation(C, AI, Info); + + if (LoadInst *LI = dyn_cast(User)) + if (!LI->isVolatile()) + return;// Loads (returning a first class aggregrate) are always rewritable + + if (StoreInst *SI = dyn_cast(User)) + if (!SI->isVolatile() && SI->getOperand(0) != AI) + return;// Store is ok if storing INTO the pointer, not storing the pointer + + GetElementPtrInst *GEPI = dyn_cast(User); + if (GEPI == 0) + return MarkUnsafe(Info); -/// isSafeGEP - Check if a GEP instruction can be handled for scalar -/// replacement. It is safe when all the indices are constant, in-bounds -/// references, and when the resulting offset corresponds to an element within -/// the alloca type. The results are flagged in the Info parameter. Upon -/// return, Offset is adjusted as specified by the GEP indices. For the -/// special case of a variable index to a 2-element array, ArrayOffset is set -/// to the array element size. -void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, - uint64_t &Offset, uint64_t &ArrayOffset, - AllocaInfo &Info) { - gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); - if (GEPIt == E) - return; + gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI); - // The first GEP index must be zero. - if (!isa(GEPIt.getOperand()) || - !cast(GEPIt.getOperand())->isZero()) + // The GEP is not safe to transform if not of the form "GEP , 0, ". + if (I == E || + I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) { return MarkUnsafe(Info); - if (++GEPIt == E) - return; + } + + ++I; + if (I == E) return MarkUnsafe(Info); // ran out of GEP indices?? + bool IsAllZeroIndices = true; + // If the first index is a non-constant index into an array, see if we can // handle it as a special case. - const Type *ArrayEltTy = 0; - if (ArrayOffset == 0 && Offset == 0) { - if (const ArrayType *AT = dyn_cast(*GEPIt)) { - if (!isa(GEPIt.getOperand())) { - uint64_t NumElements = AT->getNumElements(); - - // If this is an array index and the index is not constant, we cannot - // promote... that is unless the array has exactly one or two elements - // in it, in which case we CAN promote it, but we have to canonicalize - // this out if this is the only problem. - if ((NumElements != 1 && NumElements != 2) || !AllUsersAreLoads(GEPI)) - return MarkUnsafe(Info); + if (const ArrayType *AT = dyn_cast(*I)) { + if (!isa(I.getOperand())) { + IsAllZeroIndices = 0; + uint64_t NumElements = AT->getNumElements(); + + // If this is an array index and the index is not constant, we cannot + // promote... that is unless the array has exactly one or two elements in + // it, in which case we CAN promote it, but we have to canonicalize this + // out if this is the only problem. + if ((NumElements == 1 || NumElements == 2) && + AllUsersAreLoads(GEPI)) { Info.needsCleanup = true; - ArrayOffset = TD->getTypeAllocSizeInBits(AT->getElementType()); - ArrayEltTy = AT->getElementType(); - ++GEPIt; + return; // Canonicalization required! } + return MarkUnsafe(Info); } } - + // Walk through the GEP type indices, checking the types that this indexes // into. - for (; GEPIt != E; ++GEPIt) { + for (; I != E; ++I) { // Ignore struct elements, no extra checking needed for these. - if (isa(*GEPIt)) + if (isa(*I)) continue; + + ConstantInt *IdxVal = dyn_cast(I.getOperand()); + if (!IdxVal) return MarkUnsafe(Info); - ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); - if (!IdxVal) - return MarkUnsafe(Info); - - if (const ArrayType *AT = dyn_cast(*GEPIt)) { + // Are all indices still zero? + IsAllZeroIndices &= IdxVal->isZero(); + + if (const ArrayType *AT = dyn_cast(*I)) { // This GEP indexes an array. Verify that this is an in-range constant // integer. Specifically, consider A[0][i]. We cannot know that the user // isn't doing invalid things like allowing i to index an out-of-range @@ -494,254 +612,144 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, // of any accesses into structs where any of the components are variables. if (IdxVal->getZExtValue() >= AT->getNumElements()) return MarkUnsafe(Info); - } else { - const VectorType *VT = dyn_cast(*GEPIt); - assert(VT && "unexpected type in GEP type iterator"); + } else if (const VectorType *VT = dyn_cast(*I)) { if (IdxVal->getZExtValue() >= VT->getNumElements()) return MarkUnsafe(Info); } } - - // All the indices are safe. Now compute the offset due to this GEP and - // check if the alloca has a component element at that offset. - if (ArrayOffset == 0) { - SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); - Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), - &Indices[0], Indices.size()); - } else { - // Both array elements have the same type, so it suffices to check one of - // them. Copy the GEP indices starting from the array index, but replace - // that variable index with a constant zero. - SmallVector Indices(GEPI->op_begin() + 2, GEPI->op_end()); - Indices[0] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); - const Type *ArrayEltPtr = PointerType::getUnqual(ArrayEltTy); - Offset += TD->getIndexedOffset(ArrayEltPtr, &Indices[0], Indices.size()); - } - if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0)) - MarkUnsafe(Info); -} - -/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI -/// alloca or has an offset and size that corresponds to a component element -/// within it. The offset checked here may have been formed from a GEP with a -/// pointer bitcasted to a different type. -void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, - uint64_t ArrayOffset, uint64_t MemSize, - const Type *MemOpType, bool isStore, - AllocaInfo &Info) { - // Check if this is a load/store of the entire alloca. - if (Offset == 0 && ArrayOffset == 0 && - MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) { - bool UsesAggregateType = (MemOpType == AI->getAllocatedType()); - // This is safe for MemIntrinsics (where MemOpType is 0), integer types - // (which are essentially the same as the MemIntrinsics, especially with - // regard to copying padding between elements), or references using the - // aggregate type of the alloca. - if (!MemOpType || isa(MemOpType) || UsesAggregateType) { - if (!UsesAggregateType) { - if (isStore) - Info.isMemCpyDst = true; - else - Info.isMemCpySrc = true; - } - return; - } - } - // Check if the offset/size correspond to a component within the alloca type. - const Type *T = AI->getAllocatedType(); - if (TypeHasComponent(T, Offset, MemSize) && - (ArrayOffset == 0 || TypeHasComponent(T, Offset + ArrayOffset, MemSize))) - return; - - return MarkUnsafe(Info); + + // If there are any non-simple uses of this getelementptr, make sure to reject + // them. + return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info); } -/// TypeHasComponent - Return true if T has a component type with the -/// specified offset and size. If Size is zero, do not check the size. -bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) { - const Type *EltTy; - uint64_t EltSize; - if (const StructType *ST = dyn_cast(T)) { - const StructLayout *Layout = TD->getStructLayout(ST); - unsigned EltIdx = Layout->getElementContainingOffset(Offset); - EltTy = ST->getContainedType(EltIdx); - EltSize = TD->getTypeAllocSize(EltTy); - Offset -= Layout->getElementOffset(EltIdx); - } else if (const ArrayType *AT = dyn_cast(T)) { - EltTy = AT->getElementType(); - EltSize = TD->getTypeAllocSize(EltTy); - Offset %= EltSize; - } else { - return false; +/// isSafeMemIntrinsicOnAllocation - Check if the specified memory +/// intrinsic can be promoted by SROA. At this point, we know that the operand +/// of the memintrinsic is a pointer to the beginning of the allocation. +void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, + unsigned OpNo, AllocaInfo &Info) { + // If not constant length, give up. + ConstantInt *Length = dyn_cast(MI->getLength()); + if (!Length) return MarkUnsafe(Info); + + // If not the whole aggregate, give up. + if (Length->getZExtValue() != + TD->getTypeAllocSize(AI->getType()->getElementType())) + return MarkUnsafe(Info); + + // We only know about memcpy/memset/memmove. + if (!isa(MI)) + return MarkUnsafe(Info); + + // Otherwise, we can transform it. Determine whether this is a memcpy/set + // into or out of the aggregate. + if (OpNo == 1) + Info.isMemCpyDst = true; + else { + assert(OpNo == 2); + Info.isMemCpySrc = true; } - if (Offset == 0 && (Size == 0 || EltSize == Size)) - return true; - // Check if the component spans multiple elements. - if (Offset + Size > EltSize) - return false; - return TypeHasComponent(EltTy, Offset, Size); } -/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite -/// the instruction I, which references it, to use the separate elements. -/// Offset indicates the position within AI that is referenced by this -/// instruction. -void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, - SmallVector &NewElts) { - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ) { - Instruction *User = cast(*UI++); +/// isSafeUseOfBitCastedAllocation - Check if all users of this bitcast +/// from an alloca are safe for SROA of that alloca. +void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocaInst *AI, + AllocaInfo &Info) { + for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end(); + UI != E; ++UI) { + if (BitCastInst *BCU = dyn_cast(UI)) { + isSafeUseOfBitCastedAllocation(BCU, AI, Info); + } else if (MemIntrinsic *MI = dyn_cast(UI)) { + isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info); + } else if (StoreInst *SI = dyn_cast(UI)) { + if (SI->isVolatile()) + return MarkUnsafe(Info); + + // If storing the entire alloca in one chunk through a bitcasted pointer + // to integer, we can transform it. This happens (for example) when you + // cast a {i32,i32}* to i64* and store through it. This is similar to the + // memcpy case and occurs in various "byval" cases and emulated memcpys. + if (isa(SI->getOperand(0)->getType()) && + TD->getTypeAllocSize(SI->getOperand(0)->getType()) == + TD->getTypeAllocSize(AI->getType()->getElementType())) { + Info.isMemCpyDst = true; + continue; + } + return MarkUnsafe(Info); + } else if (LoadInst *LI = dyn_cast(UI)) { + if (LI->isVolatile()) + return MarkUnsafe(Info); - if (BitCastInst *BC = dyn_cast(User)) { - if (BC->getOperand(0) == AI) - BC->setOperand(0, NewElts[0]); - // If the bitcast type now matches the operand type, it will be removed - // after processing its uses. - RewriteForScalarRepl(BC, AI, Offset, NewElts); - } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { - RewriteGEP(GEPI, AI, Offset, NewElts); - } else if (MemIntrinsic *MI = dyn_cast(User)) { - ConstantInt *Length = dyn_cast(MI->getLength()); - uint64_t MemSize = Length->getZExtValue(); - if (Offset == 0 && - MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) - RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); - } else if (LoadInst *LI = dyn_cast(User)) { - const Type *LIType = LI->getType(); - if (LIType == AI->getAllocatedType()) { - // Replace: - // %res = load { i32, i32 }* %alloc - // with: - // %load.0 = load i32* %alloc.0 - // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 - // %load.1 = load i32* %alloc.1 - // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 - // (Also works for arrays instead of structs) - Value *Insert = UndefValue::get(LIType); - for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { - Value *Load = new LoadInst(NewElts[i], "load", LI); - Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); - } - LI->replaceAllUsesWith(Insert); - LI->eraseFromParent(); - } else if (isa(LIType) && - TD->getTypeAllocSize(LIType) == - TD->getTypeAllocSize(AI->getAllocatedType())) { - // If this is a load of the entire alloca to an integer, rewrite it. - RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); + // If loading the entire alloca in one chunk through a bitcasted pointer + // to integer, we can transform it. This happens (for example) when you + // cast a {i32,i32}* to i64* and load through it. This is similar to the + // memcpy case and occurs in various "byval" cases and emulated memcpys. + if (isa(LI->getType()) && + TD->getTypeAllocSize(LI->getType()) == + TD->getTypeAllocSize(AI->getType()->getElementType())) { + Info.isMemCpySrc = true; + continue; } - } else if (StoreInst *SI = dyn_cast(User)) { - Value *Val = SI->getOperand(0); - const Type *SIType = Val->getType(); - if (SIType == AI->getAllocatedType()) { - // Replace: - // store { i32, i32 } %val, { i32, i32 }* %alloc - // with: - // %val.0 = extractvalue { i32, i32 } %val, 0 - // store i32 %val.0, i32* %alloc.0 - // %val.1 = extractvalue { i32, i32 } %val, 1 - // store i32 %val.1, i32* %alloc.1 - // (Also works for arrays instead of structs) - for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { - Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); - new StoreInst(Extract, NewElts[i], SI); - } - SI->eraseFromParent(); - } else if (isa(SIType) && - TD->getTypeAllocSize(SIType) == - TD->getTypeAllocSize(AI->getAllocatedType())) { - // If this is a store of the entire alloca from an integer, rewrite it. - RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); + return MarkUnsafe(Info); + } else if (isa(UI)) { + // If one user is DbgInfoIntrinsic then check if all users are + // DbgInfoIntrinsics. + if (OnlyUsedByDbgInfoIntrinsics(BC)) { + Info.needsCleanup = true; + return; } + else + MarkUnsafe(Info); } - } - // Delete unused instructions and identity bitcasts. - if (I->use_empty()) - I->eraseFromParent(); - else if (BitCastInst *BC = dyn_cast(I)) { - if (BC->getDestTy() == BC->getSrcTy()) { - BC->replaceAllUsesWith(BC->getOperand(0)); - BC->eraseFromParent(); + else { + return MarkUnsafe(Info); } + if (Info.isUnsafe) return; } } -/// FindElementAndOffset - Return the index of the element containing Offset -/// within the specified type, which must be either a struct or an array. -/// Sets T to the type of the element and Offset to the offset within that -/// element. -unsigned SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset) { - unsigned Idx = 0; - if (const StructType *ST = dyn_cast(T)) { - const StructLayout *Layout = TD->getStructLayout(ST); - Idx = Layout->getElementContainingOffset(Offset); - T = ST->getContainedType(Idx); - Offset -= Layout->getElementOffset(Idx); - } else { - const ArrayType *AT = dyn_cast(T); - assert(AT && "unexpected type for scalar replacement"); - T = AT->getElementType(); - uint64_t EltSize = TD->getTypeAllocSize(T); - Idx = (unsigned)(Offset / EltSize); - Offset -= Idx * EltSize; - } - return Idx; -} +/// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes +/// to its first element. Transform users of the cast to use the new values +/// instead. +void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, + SmallVector &NewElts) { + Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end(); + while (UI != UE) { + Instruction *User = cast(*UI++); + if (BitCastInst *BCU = dyn_cast(User)) { + RewriteBitCastUserOfAlloca(BCU, AI, NewElts); + if (BCU->use_empty()) BCU->eraseFromParent(); + continue; + } -/// RewriteGEP - Check if this GEP instruction moves the pointer across -/// elements of the alloca that are being split apart, and if so, rewrite -/// the GEP to be relative to the new element. -void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, - SmallVector &NewElts) { - Instruction *Val = GEPI; - - uint64_t OldOffset = Offset; - SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); - Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), - &Indices[0], Indices.size()); - - const Type *T = AI->getAllocatedType(); - unsigned OldIdx = FindElementAndOffset(T, OldOffset); - if (GEPI->getOperand(0) == AI) - OldIdx = ~0U; // Force the GEP to be rewritten. - - T = AI->getAllocatedType(); - uint64_t EltOffset = Offset; - unsigned Idx = FindElementAndOffset(T, EltOffset); - - // If this GEP moves the pointer across elements of the alloca that are - // being split, then it needs to be rewritten. - if (Idx != OldIdx) { - const Type *i32Ty = Type::getInt32Ty(AI->getContext()); - SmallVector NewArgs; - NewArgs.push_back(Constant::getNullValue(i32Ty)); - while (EltOffset != 0) { - unsigned EltIdx = FindElementAndOffset(T, EltOffset); - NewArgs.push_back(ConstantInt::get(i32Ty, EltIdx)); + if (MemIntrinsic *MI = dyn_cast(User)) { + // This must be memcpy/memmove/memset of the entire aggregate. + // Split into one per element. + RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts); + continue; } - if (NewArgs.size() > 1) { - Val = GetElementPtrInst::CreateInBounds(NewElts[Idx], NewArgs.begin(), - NewArgs.end(), "", GEPI); - Val->takeName(GEPI); - if (Val->getType() != GEPI->getType()) - Val = new BitCastInst(Val, GEPI->getType(), Val->getNameStr(), GEPI); - } else { - Val = NewElts[Idx]; - // Insert a new bitcast. If the types match, it will be removed after - // handling all of its uses. - Val = new BitCastInst(Val, GEPI->getType(), Val->getNameStr(), GEPI); - Val->takeName(GEPI); + + if (StoreInst *SI = dyn_cast(User)) { + // If this is a store of the entire alloca from an integer, rewrite it. + RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); + continue; } - GEPI->replaceAllUsesWith(Val); - GEPI->eraseFromParent(); + if (LoadInst *LI = dyn_cast(User)) { + // If this is a load of the entire alloca to an integer, rewrite it. + RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); + continue; + } + + // Otherwise it must be some other user of a gep of the first pointer. Just + // leave these alone. + continue; } - - RewriteForScalarRepl(Val, AI, Offset, NewElts); } /// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. /// Rewrite it to copy or set the elements of the scalarized memory. -void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, +void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, AllocaInst *AI, SmallVector &NewElts) { @@ -753,10 +761,10 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, LLVMContext &Context = MI->getContext(); unsigned MemAlignment = MI->getAlignment(); if (MemTransferInst *MTI = dyn_cast(MI)) { // memmove/memcopy - if (Inst == MTI->getRawDest()) + if (BCInst == MTI->getRawDest()) OtherPtr = MTI->getRawSource(); else { - assert(Inst == MTI->getRawSource()); + assert(BCInst == MTI->getRawSource()); OtherPtr = MTI->getRawDest(); } } @@ -790,7 +798,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // Process each element of the aggregate. Value *TheFn = MI->getOperand(0); const Type *BytePtrTy = MI->getRawDest()->getType(); - bool SROADest = MI->getRawDest() == Inst; + bool SROADest = MI->getRawDest() == BCInst; Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); @@ -802,9 +810,9 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, if (OtherPtr) { Value *Idx[2] = { Zero, ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; - OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2, + OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2, OtherPtr->getNameStr()+"."+Twine(i), - MI); + MI); uint64_t EltOffset; const PointerType *OtherPtrTy = cast(OtherPtr->getType()); if (const StructType *ST = @@ -929,9 +937,15 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, // Extract each element out of the integer according to its structure offset // and store the element value to the individual alloca. Value *SrcVal = SI->getOperand(0); - const Type *AllocaEltTy = AI->getAllocatedType(); + const Type *AllocaEltTy = AI->getType()->getElementType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); + // If this isn't a store of an integer to the whole alloca, it may be a store + // to the first element. Just ignore the store in this case and normal SROA + // will handle it. + if (!isa(SrcVal->getType()) || + TD->getTypeAllocSizeInBits(SrcVal->getType()) != AllocaSizeBits) + return; // Handle tail padding by extending the operand if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) SrcVal = new ZExtInst(SrcVal, @@ -1045,9 +1059,16 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector &NewElts) { // Extract each element out of the NewElts according to its structure offset // and form the result value. - const Type *AllocaEltTy = AI->getAllocatedType(); + const Type *AllocaEltTy = AI->getType()->getElementType(); uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); + // If this isn't a load of the whole alloca to an integer, it may be a load + // of the first element. Just ignore the load in this case and normal SROA + // will handle it. + if (!isa(LI->getType()) || + TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits) + return; + DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI << '\n'); @@ -1121,6 +1142,7 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, LI->eraseFromParent(); } + /// HasPadding - Return true if the specified type has any structure or /// alignment padding, false otherwise. static bool HasPadding(const Type *Ty, const TargetData &TD) { @@ -1170,10 +1192,14 @@ int SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { // the users are safe to transform. AllocaInfo Info; - isSafeForScalarRepl(AI, AI, 0, 0, Info); - if (Info.isUnsafe) { - DEBUG(errs() << "Cannot transform: " << *AI << '\n'); - return 0; + for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); + I != E; ++I) { + isSafeUseOfAllocation(cast(*I), AI, Info); + if (Info.isUnsafe) { + DEBUG(errs() << "Cannot transform: " << *AI << "\n due to user: " + << **I << '\n'); + return 0; + } } // Okay, we know all the users are promotable. If the aggregate is a memcpy @@ -1182,7 +1208,7 @@ int SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { // types, but may actually be used. In these cases, we refuse to promote the // struct. if (Info.isMemCpySrc && Info.isMemCpyDst && - HasPadding(AI->getAllocatedType(), *TD)) + HasPadding(AI->getType()->getElementType(), *TD)) return 0; // If we require cleanup, return 1, otherwise return 3. @@ -1219,15 +1245,15 @@ void SROA::CleanupGEP(GetElementPtrInst *GEPI) { // Insert the new GEP instructions, which are properly indexed. SmallVector Indices(GEPI->op_begin()+1, GEPI->op_end()); Indices[1] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); - Value *ZeroIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0), - Indices.begin(), - Indices.end(), - GEPI->getName()+".0",GEPI); + Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0), + Indices.begin(), + Indices.end(), + GEPI->getName()+".0", GEPI); Indices[1] = ConstantInt::get(Type::getInt32Ty(GEPI->getContext()), 1); - Value *OneIdx = GetElementPtrInst::CreateInBounds(GEPI->getOperand(0), - Indices.begin(), - Indices.end(), - GEPI->getName()+".1", GEPI); + Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0), + Indices.begin(), + Indices.end(), + GEPI->getName()+".1", GEPI); // Replace all loads of the variable index GEP with loads from both // indexes and a select. while (!GEPI->use_empty()) { @@ -1238,24 +1264,22 @@ void SROA::CleanupGEP(GetElementPtrInst *GEPI) { LI->replaceAllUsesWith(R); LI->eraseFromParent(); } + GEPI->eraseFromParent(); } + /// CleanupAllocaUsers - If SROA reported that it can promote the specified /// allocation, but only if cleaned up, perform the cleanups required. -void SROA::CleanupAllocaUsers(Value *V) { +void SROA::CleanupAllocaUsers(AllocaInst *AI) { // At this point, we know that the end result will be SROA'd and promoted, so // we can insert ugly code if required so long as sroa+mem2reg will clean it // up. - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) { User *U = *UI++; - if (isa(U)) { - CleanupAllocaUsers(U); - } else if (GetElementPtrInst *GEPI = dyn_cast(U)) { + if (GetElementPtrInst *GEPI = dyn_cast(U)) CleanupGEP(GEPI); - CleanupAllocaUsers(GEPI); - if (GEPI->use_empty()) GEPI->eraseFromParent(); - } else { + else { Instruction *I = cast(U); SmallVector DbgInUses; if (!isa(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) { @@ -1371,7 +1395,7 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, // Compute the offset that this GEP adds to the pointer. SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), &Indices[0], Indices.size()); // See if all uses can be converted. if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset, @@ -1433,7 +1457,7 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { if (GetElementPtrInst *GEP = dyn_cast(User)) { // Compute the offset that this GEP adds to the pointer. SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), &Indices[0], Indices.size()); ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); GEP->eraseFromParent(); diff --git a/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll b/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll index a8f5cf71bf8..e69de29bb2d 100644 --- a/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll +++ b/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll @@ -1,68 +0,0 @@ -; RUN: opt < %s -scalarrepl -S | FileCheck %s -; Radar 7441282 - -target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" -target triple = "thumbv7-apple-darwin10" - -%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } -%struct.int16x8_t = type { <8 x i16> } -%struct.int16x8x2_t = type { [2 x %struct.int16x8_t] } -%union..0anon = type { %struct.int16x8x2_t } - -define arm_apcscc void @test(<8 x i16> %tmp.0, %struct.int16x8x2_t* %dst) nounwind { -; CHECK: @test -; CHECK-NOT: alloca -; CHECK: "alloca point" -entry: - %tmp_addr = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=3] - %dst_addr = alloca %struct.int16x8x2_t* ; <%struct.int16x8x2_t**> [#uses=2] - %__rv = alloca %union..0anon ; <%union..0anon*> [#uses=2] - %__bx = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=2] - %__ax = alloca %struct.int16x8_t ; <%struct.int16x8_t*> [#uses=2] - %tmp2 = alloca %struct.int16x8x2_t ; <%struct.int16x8x2_t*> [#uses=2] - %0 = alloca %struct.int16x8x2_t ; <%struct.int16x8x2_t*> [#uses=2] - %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] - %1 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - store <8 x i16> %tmp.0, <8 x i16>* %1 - store %struct.int16x8x2_t* %dst, %struct.int16x8x2_t** %dst_addr - %2 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %3 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %4 = load <8 x i16>* %3, align 16 ; <<8 x i16>> [#uses=1] - store <8 x i16> %4, <8 x i16>* %2, align 16 - %5 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %6 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %7 = load <8 x i16>* %6, align 16 ; <<8 x i16>> [#uses=1] - store <8 x i16> %7, <8 x i16>* %5, align 16 - %8 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %9 = load <8 x i16>* %8, align 16 ; <<8 x i16>> [#uses=2] - %10 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - %11 = load <8 x i16>* %10, align 16 ; <<8 x i16>> [#uses=2] - %12 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1] - %13 = bitcast %struct.int16x8x2_t* %12 to %struct.__neon_int16x8x2_t* ; <%struct.__neon_int16x8x2_t*> [#uses=2] - %14 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> ; <<8 x i16>> [#uses=1] - %15 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 0 ; <<8 x i16>*> [#uses=1] - store <8 x i16> %14, <8 x i16>* %15 - %16 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> ; <<8 x i16>> [#uses=1] - %17 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 1 ; <<8 x i16>*> [#uses=1] - store <8 x i16> %16, <8 x i16>* %17 - %18 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1] - %19 = bitcast %struct.int16x8x2_t* %0 to i8* ; [#uses=1] - %20 = bitcast %struct.int16x8x2_t* %18 to i8* ; [#uses=1] - call void @llvm.memcpy.i32(i8* %19, i8* %20, i32 32, i32 16) - %tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; [#uses=1] - %21 = bitcast %struct.int16x8x2_t* %0 to i8* ; [#uses=1] - call void @llvm.memcpy.i32(i8* %tmp21, i8* %21, i32 32, i32 16) - %22 = load %struct.int16x8x2_t** %dst_addr, align 4 ; <%struct.int16x8x2_t*> [#uses=1] - %23 = bitcast %struct.int16x8x2_t* %22 to i8* ; [#uses=1] - %tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; [#uses=1] - call void @llvm.memcpy.i32(i8* %23, i8* %tmp22, i32 32, i32 16) - br label %return - -; CHECK: store <8 x i16> -; CHECK: store <8 x i16> - -return: ; preds = %entry - ret void -} - -declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind -- 2.34.1