X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FScalarReplAggregates.cpp;h=bbe6270655845ca2fee421dff1775ea02996c0fb;hb=040056fd11693ffc41ce9b777281c71705d0dc1f;hp=83572d65dae3507f7d6989add2fc21b69fda0cfa;hpb=3d730f7453af5ecb1be4b8c5d48843ad5637de37;p=oota-llvm.git diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 83572d65dae..bbe62706558 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -27,17 +27,20 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Pass.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/IRBuilder.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" using namespace llvm; STATISTIC(NumReplaced, "Number of allocas broken up"); @@ -46,7 +49,7 @@ STATISTIC(NumConverted, "Number of aggregates converted to scalar"); STATISTIC(NumGlobals, "Number of allocas copied from constant global"); namespace { - struct VISIBILITY_HIDDEN SROA : public FunctionPass { + struct SROA : public FunctionPass { static char ID; // Pass identification, replacement for typeid explicit SROA(signed T = -1) : FunctionPass(&ID) { if (T == -1) @@ -65,13 +68,16 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); - AU.addRequired(); AU.setPreservesCFG(); } private: TargetData *TD; + /// DeadInsts - Keep track of instructions we have made dead, so that + /// we can remove them after we are done working. + SmallVector DeadInsts; + /// AllocaInfo - When analyzing uses of an alloca instruction, this captures /// information about the uses. All these fields are initialized to false /// and set to true when something is learned. @@ -79,10 +85,6 @@ namespace { /// isUnsafe - This is set to true if the alloca cannot be SROA'd. bool isUnsafe : 1; - /// needsCanon - This is set to true if there is some use of the alloca - /// that requires canonicalization. - bool needsCanon : 1; - /// isMemCpySrc - This is true if this aggregate is memcpy'd from. bool isMemCpySrc : 1; @@ -90,49 +92,52 @@ namespace { bool isMemCpyDst : 1; AllocaInfo() - : isUnsafe(false), needsCanon(false), - isMemCpySrc(false), isMemCpyDst(false) {} + : isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false) {} }; unsigned SRThreshold; void MarkUnsafe(AllocaInfo &I) { I.isUnsafe = true; } - int isSafeAllocaToScalarRepl(AllocationInst *AI); - - void isSafeUseOfAllocation(Instruction *User, AllocationInst *AI, - AllocaInfo &Info); - void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI, - AllocaInfo &Info); - void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI, - unsigned OpNo, AllocaInfo &Info); - void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocationInst *AI, - AllocaInfo &Info); - - void DoScalarReplacement(AllocationInst *AI, - std::vector &WorkList); - void CanonicalizeAllocaUsers(AllocationInst *AI); - AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocationInst *Base); + bool isSafeAllocaToScalarRepl(AllocaInst *AI); + + void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, + AllocaInfo &Info); + void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset, + AllocaInfo &Info); + void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize, + const Type *MemOpType, bool isStore, AllocaInfo &Info); + bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size); + uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset, + const Type *&IdxTy); - void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI, - SmallVector &NewElts); + void DoScalarReplacement(AllocaInst *AI, + std::vector &WorkList); + void DeleteDeadInstructions(); + AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base); - void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, - AllocationInst *AI, + void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts); + void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts); + void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts); + void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, + AllocaInst *AI, SmallVector &NewElts); - void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocationInst *AI, + void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, SmallVector &NewElts); - void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, + void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector &NewElts); bool CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, - uint64_t Offset, unsigned AllocaSize); + bool &SawVec, uint64_t Offset, unsigned AllocaSize); void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); - Value *ConvertUsesOfLoadToScalar(LoadInst *LI, AllocaInst *NewAI, - uint64_t Offset); - Value *ConvertUsesOfStoreToScalar(Value *StoredVal, AllocaInst *NewAI, - uint64_t Offset, Instruction *InsertPt); - static Instruction *isOnlyCopiedFromConstantGlobal(AllocationInst *AI); + Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType, + uint64_t Offset, IRBuilder<> &Builder); + Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal, + uint64_t Offset, IRBuilder<> &Builder); + static Instruction *isOnlyCopiedFromConstantGlobal(AllocaInst *AI); }; } @@ -146,9 +151,16 @@ FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) { bool SROA::runOnFunction(Function &F) { - TD = &getAnalysis(); - + TD = getAnalysisIfAvailable(); + bool Changed = performPromotion(F); + + // FIXME: ScalarRepl currently depends on TargetData more than it + // theoretically needs to. It should be refactored in order to support + // target-independent IR. Until this is done, just skip the actual + // scalar-replacement portion of this pass. + if (!TD) return Changed; + while (1) { bool LocalChange = performScalarRepl(F); if (!LocalChange) break; // No need to repromote if no scalarrepl @@ -190,12 +202,18 @@ bool SROA::performPromotion(Function &F) { return Changed; } -/// getNumSAElements - Return the number of elements in the specific struct or -/// array. -static uint64_t getNumSAElements(const Type *T) { +/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for +/// SROA. It must be a struct or array type with a small number of elements. +static bool ShouldAttemptScalarRepl(AllocaInst *AI) { + const Type *T = AI->getAllocatedType(); + // Do not promote any struct into more than 32 separate vars. if (const StructType *ST = dyn_cast(T)) - return ST->getNumElements(); - return cast(T)->getNumElements(); + return ST->getNumElements() <= 32; + // Arrays are much less likely to be safe for SROA; only consider + // them if they are very small. + if (const ArrayType *AT = dyn_cast(T)) + return AT->getNumElements() <= 8; + return false; } // performScalarRepl - This algorithm is a simple worklist driven algorithm, @@ -203,18 +221,18 @@ static uint64_t getNumSAElements(const Type *T) { // them if they are only used by getelementptr instructions. // bool SROA::performScalarRepl(Function &F) { - std::vector WorkList; + std::vector WorkList; // Scan the entry basic block, adding any alloca's and mallocs to the worklist BasicBlock &BB = F.getEntryBlock(); for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) - if (AllocationInst *A = dyn_cast(I)) + if (AllocaInst *A = dyn_cast(I)) WorkList.push_back(A); // Process the worklist bool Changed = false; while (!WorkList.empty()) { - AllocationInst *AI = WorkList.back(); + AllocaInst *AI = WorkList.back(); WorkList.pop_back(); // Handle dead allocas trivially. These can be formed by SROA'ing arrays @@ -234,8 +252,8 @@ bool SROA::performScalarRepl(Function &F) { // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' // is only subsequently read. if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) { - DOUT << "Found alloca equal to global: " << *AI; - DOUT << " memcpy = " << *TheCopy; + DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n'); + DEBUG(dbgs() << " memcpy = " << *TheCopy << '\n'); Constant *TheSrc = cast(TheCopy->getOperand(2)); AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); TheCopy->eraseFromParent(); // Don't mutate the global. @@ -249,30 +267,23 @@ bool SROA::performScalarRepl(Function &F) { // transform the allocation instruction if it is an array allocation // (allocations OF arrays are ok though), and an allocation of a scalar // value cannot be decomposed at all. - uint64_t AllocaSize = TD->getTypePaddedSize(AI->getAllocatedType()); - - if ((isa(AI->getAllocatedType()) || - isa(AI->getAllocatedType())) && - // Do not promote any struct whose size is too big. - AllocaSize < SRThreshold && - // Do not promote any struct into more than "32" separate vars. - getNumSAElements(AI->getAllocatedType()) < SRThreshold/4) { - // Check that all of the users of the allocation are capable of being - // transformed. - switch (isSafeAllocaToScalarRepl(AI)) { - default: assert(0 && "Unexpected value!"); - case 0: // Not safe to scalar replace. - break; - case 1: // Safe, but requires cleanup/canonicalizations first - CanonicalizeAllocaUsers(AI); - // FALL THROUGH. - case 3: // Safe to scalar replace. - DoScalarReplacement(AI, WorkList); - Changed = true; - continue; - } + uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType()); + + // Do not promote [0 x %struct]. + if (AllocaSize == 0) continue; + + // If the alloca looks like a good candidate for scalar replacement, and if + // all its users can be transformed, then split up the aggregate into its + // separate elements. + if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) { + DoScalarReplacement(AI, WorkList); + Changed = true; + continue; } + // Do not promote any struct whose size is too big. + if (AllocaSize > SRThreshold) continue; + // If we can turn this aggregate value (potentially with casts) into a // simple scalar value that can be mem2reg'd into a register value. // IsNotTrivial tracks whether this is something that mem2reg could have @@ -281,20 +292,28 @@ bool SROA::performScalarRepl(Function &F) { // but that has pointer arithmetic to set byte 3 of it or something. bool IsNotTrivial = false; const Type *VectorTy = 0; - if (CanConvertToScalar(AI, IsNotTrivial, VectorTy, + bool HadAVector = false; + if (CanConvertToScalar(AI, IsNotTrivial, VectorTy, HadAVector, 0, unsigned(AllocaSize)) && IsNotTrivial) { AllocaInst *NewAI; - if (VectorTy && isa(VectorTy)) { - DOUT << "CONVERT TO VECTOR: " << *AI << " TYPE = " << *VectorTy <<"\n"; + // If we were able to find a vector type that can handle this with + // insert/extract elements, and if there was at least one use that had + // a vector type, promote this to a vector. We don't want to promote + // random stuff that doesn't use vectors (e.g. <9 x double>) because then + // we just get a lot of insert/extracts. If at least one vector is + // involved, then we probably really do have a union of vector/array. + if (VectorTy && VectorTy->isVectorTy() && HadAVector) { + DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " + << *VectorTy << '\n'); // Create and insert the vector alloca. - NewAI = new AllocaInst(VectorTy, 0, "", AI->getParent()->begin()); + NewAI = new AllocaInst(VectorTy, 0, "", AI->getParent()->begin()); ConvertUsesToScalar(AI, NewAI, 0); } else { - DOUT << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"; + DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); // Create and insert the integer alloca. - const Type *NewTy = IntegerType::get(AllocaSize*8); + const Type *NewTy = IntegerType::get(AI->getContext(), AllocaSize*8); NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); ConvertUsesToScalar(AI, NewAI, 0); } @@ -313,16 +332,16 @@ bool SROA::performScalarRepl(Function &F) { /// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl /// predicate, do SROA now. -void SROA::DoScalarReplacement(AllocationInst *AI, - std::vector &WorkList) { - DOUT << "Found inst to SROA: " << *AI; +void SROA::DoScalarReplacement(AllocaInst *AI, + std::vector &WorkList) { + DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n'); SmallVector ElementAllocas; if (const StructType *ST = dyn_cast(AI->getAllocatedType())) { ElementAllocas.reserve(ST->getNumContainedTypes()); for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, AI->getAlignment(), - AI->getName() + "." + utostr(i), AI); + AI->getName() + "." + Twine(i), AI); ElementAllocas.push_back(NA); WorkList.push_back(NA); // Add to worklist for recursive processing } @@ -332,402 +351,398 @@ void SROA::DoScalarReplacement(AllocationInst *AI, const Type *ElTy = AT->getElementType(); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(), - AI->getName() + "." + utostr(i), AI); + AI->getName() + "." + Twine(i), AI); ElementAllocas.push_back(NA); WorkList.push_back(NA); // Add to worklist for recursive processing } } - // Now that we have created the alloca instructions that we want to use, - // expand the getelementptr instructions to use them. - // - while (!AI->use_empty()) { - Instruction *User = cast(AI->use_back()); - if (BitCastInst *BCInst = dyn_cast(User)) { - RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas); - BCInst->eraseFromParent(); - continue; - } - - // Replace: - // %res = load { i32, i32 }* %alloc - // with: - // %load.0 = load i32* %alloc.0 - // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 - // %load.1 = load i32* %alloc.1 - // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 - // (Also works for arrays instead of structs) - if (LoadInst *LI = dyn_cast(User)) { - Value *Insert = UndefValue::get(LI->getType()); - for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { - Value *Load = new LoadInst(ElementAllocas[i], "load", LI); - Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); - } - LI->replaceAllUsesWith(Insert); - LI->eraseFromParent(); - continue; - } - - // Replace: - // store { i32, i32 } %val, { i32, i32 }* %alloc - // with: - // %val.0 = extractvalue { i32, i32 } %val, 0 - // store i32 %val.0, i32* %alloc.0 - // %val.1 = extractvalue { i32, i32 } %val, 1 - // store i32 %val.1, i32* %alloc.1 - // (Also works for arrays instead of structs) - if (StoreInst *SI = dyn_cast(User)) { - Value *Val = SI->getOperand(0); - for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { - Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); - new StoreInst(Extract, ElementAllocas[i], SI); - } - SI->eraseFromParent(); - continue; - } - - GetElementPtrInst *GEPI = cast(User); - // We now know that the GEP is of the form: GEP , 0, - unsigned Idx = - (unsigned)cast(GEPI->getOperand(2))->getZExtValue(); - - assert(Idx < ElementAllocas.size() && "Index out of range?"); - AllocaInst *AllocaToUse = ElementAllocas[Idx]; - - Value *RepValue; - if (GEPI->getNumOperands() == 3) { - // Do not insert a new getelementptr instruction with zero indices, only - // to have it optimized out later. - RepValue = AllocaToUse; - } else { - // We are indexing deeply into the structure, so we still need a - // getelement ptr instruction to finish the indexing. This may be - // expanded itself once the worklist is rerun. - // - SmallVector NewArgs; - NewArgs.push_back(Constant::getNullValue(Type::Int32Ty)); - NewArgs.append(GEPI->op_begin()+3, GEPI->op_end()); - RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(), - NewArgs.end(), "", GEPI); - RepValue->takeName(GEPI); - } - - // If this GEP is to the start of the aggregate, check for memcpys. - if (Idx == 0 && GEPI->hasAllZeroIndices()) - RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas); - - // Move all of the users over to the new GEP. - GEPI->replaceAllUsesWith(RepValue); - // Delete the old GEP - GEPI->eraseFromParent(); - } + // Now that we have created the new alloca instructions, rewrite all the + // uses of the old alloca. + RewriteForScalarRepl(AI, AI, 0, ElementAllocas); - // Finally, delete the Alloca instruction + // Now erase any instructions that were made dead while rewriting the alloca. + DeleteDeadInstructions(); AI->eraseFromParent(); + NumReplaced++; } - -/// isSafeElementUse - Check to see if this use is an allowed use for a -/// getelementptr instruction of an array aggregate allocation. isFirstElt -/// indicates whether Ptr is known to the start of the aggregate. -/// -void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI, - AllocaInfo &Info) { - for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); - I != E; ++I) { - Instruction *User = cast(*I); - switch (User->getOpcode()) { - case Instruction::Load: break; - case Instruction::Store: - // Store is ok if storing INTO the pointer, not storing the pointer - if (User->getOperand(0) == Ptr) return MarkUnsafe(Info); - break; - case Instruction::GetElementPtr: { - GetElementPtrInst *GEP = cast(User); - bool AreAllZeroIndices = isFirstElt; - if (GEP->getNumOperands() > 1) { - if (!isa(GEP->getOperand(1)) || - !cast(GEP->getOperand(1))->isZero()) - // Using pointer arithmetic to navigate the array. - return MarkUnsafe(Info); - - if (AreAllZeroIndices) - AreAllZeroIndices = GEP->hasAllZeroIndices(); - } - isSafeElementUse(GEP, AreAllZeroIndices, AI, Info); - if (Info.isUnsafe) return; - break; - } - case Instruction::BitCast: - if (isFirstElt) { - isSafeUseOfBitCastedAllocation(cast(User), AI, Info); - if (Info.isUnsafe) return; - break; - } - DOUT << " Transformation preventing inst: " << *User; - return MarkUnsafe(Info); - case Instruction::Call: - if (MemIntrinsic *MI = dyn_cast(User)) { - if (isFirstElt) { - isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info); - if (Info.isUnsafe) return; - break; - } +/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list, +/// recursively including all their operands that become trivially dead. +void SROA::DeleteDeadInstructions() { + while (!DeadInsts.empty()) { + Instruction *I = cast(DeadInsts.pop_back_val()); + + for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) + if (Instruction *U = dyn_cast(*OI)) { + // Zero out the operand and see if it becomes trivially dead. + // (But, don't add allocas to the dead instruction list -- they are + // already on the worklist and will be deleted separately.) + *OI = 0; + if (isInstructionTriviallyDead(U) && !isa(U)) + DeadInsts.push_back(U); } - DOUT << " Transformation preventing inst: " << *User; - return MarkUnsafe(Info); - default: - DOUT << " Transformation preventing inst: " << *User; - return MarkUnsafe(Info); - } - } - return; // All users look ok :) -} -/// AllUsersAreLoads - Return true if all users of this value are loads. -static bool AllUsersAreLoads(Value *Ptr) { - for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); - I != E; ++I) - if (cast(*I)->getOpcode() != Instruction::Load) - return false; - return true; + I->eraseFromParent(); + } } + +/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to +/// performing scalar replacement of alloca AI. The results are flagged in +/// the Info parameter. Offset indicates the position within AI that is +/// referenced by this instruction. +void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, + AllocaInfo &Info) { + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { + Instruction *User = cast(*UI); -/// isSafeUseOfAllocation - Check to see if this user is an allowed use for an -/// aggregate allocation. -/// -void SROA::isSafeUseOfAllocation(Instruction *User, AllocationInst *AI, - AllocaInfo &Info) { - if (BitCastInst *C = dyn_cast(User)) - return isSafeUseOfBitCastedAllocation(C, AI, Info); - - if (LoadInst *LI = dyn_cast(User)) - if (!LI->isVolatile()) - return;// Loads (returning a first class aggregrate) are always rewritable - - if (StoreInst *SI = dyn_cast(User)) - if (!SI->isVolatile() && SI->getOperand(0) != AI) - return;// Store is ok if storing INTO the pointer, not storing the pointer - - GetElementPtrInst *GEPI = dyn_cast(User); - if (GEPI == 0) - return MarkUnsafe(Info); - - gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI); - - // The GEP is not safe to transform if not of the form "GEP , 0, ". - if (I == E || - I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) { - return MarkUnsafe(Info); + if (BitCastInst *BC = dyn_cast(User)) { + isSafeForScalarRepl(BC, AI, Offset, Info); + } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { + uint64_t GEPOffset = Offset; + isSafeGEP(GEPI, AI, GEPOffset, Info); + if (!Info.isUnsafe) + isSafeForScalarRepl(GEPI, AI, GEPOffset, Info); + } else if (MemIntrinsic *MI = dyn_cast(UI)) { + ConstantInt *Length = dyn_cast(MI->getLength()); + if (Length) + isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0, + UI.getOperandNo() == 1, Info); + else + MarkUnsafe(Info); + } else if (LoadInst *LI = dyn_cast(User)) { + if (!LI->isVolatile()) { + const Type *LIType = LI->getType(); + isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(LIType), + LIType, false, Info); + } else + MarkUnsafe(Info); + } else if (StoreInst *SI = dyn_cast(User)) { + // Store is ok if storing INTO the pointer, not storing the pointer + if (!SI->isVolatile() && SI->getOperand(0) != I) { + const Type *SIType = SI->getOperand(0)->getType(); + isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(SIType), + SIType, true, Info); + } else + MarkUnsafe(Info); + } else { + DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); + MarkUnsafe(Info); + } + if (Info.isUnsafe) return; } +} - ++I; - if (I == E) return MarkUnsafe(Info); // ran out of GEP indices?? +/// isSafeGEP - Check if a GEP instruction can be handled for scalar +/// replacement. It is safe when all the indices are constant, in-bounds +/// references, and when the resulting offset corresponds to an element within +/// the alloca type. The results are flagged in the Info parameter. Upon +/// return, Offset is adjusted as specified by the GEP indices. +void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, + uint64_t &Offset, AllocaInfo &Info) { + gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); + if (GEPIt == E) + return; - bool IsAllZeroIndices = true; - - // If the first index is a non-constant index into an array, see if we can - // handle it as a special case. - if (const ArrayType *AT = dyn_cast(*I)) { - if (!isa(I.getOperand())) { - IsAllZeroIndices = 0; - uint64_t NumElements = AT->getNumElements(); - - // If this is an array index and the index is not constant, we cannot - // promote... that is unless the array has exactly one or two elements in - // it, in which case we CAN promote it, but we have to canonicalize this - // out if this is the only problem. - if ((NumElements == 1 || NumElements == 2) && - AllUsersAreLoads(GEPI)) { - Info.needsCanon = true; - return; // Canonicalization required! - } - return MarkUnsafe(Info); - } - } - // Walk through the GEP type indices, checking the types that this indexes // into. - for (; I != E; ++I) { + for (; GEPIt != E; ++GEPIt) { // Ignore struct elements, no extra checking needed for these. - if (isa(*I)) + if ((*GEPIt)->isStructTy()) continue; - - ConstantInt *IdxVal = dyn_cast(I.getOperand()); - if (!IdxVal) return MarkUnsafe(Info); - // Are all indices still zero? - IsAllZeroIndices &= IdxVal->isZero(); - - if (const ArrayType *AT = dyn_cast(*I)) { - // This GEP indexes an array. Verify that this is an in-range constant - // integer. Specifically, consider A[0][i]. We cannot know that the user - // isn't doing invalid things like allowing i to index an out-of-range - // subscript that accesses A[1]. Because of this, we have to reject SROA - // of any accesses into structs where any of the components are variables. - if (IdxVal->getZExtValue() >= AT->getNumElements()) - return MarkUnsafe(Info); - } else if (const VectorType *VT = dyn_cast(*I)) { - if (IdxVal->getZExtValue() >= VT->getNumElements()) - return MarkUnsafe(Info); + ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); + if (!IdxVal) + return MarkUnsafe(Info); + } + + // Compute the offset due to this GEP and check if the alloca has a + // component element at that offset. + SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); + Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), + &Indices[0], Indices.size()); + if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0)) + MarkUnsafe(Info); +} + +/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI +/// alloca or has an offset and size that corresponds to a component element +/// within it. The offset checked here may have been formed from a GEP with a +/// pointer bitcasted to a different type. +void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize, + const Type *MemOpType, bool isStore, + AllocaInfo &Info) { + // Check if this is a load/store of the entire alloca. + if (Offset == 0 && MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) { + bool UsesAggregateType = (MemOpType == AI->getAllocatedType()); + // This is safe for MemIntrinsics (where MemOpType is 0), integer types + // (which are essentially the same as the MemIntrinsics, especially with + // regard to copying padding between elements), or references using the + // aggregate type of the alloca. + if (!MemOpType || MemOpType->isIntegerTy() || UsesAggregateType) { + if (!UsesAggregateType) { + if (isStore) + Info.isMemCpyDst = true; + else + Info.isMemCpySrc = true; + } + return; } } - - // If there are any non-simple uses of this getelementptr, make sure to reject - // them. - return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info); + // Check if the offset/size correspond to a component within the alloca type. + const Type *T = AI->getAllocatedType(); + if (TypeHasComponent(T, Offset, MemSize)) + return; + + return MarkUnsafe(Info); } -/// isSafeMemIntrinsicOnAllocation - Return true if the specified memory -/// intrinsic can be promoted by SROA. At this point, we know that the operand -/// of the memintrinsic is a pointer to the beginning of the allocation. -void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocationInst *AI, - unsigned OpNo, AllocaInfo &Info) { - // If not constant length, give up. - ConstantInt *Length = dyn_cast(MI->getLength()); - if (!Length) return MarkUnsafe(Info); - - // If not the whole aggregate, give up. - if (Length->getZExtValue() != - TD->getTypePaddedSize(AI->getType()->getElementType())) - return MarkUnsafe(Info); - - // We only know about memcpy/memset/memmove. - if (!isa(MI) && !isa(MI) && !isa(MI)) - return MarkUnsafe(Info); - - // Otherwise, we can transform it. Determine whether this is a memcpy/set - // into or out of the aggregate. - if (OpNo == 1) - Info.isMemCpyDst = true; - else { - assert(OpNo == 2); - Info.isMemCpySrc = true; +/// TypeHasComponent - Return true if T has a component type with the +/// specified offset and size. If Size is zero, do not check the size. +bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) { + const Type *EltTy; + uint64_t EltSize; + if (const StructType *ST = dyn_cast(T)) { + const StructLayout *Layout = TD->getStructLayout(ST); + unsigned EltIdx = Layout->getElementContainingOffset(Offset); + EltTy = ST->getContainedType(EltIdx); + EltSize = TD->getTypeAllocSize(EltTy); + Offset -= Layout->getElementOffset(EltIdx); + } else if (const ArrayType *AT = dyn_cast(T)) { + EltTy = AT->getElementType(); + EltSize = TD->getTypeAllocSize(EltTy); + if (Offset >= AT->getNumElements() * EltSize) + return false; + Offset %= EltSize; + } else { + return false; } + if (Offset == 0 && (Size == 0 || EltSize == Size)) + return true; + // Check if the component spans multiple elements. + if (Offset + Size > EltSize) + return false; + return TypeHasComponent(EltTy, Offset, Size); } -/// isSafeUseOfBitCastedAllocation - Return true if all users of this bitcast -/// are -void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocationInst *AI, - AllocaInfo &Info) { - for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end(); - UI != E; ++UI) { - if (BitCastInst *BCU = dyn_cast(UI)) { - isSafeUseOfBitCastedAllocation(BCU, AI, Info); - } else if (MemIntrinsic *MI = dyn_cast(UI)) { - isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info); - } else if (StoreInst *SI = dyn_cast(UI)) { - if (SI->isVolatile()) - return MarkUnsafe(Info); - - // If storing the entire alloca in one chunk through a bitcasted pointer - // to integer, we can transform it. This happens (for example) when you - // cast a {i32,i32}* to i64* and store through it. This is similar to the - // memcpy case and occurs in various "byval" cases and emulated memcpys. - if (isa(SI->getOperand(0)->getType()) && - TD->getTypePaddedSize(SI->getOperand(0)->getType()) == - TD->getTypePaddedSize(AI->getType()->getElementType())) { - Info.isMemCpyDst = true; - continue; +/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite +/// the instruction I, which references it, to use the separate elements. +/// Offset indicates the position within AI that is referenced by this +/// instruction. +void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts) { + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { + Instruction *User = cast(*UI); + + if (BitCastInst *BC = dyn_cast(User)) { + RewriteBitCast(BC, AI, Offset, NewElts); + } else if (GetElementPtrInst *GEPI = dyn_cast(User)) { + RewriteGEP(GEPI, AI, Offset, NewElts); + } else if (MemIntrinsic *MI = dyn_cast(User)) { + ConstantInt *Length = dyn_cast(MI->getLength()); + uint64_t MemSize = Length->getZExtValue(); + if (Offset == 0 && + MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) + RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); + // Otherwise the intrinsic can only touch a single element and the + // address operand will be updated, so nothing else needs to be done. + } else if (LoadInst *LI = dyn_cast(User)) { + const Type *LIType = LI->getType(); + if (LIType == AI->getAllocatedType()) { + // Replace: + // %res = load { i32, i32 }* %alloc + // with: + // %load.0 = load i32* %alloc.0 + // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 + // %load.1 = load i32* %alloc.1 + // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 + // (Also works for arrays instead of structs) + Value *Insert = UndefValue::get(LIType); + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { + Value *Load = new LoadInst(NewElts[i], "load", LI); + Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); + } + LI->replaceAllUsesWith(Insert); + DeadInsts.push_back(LI); + } else if (LIType->isIntegerTy() && + TD->getTypeAllocSize(LIType) == + TD->getTypeAllocSize(AI->getAllocatedType())) { + // If this is a load of the entire alloca to an integer, rewrite it. + RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); } - return MarkUnsafe(Info); - } else if (LoadInst *LI = dyn_cast(UI)) { - if (LI->isVolatile()) - return MarkUnsafe(Info); - - // If loading the entire alloca in one chunk through a bitcasted pointer - // to integer, we can transform it. This happens (for example) when you - // cast a {i32,i32}* to i64* and load through it. This is similar to the - // memcpy case and occurs in various "byval" cases and emulated memcpys. - if (isa(LI->getType()) && - TD->getTypePaddedSize(LI->getType()) == - TD->getTypePaddedSize(AI->getType()->getElementType())) { - Info.isMemCpySrc = true; - continue; + } else if (StoreInst *SI = dyn_cast(User)) { + Value *Val = SI->getOperand(0); + const Type *SIType = Val->getType(); + if (SIType == AI->getAllocatedType()) { + // Replace: + // store { i32, i32 } %val, { i32, i32 }* %alloc + // with: + // %val.0 = extractvalue { i32, i32 } %val, 0 + // store i32 %val.0, i32* %alloc.0 + // %val.1 = extractvalue { i32, i32 } %val, 1 + // store i32 %val.1, i32* %alloc.1 + // (Also works for arrays instead of structs) + for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { + Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); + new StoreInst(Extract, NewElts[i], SI); + } + DeadInsts.push_back(SI); + } else if (SIType->isIntegerTy() && + TD->getTypeAllocSize(SIType) == + TD->getTypeAllocSize(AI->getAllocatedType())) { + // If this is a store of the entire alloca from an integer, rewrite it. + RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); } - return MarkUnsafe(Info); - } else { - return MarkUnsafe(Info); } - if (Info.isUnsafe) return; } } -/// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes -/// to its first element. Transform users of the cast to use the new values -/// instead. -void SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocationInst *AI, - SmallVector &NewElts) { - Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end(); - while (UI != UE) { - Instruction *User = cast(*UI++); - if (BitCastInst *BCU = dyn_cast(User)) { - RewriteBitCastUserOfAlloca(BCU, AI, NewElts); - if (BCU->use_empty()) BCU->eraseFromParent(); - continue; - } +/// RewriteBitCast - Update a bitcast reference to the alloca being replaced +/// and recursively continue updating all of its uses. +void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts) { + RewriteForScalarRepl(BC, AI, Offset, NewElts); + if (BC->getOperand(0) != AI) + return; - if (MemIntrinsic *MI = dyn_cast(User)) { - // This must be memcpy/memmove/memset of the entire aggregate. - // Split into one per element. - RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts); - continue; - } - - if (StoreInst *SI = dyn_cast(User)) { - // If this is a store of the entire alloca from an integer, rewrite it. - RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); - continue; - } + // The bitcast references the original alloca. Replace its uses with + // references to the first new element alloca. + Instruction *Val = NewElts[0]; + if (Val->getType() != BC->getDestTy()) { + Val = new BitCastInst(Val, BC->getDestTy(), "", BC); + Val->takeName(BC); + } + BC->replaceAllUsesWith(Val); + DeadInsts.push_back(BC); +} - if (LoadInst *LI = dyn_cast(User)) { - // If this is a load of the entire alloca to an integer, rewrite it. - RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); - continue; - } - - // Otherwise it must be some other user of a gep of the first pointer. Just - // leave these alone. - continue; +/// FindElementAndOffset - Return the index of the element containing Offset +/// within the specified type, which must be either a struct or an array. +/// Sets T to the type of the element and Offset to the offset within that +/// element. IdxTy is set to the type of the index result to be used in a +/// GEP instruction. +uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset, + const Type *&IdxTy) { + uint64_t Idx = 0; + if (const StructType *ST = dyn_cast(T)) { + const StructLayout *Layout = TD->getStructLayout(ST); + Idx = Layout->getElementContainingOffset(Offset); + T = ST->getContainedType(Idx); + Offset -= Layout->getElementOffset(Idx); + IdxTy = Type::getInt32Ty(T->getContext()); + return Idx; + } + const ArrayType *AT = cast(T); + T = AT->getElementType(); + uint64_t EltSize = TD->getTypeAllocSize(T); + Idx = Offset / EltSize; + Offset -= Idx * EltSize; + IdxTy = Type::getInt64Ty(T->getContext()); + return Idx; +} + +/// RewriteGEP - Check if this GEP instruction moves the pointer across +/// elements of the alloca that are being split apart, and if so, rewrite +/// the GEP to be relative to the new element. +void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, + SmallVector &NewElts) { + uint64_t OldOffset = Offset; + SmallVector Indices(GEPI->op_begin() + 1, GEPI->op_end()); + Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), + &Indices[0], Indices.size()); + + RewriteForScalarRepl(GEPI, AI, Offset, NewElts); + + const Type *T = AI->getAllocatedType(); + const Type *IdxTy; + uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy); + if (GEPI->getOperand(0) == AI) + OldIdx = ~0ULL; // Force the GEP to be rewritten. + + T = AI->getAllocatedType(); + uint64_t EltOffset = Offset; + uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy); + + // If this GEP does not move the pointer across elements of the alloca + // being split, then it does not needs to be rewritten. + if (Idx == OldIdx) + return; + + const Type *i32Ty = Type::getInt32Ty(AI->getContext()); + SmallVector NewArgs; + NewArgs.push_back(Constant::getNullValue(i32Ty)); + while (EltOffset != 0) { + uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy); + NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx)); + } + Instruction *Val = NewElts[Idx]; + if (NewArgs.size() > 1) { + Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(), + NewArgs.end(), "", GEPI); + Val->takeName(GEPI); } + if (Val->getType() != GEPI->getType()) + Val = new BitCastInst(Val, GEPI->getType(), Val->getName(), GEPI); + GEPI->replaceAllUsesWith(Val); + DeadInsts.push_back(GEPI); } /// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. /// Rewrite it to copy or set the elements of the scalarized memory. -void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, - AllocationInst *AI, +void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, + AllocaInst *AI, SmallVector &NewElts) { - // If this is a memcpy/memmove, construct the other pointer as the - // appropriate type. + // appropriate type. The "Other" pointer is the pointer that goes to memory + // that doesn't have anything to do with the alloca that we are promoting. For + // memset, this Value* stays null. Value *OtherPtr = 0; - if (MemCpyInst *MCI = dyn_cast(MI)) { - if (BCInst == MCI->getRawDest()) - OtherPtr = MCI->getRawSource(); - else { - assert(BCInst == MCI->getRawSource()); - OtherPtr = MCI->getRawDest(); - } - } else if (MemMoveInst *MMI = dyn_cast(MI)) { - if (BCInst == MMI->getRawDest()) - OtherPtr = MMI->getRawSource(); + LLVMContext &Context = MI->getContext(); + unsigned MemAlignment = MI->getAlignment(); + if (MemTransferInst *MTI = dyn_cast(MI)) { // memmove/memcopy + if (Inst == MTI->getRawDest()) + OtherPtr = MTI->getRawSource(); else { - assert(BCInst == MMI->getRawSource()); - OtherPtr = MMI->getRawDest(); + assert(Inst == MTI->getRawSource()); + OtherPtr = MTI->getRawDest(); } } - + // If there is an other pointer, we want to convert it to the same pointer // type as AI has, so we can GEP through it safely. if (OtherPtr) { - // It is likely that OtherPtr is a bitcast, if so, remove it. - if (BitCastInst *BC = dyn_cast(OtherPtr)) - OtherPtr = BC->getOperand(0); - // All zero GEPs are effectively bitcasts. - if (GetElementPtrInst *GEP = dyn_cast(OtherPtr)) - if (GEP->hasAllZeroIndices()) - OtherPtr = GEP->getOperand(0); + + // Remove bitcasts and all-zero GEPs from OtherPtr. This is an + // optimization, but it's also required to detect the corner case where + // both pointer operands are referencing the same memory, and where + // OtherPtr may be a bitcast or GEP that currently being rewritten. (This + // function is only called for mem intrinsics that access the whole + // aggregate, so non-zero GEPs are not an issue here.) + while (1) { + if (BitCastInst *BC = dyn_cast(OtherPtr)) { + OtherPtr = BC->getOperand(0); + continue; + } + if (GetElementPtrInst *GEP = dyn_cast(OtherPtr)) { + // All zero GEPs are effectively bitcasts. + if (GEP->hasAllZeroIndices()) { + OtherPtr = GEP->getOperand(0); + continue; + } + } + break; + } + // Copying the alloca to itself is a no-op: just delete it. + if (OtherPtr == AI || OtherPtr == NewElts[0]) { + // This code will run twice for a no-op memcpy -- once for each operand. + // Put only one reference to MI on the DeadInsts list. + for (SmallVector::const_iterator I = DeadInsts.begin(), + E = DeadInsts.end(); I != E; ++I) + if (*I == MI) return; + DeadInsts.push_back(MI); + return; + } if (ConstantExpr *BCE = dyn_cast(OtherPtr)) if (BCE->getOpcode() == Instruction::BitCast) @@ -743,29 +758,55 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, // Process each element of the aggregate. Value *TheFn = MI->getOperand(0); const Type *BytePtrTy = MI->getRawDest()->getType(); - bool SROADest = MI->getRawDest() == BCInst; + bool SROADest = MI->getRawDest() == Inst; - Constant *Zero = Constant::getNullValue(Type::Int32Ty); + Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // If this is a memcpy/memmove, emit a GEP of the other element address. Value *OtherElt = 0; + unsigned OtherEltAlign = MemAlignment; + if (OtherPtr) { - Value *Idx[2] = { Zero, ConstantInt::get(Type::Int32Ty, i) }; - OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2, - OtherPtr->getNameStr()+"."+utostr(i), - MI); + Value *Idx[2] = { Zero, + ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; + OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2, + OtherPtr->getName()+"."+Twine(i), + MI); + uint64_t EltOffset; + const PointerType *OtherPtrTy = cast(OtherPtr->getType()); + if (const StructType *ST = + dyn_cast(OtherPtrTy->getElementType())) { + EltOffset = TD->getStructLayout(ST)->getElementOffset(i); + } else { + const Type *EltTy = + cast(OtherPtr->getType())->getElementType(); + EltOffset = TD->getTypeAllocSize(EltTy)*i; + } + + // The alignment of the other pointer is the guaranteed alignment of the + // element, which is affected by both the known alignment of the whole + // mem intrinsic and the alignment of the element. If the alignment of + // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the + // known alignment is just 4 bytes. + OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset); } Value *EltPtr = NewElts[i]; - const Type *EltTy =cast(EltPtr->getType())->getElementType(); + const Type *EltTy = cast(EltPtr->getType())->getElementType(); // If we got down to a scalar, insert a load or store as appropriate. if (EltTy->isSingleValueType()) { - if (isa(MI) || isa(MI)) { - Value *Elt = new LoadInst(SROADest ? OtherElt : EltPtr, "tmp", - MI); - new StoreInst(Elt, SROADest ? EltPtr : OtherElt, MI); + if (isa(MI)) { + if (SROADest) { + // From Other to Alloca. + Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI); + new StoreInst(Elt, EltPtr, MI); + } else { + // From Alloca to Other. + Value *Elt = new LoadInst(EltPtr, "tmp", MI); + new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI); + } continue; } assert(isa(MI)); @@ -778,10 +819,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> } else { // If EltTy is a vector type, get the element type. - const Type *ValTy = EltTy; - if (const VectorType *VTy = dyn_cast(ValTy)) - ValTy = VTy->getElementType(); - + const Type *ValTy = EltTy->getScalarType(); + // Construct an integer with the right value. unsigned EltSize = TD->getTypeSizeInBits(ValTy); APInt OneVal(EltSize, CI->getZExtValue()); @@ -793,10 +832,10 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, } // Convert the integer value to the appropriate type. - StoreVal = ConstantInt::get(TotalVal); - if (isa(ValTy)) + StoreVal = ConstantInt::get(Context, TotalVal); + if (ValTy->isPointerTy()) StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy); - else if (ValTy->isFloatingPoint()) + else if (ValTy->isFloatingPointTy()) StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy); assert(StoreVal->getType() == ValTy && "Type mismatch!"); @@ -816,22 +855,22 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, // Cast the element pointer to BytePtrTy. if (EltPtr->getType() != BytePtrTy) - EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getNameStr(), MI); + EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getName(), MI); // Cast the other pointer (if we have one) to BytePtrTy. if (OtherElt && OtherElt->getType() != BytePtrTy) - OtherElt = new BitCastInst(OtherElt, BytePtrTy,OtherElt->getNameStr(), - MI); + OtherElt = new BitCastInst(OtherElt, BytePtrTy, OtherElt->getName(), MI); - unsigned EltSize = TD->getTypePaddedSize(EltTy); + unsigned EltSize = TD->getTypeAllocSize(EltTy); // Finally, insert the meminst for this element. - if (isa(MI) || isa(MI)) { + if (isa(MI)) { Value *Ops[] = { SROADest ? EltPtr : OtherElt, // Dest ptr SROADest ? OtherElt : EltPtr, // Src ptr ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size - Zero // Align + // Align + ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign) }; CallInst::Create(TheFn, Ops, Ops + 4, "", MI); } else { @@ -844,29 +883,28 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, CallInst::Create(TheFn, Ops, Ops + 4, "", MI); } } - MI->eraseFromParent(); + DeadInsts.push_back(MI); } -/// RewriteStoreUserOfWholeAlloca - We found an store of an integer that +/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that /// overwrites the entire allocation. Extract out the pieces of the stored /// integer and store them individually. -void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, - AllocationInst *AI, +void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, SmallVector &NewElts){ // Extract each element out of the integer according to its structure offset // and store the element value to the individual alloca. Value *SrcVal = SI->getOperand(0); - const Type *AllocaEltTy = AI->getType()->getElementType(); - uint64_t AllocaSizeBits = TD->getTypePaddedSizeInBits(AllocaEltTy); + const Type *AllocaEltTy = AI->getAllocatedType(); + uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); - // If this isn't a store of an integer to the whole alloca, it may be a store - // to the first element. Just ignore the store in this case and normal SROA - // will handle it. - if (!isa(SrcVal->getType()) || - TD->getTypePaddedSizeInBits(SrcVal->getType()) != AllocaSizeBits) - return; + // Handle tail padding by extending the operand + if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) + SrcVal = new ZExtInst(SrcVal, + IntegerType::get(SI->getContext(), AllocaSizeBits), + "", SI); - DOUT << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << *SI; + DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI + << '\n'); // There are two forms here: AI could be an array or struct. Both cases // have different ways to compute the element offset. @@ -879,7 +917,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, uint64_t Shift = Layout->getElementOffsetInBits(i); if (TD->isBigEndian()) - Shift = AllocaSizeBits-Shift-TD->getTypePaddedSizeInBits(FieldTy); + Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy); Value *EltVal = SrcVal; if (Shift) { @@ -895,17 +933,19 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, if (FieldSizeBits == 0) continue; if (FieldSizeBits != AllocaSizeBits) - EltVal = new TruncInst(EltVal, IntegerType::get(FieldSizeBits), "", SI); + EltVal = new TruncInst(EltVal, + IntegerType::get(SI->getContext(), FieldSizeBits), + "", SI); Value *DestField = NewElts[i]; if (EltVal->getType() == FieldTy) { // Storing to an integer field of this size, just do it. - } else if (FieldTy->isFloatingPoint() || isa(FieldTy)) { + } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) { // Bitcast to the right element type (for fp/vector values). EltVal = new BitCastInst(EltVal, FieldTy, "", SI); } else { // Otherwise, bitcast the dest pointer (for aggregates). DestField = new BitCastInst(DestField, - PointerType::getUnqual(EltVal->getType()), + PointerType::getUnqual(EltVal->getType()), "", SI); } new StoreInst(EltVal, DestField, SI); @@ -914,7 +954,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, } else { const ArrayType *ATy = cast(AllocaEltTy); const Type *ArrayEltTy = ATy->getElementType(); - uint64_t ElementOffset = TD->getTypePaddedSizeInBits(ArrayEltTy); + uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy); uint64_t Shift; @@ -937,17 +977,20 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, // Truncate down to an integer of the right size. if (ElementSizeBits != AllocaSizeBits) - EltVal = new TruncInst(EltVal, IntegerType::get(ElementSizeBits),"",SI); + EltVal = new TruncInst(EltVal, + IntegerType::get(SI->getContext(), + ElementSizeBits),"",SI); Value *DestField = NewElts[i]; if (EltVal->getType() == ArrayEltTy) { // Storing to an integer field of this size, just do it. - } else if (ArrayEltTy->isFloatingPoint() || isa(ArrayEltTy)) { + } else if (ArrayEltTy->isFloatingPointTy() || + ArrayEltTy->isVectorTy()) { // Bitcast to the right element type (for fp/vector values). EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI); } else { // Otherwise, bitcast the dest pointer (for aggregates). DestField = new BitCastInst(DestField, - PointerType::getUnqual(EltVal->getType()), + PointerType::getUnqual(EltVal->getType()), "", SI); } new StoreInst(EltVal, DestField, SI); @@ -959,26 +1002,20 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, } } - SI->eraseFromParent(); + DeadInsts.push_back(SI); } -/// RewriteLoadUserOfWholeAlloca - We found an load of the entire allocation to +/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to /// an integer. Load the individual pieces to form the aggregate value. -void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, +void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector &NewElts) { // Extract each element out of the NewElts according to its structure offset // and form the result value. - const Type *AllocaEltTy = AI->getType()->getElementType(); - uint64_t AllocaSizeBits = TD->getTypePaddedSizeInBits(AllocaEltTy); + const Type *AllocaEltTy = AI->getAllocatedType(); + uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); - // If this isn't a load of the whole alloca to an integer, it may be a load - // of the first element. Just ignore the load in this case and normal SROA - // will handle it. - if (!isa(LI->getType()) || - TD->getTypePaddedSizeInBits(LI->getType()) != AllocaSizeBits) - return; - - DOUT << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << *LI; + DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI + << '\n'); // There are two forms here: AI could be an array or struct. Both cases // have different ways to compute the element offset. @@ -988,10 +1025,11 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, Layout = TD->getStructLayout(EltSTy); } else { const Type *ArrayEltTy = cast(AllocaEltTy)->getElementType(); - ArrayEltBitOffset = TD->getTypePaddedSizeInBits(ArrayEltTy); + ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); } - - Value *ResultVal = Constant::getNullValue(LI->getType()); + + Value *ResultVal = + Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits)); for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // Load the value from the alloca. If the NewElt is an aggregate, cast @@ -1004,10 +1042,12 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, // Ignore zero sized fields like {}, they obviously contain no data. if (FieldSizeBits == 0) continue; - const IntegerType *FieldIntTy = IntegerType::get(FieldSizeBits); - if (!isa(FieldTy) && !FieldTy->isFloatingPoint() && - !isa(FieldTy)) - SrcField = new BitCastInst(SrcField, PointerType::getUnqual(FieldIntTy), + const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), + FieldSizeBits); + if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() && + !FieldTy->isVectorTy()) + SrcField = new BitCastInst(SrcField, + PointerType::getUnqual(FieldIntTy), "", LI); SrcField = new LoadInst(SrcField, "sroa.load.elt", LI); @@ -1038,12 +1078,15 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI, ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI); } - + + // Handle tail padding by truncating the result + if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits) + ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI); + LI->replaceAllUsesWith(ResultVal); - LI->eraseFromParent(); + DeadInsts.push_back(LI); } - /// HasPadding - Return true if the specified type has any structure or /// alignment padding, false otherwise. static bool HasPadding(const Type *Ty, const TargetData &TD) { @@ -1082,25 +1125,21 @@ static bool HasPadding(const Type *Ty, const TargetData &TD) { } else if (const VectorType *VTy = dyn_cast(Ty)) { return HasPadding(VTy->getElementType(), TD); } - return TD.getTypeSizeInBits(Ty) != TD.getTypePaddedSizeInBits(Ty); + return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty); } /// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of /// an aggregate can be broken down into elements. Return 0 if not, 3 if safe, /// or 1 if safe after canonicalization has been performed. -/// -int SROA::isSafeAllocaToScalarRepl(AllocationInst *AI) { +bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { // Loop over the use list of the alloca. We can only transform it if all of // the users are safe to transform. AllocaInfo Info; - for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); - I != E; ++I) { - isSafeUseOfAllocation(cast(*I), AI, Info); - if (Info.isUnsafe) { - DOUT << "Cannot transform: " << *AI << " due to user: " << **I; - return 0; - } + isSafeForScalarRepl(AI, AI, 0, Info); + if (Info.isUnsafe) { + DEBUG(dbgs() << "Cannot transform: " << *AI << '\n'); + return false; } // Okay, we know all the users are promotable. If the aggregate is a memcpy @@ -1109,66 +1148,10 @@ int SROA::isSafeAllocaToScalarRepl(AllocationInst *AI) { // types, but may actually be used. In these cases, we refuse to promote the // struct. if (Info.isMemCpySrc && Info.isMemCpyDst && - HasPadding(AI->getType()->getElementType(), *TD)) - return 0; - - // If we require cleanup, return 1, otherwise return 3. - return Info.needsCanon ? 1 : 3; -} + HasPadding(AI->getAllocatedType(), *TD)) + return false; -/// CanonicalizeAllocaUsers - If SROA reported that it can promote the specified -/// allocation, but only if cleaned up, perform the cleanups required. -void SROA::CanonicalizeAllocaUsers(AllocationInst *AI) { - // At this point, we know that the end result will be SROA'd and promoted, so - // we can insert ugly code if required so long as sroa+mem2reg will clean it - // up. - for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); - UI != E; ) { - GetElementPtrInst *GEPI = dyn_cast(*UI++); - if (!GEPI) continue; - gep_type_iterator I = gep_type_begin(GEPI); - ++I; - - if (const ArrayType *AT = dyn_cast(*I)) { - uint64_t NumElements = AT->getNumElements(); - - if (!isa(I.getOperand())) { - if (NumElements == 1) { - GEPI->setOperand(2, Constant::getNullValue(Type::Int32Ty)); - } else { - assert(NumElements == 2 && "Unhandled case!"); - // All users of the GEP must be loads. At each use of the GEP, insert - // two loads of the appropriate indexed GEP and select between them. - Value *IsOne = new ICmpInst(ICmpInst::ICMP_NE, I.getOperand(), - Constant::getNullValue(I.getOperand()->getType()), - "isone", GEPI); - // Insert the new GEP instructions, which are properly indexed. - SmallVector Indices(GEPI->op_begin()+1, GEPI->op_end()); - Indices[1] = Constant::getNullValue(Type::Int32Ty); - Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0), - Indices.begin(), - Indices.end(), - GEPI->getName()+".0", GEPI); - Indices[1] = ConstantInt::get(Type::Int32Ty, 1); - Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0), - Indices.begin(), - Indices.end(), - GEPI->getName()+".1", GEPI); - // Replace all loads of the variable index GEP with loads from both - // indexes and a select. - while (!GEPI->use_empty()) { - LoadInst *LI = cast(GEPI->use_back()); - Value *Zero = new LoadInst(ZeroIdx, LI->getName()+".0", LI); - Value *One = new LoadInst(OneIdx , LI->getName()+".1", LI); - Value *R = SelectInst::Create(IsOne, One, Zero, LI->getName(), LI); - LI->replaceAllUsesWith(R); - LI->eraseFromParent(); - } - GEPI->eraseFromParent(); - } - } - } - } + return true; } /// MergeInType - Add the 'In' type to the accumulated type (Accum) so far at @@ -1183,9 +1166,10 @@ void SROA::CanonicalizeAllocaUsers(AllocationInst *AI) { /// large) integer type with extract and insert operations where the loads /// and stores would mutate the memory. static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, - unsigned AllocaSize, const TargetData &TD) { + unsigned AllocaSize, const TargetData &TD, + LLVMContext &Context) { // If this could be contributing to a vector, analyze it. - if (VecTy != Type::VoidTy) { // either null or a vector type. + if (VecTy != Type::getVoidTy(Context)) { // either null or a vector type. // If the In type is a vector that is the same size as the alloca, see if it // matches the existing VecTy. @@ -1198,8 +1182,8 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, VecTy = VInTy; return; } - } else if (In == Type::FloatTy || In == Type::DoubleTy || - (isa(In) && In->getPrimitiveSizeInBits() >= 8 && + } else if (In->isFloatTy() || In->isDoubleTy() || + (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 && isPowerOf2_32(In->getPrimitiveSizeInBits()))) { // If we're accessing something that could be an element of a vector, see // if the implied vector agrees with what we already have and if Offset is @@ -1219,18 +1203,20 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, // Otherwise, we have a case that we can't handle with an optimized vector // form. We can still turn this into a large integer. - VecTy = Type::VoidTy; + VecTy = Type::getVoidTy(Context); } /// CanConvertToScalar - V is a pointer. If we can convert the pointee and all -/// its accesses to use a to single vector type, return true, and set VecTy to +/// its accesses to a single vector type, return true and set VecTy to /// the new type. If we could convert the alloca into a single promotable /// integer, return true but set VecTy to VoidTy. Further, if the use is not a /// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset /// is the current offset from the base of the alloca being analyzed. /// -bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, - const Type *&VecTy, uint64_t Offset, +/// If we see at least one access to the value that is as a vector type, set the +/// SawVec flag. +bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, + bool &SawVec, uint64_t Offset, unsigned AllocaSize) { for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { Instruction *User = cast(*UI); @@ -1239,19 +1225,24 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, // Don't break volatile loads. if (LI->isVolatile()) return false; - MergeInType(LI->getType(), Offset, VecTy, AllocaSize, *TD); + MergeInType(LI->getType(), Offset, VecTy, + AllocaSize, *TD, V->getContext()); + SawVec |= LI->getType()->isVectorTy(); continue; } if (StoreInst *SI = dyn_cast(User)) { // Storing the pointer, not into the value? if (SI->getOperand(0) == V || SI->isVolatile()) return 0; - MergeInType(SI->getOperand(0)->getType(), Offset, VecTy, AllocaSize, *TD); + MergeInType(SI->getOperand(0)->getType(), Offset, + VecTy, AllocaSize, *TD, V->getContext()); + SawVec |= SI->getOperand(0)->getType()->isVectorTy(); continue; } if (BitCastInst *BCI = dyn_cast(User)) { - if (!CanConvertToScalar(BCI, IsNotTrivial, VecTy, Offset, AllocaSize)) + if (!CanConvertToScalar(BCI, IsNotTrivial, VecTy, SawVec, Offset, + AllocaSize)) return false; IsNotTrivial = true; continue; @@ -1264,26 +1255,35 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, // Compute the offset that this GEP adds to the pointer. SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), &Indices[0], Indices.size()); // See if all uses can be converted. - if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, Offset+GEPOffset, + if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset, AllocaSize)) return false; IsNotTrivial = true; continue; } - + // If this is a constant sized memset of a constant value (e.g. 0) we can // handle it. - if (isa(User) && - // Store of constant value. - isa(User->getOperand(2)) && - // Store with constant size. - isa(User->getOperand(3))) { - VecTy = Type::VoidTy; - IsNotTrivial = true; - continue; + if (MemSetInst *MSI = dyn_cast(User)) { + // Store of constant value and constant size. + if (isa(MSI->getValue()) && + isa(MSI->getLength())) { + IsNotTrivial = true; + continue; + } + } + + // If this is a memcpy or memmove into or out of the whole allocation, we + // can handle it like a load or store of the scalar type. + if (MemTransferInst *MTI = dyn_cast(User)) { + if (ConstantInt *Len = dyn_cast(MTI->getLength())) + if (Len->getZExtValue() == AllocaSize && Offset == 0) { + IsNotTrivial = true; + continue; + } } // Otherwise, we cannot handle this! @@ -1293,7 +1293,6 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, return true; } - /// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca /// directly. This happens when we are converting an "integer union" to a /// single integer scalar, or when we are converting a "vector union" to a @@ -1305,20 +1304,6 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { while (!Ptr->use_empty()) { Instruction *User = cast(Ptr->use_back()); - if (LoadInst *LI = dyn_cast(User)) { - LI->replaceAllUsesWith(ConvertUsesOfLoadToScalar(LI, NewAI, Offset)); - LI->eraseFromParent(); - continue; - } - - if (StoreInst *SI = dyn_cast(User)) { - assert(SI->getOperand(0) != Ptr && "Consistency error!"); - new StoreInst(ConvertUsesOfStoreToScalar(SI->getOperand(0), NewAI, - Offset, SI), NewAI, SI); - SI->eraseFromParent(); - continue; - } - if (BitCastInst *CI = dyn_cast(User)) { ConvertUsesToScalar(CI, NewAI, Offset); CI->eraseFromParent(); @@ -1328,80 +1313,176 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { if (GetElementPtrInst *GEP = dyn_cast(User)) { // Compute the offset that this GEP adds to the pointer. SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); - uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), + uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), &Indices[0], Indices.size()); ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); GEP->eraseFromParent(); continue; } + IRBuilder<> Builder(User->getParent(), User); + + if (LoadInst *LI = dyn_cast(User)) { + // The load is a bit extract from NewAI shifted right by Offset bits. + Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp"); + Value *NewLoadVal + = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder); + LI->replaceAllUsesWith(NewLoadVal); + LI->eraseFromParent(); + continue; + } + + if (StoreInst *SI = dyn_cast(User)) { + assert(SI->getOperand(0) != Ptr && "Consistency error!"); + Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); + Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset, + Builder); + Builder.CreateStore(New, NewAI); + SI->eraseFromParent(); + + // If the load we just inserted is now dead, then the inserted store + // overwrote the entire thing. + if (Old->use_empty()) + Old->eraseFromParent(); + continue; + } + // If this is a constant sized memset of a constant value (e.g. 0) we can // transform it into a store of the expanded constant value. if (MemSetInst *MSI = dyn_cast(User)) { assert(MSI->getRawDest() == Ptr && "Consistency error!"); unsigned NumBytes = cast(MSI->getLength())->getZExtValue(); - unsigned Val = cast(MSI->getValue())->getZExtValue(); - - // Compute the value replicated the right number of times. - APInt APVal(NumBytes*8, Val); + if (NumBytes != 0) { + unsigned Val = cast(MSI->getValue())->getZExtValue(); + + // Compute the value replicated the right number of times. + APInt APVal(NumBytes*8, Val); - // Splat the value if non-zero. - if (Val) - for (unsigned i = 1; i != NumBytes; ++i) - APVal |= APVal << 8; - - new StoreInst(ConvertUsesOfStoreToScalar(ConstantInt::get(APVal), NewAI, - Offset, MSI), NewAI, MSI); + // Splat the value if non-zero. + if (Val) + for (unsigned i = 1; i != NumBytes; ++i) + APVal |= APVal << 8; + + Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); + Value *New = ConvertScalar_InsertValue( + ConstantInt::get(User->getContext(), APVal), + Old, Offset, Builder); + Builder.CreateStore(New, NewAI); + + // If the load we just inserted is now dead, then the memset overwrote + // the entire thing. + if (Old->use_empty()) + Old->eraseFromParent(); + } MSI->eraseFromParent(); continue; } + + // If this is a memcpy or memmove into or out of the whole allocation, we + // can handle it like a load or store of the scalar type. + if (MemTransferInst *MTI = dyn_cast(User)) { + assert(Offset == 0 && "must be store to start of alloca"); + + // If the source and destination are both to the same alloca, then this is + // a noop copy-to-self, just delete it. Otherwise, emit a load and store + // as appropriate. + AllocaInst *OrigAI = cast(Ptr->getUnderlyingObject(0)); + + if (MTI->getSource()->getUnderlyingObject(0) != OrigAI) { + // Dest must be OrigAI, change this to be a load from the original + // pointer (bitcasted), then a store to our new alloca. + assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?"); + Value *SrcPtr = MTI->getSource(); + SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType()); + LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval"); + SrcVal->setAlignment(MTI->getAlignment()); + Builder.CreateStore(SrcVal, NewAI); + } else if (MTI->getDest()->getUnderlyingObject(0) != OrigAI) { + // Src must be OrigAI, change this to be a load from NewAI then a store + // through the original dest pointer (bitcasted). + assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?"); + LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval"); + + Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType()); + StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr); + NewStore->setAlignment(MTI->getAlignment()); + } else { + // Noop transfer. Src == Dst + } + + MTI->eraseFromParent(); + continue; + } - assert(0 && "Unsupported operation!"); - abort(); + llvm_unreachable("Unsupported operation!"); } } -/// ConvertUsesOfLoadToScalar - Convert all of the users of the specified load -/// to use the new alloca directly, returning the value that should replace the -/// load. This happens when we are converting an "integer union" to a single +/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer +/// or vector value FromVal, extracting the bits from the offset specified by +/// Offset. This returns the value, which is of type ToType. +/// +/// This happens when we are converting an "integer union" to a single /// integer scalar, or when we are converting a "vector union" to a vector with /// insert/extractelement instructions. /// /// Offset is an offset from the original alloca, in bits that need to be -/// shifted to the right. By the end of this, there should be no uses of Ptr. -Value *SROA::ConvertUsesOfLoadToScalar(LoadInst *LI, AllocaInst *NewAI, - uint64_t Offset) { - // The load is a bit extract from NewAI shifted right by Offset bits. - Value *NV = new LoadInst(NewAI, LI->getName(), LI); - +/// shifted to the right. +Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, + uint64_t Offset, IRBuilder<> &Builder) { // If the load is of the whole new alloca, no conversion is needed. - if (NV->getType() == LI->getType() && Offset == 0) - return NV; + if (FromVal->getType() == ToType && Offset == 0) + return FromVal; // If the result alloca is a vector type, this is either an element // access or a bitcast to another vector type of the same size. - if (const VectorType *VTy = dyn_cast(NV->getType())) { - if (isa(LI->getType())) - return new BitCastInst(NV, LI->getType(), LI->getName(), LI); + if (const VectorType *VTy = dyn_cast(FromVal->getType())) { + if (ToType->isVectorTy()) + return Builder.CreateBitCast(FromVal, ToType, "tmp"); // Otherwise it must be an element access. unsigned Elt = 0; if (Offset) { - unsigned EltSize = TD->getTypePaddedSizeInBits(VTy->getElementType()); + unsigned EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType()); Elt = Offset/EltSize; assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); } // Return the element extracted out of it. - Value *V = new ExtractElementInst(NV, ConstantInt::get(Type::Int32Ty, Elt), - "tmp", LI); - if (V->getType() != LI->getType()) - V = new BitCastInst(V, LI->getType(), "tmp", LI); + Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get( + Type::getInt32Ty(FromVal->getContext()), Elt), "tmp"); + if (V->getType() != ToType) + V = Builder.CreateBitCast(V, ToType, "tmp"); return V; } + + // If ToType is a first class aggregate, extract out each of the pieces and + // use insertvalue's to form the FCA. + if (const StructType *ST = dyn_cast(ToType)) { + const StructLayout &Layout = *TD->getStructLayout(ST); + Value *Res = UndefValue::get(ST); + for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { + Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i), + Offset+Layout.getElementOffsetInBits(i), + Builder); + Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); + } + return Res; + } + + if (const ArrayType *AT = dyn_cast(ToType)) { + uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType()); + Value *Res = UndefValue::get(AT); + for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { + Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), + Offset+i*EltSize, Builder); + Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); + } + return Res; + } // Otherwise, this must be a union that was converted to an integer value. - const IntegerType *NTy = cast(NV->getType()); + const IntegerType *NTy = cast(FromVal->getType()); // If this is a big-endian system and the load is narrower than the // full alloca type, we need to do a shift to get the right bits. @@ -1411,7 +1492,7 @@ Value *SROA::ConvertUsesOfLoadToScalar(LoadInst *LI, AllocaInst *NewAI, // from the pointer given by getTypeStoreSizeInBits. This matters for // integers with a bitwidth that is not a multiple of 8. ShAmt = TD->getTypeStoreSizeInBits(NTy) - - TD->getTypeStoreSizeInBits(LI->getType()) - Offset; + TD->getTypeStoreSizeInBits(ToType) - Offset; } else { ShAmt = Offset; } @@ -1420,98 +1501,121 @@ Value *SROA::ConvertUsesOfLoadToScalar(LoadInst *LI, AllocaInst *NewAI, // We do this to support (f.e.) loads off the end of a structure where // only some bits are used. if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth()) - NV = BinaryOperator::CreateLShr(NV, - ConstantInt::get(NV->getType(), ShAmt), - LI->getName(), LI); + FromVal = Builder.CreateLShr(FromVal, + ConstantInt::get(FromVal->getType(), + ShAmt), "tmp"); else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth()) - NV = BinaryOperator::CreateShl(NV, - ConstantInt::get(NV->getType(), -ShAmt), - LI->getName(), LI); + FromVal = Builder.CreateShl(FromVal, + ConstantInt::get(FromVal->getType(), + -ShAmt), "tmp"); // Finally, unconditionally truncate the integer to the right width. - unsigned LIBitWidth = TD->getTypeSizeInBits(LI->getType()); + unsigned LIBitWidth = TD->getTypeSizeInBits(ToType); if (LIBitWidth < NTy->getBitWidth()) - NV = new TruncInst(NV, IntegerType::get(LIBitWidth), - LI->getName(), LI); + FromVal = + Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), + LIBitWidth), "tmp"); + else if (LIBitWidth > NTy->getBitWidth()) + FromVal = + Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), + LIBitWidth), "tmp"); // If the result is an integer, this is a trunc or bitcast. - if (isa(LI->getType())) { + if (ToType->isIntegerTy()) { // Should be done. - } else if (LI->getType()->isFloatingPoint() || - isa(LI->getType())) { + } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) { // Just do a bitcast, we know the sizes match up. - NV = new BitCastInst(NV, LI->getType(), LI->getName(), LI); + FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp"); } else { // Otherwise must be a pointer. - NV = new IntToPtrInst(NV, LI->getType(), LI->getName(), LI); + FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp"); } - assert(NV->getType() == LI->getType() && "Didn't convert right?"); - return NV; + assert(FromVal->getType() == ToType && "Didn't convert right?"); + return FromVal; } - -/// ConvertUsesOfStoreToScalar - Convert the specified store to a load+store -/// pair of the new alloca directly, returning the value that should be stored -/// to the alloca. This happens when we are converting an "integer union" to a +/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer +/// or vector value "Old" at the offset specified by Offset. +/// +/// This happens when we are converting an "integer union" to a /// single integer scalar, or when we are converting a "vector union" to a /// vector with insert/extractelement instructions. /// /// Offset is an offset from the original alloca, in bits that need to be -/// shifted to the right. By the end of this, there should be no uses of Ptr. -Value *SROA::ConvertUsesOfStoreToScalar(Value *SV, AllocaInst *NewAI, - uint64_t Offset, Instruction *IP) { +/// shifted to the right. +Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, + uint64_t Offset, IRBuilder<> &Builder) { // Convert the stored type to the actual type, shift it left to insert // then 'or' into place. - const Type *AllocaType = NewAI->getType()->getElementType(); - if (SV->getType() == AllocaType && Offset == 0) - return SV; + const Type *AllocaType = Old->getType(); + LLVMContext &Context = Old->getContext(); if (const VectorType *VTy = dyn_cast(AllocaType)) { - Value *Old = new LoadInst(NewAI, NewAI->getName()+".in", IP); + uint64_t VecSize = TD->getTypeAllocSizeInBits(VTy); + uint64_t ValSize = TD->getTypeAllocSizeInBits(SV->getType()); + + // Changing the whole vector with memset or with an access of a different + // vector type? + if (ValSize == VecSize) + return Builder.CreateBitCast(SV, AllocaType, "tmp"); - // If the result alloca is a vector type, this is either an element - // access or a bitcast to another vector type. - if (isa(SV->getType())) { - SV = new BitCastInst(SV, AllocaType, SV->getName(), IP); - } else { - // Must be an element insertion. - unsigned Elt = Offset/TD->getTypePaddedSizeInBits(VTy->getElementType()); - - if (SV->getType() != VTy->getElementType()) - SV = new BitCastInst(SV, VTy->getElementType(), "tmp", IP); - - SV = InsertElementInst::Create(Old, SV, - ConstantInt::get(Type::Int32Ty, Elt), - "tmp", IP); - } + uint64_t EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType()); + + // Must be an element insertion. + unsigned Elt = Offset/EltSize; + + if (SV->getType() != VTy->getElementType()) + SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp"); + + SV = Builder.CreateInsertElement(Old, SV, + ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt), + "tmp"); return SV; } - - - Value *Old = new LoadInst(NewAI, NewAI->getName()+".in", IP); + + // If SV is a first-class aggregate value, insert each value recursively. + if (const StructType *ST = dyn_cast(SV->getType())) { + const StructLayout &Layout = *TD->getStructLayout(ST); + for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { + Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); + Old = ConvertScalar_InsertValue(Elt, Old, + Offset+Layout.getElementOffsetInBits(i), + Builder); + } + return Old; + } + + if (const ArrayType *AT = dyn_cast(SV->getType())) { + uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType()); + for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { + Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); + Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder); + } + return Old; + } // If SV is a float, convert it to the appropriate integer type. - // If it is a pointer, do the same, and also handle ptr->ptr casts - // here. + // If it is a pointer, do the same. unsigned SrcWidth = TD->getTypeSizeInBits(SV->getType()); unsigned DestWidth = TD->getTypeSizeInBits(AllocaType); unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType()); unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType); - if (SV->getType()->isFloatingPoint() || isa(SV->getType())) - SV = new BitCastInst(SV, IntegerType::get(SrcWidth), SV->getName(), IP); - else if (isa(SV->getType())) - SV = new PtrToIntInst(SV, TD->getIntPtrType(), SV->getName(), IP); + if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy()) + SV = Builder.CreateBitCast(SV, + IntegerType::get(SV->getContext(),SrcWidth), "tmp"); + else if (SV->getType()->isPointerTy()) + SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(SV->getContext()), "tmp"); // Zero extend or truncate the value if needed. if (SV->getType() != AllocaType) { if (SV->getType()->getPrimitiveSizeInBits() < AllocaType->getPrimitiveSizeInBits()) - SV = new ZExtInst(SV, AllocaType, SV->getName(), IP); + SV = Builder.CreateZExt(SV, AllocaType, "tmp"); else { // Truncation may be needed if storing more than the alloca can hold // (undefined behavior). - SV = new TruncInst(SV, AllocaType, SV->getName(), IP); + SV = Builder.CreateTrunc(SV, AllocaType, "tmp"); SrcWidth = DestWidth; SrcStoreWidth = DestStoreWidth; } @@ -1534,14 +1638,12 @@ Value *SROA::ConvertUsesOfStoreToScalar(Value *SV, AllocaInst *NewAI, // only some bits in the structure are set. APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth)); if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) { - SV = BinaryOperator::CreateShl(SV, - ConstantInt::get(SV->getType(), ShAmt), - SV->getName(), IP); + SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), + ShAmt), "tmp"); Mask <<= ShAmt; } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) { - SV = BinaryOperator::CreateLShr(SV, - ConstantInt::get(SV->getType(), -ShAmt), - SV->getName(), IP); + SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), + -ShAmt), "tmp"); Mask = Mask.lshr(-ShAmt); } @@ -1549,9 +1651,8 @@ Value *SROA::ConvertUsesOfStoreToScalar(Value *SV, AllocaInst *NewAI, // in the new bits. if (SrcWidth != DestWidth) { assert(DestWidth > SrcWidth); - Old = BinaryOperator::CreateAnd(Old, ConstantInt::get(~Mask), - Old->getName()+".mask", IP); - SV = BinaryOperator::CreateOr(Old, SV, SV->getName()+".ins", IP); + Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask"); + SV = Builder.CreateOr(Old, SV, "ins"); } return SV; } @@ -1603,7 +1704,7 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy, // If this is isn't our memcpy/memmove, reject it as something we can't // handle. - if (!isa(*UI) && !isa(*UI)) + if (!isa(*UI)) return false; // If we already have seen a copy, reject the second one. @@ -1631,7 +1732,7 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy, /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only /// modified by a copy from a constant global. If we can prove this, we can /// replace any uses of the alloca with uses of the global directly. -Instruction *SROA::isOnlyCopiedFromConstantGlobal(AllocationInst *AI) { +Instruction *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) { Instruction *TheCopy = 0; if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false)) return TheCopy;