isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
const Value *&MemLocBase,
int64_t &MemLocOffs,
- const LoadInst *LI, TargetData *TD) {
+ const LoadInst *LI,
+ const TargetData *TD) {
// If we have no target data, we can't do this.
if (TD == 0) return false;
if (MemLocBase == 0)
MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, *TD);
+ unsigned Size = MemoryDependenceAnalysis::
+ getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size,
+ LI, *TD);
+ return Size != 0;
+}
+
+/// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that
+/// looks at a memory location for a load (specified by MemLocBase, Offs,
+/// and Size) and compares it against a load. If the specified load could
+/// be safely widened to a larger integer load that is 1) still efficient,
+/// 2) safe for the target, and 3) would provide the specified memory
+/// location value, then this function returns the size in bytes of the
+/// load width to use. If not, this returns zero.
+unsigned MemoryDependenceAnalysis::
+getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
+ unsigned MemLocSize, const LoadInst *LI,
+ const TargetData &TD) {
+ // We can only extend non-volatile integer loads.
+ if (!isa<IntegerType>(LI->getType()) || LI->isVolatile()) return 0;
+
// Get the base of this load.
int64_t LIOffs = 0;
const Value *LIBase =
- GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, *TD);
+ GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, TD);
// If the two pointers are not based on the same pointer, we can't tell that
// they are related.
- if (LIBase != MemLocBase) return false;
+ if (LIBase != MemLocBase) return 0;
// Okay, the two values are based on the same pointer, but returned as
// no-alias. This happens when we have things like two byte loads at "P+1"
// the bits required by MemLoc.
// If MemLoc is before LI, then no widening of LI will help us out.
- if (MemLocOffs < LIOffs) return false;
+ if (MemLocOffs < LIOffs) return 0;
// Get the alignment of the load in bytes. We assume that it is safe to load
// any legal integer up to this size without a problem. For example, if we're
// to i16.
unsigned LoadAlign = LI->getAlignment();
- int64_t MemLocEnd = MemLocOffs+MemLoc.Size;
+ int64_t MemLocEnd = MemLocOffs+MemLocSize;
// If no amount of rounding up will let MemLoc fit into LI, then bail out.
- if (LIOffs+LoadAlign < MemLocEnd) return false;
+ if (LIOffs+LoadAlign < MemLocEnd) return 0;
// This is the size of the load to try. Start with the next larger power of
// two.
// If this load size is bigger than our known alignment or would not fit
// into a native integer register, then we fail.
if (NewLoadByteSize > LoadAlign ||
- !TD->fitsInLegalInteger(NewLoadByteSize*8))
- return false;
+ !TD.fitsInLegalInteger(NewLoadByteSize*8))
+ return 0;
// If a load of this width would include all of MemLoc, then we succeed.
if (LIOffs+NewLoadByteSize >= MemLocEnd)
- return true;
+ return NewLoadByteSize;
NewLoadByteSize <<= 1;
}
- return false;
+ return 0;
}
/// getPointerDependencyFrom - Return the instruction on which a memory
if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
return 0;
+ // If this is already the right type, just return it.
const Type *StoredValTy = StoredVal->getType();
uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy);
- uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
+ uint64_t LoadSize = TD.getTypeStoreSizeInBits(LoadedTy);
// If the store and reload are the same size, we can always reuse it.
if (StoreSize == LoadSize) {
Value *DepPtr = DepLI->getPointerOperand();
uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType());
- return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD);
+ int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD);
+ if (R != -1) return R;
+
+ // If we have a load/load clobber an DepLI can be widened to cover this load,
+ // then we should widen it!
+ int64_t LoadOffs = 0;
+ const Value *LoadBase =
+ GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD);
+ unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
+
+ unsigned Size = MemoryDependenceAnalysis::
+ getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD);
+ if (Size == 0) return -1;
+
+ return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD);
}
/// GetStoreValueForLoad - This function is called when we have a
/// memdep query of a load that ends up being a clobbering store. This means
-/// that the store *may* provide bits used by the load but we can't be sure
-/// because the pointers don't mustalias. Check this case to see if there is
-/// anything more we can do before we give up.
+/// that the store provides bits used by the load but we the pointers don't
+/// mustalias. Check this case to see if there is anything more we can do
+/// before we give up.
static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
const Type *LoadTy,
Instruction *InsertPt, const TargetData &TD){
return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
}
+/// GetStoreValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering load. This means
+/// that the load *may* provide bits used by the load but we can't be sure
+/// because the pointers don't mustalias. Check this case to see if there is
+/// anything more we can do before we give up.
+static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
+ const Type *LoadTy,
+ Instruction *InsertPt, const TargetData &TD,
+ MemoryDependenceAnalysis &MD) {
+ // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
+ // widen SrcVal out to a larger load.
+ unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType());
+ unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
+ if (Offset+LoadSize > SrcValSize) {
+ assert(!SrcVal->isVolatile() && "Cannot widen volatile load!");
+ assert(isa<IntegerType>(SrcVal->getType())&&"Can't widen non-integer load");
+ // If we have a load/load clobber an DepLI can be widened to cover this
+ // load, then we should widen it to the next power of 2 size big enough!
+ unsigned NewLoadSize = Offset+LoadSize;
+ if (!isPowerOf2_32(NewLoadSize))
+ NewLoadSize = NextPowerOf2(NewLoadSize);
+
+ Value *PtrVal = SrcVal->getPointerOperand();
+
+ IRBuilder<> Builder(SrcVal->getParent(), SrcVal);
+ const Type *DestPTy =
+ IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
+ DestPTy = PointerType::get(DestPTy,
+ cast<PointerType>(PtrVal->getType())->getAddressSpace());
+
+ PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
+ LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
+ NewLoad->takeName(SrcVal);
+ NewLoad->setAlignment(SrcVal->getAlignment());
+
+ DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
+ DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
+
+ // Replace uses of the original load with the wider load. On a big endian
+ // system, we need to shift down to get the relevant bits.
+ Value *RV = NewLoad;
+ if (TD.isBigEndian())
+ RV = Builder.CreateLShr(RV,
+ NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits());
+ RV = Builder.CreateTrunc(RV, SrcVal->getType());
+ SrcVal->replaceAllUsesWith(RV);
+ MD.removeInstruction(SrcVal);
+ SrcVal = NewLoad;
+ }
+
+ return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD);
+}
+
+
/// GetMemInstValueForLoad - This function is called when we have a
/// memdep query of a load that ends up being a clobbering mem intrinsic.
static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
BasicBlock *BB;
enum ValType {
SimpleVal, // A simple offsetted value that is accessed.
+ LoadVal, // A value produced by a load.
MemIntrin // A memory intrinsic which is loaded from.
};
/// V - The value that is live out of the block.
- PointerIntPair<Value *, 1, ValType> Val;
+ PointerIntPair<Value *, 2, ValType> Val;
/// Offset - The byte offset in Val that is interesting for the load query.
unsigned Offset;
return Res;
}
+ static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(LI);
+ Res.Val.setInt(LoadVal);
+ Res.Offset = Offset;
+ return Res;
+ }
+
bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+ bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
+ bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+
Value *getSimpleValue() const {
assert(isSimpleValue() && "Wrong accessor");
return Val.getPointer();
}
+ LoadInst *getCoercedLoadValue() const {
+ assert(isCoercedLoadValue() && "Wrong accessor");
+ return cast<LoadInst>(Val.getPointer());
+ }
+
MemIntrinsic *getMemIntrinValue() const {
- assert(!isSimpleValue() && "Wrong accessor");
+ assert(isMemIntrinValue() && "Wrong accessor");
return cast<MemIntrinsic>(Val.getPointer());
}
/// MaterializeAdjustedValue - Emit code into this block to adjust the value
/// defined here to the specified type. This handles various coercion cases.
Value *MaterializeAdjustedValue(const Type *LoadTy,
- const TargetData *TD) const {
+ const TargetData *TD,
+ MemoryDependenceAnalysis &MD) const {
Value *Res;
if (isSimpleValue()) {
Res = getSimpleValue();
Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
*TD);
- DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
<< *getSimpleValue() << '\n'
<< *Res << '\n' << "\n\n\n");
}
+ } else if (isCoercedLoadValue()) {
+ LoadInst *Load = getCoercedLoadValue();
+ if (Load->getType() == LoadTy && Offset == 0) {
+ Res = Load;
+ } else {
+ assert(TD && "Need target data to handle type mismatch case");
+ Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
+ *TD, MD);
+
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
+ << *getCoercedLoadValue() << '\n'
+ << *Res << '\n' << "\n\n\n");
+ }
} else {
Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
LoadTy, BB->getTerminator(), *TD);
- DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+ DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
<< " " << *getMemIntrinValue() << '\n'
<< *Res << '\n' << "\n\n\n");
}
}
};
-}
+} // end anonymous namespace
/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
/// construct SSA form, allowing us to eliminate LI. This returns the value
SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
const TargetData *TD,
const DominatorTree &DT,
- AliasAnalysis *AA) {
+ AliasAnalysis *AA,
+ MemoryDependenceAnalysis &MD) {
// Check for the fully redundant, dominating load case. In this case, we can
// just use the dominating value directly.
if (ValuesPerBlock.size() == 1 &&
DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent()))
- return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD);
+ return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD, MD);
// Otherwise, we have to construct SSA form.
SmallVector<PHINode*, 8> NewPHIs;
if (SSAUpdate.HasValueForBlock(BB))
continue;
- SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD));
+ SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD,MD));
}
// Perform PHI construction.
DepLI, *TD);
if (Offset != -1) {
- ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, DepLI,
- Offset));
+ ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI,
+ Offset));
continue;
}
}
continue;
}
}
- ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD));
+ ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD));
continue;
}
// Perform PHI construction.
Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
- VN.getAliasAnalysis());
+ VN.getAliasAnalysis(), *MD);
LI->replaceAllUsesWith(V);
if (isa<PHINode>(V))
// Perform PHI construction.
Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
- VN.getAliasAnalysis());
+ VN.getAliasAnalysis(), *MD);
LI->replaceAllUsesWith(V);
if (isa<PHINode>(V))
V->takeName(LI);
L->getPointerOperand(),
DepLI, *TD);
if (Offset != -1)
- AvailVal = GetStoreValueForLoad(DepLI, Offset, L->getType(), L, *TD);
+ AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *TD, *MD);
}
// If the clobbering value is a memset/memcpy/memmove, see if we can forward