return true;
}
-
+/// isAddress - Returns true if the specified instruction is using the
+/// specified value as an address.
+static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
+ bool isAddress = isa<LoadInst>(Inst);
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (SI->getOperand(1) == OperandVal)
+ isAddress = true;
+ } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ // Addressing modes can also be folded into prefetches and a variety
+ // of intrinsics.
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::prefetch:
+ case Intrinsic::x86_sse2_loadu_dq:
+ case Intrinsic::x86_sse2_loadu_pd:
+ case Intrinsic::x86_sse_loadu_ps:
+ case Intrinsic::x86_sse_storeu_ps:
+ case Intrinsic::x86_sse2_storeu_pd:
+ case Intrinsic::x86_sse2_storeu_dq:
+ case Intrinsic::x86_sse2_storel_dq:
+ if (II->getOperand(1) == OperandVal)
+ isAddress = true;
+ break;
+ }
+ }
+ return isAddress;
+}
/// AddUsersIfInteresting - Inspect the specified instruction. If it is a
/// reducible SCEV, recursively add its users to the IVUsesByStride set and
}
-/// isTargetConstant - Return true if the following can be referenced by the
-/// immediate field of a target instruction.
-static bool isTargetConstant(const SCEVHandle &V, const Type *UseTy,
- const TargetLowering *TLI) {
+/// fitsInAddressMode - Return true if V can be subsumed within an addressing
+/// mode, and does not need to be put in a register first.
+static bool fitsInAddressMode(const SCEVHandle &V, const Type *UseTy,
+ const TargetLowering *TLI, bool HasBaseReg) {
if (SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) {
int64_t VC = SC->getValue()->getSExtValue();
if (TLI) {
TargetLowering::AddrMode AM;
AM.BaseOffs = VC;
+ AM.HasBaseReg = HasBaseReg;
return TLI->isLegalAddressingMode(AM, UseTy);
} else {
// Defaults to PPC. PPC allows a sign-extended 16-bit immediate field.
if (GlobalValue *GV = dyn_cast<GlobalValue>(Op0)) {
TargetLowering::AddrMode AM;
AM.BaseGV = GV;
+ AM.HasBaseReg = HasBaseReg;
return TLI->isLegalAddressingMode(AM, UseTy);
}
}
return;
} else if (SCEVMulExpr *SME = dyn_cast<SCEVMulExpr>(Val)) {
// Transform "8 * (4 + v)" -> "32 + 8*V" if "32" fits in the immed field.
- if (isAddress && isTargetConstant(SME->getOperand(0), UseTy, TLI) &&
+ if (isAddress && fitsInAddressMode(SME->getOperand(0), UseTy, TLI, false) &&
SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) {
SCEVHandle SubImm = SE->getIntegerSCEV(0, Val->getType());
// Scale SubImm up by "8". If the result is a target constant, we are
// good.
SubImm = SE->getMulExpr(SubImm, SME->getOperand(0));
- if (isTargetConstant(SubImm, UseTy, TLI)) {
+ if (fitsInAddressMode(SubImm, UseTy, TLI, false)) {
// Accumulate the immediate.
Imm = SE->getAddExpr(Imm, SubImm);
// Loop-variant expressions must stay in the immediate field of the
// expression.
- if ((isAddress && isTargetConstant(Val, UseTy, TLI)) ||
+ if ((isAddress && fitsInAddressMode(Val, UseTy, TLI, false)) ||
!Val->isLoopInvariant(L)) {
Imm = SE->getAddExpr(Imm, Val);
Val = SE->getIntegerSCEV(0, Val->getType());
}
}
+// This is logically local to the following function, but C++ says we have
+// to make it file scope.
+struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
-/// RemoveCommonExpressionsFromUseBases - Look through all of the uses in Bases,
-/// removing any common subexpressions from it. Anything truly common is
-/// removed, accumulated, and returned. This looks for things like (a+b+c) and
+/// RemoveCommonExpressionsFromUseBases - Look through all of the Bases of all
+/// the Uses, removing any common subexpressions, except that if all such
+/// subexpressions can be folded into an addressing mode for all uses inside
+/// the loop (this case is referred to as "free" in comments herein) we do
+/// not remove anything. This looks for things like (a+b+c) and
/// (a+c+d) and computes the common (a+c) subexpression. The common expression
/// is *removed* from the Bases and returned.
static SCEVHandle
RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
- ScalarEvolution *SE, Loop *L) {
+ ScalarEvolution *SE, Loop *L,
+ const TargetLowering *TLI) {
unsigned NumUses = Uses.size();
// Only one use? This is a very common case, so we handle it specially and
// cheaply.
SCEVHandle Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType());
SCEVHandle Result = Zero;
+ SCEVHandle FreeResult = Zero;
if (NumUses == 1) {
// If the use is inside the loop, use its base, regardless of what it is:
// it is clearly shared across all the IV's. If the use is outside the loop
// To find common subexpressions, count how many of Uses use each expression.
// If any subexpressions are used Uses.size() times, they are common.
- std::map<SCEVHandle, unsigned> SubExpressionUseCounts;
+ // Also track whether all uses of each expression can be moved into an
+ // an addressing mode "for free"; such expressions are left within the loop.
+ // struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
+ std::map<SCEVHandle, SubExprUseData> SubExpressionUseData;
// UniqueSubExprs - Keep track of all of the subexpressions we see in the
// order we see them.
// CSEs we can find.
if (Uses[i].Base == Zero) return Zero;
+ // If this use is as an address we may be able to put CSEs in the addressing
+ // mode rather than hoisting them.
+ bool isAddrUse = isAddressUse(Uses[i].Inst, Uses[i].OperandValToReplace);
+ // We may need the UseTy below, but only when isAddrUse, so compute it
+ // only in that case.
+ const Type *UseTy = 0;
+ if (isAddrUse) {
+ UseTy = Uses[i].Inst->getType();
+ if (StoreInst *SI = dyn_cast<StoreInst>(Uses[i].Inst))
+ UseTy = SI->getOperand(0)->getType();
+ }
+
// Split the expression into subexprs.
SeparateSubExprs(SubExprs, Uses[i].Base, SE);
- // Add one to SubExpressionUseCounts for each subexpr present.
- for (unsigned j = 0, e = SubExprs.size(); j != e; ++j)
- if (++SubExpressionUseCounts[SubExprs[j]] == 1)
+ // Add one to SubExpressionUseData.Count for each subexpr present, and
+ // if the subexpr is not a valid immediate within an addressing mode use,
+ // set SubExpressionUseData.notAllUsesAreFree. We definitely want to
+ // hoist these out of the loop (if they are common to all uses).
+ for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
+ if (++SubExpressionUseData[SubExprs[j]].Count == 1)
UniqueSubExprs.push_back(SubExprs[j]);
+ if (!isAddrUse || !fitsInAddressMode(SubExprs[j], UseTy, TLI, false))
+ SubExpressionUseData[SubExprs[j]].notAllUsesAreFree = true;
+ }
SubExprs.clear();
}
// Now that we know how many times each is used, build Result. Iterate over
// UniqueSubexprs so that we have a stable ordering.
for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) {
- std::map<SCEVHandle, unsigned>::iterator I =
- SubExpressionUseCounts.find(UniqueSubExprs[i]);
- assert(I != SubExpressionUseCounts.end() && "Entry not found?");
- if (I->second == NumUsesInsideLoop) // Found CSE!
- Result = SE->getAddExpr(Result, I->first);
- else
- // Remove non-cse's from SubExpressionUseCounts.
- SubExpressionUseCounts.erase(I);
+ std::map<SCEVHandle, SubExprUseData>::iterator I =
+ SubExpressionUseData.find(UniqueSubExprs[i]);
+ assert(I != SubExpressionUseData.end() && "Entry not found?");
+ if (I->second.Count == NumUsesInsideLoop) { // Found CSE!
+ if (I->second.notAllUsesAreFree)
+ Result = SE->getAddExpr(Result, I->first);
+ else
+ FreeResult = SE->getAddExpr(FreeResult, I->first);
+ } else
+ // Remove non-cse's from SubExpressionUseData.
+ SubExpressionUseData.erase(I);
}
-
+
+ if (FreeResult != Zero) {
+ // We have some subexpressions that can be subsumed into addressing
+ // modes in every use inside the loop. However, it's possible that
+ // there are so many of them that the combined FreeResult cannot
+ // be subsumed, or that the target cannot handle both a FreeResult
+ // and a Result in the same instruction (for example because it would
+ // require too many registers). Check this.
+ for (unsigned i=0; i<NumUses; ++i) {
+ if (!L->contains(Uses[i].Inst->getParent()))
+ continue;
+ // We know this is an addressing mode use; if there are any uses that
+ // are not, FreeResult would be Zero.
+ const Type *UseTy = Uses[i].Inst->getType();
+ if (StoreInst *SI = dyn_cast<StoreInst>(Uses[i].Inst))
+ UseTy = SI->getOperand(0)->getType();
+ if (!fitsInAddressMode(FreeResult, UseTy, TLI, Result!=Zero)) {
+ // FIXME: could split up FreeResult into pieces here, some hoisted
+ // and some not. Doesn't seem worth it for now.
+ Result = SE->getAddExpr(Result, FreeResult);
+ FreeResult = Zero;
+ break;
+ }
+ }
+ }
+
// If we found no CSE's, return now.
if (Result == Zero) return Result;
+ // If we still have a FreeResult, remove its subexpressions from
+ // SubExpressionUseData. This means they will remain in the use Bases.
+ if (FreeResult != Zero) {
+ SeparateSubExprs(SubExprs, FreeResult, SE);
+ for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
+ std::map<SCEVHandle, SubExprUseData>::iterator I =
+ SubExpressionUseData.find(SubExprs[j]);
+ SubExpressionUseData.erase(I);
+ }
+ SubExprs.clear();
+ }
+
// Otherwise, remove all of the CSE's we found from each of the base values.
for (unsigned i = 0; i != NumUses; ++i) {
// Uses outside the loop don't necessarily include the common base, but
// Remove any common subexpressions.
for (unsigned j = 0, e = SubExprs.size(); j != e; ++j)
- if (SubExpressionUseCounts.count(SubExprs[j])) {
+ if (SubExpressionUseData.count(SubExprs[j])) {
SubExprs.erase(SubExprs.begin()+j);
--j; --e;
}
return SC->getValue()->getValue().isNegative();
}
-/// isAddress - Returns true if the specified instruction is using the
-/// specified value as an address.
-static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
- bool isAddress = isa<LoadInst>(Inst);
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- if (SI->getOperand(1) == OperandVal)
- isAddress = true;
- } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- // Addressing modes can also be folded into prefetches and a variety
- // of intrinsics.
- switch (II->getIntrinsicID()) {
- default: break;
- case Intrinsic::prefetch:
- case Intrinsic::x86_sse2_loadu_dq:
- case Intrinsic::x86_sse2_loadu_pd:
- case Intrinsic::x86_sse_loadu_ps:
- case Intrinsic::x86_sse_storeu_ps:
- case Intrinsic::x86_sse2_storeu_pd:
- case Intrinsic::x86_sse2_storeu_dq:
- case Intrinsic::x86_sse2_storel_dq:
- if (II->getOperand(1) == OperandVal)
- isAddress = true;
- break;
- }
- }
- return isAddress;
-}
-
// CollectIVUsers - Transform our list of users and offsets to a bit more
// complex table. In this new vector, each 'BasedUser' contains 'Base', the base
// of the strided accesses, as well as the old information from Uses. We
// "A+B"), emit it to the preheader, then remove the expression from the
// UsersToProcess base values.
SCEVHandle CommonExprs =
- RemoveCommonExpressionsFromUseBases(UsersToProcess, SE, L);
+ RemoveCommonExpressionsFromUseBases(UsersToProcess, SE, L, TLI);
// Next, figure out what we can represent in the immediate fields of
// instructions. If we can represent anything there, move it to the imm
Constant *C = dyn_cast<Constant>(CommonBaseV);
if (!C ||
(!C->isNullValue() &&
- !isTargetConstant(SE->getUnknown(CommonBaseV), ReplacedTy, TLI)))
+ !fitsInAddressMode(SE->getUnknown(CommonBaseV), ReplacedTy,
+ TLI, false)))
// We want the common base emitted into the preheader! This is just
// using cast as a copy so BitCast (no-op cast) is appropriate
CommonBaseV = new BitCastInst(CommonBaseV, CommonBaseV->getType(),
// this by forcing a BitCast (noop cast) to be inserted into the preheader
// in this case.
if (Constant *C = dyn_cast<Constant>(BaseV)) {
- if (!C->isNullValue() && !isTargetConstant(Base, ReplacedTy, TLI)) {
+ if (!C->isNullValue() && !fitsInAddressMode(Base, ReplacedTy,
+ TLI, false)) {
// We want this constant emitted into the preheader! This is just
// using cast as a copy so BitCast (no-op cast) is appropriate
BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",