propagating one of the values it simplified to a constant across
a myriad of instructions. Notably, ptrtoint instructions when we had
a constant pointer (say, 0) didn't propagate that, blocking a massive
number of down-stream optimizations.
This was uncovered when investigating why we fail to inline and delete
the boilerplate in:
void f() {
std::vector<int> v;
v.push_back(1);
}
It turns out most of the efforts I've made thus far to improve the
analysis weren't making it far purely because of this. After this is
fixed, the store-to-load forwarding patch enables LLVM to optimize the
above to an empty function. We still can't nuke a second push_back, but
for different reasons.
There is a very real chance this will cause somewhat noticable changes
in inlining behavior, so please let me know if you see regressions (or
improvements!) because of this patch.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171196
91177308-0d34-0410-b5e6-
96231b3b80d8
bool CallAnalyzer::visitBitCast(BitCastInst &I) {
// Propagate constants through bitcasts.
bool CallAnalyzer::visitBitCast(BitCastInst &I) {
// Propagate constants through bitcasts.
- if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
+ Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+ if (!COp)
+ COp = SimplifiedValues.lookup(I.getOperand(0));
+ if (COp)
if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) {
SimplifiedValues[&I] = C;
return true;
if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) {
SimplifiedValues[&I] = C;
return true;
bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
// Propagate constants through ptrtoint.
bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
// Propagate constants through ptrtoint.
- if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
+ Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+ if (!COp)
+ COp = SimplifiedValues.lookup(I.getOperand(0));
+ if (COp)
if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) {
SimplifiedValues[&I] = C;
return true;
if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) {
SimplifiedValues[&I] = C;
return true;
bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
// Propagate constants through ptrtoint.
bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
// Propagate constants through ptrtoint.
- if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
+ Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+ if (!COp)
+ COp = SimplifiedValues.lookup(I.getOperand(0));
+ if (COp)
if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) {
SimplifiedValues[&I] = C;
return true;
if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) {
SimplifiedValues[&I] = C;
return true;
bool CallAnalyzer::visitCastInst(CastInst &I) {
// Propagate constants through ptrtoint.
bool CallAnalyzer::visitCastInst(CastInst &I) {
// Propagate constants through ptrtoint.
- if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
+ Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+ if (!COp)
+ COp = SimplifiedValues.lookup(I.getOperand(0));
+ if (COp)
if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
SimplifiedValues[&I] = C;
return true;
if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
SimplifiedValues[&I] = C;
return true;
+define i64 @caller5(i64 %y) {
+; Check that we can round trip constants through various kinds of casts etc w/o
+; losing track of the constant prop in the inline cost analysis.
+;
+; CHECK: @caller5
+; CHECK-NOT: call
+; CHECK: ret i64 -1
+
+entry:
+ %x = call i64 @callee5(i64 42, i64 %y)
+ ret i64 %x
+}
+
+define i64 @callee5(i64 %x, i64 %y) {
+ %inttoptr = inttoptr i64 %x to i8*
+ %bitcast = bitcast i8* %inttoptr to i32*
+ %ptrtoint = ptrtoint i32* %bitcast to i64
+ %trunc = trunc i64 %ptrtoint to i32
+ %zext = zext i32 %trunc to i64
+ %cmp = icmp eq i64 %zext, 42
+ br i1 %cmp, label %bb.true, label %bb.false
+
+bb.true:
+ ret i64 -1
+
+bb.false:
+ ; This block musn't be counted in the inline cost.
+ %y1 = add i64 %y, 1
+ %y2 = add i64 %y1, 1
+ %y3 = add i64 %y2, 1
+ %y4 = add i64 %y3, 1
+ %y5 = add i64 %y4, 1
+ %y6 = add i64 %y5, 1
+ %y7 = add i64 %y6, 1
+ %y8 = add i64 %y7, 1
+ ret i64 %y8
+}
+
define i32 @PR13412.main() {
; This is a somewhat complicated three layer subprogram that was reported to
define i32 @PR13412.main() {
; This is a somewhat complicated three layer subprogram that was reported to