return Bits.size() == 1 && Bits[0] == 1;
}
+ bool isAllOnes() const {
+ for (unsigned I = 0; I != Bits.size() - 1; ++I)
+ if (Bits[I] != 0xFF)
+ return false;
+
+ if (BitSize % 8 == 0)
+ return Bits[Bits.size() - 1] == 0xFF;
+
+ return Bits[Bits.size() - 1] == (1 << (BitSize % 8)) - 1;
+ }
+
bool containsGlobalOffset(uint64_t Offset) const;
bool containsValue(const DataLayout *DL,
const DataLayout *DL;
IntegerType *Int1Ty;
+ IntegerType *Int8Ty;
IntegerType *Int32Ty;
Type *Int32PtrTy;
IntegerType *Int64Ty;
const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
Value *createBitSetTest(IRBuilder<> &B, const BitSetInfo &BSI,
GlobalVariable *BitSetGlobal, Value *BitOffset);
- void
+ Value *
lowerBitSetCall(CallInst *CI, const BitSetInfo &BSI,
GlobalVariable *BitSetGlobal, GlobalVariable *CombinedGlobal,
const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
report_fatal_error("Data layout required");
Int1Ty = Type::getInt1Ty(M.getContext());
+ Int8Ty = Type::getInt8Ty(M.getContext());
Int32Ty = Type::getInt32Ty(M.getContext());
Int32PtrTy = PointerType::getUnqual(Int32Ty);
Int64Ty = Type::getInt64Ty(M.getContext());
}
}
-/// Lower a llvm.bitset.test call to its implementation.
-void LowerBitSets::lowerBitSetCall(
+/// Lower a llvm.bitset.test call to its implementation. Returns the value to
+/// replace the call with.
+Value *LowerBitSets::lowerBitSetCall(
CallInst *CI, const BitSetInfo &BSI, GlobalVariable *BitSetGlobal,
GlobalVariable *CombinedGlobal,
const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) {
Value *Ptr = CI->getArgOperand(0);
- if (BSI.containsValue(DL, GlobalLayout, Ptr)) {
- CI->replaceAllUsesWith(
- ConstantInt::getTrue(BitSetGlobal->getParent()->getContext()));
- CI->eraseFromParent();
- return;
- }
+ if (BSI.containsValue(DL, GlobalLayout, Ptr))
+ return ConstantInt::getTrue(BitSetGlobal->getParent()->getContext());
Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy);
Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd(
Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy);
- if (BSI.isSingleOffset()) {
- Value *Eq = B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt);
- CI->replaceAllUsesWith(Eq);
- CI->eraseFromParent();
- return;
- }
+ if (BSI.isSingleOffset())
+ return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt);
Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt);
Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BSI.BitSize);
Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst);
+ // If the bit set is all ones, testing against it is unnecessary.
+ if (BSI.isAllOnes())
+ return OffsetInRange;
+
TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false);
IRBuilder<> ThenB(Term);
PHINode *P = B.CreatePHI(Int1Ty, 2);
P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB);
P->addIncoming(Bit, ThenB.GetInsertBlock());
-
- CI->replaceAllUsesWith(P);
- CI->eraseFromParent();
+ return P;
}
/// Given a disjoint set of bitsets and globals, layout the globals, build the
const std::vector<GlobalVariable *> &Globals) {
// Build a new global with the combined contents of the referenced globals.
std::vector<Constant *> GlobalInits;
- for (GlobalVariable *G : Globals)
+ for (GlobalVariable *G : Globals) {
GlobalInits.push_back(G->getInitializer());
+ uint64_t InitSize = DL->getTypeAllocSize(G->getInitializer()->getType());
+
+ // Compute the amount of padding required to align the next element to the
+ // next power of 2.
+ uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize;
+
+ // Cap at 128 was found experimentally to have a good data/instruction
+ // overhead tradeoff.
+ if (Padding > 128)
+ Padding = RoundUpToAlignment(InitSize, 128) - InitSize;
+
+ GlobalInits.push_back(
+ ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding)));
+ }
+ if (!GlobalInits.empty())
+ GlobalInits.pop_back();
Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits);
auto CombinedGlobal =
new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true,
// Compute the offsets of the original globals within the new global.
DenseMap<GlobalVariable *, uint64_t> GlobalLayout;
for (unsigned I = 0; I != Globals.size(); ++I)
- GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I);
+ // Multiply by 2 to account for padding elements.
+ GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2);
// For each bitset in this disjoint set...
for (MDString *BS : BitSets) {
// Lower each call to llvm.bitset.test for this bitset.
for (CallInst *CI : BitSetTestCallSites[BS]) {
++NumBitSetCallsLowered;
- lowerBitSetCall(CI, BSI, BitSetGlobal, CombinedGlobal, GlobalLayout);
+ Value *Lowered =
+ lowerBitSetCall(CI, BSI, BitSetGlobal, CombinedGlobal, GlobalLayout);
+ CI->replaceAllUsesWith(Lowered);
+ CI->eraseFromParent();
}
}
// global from which we built the combined global, and replace references
// to the original globals with references to the aliases.
for (unsigned I = 0; I != Globals.size(); ++I) {
+ // Multiply by 2 to account for padding elements.
Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0),
- ConstantInt::get(Int32Ty, I)};
+ ConstantInt::get(Int32Ty, I * 2)};
Constant *CombinedGlobalElemPtr =
ConstantExpr::getGetElementPtr(CombinedGlobal, CombinedGlobalIdxs);
GlobalAlias *GAlias = GlobalAlias::create(
; (see GlobalLayoutBuilder in include/llvm/Transforms/IPO/LowerBitSets.h).
; The chosen layout in this case is a, e, b, d, c.
-; CHECK: private constant { i32, i32, i32, i32, i32 } { i32 1, i32 5, i32 2, i32 4, i32 3 }
+; CHECK: private constant { i32, [0 x i8], i32, [0 x i8], i32, [0 x i8], i32, [0 x i8], i32 } { i32 1, [0 x i8] zeroinitializer, i32 5, [0 x i8] zeroinitializer, i32 2, [0 x i8] zeroinitializer, i32 4, [0 x i8] zeroinitializer, i32 3 }
@a = constant i32 1
@b = constant i32 2
@c = constant i32 3
target datalayout = "e-p:32:32"
-; CHECK: [[G:@[^ ]*]] = private constant { i32, [63 x i32], i32, [2 x i32] } { i32 1, [63 x i32] zeroinitializer, i32 3, [2 x i32] [i32 4, i32 5] }
+; CHECK: [[G:@[^ ]*]] = private constant { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] } { i32 1, [0 x i8] zeroinitializer, [63 x i32] zeroinitializer, [4 x i8] zeroinitializer, i32 3, [0 x i8] zeroinitializer, [2 x i32] [i32 4, i32 5] }
@a = constant i32 1
@b = constant [63 x i32] zeroinitializer
@c = constant i32 3
@d = constant [2 x i32] [i32 4, i32 5]
; Offset 0, 4 byte alignment
-; CHECK: @bitset1.bits = private constant [9 x i8] c"\03\00\00\00\00\00\00\00\04"
+; CHECK: @bitset1.bits = private constant [9 x i8] c"\03\00\00\00\00\00\00\00\08"
!0 = !{!"bitset1", i32* @a, i32 0}
; CHECK-NODISCARD-DAG: !{!"bitset1", i32* @a, i32 0}
!1 = !{!"bitset1", [63 x i32]* @b, i32 0}
!2 = !{!"bitset1", [2 x i32]* @d, i32 4}
; CHECK-NODISCARD-DAG: !{!"bitset1", [2 x i32]* @d, i32 4}
-; Offset 4, 4 byte alignment
-; CHECK: @bitset2.bits = private constant [8 x i8] c"\01\00\00\00\00\00\00\80"
+; Offset 4, 256 byte alignment
+; CHECK: @bitset2.bits = private constant [1 x i8] c"\03"
!3 = !{!"bitset2", [63 x i32]* @b, i32 0}
; CHECK-NODISCARD-DAG: !{!"bitset2", [63 x i32]* @b, i32 0}
!4 = !{!"bitset2", i32* @c, i32 0}
; CHECK-NODISCARD-DAG: !{!"bitset2", i32* @c, i32 0}
-; Offset 0, 256 byte alignment
-; CHECK: @bitset3.bits = private constant [1 x i8] c"\03"
+; Offset 0, 4 byte alignment
+; CHECK: @bitset3.bits = private constant [9 x i8] c"\01\00\00\00\00\00\00\00\02"
!5 = !{!"bitset3", i32* @a, i32 0}
; CHECK-NODISCARD-DAG: !{!"bitset3", i32* @a, i32 0}
!6 = !{!"bitset3", i32* @c, i32 0}
!llvm.bitsets = !{ !0, !1, !2, !3, !4, !5, !6, !7 }
-; CHECK: @a = alias getelementptr inbounds ({ i32, [63 x i32], i32, [2 x i32] }* [[G]], i32 0, i32 0)
-; CHECK: @b = alias getelementptr inbounds ({ i32, [63 x i32], i32, [2 x i32] }* [[G]], i32 0, i32 1)
-; CHECK: @c = alias getelementptr inbounds ({ i32, [63 x i32], i32, [2 x i32] }* [[G]], i32 0, i32 2)
-; CHECK: @d = alias getelementptr inbounds ({ i32, [63 x i32], i32, [2 x i32] }* [[G]], i32 0, i32 3)
+; CHECK: @a = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 0)
+; CHECK: @b = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 2)
+; CHECK: @c = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 4)
+; CHECK: @d = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 6)
declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
; CHECK: [[R0:%[^ ]*]] = bitcast i32* [[A0]] to i8*
%pi8 = bitcast i32* %p to i8*
; CHECK: [[R1:%[^ ]*]] = ptrtoint i8* [[R0]] to i32
- ; CHECK: [[R2:%[^ ]*]] = sub i32 [[R1]], ptrtoint ({ i32, [63 x i32], i32, [2 x i32] }* [[G]] to i32)
+ ; CHECK: [[R2:%[^ ]*]] = sub i32 [[R1]], ptrtoint ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]] to i32)
; CHECK: [[R3:%[^ ]*]] = lshr i32 [[R2]], 2
; CHECK: [[R4:%[^ ]*]] = shl i32 [[R2]], 30
; CHECK: [[R5:%[^ ]*]] = or i32 [[R3]], [[R4]]
- ; CHECK: [[R6:%[^ ]*]] = icmp ult i32 [[R5]], 67
+ ; CHECK: [[R6:%[^ ]*]] = icmp ult i32 [[R5]], 68
; CHECK: br i1 [[R6]]
; CHECK: [[R8:%[^ ]*]] = lshr i32 [[R5]], 5
; CHECK: [[S0:%[^ ]*]] = bitcast i32* [[B0]] to i8*
%pi8 = bitcast i32* %p to i8*
; CHECK: [[S1:%[^ ]*]] = ptrtoint i8* [[S0]] to i32
- ; CHECK: [[S2:%[^ ]*]] = sub i32 [[S1]], add (i32 ptrtoint ({ i32, [63 x i32], i32, [2 x i32] }* [[G]] to i32), i32 4)
- ; CHECK: [[S3:%[^ ]*]] = lshr i32 [[S2]], 2
- ; CHECK: [[S4:%[^ ]*]] = shl i32 [[S2]], 30
+ ; CHECK: [[S2:%[^ ]*]] = sub i32 [[S1]], add (i32 ptrtoint ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]] to i32), i32 4)
+ ; CHECK: [[S3:%[^ ]*]] = lshr i32 [[S2]], 8
+ ; CHECK: [[S4:%[^ ]*]] = shl i32 [[S2]], 24
; CHECK: [[S5:%[^ ]*]] = or i32 [[S3]], [[S4]]
- ; CHECK: [[S6:%[^ ]*]] = icmp ult i32 [[S5]], 64
- ; CHECK: br i1 [[S6]]
-
- ; CHECK: [[S8:%[^ ]*]] = zext i32 [[S5]] to i64
- ; CHECK: [[S9:%[^ ]*]] = and i64 [[S8]], 63
- ; CHECK: [[S10:%[^ ]*]] = shl i64 1, [[S9]]
- ; CHECK: [[S11:%[^ ]*]] = and i64 -9223372036854775807, [[S10]]
- ; CHECK: [[S12:%[^ ]*]] = icmp ne i64 [[S11]], 0
-
- ; CHECK: [[S16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[S12]], {{%[^ ]*}} ]
+ ; CHECK: [[S6:%[^ ]*]] = icmp ult i32 [[S5]], 2
%x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset2")
- ; CHECK: ret i1 [[S16]]
+
+ ; CHECK: ret i1 [[S6]]
ret i1 %x
}
; CHECK: [[T0:%[^ ]*]] = bitcast i32* [[C0]] to i8*
%pi8 = bitcast i32* %p to i8*
; CHECK: [[T1:%[^ ]*]] = ptrtoint i8* [[T0]] to i32
- ; CHECK: [[T2:%[^ ]*]] = sub i32 [[T1]], ptrtoint ({ i32, [63 x i32], i32, [2 x i32] }* [[G]] to i32)
- ; CHECK: [[T3:%[^ ]*]] = lshr i32 [[T2]], 8
- ; CHECK: [[T4:%[^ ]*]] = shl i32 [[T2]], 24
+ ; CHECK: [[T2:%[^ ]*]] = sub i32 [[T1]], ptrtoint ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]] to i32)
+ ; CHECK: [[T3:%[^ ]*]] = lshr i32 [[T2]], 2
+ ; CHECK: [[T4:%[^ ]*]] = shl i32 [[T2]], 30
; CHECK: [[T5:%[^ ]*]] = or i32 [[T3]], [[T4]]
- ; CHECK: [[T6:%[^ ]*]] = icmp ult i32 [[T5]], 2
+ ; CHECK: [[T6:%[^ ]*]] = icmp ult i32 [[T5]], 66
; CHECK: br i1 [[T6]]
- ; CHECK: [[T8:%[^ ]*]] = and i32 [[T5]], 31
- ; CHECK: [[T9:%[^ ]*]] = shl i32 1, [[T8]]
- ; CHECK: [[T10:%[^ ]*]] = and i32 3, [[T9]]
- ; CHECK: [[T11:%[^ ]*]] = icmp ne i32 [[T10]], 0
+ ; CHECK: [[T8:%[^ ]*]] = lshr i32 [[T5]], 5
+ ; CHECK: [[T9:%[^ ]*]] = getelementptr i32* bitcast ([9 x i8]* @bitset3.bits to i32*), i32 [[T8]]
+ ; CHECK: [[T10:%[^ ]*]] = load i32* [[T9]]
+ ; CHECK: [[T11:%[^ ]*]] = and i32 [[T5]], 31
+ ; CHECK: [[T12:%[^ ]*]] = shl i32 1, [[T11]]
+ ; CHECK: [[T13:%[^ ]*]] = and i32 [[T10]], [[T12]]
+ ; CHECK: [[T14:%[^ ]*]] = icmp ne i32 [[T13]], 0
- ; CHECK: [[T16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[T11]], {{%[^ ]*}} ]
+ ; CHECK: [[T16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[T14]], {{%[^ ]*}} ]
%x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset3")
; CHECK: ret i1 [[T16]]
ret i1 %x
target datalayout = "e-p:32:32"
-; CHECK: [[G:@[^ ]*]] = private constant { i32, i32 }
+; CHECK: [[G:@[^ ]*]] = private constant { i32, [0 x i8], i32 }
@a = constant i32 1
@b = constant i32 2
; CHECK: @foo(i8* [[A0:%[^ ]*]])
define i1 @foo(i8* %p) {
; CHECK: [[R0:%[^ ]*]] = ptrtoint i8* [[A0]] to i32
- ; CHECK: [[R1:%[^ ]*]] = icmp eq i32 [[R0]], ptrtoint ({ i32, i32 }* [[G]] to i32)
+ ; CHECK: [[R1:%[^ ]*]] = icmp eq i32 [[R0]], ptrtoint ({ i32, [0 x i8], i32 }* [[G]] to i32)
%x = call i1 @llvm.bitset.test(i8* %p, metadata !"bitset2")
; CHECK: ret i1 [[R1]]
ret i1 %x
; CHECK: @bar(i8* [[B0:%[^ ]*]])
define i1 @bar(i8* %p) {
; CHECK: [[S0:%[^ ]*]] = ptrtoint i8* [[B0]] to i32
- ; CHECK: [[S1:%[^ ]*]] = icmp eq i32 [[S0]], add (i32 ptrtoint ({ i32, i32 }* [[G]] to i32), i32 4)
+ ; CHECK: [[S1:%[^ ]*]] = icmp eq i32 [[S0]], add (i32 ptrtoint ({ i32, [0 x i8], i32 }* [[G]] to i32), i32 4)
%x = call i1 @llvm.bitset.test(i8* %p, metadata !"bitset3")
; CHECK: ret i1 [[S1]]
ret i1 %x
uint64_t BitSize;
unsigned AlignLog2;
bool IsSingleOffset;
+ bool IsAllOnes;
} BSBTests[] = {
- {{}, {0}, 0, 1, 0, false},
- {{0}, {1}, 0, 1, 0, true},
- {{4}, {1}, 4, 1, 0, true},
- {{37}, {1}, 37, 1, 0, true},
- {{0, 1}, {3}, 0, 2, 0, false},
- {{0, 4}, {3}, 0, 2, 2, false},
- {{0, uint64_t(1) << 33}, {3}, 0, 2, 33, false},
- {{3, 7}, {3}, 3, 2, 2, false},
- {{0, 1, 7}, {131}, 0, 8, 0, false},
- {{0, 2, 14}, {131}, 0, 8, 1, false},
- {{0, 1, 8}, {3, 1}, 0, 9, 0, false},
- {{0, 2, 16}, {3, 1}, 0, 9, 1, false},
+ {{}, {0}, 0, 1, 0, false, false},
+ {{0}, {1}, 0, 1, 0, true, true},
+ {{4}, {1}, 4, 1, 0, true, true},
+ {{37}, {1}, 37, 1, 0, true, true},
+ {{0, 1}, {3}, 0, 2, 0, false, true},
+ {{0, 4}, {3}, 0, 2, 2, false, true},
+ {{0, uint64_t(1) << 33}, {3}, 0, 2, 33, false, true},
+ {{3, 7}, {3}, 3, 2, 2, false, true},
+ {{0, 1, 7}, {131}, 0, 8, 0, false, false},
+ {{0, 2, 14}, {131}, 0, 8, 1, false, false},
+ {{0, 1, 8}, {3, 1}, 0, 9, 0, false, false},
+ {{0, 2, 16}, {3, 1}, 0, 9, 1, false, false},
+ {{0, 1, 2, 3, 4, 5, 6, 7}, {255}, 0, 8, 0, false, true},
+ {{0, 1, 2, 3, 4, 5, 6, 7, 8}, {255, 1}, 0, 9, 0, false, true},
};
for (auto &&T : BSBTests) {
EXPECT_EQ(T.BitSize, BSI.BitSize);
EXPECT_EQ(T.AlignLog2, BSI.AlignLog2);
EXPECT_EQ(T.IsSingleOffset, BSI.isSingleOffset());
+ EXPECT_EQ(T.IsAllOnes, BSI.isAllOnes());
for (auto Offset : T.Offsets)
EXPECT_TRUE(BSI.containsGlobalOffset(Offset));