From: Peter Collingbourne <peter@pcc.me.uk>
Date: Wed, 25 Feb 2015 20:42:41 +0000 (+0000)
Subject: LowerBitSets: Align referenced globals.
X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=d63e5ad9c5a14cc8dea5c82c91bea9f9ccf631f2;p=oota-llvm.git

LowerBitSets: Align referenced globals.

This change aligns globals to the next highest power of 2 bytes, up to a
maximum of 128. This makes it more likely that we will be able to compress
bit sets with a greater alignment. In many more cases, we can now take
advantage of a new optimization also introduced in this patch that removes
bit set checks if the bit set is all ones.

The 128 byte maximum was found to provide the best tradeoff between instruction
overhead and data overhead in a recent build of Chromium. It allows us to
remove ~2.4MB of instructions at the cost of ~250KB of data.

Differential Revision: http://reviews.llvm.org/D7873

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230540 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/include/llvm/Transforms/IPO/LowerBitSets.h b/include/llvm/Transforms/IPO/LowerBitSets.h
index ae75b090b49..0f606170545 100644
--- a/include/llvm/Transforms/IPO/LowerBitSets.h
+++ b/include/llvm/Transforms/IPO/LowerBitSets.h
@@ -48,6 +48,17 @@ struct BitSetInfo {
     return Bits.size() == 1 && Bits[0] == 1;
   }
 
+  bool isAllOnes() const {
+    for (unsigned I = 0; I != Bits.size() - 1; ++I)
+      if (Bits[I] != 0xFF)
+        return false;
+
+    if (BitSize % 8 == 0)
+      return Bits[Bits.size() - 1] == 0xFF;
+
+    return Bits[Bits.size() - 1] == (1 << (BitSize % 8)) - 1;
+  }
+
   bool containsGlobalOffset(uint64_t Offset) const;
 
   bool containsValue(const DataLayout *DL,
diff --git a/lib/Transforms/IPO/LowerBitSets.cpp b/lib/Transforms/IPO/LowerBitSets.cpp
index 032d6484ea2..0a22a809c53 100644
--- a/lib/Transforms/IPO/LowerBitSets.cpp
+++ b/lib/Transforms/IPO/LowerBitSets.cpp
@@ -157,6 +157,7 @@ struct LowerBitSets : public ModulePass {
 
   const DataLayout *DL;
   IntegerType *Int1Ty;
+  IntegerType *Int8Ty;
   IntegerType *Int32Ty;
   Type *Int32PtrTy;
   IntegerType *Int64Ty;
@@ -173,7 +174,7 @@ struct LowerBitSets : public ModulePass {
               const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
   Value *createBitSetTest(IRBuilder<> &B, const BitSetInfo &BSI,
                           GlobalVariable *BitSetGlobal, Value *BitOffset);
-  void
+  Value *
   lowerBitSetCall(CallInst *CI, const BitSetInfo &BSI,
                   GlobalVariable *BitSetGlobal, GlobalVariable *CombinedGlobal,
                   const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
@@ -203,6 +204,7 @@ bool LowerBitSets::doInitialization(Module &M) {
     report_fatal_error("Data layout required");
 
   Int1Ty = Type::getInt1Ty(M.getContext());
+  Int8Ty = Type::getInt8Ty(M.getContext());
   Int32Ty = Type::getInt32Ty(M.getContext());
   Int32PtrTy = PointerType::getUnqual(Int32Ty);
   Int64Ty = Type::getInt64Ty(M.getContext());
@@ -293,19 +295,16 @@ Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, const BitSetInfo &BSI,
   }
 }
 
-/// Lower a llvm.bitset.test call to its implementation.
-void LowerBitSets::lowerBitSetCall(
+/// Lower a llvm.bitset.test call to its implementation. Returns the value to
+/// replace the call with.
+Value *LowerBitSets::lowerBitSetCall(
     CallInst *CI, const BitSetInfo &BSI, GlobalVariable *BitSetGlobal,
     GlobalVariable *CombinedGlobal,
     const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) {
   Value *Ptr = CI->getArgOperand(0);
 
-  if (BSI.containsValue(DL, GlobalLayout, Ptr)) {
-    CI->replaceAllUsesWith(
-        ConstantInt::getTrue(BitSetGlobal->getParent()->getContext()));
-    CI->eraseFromParent();
-    return;
-  }
+  if (BSI.containsValue(DL, GlobalLayout, Ptr))
+    return ConstantInt::getTrue(BitSetGlobal->getParent()->getContext());
 
   Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy);
   Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd(
@@ -317,12 +316,8 @@ void LowerBitSets::lowerBitSetCall(
 
   Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy);
 
-  if (BSI.isSingleOffset()) {
-    Value *Eq = B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt);
-    CI->replaceAllUsesWith(Eq);
-    CI->eraseFromParent();
-    return;
-  }
+  if (BSI.isSingleOffset())
+    return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt);
 
   Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt);
 
@@ -349,6 +344,10 @@ void LowerBitSets::lowerBitSetCall(
   Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BSI.BitSize);
   Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst);
 
+  // If the bit set is all ones, testing against it is unnecessary.
+  if (BSI.isAllOnes())
+    return OffsetInRange;
+
   TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false);
   IRBuilder<> ThenB(Term);
 
@@ -363,9 +362,7 @@ void LowerBitSets::lowerBitSetCall(
   PHINode *P = B.CreatePHI(Int1Ty, 2);
   P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB);
   P->addIncoming(Bit, ThenB.GetInsertBlock());
-
-  CI->replaceAllUsesWith(P);
-  CI->eraseFromParent();
+  return P;
 }
 
 /// Given a disjoint set of bitsets and globals, layout the globals, build the
@@ -376,8 +373,24 @@ void LowerBitSets::buildBitSetsFromGlobals(
     const std::vector<GlobalVariable *> &Globals) {
   // Build a new global with the combined contents of the referenced globals.
   std::vector<Constant *> GlobalInits;
-  for (GlobalVariable *G : Globals)
+  for (GlobalVariable *G : Globals) {
     GlobalInits.push_back(G->getInitializer());
+    uint64_t InitSize = DL->getTypeAllocSize(G->getInitializer()->getType());
+
+    // Compute the amount of padding required to align the next element to the
+    // next power of 2.
+    uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize;
+
+    // Cap at 128 was found experimentally to have a good data/instruction
+    // overhead tradeoff.
+    if (Padding > 128)
+      Padding = RoundUpToAlignment(InitSize, 128) - InitSize;
+
+    GlobalInits.push_back(
+        ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding)));
+  }
+  if (!GlobalInits.empty())
+    GlobalInits.pop_back();
   Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits);
   auto CombinedGlobal =
       new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true,
@@ -389,7 +402,8 @@ void LowerBitSets::buildBitSetsFromGlobals(
   // Compute the offsets of the original globals within the new global.
   DenseMap<GlobalVariable *, uint64_t> GlobalLayout;
   for (unsigned I = 0; I != Globals.size(); ++I)
-    GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I);
+    // Multiply by 2 to account for padding elements.
+    GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2);
 
   // For each bitset in this disjoint set...
   for (MDString *BS : BitSets) {
@@ -406,7 +420,10 @@ void LowerBitSets::buildBitSetsFromGlobals(
     // Lower each call to llvm.bitset.test for this bitset.
     for (CallInst *CI : BitSetTestCallSites[BS]) {
       ++NumBitSetCallsLowered;
-      lowerBitSetCall(CI, BSI, BitSetGlobal, CombinedGlobal, GlobalLayout);
+      Value *Lowered =
+          lowerBitSetCall(CI, BSI, BitSetGlobal, CombinedGlobal, GlobalLayout);
+      CI->replaceAllUsesWith(Lowered);
+      CI->eraseFromParent();
     }
   }
 
@@ -414,8 +431,9 @@ void LowerBitSets::buildBitSetsFromGlobals(
   // global from which we built the combined global, and replace references
   // to the original globals with references to the aliases.
   for (unsigned I = 0; I != Globals.size(); ++I) {
+    // Multiply by 2 to account for padding elements.
     Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0),
-                                      ConstantInt::get(Int32Ty, I)};
+                                      ConstantInt::get(Int32Ty, I * 2)};
     Constant *CombinedGlobalElemPtr =
         ConstantExpr::getGetElementPtr(CombinedGlobal, CombinedGlobalIdxs);
     GlobalAlias *GAlias = GlobalAlias::create(
diff --git a/test/Transforms/LowerBitSets/layout.ll b/test/Transforms/LowerBitSets/layout.ll
index 2966284279d..a0c6e77a57f 100644
--- a/test/Transforms/LowerBitSets/layout.ll
+++ b/test/Transforms/LowerBitSets/layout.ll
@@ -6,7 +6,7 @@ target datalayout = "e-p:32:32"
 ; (see GlobalLayoutBuilder in include/llvm/Transforms/IPO/LowerBitSets.h).
 ; The chosen layout in this case is a, e, b, d, c.
 
-; CHECK: private constant { i32, i32, i32, i32, i32 } { i32 1, i32 5, i32 2, i32 4, i32 3 }
+; CHECK: private constant { i32, [0 x i8], i32, [0 x i8], i32, [0 x i8], i32, [0 x i8], i32 } { i32 1, [0 x i8] zeroinitializer, i32 5, [0 x i8] zeroinitializer, i32 2, [0 x i8] zeroinitializer, i32 4, [0 x i8] zeroinitializer, i32 3 }
 @a = constant i32 1
 @b = constant i32 2
 @c = constant i32 3
diff --git a/test/Transforms/LowerBitSets/simple.ll b/test/Transforms/LowerBitSets/simple.ll
index aaf89c0f878..0928524ae9d 100644
--- a/test/Transforms/LowerBitSets/simple.ll
+++ b/test/Transforms/LowerBitSets/simple.ll
@@ -3,14 +3,14 @@
 
 target datalayout = "e-p:32:32"
 
-; CHECK: [[G:@[^ ]*]] = private constant { i32, [63 x i32], i32, [2 x i32] } { i32 1, [63 x i32] zeroinitializer, i32 3, [2 x i32] [i32 4, i32 5] }
+; CHECK: [[G:@[^ ]*]] = private constant { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] } { i32 1, [0 x i8] zeroinitializer, [63 x i32] zeroinitializer, [4 x i8] zeroinitializer, i32 3, [0 x i8] zeroinitializer, [2 x i32] [i32 4, i32 5] }
 @a = constant i32 1
 @b = constant [63 x i32] zeroinitializer
 @c = constant i32 3
 @d = constant [2 x i32] [i32 4, i32 5]
 
 ; Offset 0, 4 byte alignment
-; CHECK: @bitset1.bits = private constant [9 x i8] c"\03\00\00\00\00\00\00\00\04"
+; CHECK: @bitset1.bits = private constant [9 x i8] c"\03\00\00\00\00\00\00\00\08"
 !0 = !{!"bitset1", i32* @a, i32 0}
 ; CHECK-NODISCARD-DAG: !{!"bitset1", i32* @a, i32 0}
 !1 = !{!"bitset1", [63 x i32]* @b, i32 0}
@@ -18,15 +18,15 @@ target datalayout = "e-p:32:32"
 !2 = !{!"bitset1", [2 x i32]* @d, i32 4}
 ; CHECK-NODISCARD-DAG: !{!"bitset1", [2 x i32]* @d, i32 4}
 
-; Offset 4, 4 byte alignment
-; CHECK: @bitset2.bits = private constant [8 x i8] c"\01\00\00\00\00\00\00\80"
+; Offset 4, 256 byte alignment
+; CHECK: @bitset2.bits = private constant [1 x i8] c"\03"
 !3 = !{!"bitset2", [63 x i32]* @b, i32 0}
 ; CHECK-NODISCARD-DAG: !{!"bitset2", [63 x i32]* @b, i32 0}
 !4 = !{!"bitset2", i32* @c, i32 0}
 ; CHECK-NODISCARD-DAG: !{!"bitset2", i32* @c, i32 0}
 
-; Offset 0, 256 byte alignment
-; CHECK: @bitset3.bits = private constant [1 x i8] c"\03"
+; Offset 0, 4 byte alignment
+; CHECK: @bitset3.bits = private constant [9 x i8] c"\01\00\00\00\00\00\00\00\02"
 !5 = !{!"bitset3", i32* @a, i32 0}
 ; CHECK-NODISCARD-DAG: !{!"bitset3", i32* @a, i32 0}
 !6 = !{!"bitset3", i32* @c, i32 0}
@@ -38,10 +38,10 @@ target datalayout = "e-p:32:32"
 
 !llvm.bitsets = !{ !0, !1, !2, !3, !4, !5, !6, !7 }
 
-; CHECK: @a = alias getelementptr inbounds ({ i32, [63 x i32], i32, [2 x i32] }* [[G]], i32 0, i32 0)
-; CHECK: @b = alias getelementptr inbounds ({ i32, [63 x i32], i32, [2 x i32] }* [[G]], i32 0, i32 1)
-; CHECK: @c = alias getelementptr inbounds ({ i32, [63 x i32], i32, [2 x i32] }* [[G]], i32 0, i32 2)
-; CHECK: @d = alias getelementptr inbounds ({ i32, [63 x i32], i32, [2 x i32] }* [[G]], i32 0, i32 3)
+; CHECK: @a = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 0)
+; CHECK: @b = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 2)
+; CHECK: @c = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 4)
+; CHECK: @d = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 6)
 
 declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
 
@@ -52,11 +52,11 @@ define i1 @foo(i32* %p) {
   ; CHECK: [[R0:%[^ ]*]] = bitcast i32* [[A0]] to i8*
   %pi8 = bitcast i32* %p to i8*
   ; CHECK: [[R1:%[^ ]*]] = ptrtoint i8* [[R0]] to i32
-  ; CHECK: [[R2:%[^ ]*]] = sub i32 [[R1]], ptrtoint ({ i32, [63 x i32], i32, [2 x i32] }* [[G]] to i32)
+  ; CHECK: [[R2:%[^ ]*]] = sub i32 [[R1]], ptrtoint ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]] to i32)
   ; CHECK: [[R3:%[^ ]*]] = lshr i32 [[R2]], 2
   ; CHECK: [[R4:%[^ ]*]] = shl i32 [[R2]], 30
   ; CHECK: [[R5:%[^ ]*]] = or i32 [[R3]], [[R4]]
-  ; CHECK: [[R6:%[^ ]*]] = icmp ult i32 [[R5]], 67
+  ; CHECK: [[R6:%[^ ]*]] = icmp ult i32 [[R5]], 68
   ; CHECK: br i1 [[R6]]
 
   ; CHECK: [[R8:%[^ ]*]] = lshr i32 [[R5]], 5
@@ -82,22 +82,14 @@ define i1 @bar(i32* %p) {
   ; CHECK: [[S0:%[^ ]*]] = bitcast i32* [[B0]] to i8*
   %pi8 = bitcast i32* %p to i8*
   ; CHECK: [[S1:%[^ ]*]] = ptrtoint i8* [[S0]] to i32
-  ; CHECK: [[S2:%[^ ]*]] = sub i32 [[S1]], add (i32 ptrtoint ({ i32, [63 x i32], i32, [2 x i32] }* [[G]] to i32), i32 4)
-  ; CHECK: [[S3:%[^ ]*]] = lshr i32 [[S2]], 2
-  ; CHECK: [[S4:%[^ ]*]] = shl i32 [[S2]], 30
+  ; CHECK: [[S2:%[^ ]*]] = sub i32 [[S1]], add (i32 ptrtoint ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]] to i32), i32 4)
+  ; CHECK: [[S3:%[^ ]*]] = lshr i32 [[S2]], 8
+  ; CHECK: [[S4:%[^ ]*]] = shl i32 [[S2]], 24
   ; CHECK: [[S5:%[^ ]*]] = or i32 [[S3]], [[S4]]
-  ; CHECK: [[S6:%[^ ]*]] = icmp ult i32 [[S5]], 64
-  ; CHECK: br i1 [[S6]]
-
-  ; CHECK: [[S8:%[^ ]*]] = zext i32 [[S5]] to i64
-  ; CHECK: [[S9:%[^ ]*]] = and i64 [[S8]], 63
-  ; CHECK: [[S10:%[^ ]*]] = shl i64 1, [[S9]]
-  ; CHECK: [[S11:%[^ ]*]] = and i64 -9223372036854775807, [[S10]]
-  ; CHECK: [[S12:%[^ ]*]] = icmp ne i64 [[S11]], 0
-
-  ; CHECK: [[S16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[S12]], {{%[^ ]*}} ]
+  ; CHECK: [[S6:%[^ ]*]] = icmp ult i32 [[S5]], 2
   %x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset2")
-  ; CHECK: ret i1 [[S16]]
+
+  ; CHECK: ret i1 [[S6]]
   ret i1 %x
 }
 
@@ -106,19 +98,22 @@ define i1 @baz(i32* %p) {
   ; CHECK: [[T0:%[^ ]*]] = bitcast i32* [[C0]] to i8*
   %pi8 = bitcast i32* %p to i8*
   ; CHECK: [[T1:%[^ ]*]] = ptrtoint i8* [[T0]] to i32
-  ; CHECK: [[T2:%[^ ]*]] = sub i32 [[T1]], ptrtoint ({ i32, [63 x i32], i32, [2 x i32] }* [[G]] to i32)
-  ; CHECK: [[T3:%[^ ]*]] = lshr i32 [[T2]], 8
-  ; CHECK: [[T4:%[^ ]*]] = shl i32 [[T2]], 24
+  ; CHECK: [[T2:%[^ ]*]] = sub i32 [[T1]], ptrtoint ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]] to i32)
+  ; CHECK: [[T3:%[^ ]*]] = lshr i32 [[T2]], 2
+  ; CHECK: [[T4:%[^ ]*]] = shl i32 [[T2]], 30
   ; CHECK: [[T5:%[^ ]*]] = or i32 [[T3]], [[T4]]
-  ; CHECK: [[T6:%[^ ]*]] = icmp ult i32 [[T5]], 2
+  ; CHECK: [[T6:%[^ ]*]] = icmp ult i32 [[T5]], 66
   ; CHECK: br i1 [[T6]]
 
-  ; CHECK: [[T8:%[^ ]*]] = and i32 [[T5]], 31
-  ; CHECK: [[T9:%[^ ]*]] = shl i32 1, [[T8]]
-  ; CHECK: [[T10:%[^ ]*]] = and i32 3, [[T9]]
-  ; CHECK: [[T11:%[^ ]*]] = icmp ne i32 [[T10]], 0
+  ; CHECK: [[T8:%[^ ]*]] = lshr i32 [[T5]], 5
+  ; CHECK: [[T9:%[^ ]*]] = getelementptr i32* bitcast ([9 x i8]* @bitset3.bits to i32*), i32 [[T8]]
+  ; CHECK: [[T10:%[^ ]*]] = load i32* [[T9]]
+  ; CHECK: [[T11:%[^ ]*]] = and i32 [[T5]], 31
+  ; CHECK: [[T12:%[^ ]*]] = shl i32 1, [[T11]]
+  ; CHECK: [[T13:%[^ ]*]] = and i32 [[T10]], [[T12]]
+  ; CHECK: [[T14:%[^ ]*]] = icmp ne i32 [[T13]], 0
 
-  ; CHECK: [[T16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[T11]], {{%[^ ]*}} ]
+  ; CHECK: [[T16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[T14]], {{%[^ ]*}} ]
   %x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset3")
   ; CHECK: ret i1 [[T16]]
   ret i1 %x
diff --git a/test/Transforms/LowerBitSets/single-offset.ll b/test/Transforms/LowerBitSets/single-offset.ll
index ebe57bf4d14..57194f42e09 100644
--- a/test/Transforms/LowerBitSets/single-offset.ll
+++ b/test/Transforms/LowerBitSets/single-offset.ll
@@ -2,7 +2,7 @@
 
 target datalayout = "e-p:32:32"
 
-; CHECK: [[G:@[^ ]*]] = private constant { i32, i32 }
+; CHECK: [[G:@[^ ]*]] = private constant { i32, [0 x i8], i32 }
 @a = constant i32 1
 @b = constant i32 2
 
@@ -18,7 +18,7 @@ declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
 ; CHECK: @foo(i8* [[A0:%[^ ]*]])
 define i1 @foo(i8* %p) {
   ; CHECK: [[R0:%[^ ]*]] = ptrtoint i8* [[A0]] to i32
-  ; CHECK: [[R1:%[^ ]*]] = icmp eq i32 [[R0]], ptrtoint ({ i32, i32 }* [[G]] to i32)
+  ; CHECK: [[R1:%[^ ]*]] = icmp eq i32 [[R0]], ptrtoint ({ i32, [0 x i8], i32 }* [[G]] to i32)
   %x = call i1 @llvm.bitset.test(i8* %p, metadata !"bitset2")
   ; CHECK: ret i1 [[R1]]
   ret i1 %x
@@ -27,7 +27,7 @@ define i1 @foo(i8* %p) {
 ; CHECK: @bar(i8* [[B0:%[^ ]*]])
 define i1 @bar(i8* %p) {
   ; CHECK: [[S0:%[^ ]*]] = ptrtoint i8* [[B0]] to i32
-  ; CHECK: [[S1:%[^ ]*]] = icmp eq i32 [[S0]], add (i32 ptrtoint ({ i32, i32 }* [[G]] to i32), i32 4)
+  ; CHECK: [[S1:%[^ ]*]] = icmp eq i32 [[S0]], add (i32 ptrtoint ({ i32, [0 x i8], i32 }* [[G]] to i32), i32 4)
   %x = call i1 @llvm.bitset.test(i8* %p, metadata !"bitset3")
   ; CHECK: ret i1 [[S1]]
   ret i1 %x
diff --git a/unittests/Transforms/IPO/LowerBitSets.cpp b/unittests/Transforms/IPO/LowerBitSets.cpp
index 2f27c0762ff..26a42528378 100644
--- a/unittests/Transforms/IPO/LowerBitSets.cpp
+++ b/unittests/Transforms/IPO/LowerBitSets.cpp
@@ -20,19 +20,22 @@ TEST(LowerBitSets, BitSetBuilder) {
     uint64_t BitSize;
     unsigned AlignLog2;
     bool IsSingleOffset;
+    bool IsAllOnes;
   } BSBTests[] = {
-      {{}, {0}, 0, 1, 0, false},
-      {{0}, {1}, 0, 1, 0, true},
-      {{4}, {1}, 4, 1, 0, true},
-      {{37}, {1}, 37, 1, 0, true},
-      {{0, 1}, {3}, 0, 2, 0, false},
-      {{0, 4}, {3}, 0, 2, 2, false},
-      {{0, uint64_t(1) << 33}, {3}, 0, 2, 33, false},
-      {{3, 7}, {3}, 3, 2, 2, false},
-      {{0, 1, 7}, {131}, 0, 8, 0, false},
-      {{0, 2, 14}, {131}, 0, 8, 1, false},
-      {{0, 1, 8}, {3, 1}, 0, 9, 0, false},
-      {{0, 2, 16}, {3, 1}, 0, 9, 1, false},
+      {{}, {0}, 0, 1, 0, false, false},
+      {{0}, {1}, 0, 1, 0, true, true},
+      {{4}, {1}, 4, 1, 0, true, true},
+      {{37}, {1}, 37, 1, 0, true, true},
+      {{0, 1}, {3}, 0, 2, 0, false, true},
+      {{0, 4}, {3}, 0, 2, 2, false, true},
+      {{0, uint64_t(1) << 33}, {3}, 0, 2, 33, false, true},
+      {{3, 7}, {3}, 3, 2, 2, false, true},
+      {{0, 1, 7}, {131}, 0, 8, 0, false, false},
+      {{0, 2, 14}, {131}, 0, 8, 1, false, false},
+      {{0, 1, 8}, {3, 1}, 0, 9, 0, false, false},
+      {{0, 2, 16}, {3, 1}, 0, 9, 1, false, false},
+      {{0, 1, 2, 3, 4, 5, 6, 7}, {255}, 0, 8, 0, false, true},
+      {{0, 1, 2, 3, 4, 5, 6, 7, 8}, {255, 1}, 0, 9, 0, false, true},
   };
 
   for (auto &&T : BSBTests) {
@@ -47,6 +50,7 @@ TEST(LowerBitSets, BitSetBuilder) {
     EXPECT_EQ(T.BitSize, BSI.BitSize);
     EXPECT_EQ(T.AlignLog2, BSI.AlignLog2);
     EXPECT_EQ(T.IsSingleOffset, BSI.isSingleOffset());
+    EXPECT_EQ(T.IsAllOnes, BSI.isAllOnes());
 
     for (auto Offset : T.Offsets)
       EXPECT_TRUE(BSI.containsGlobalOffset(Offset));