2 * Copyright 2017 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
21 #include <type_traits>
23 #include <folly/Bits.h>
24 #include <folly/Likely.h>
25 #include <folly/Portability.h>
26 #include <folly/Range.h>
27 #include <folly/experimental/Bits.h>
28 #include <folly/experimental/CodingDetail.h>
29 #include <folly/experimental/Instructions.h>
30 #include <folly/experimental/Select64.h>
31 #include <glog/logging.h>
34 #error BitVectorCoding.h requires x86_64
37 namespace folly { namespace compression {
39 static_assert(kIsLittleEndian, "BitVectorCoding.h requires little endianness");
41 template <class Pointer>
42 struct BitVectorCompressedListBase {
43 BitVectorCompressedListBase() = default;
45 template <class OtherPointer>
46 BitVectorCompressedListBase(
47 const BitVectorCompressedListBase<OtherPointer>& other)
49 upperBound(other.upperBound),
51 bits(reinterpret_cast<Pointer>(other.bits)),
52 skipPointers(reinterpret_cast<Pointer>(other.skipPointers)),
53 forwardPointers(reinterpret_cast<Pointer>(other.forwardPointers)) {}
55 template <class T = Pointer>
56 auto free() -> decltype(::free(T(nullptr))) {
57 return ::free(data.data());
61 size_t upperBound = 0;
63 folly::Range<Pointer> data;
65 Pointer bits = nullptr;
66 Pointer skipPointers = nullptr;
67 Pointer forwardPointers = nullptr;
70 typedef BitVectorCompressedListBase<const uint8_t*> BitVectorCompressedList;
71 typedef BitVectorCompressedListBase<uint8_t*> MutableBitVectorCompressedList;
76 size_t kSkipQuantum = 0,
77 size_t kForwardQuantum = 0>
78 struct BitVectorEncoder {
79 static_assert(std::is_integral<Value>::value &&
80 std::is_unsigned<Value>::value,
81 "Value should be unsigned integral");
83 typedef BitVectorCompressedList CompressedList;
84 typedef MutableBitVectorCompressedList MutableCompressedList;
86 typedef Value ValueType;
87 typedef SkipValue SkipValueType;
90 static constexpr size_t skipQuantum = kSkipQuantum;
91 static constexpr size_t forwardQuantum = kForwardQuantum;
93 template <class RandomAccessIterator>
94 static MutableCompressedList encode(RandomAccessIterator begin,
95 RandomAccessIterator end) {
97 return MutableCompressedList();
99 BitVectorEncoder encoder(size_t(end - begin), *(end - 1));
100 for (; begin != end; ++begin) {
103 return encoder.finish();
106 explicit BitVectorEncoder(const MutableCompressedList& result)
107 : bits_(result.bits),
108 skipPointers_(result.skipPointers),
109 forwardPointers_(result.forwardPointers),
111 memset(result.data.data(), 0, result.data.size());
114 BitVectorEncoder(size_t size, ValueType upperBound)
116 Layout::fromUpperBoundAndSize(upperBound, size).allocList()) {}
118 void add(ValueType value) {
119 CHECK_LT(value, std::numeric_limits<ValueType>::max());
120 // Also works when lastValue_ == -1.
121 CHECK_GT(value + 1, lastValue_ + 1)
122 << "BitVectorCoding only supports stricly monotone lists";
124 auto block = bits_ + (value / 64) * sizeof(uint64_t);
125 size_t inner = value % 64;
126 folly::Bits<folly::Unaligned<uint64_t>>::set(
127 reinterpret_cast<folly::Unaligned<uint64_t>*>(block), inner);
129 if (skipQuantum != 0) {
130 size_t nextSkipPointerSize = value / skipQuantum;
131 while (skipPointersSize_ < nextSkipPointerSize) {
132 auto pos = skipPointersSize_++;
133 folly::storeUnaligned<SkipValueType>(
134 skipPointers_ + pos * sizeof(SkipValueType), size_);
138 if (forwardQuantum != 0) {
139 if (size_ != 0 && (size_ % forwardQuantum == 0)) {
140 const auto pos = size_ / forwardQuantum - 1;
141 folly::storeUnaligned<SkipValueType>(
142 forwardPointers_ + pos * sizeof(SkipValueType), value);
150 const MutableCompressedList& finish() const {
151 CHECK_EQ(size_, result_.size);
152 // TODO(ott): Relax this assumption.
153 CHECK_EQ(result_.upperBound, lastValue_);
158 uint8_t* const bits_ = nullptr;
159 uint8_t* const skipPointers_ = nullptr;
160 uint8_t* const forwardPointers_ = nullptr;
162 ValueType lastValue_ = -1;
164 size_t skipPointersSize_ = 0;
166 MutableCompressedList result_;
173 size_t kForwardQuantum>
174 struct BitVectorEncoder<Value, SkipValue, kSkipQuantum, kForwardQuantum>::
176 static Layout fromUpperBoundAndSize(size_t upperBound, size_t size) {
179 layout.upperBound = upperBound;
181 size_t bitVectorSizeInBytes = (upperBound / 8) + 1;
182 layout.bits = bitVectorSizeInBytes;
184 if (skipQuantum != 0) {
185 size_t numSkipPointers = upperBound / skipQuantum;
186 layout.skipPointers = numSkipPointers * sizeof(SkipValueType);
188 if (forwardQuantum != 0) {
189 size_t numForwardPointers = size / forwardQuantum;
190 layout.forwardPointers = numForwardPointers * sizeof(SkipValueType);
193 CHECK_LT(size, std::numeric_limits<SkipValueType>::max());
198 size_t bytes() const { return bits + skipPointers + forwardPointers; }
200 template <class Range>
201 BitVectorCompressedListBase<typename Range::iterator> openList(
203 BitVectorCompressedListBase<typename Range::iterator> result;
205 result.upperBound = upperBound;
206 result.data = buf.subpiece(0, bytes());
207 auto advance = [&](size_t n) {
208 auto begin = buf.data();
213 result.bits = advance(bits);
214 result.skipPointers = advance(skipPointers);
215 result.forwardPointers = advance(forwardPointers);
216 CHECK_EQ(buf.data() - result.data.data(), bytes());
221 MutableCompressedList allocList() const {
222 uint8_t* buf = nullptr;
224 buf = static_cast<uint8_t*>(malloc(bytes() + 7));
226 folly::MutableByteRange bufRange(buf, bytes());
227 return openList(bufRange);
231 size_t upperBound = 0;
235 size_t skipPointers = 0;
236 size_t forwardPointers = 0;
241 class Instructions = instructions::Default,
242 bool kUnchecked = false>
243 class BitVectorReader : detail::ForwardPointers<Encoder::forwardQuantum>,
244 detail::SkipPointers<Encoder::skipQuantum> {
246 typedef Encoder EncoderType;
247 typedef typename Encoder::ValueType ValueType;
248 // A bitvector can only be as large as its largest value.
249 typedef typename Encoder::ValueType SizeType;
250 typedef typename Encoder::SkipValueType SkipValueType;
252 explicit BitVectorReader(const typename Encoder::CompressedList& list)
253 : detail::ForwardPointers<Encoder::forwardQuantum>(list.forwardPointers),
254 detail::SkipPointers<Encoder::skipQuantum>(list.skipPointers),
259 if (kUnchecked || UNLIKELY(list.size == 0)) {
264 upperBound_ = list.upperBound;
268 block_ = (bits_ != nullptr) ? folly::loadUnaligned<uint64_t>(bits_) : 0;
271 value_ = kInvalidValue;
275 if (!kUnchecked && UNLIKELY(position() + 1 >= size_)) {
279 while (block_ == 0) {
280 outer_ += sizeof(uint64_t);
281 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
285 auto inner = Instructions::ctz(block_);
286 block_ = Instructions::blsr(block_);
288 return setValue(inner);
291 bool skip(SizeType n) {
294 if (!kUnchecked && position() + n >= size_) {
297 // Small skip optimization.
298 if (LIKELY(n < kLinearScanThreshold)) {
299 for (size_t i = 0; i < n; ++i) {
307 // Use forward pointer.
308 if (Encoder::forwardQuantum > 0 && n > Encoder::forwardQuantum) {
309 const size_t steps = position_ / Encoder::forwardQuantum;
310 const size_t dest = folly::loadUnaligned<SkipValueType>(
311 this->forwardPointers_ + (steps - 1) * sizeof(SkipValueType));
314 n = position_ + 1 - steps * Encoder::forwardQuantum;
318 // Find necessary block.
319 while ((cnt = Instructions::popcount(block_)) < n) {
321 outer_ += sizeof(uint64_t);
322 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
325 // Skip to the n-th one in the block.
327 auto inner = select64<Instructions>(block_, n - 1);
328 block_ &= (uint64_t(-1) << inner) << 1;
330 return setValue(inner);
333 bool skipTo(ValueType v) {
334 // Also works when value_ == kInvalidValue.
335 if (v != kInvalidValue) { DCHECK_GE(v + 1, value_ + 1); }
337 if (!kUnchecked && v > upperBound_) {
339 } else if (v == value_) {
343 // Small skip optimization.
344 if (v - value_ < kLinearScanThreshold) {
347 } while (value() < v);
352 if (Encoder::skipQuantum > 0 && v - value_ > Encoder::skipQuantum) {
353 size_t q = v / Encoder::skipQuantum;
354 auto skipPointer = folly::loadUnaligned<SkipValueType>(
355 this->skipPointers_ + (q - 1) * sizeof(SkipValueType));
356 position_ = static_cast<SizeType>(skipPointer) - 1;
358 reposition(q * Encoder::skipQuantum);
362 size_t outer = v / 64 * 8;
364 while (outer_ < outer) {
365 position_ += Instructions::popcount(block_);
366 outer_ += sizeof(uint64_t);
367 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
370 DCHECK_EQ(outer_, outer);
371 uint64_t mask = ~((uint64_t(1) << (v % 64)) - 1);
372 position_ += Instructions::popcount(block_ & ~mask) + 1;
375 while (block_ == 0) {
376 outer_ += sizeof(uint64_t);
377 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
380 auto inner = Instructions::ctz(block_);
381 block_ = Instructions::blsr(block_);
387 SizeType size() const {
392 return position() < size(); // Also checks that position() != -1.
395 SizeType position() const {
398 ValueType value() const {
403 bool jump(SizeType n) {
408 bool jumpTo(ValueType v) {
414 value_ = kInvalidValue;
420 constexpr static ValueType kInvalidValue =
421 std::numeric_limits<ValueType>::max(); // Must hold kInvalidValue + 1 == 0.
423 bool setValue(size_t inner) {
424 value_ = static_cast<ValueType>(8 * outer_ + inner);
428 void reposition(size_t dest) {
429 outer_ = dest / 64 * 8;
430 // We maintain the invariant that outer_ is divisible by 8.
431 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
432 block_ &= ~((uint64_t(1) << (dest % 64)) - 1);
435 constexpr static size_t kLinearScanThreshold = 4;
437 const uint8_t* const bits_;
444 ValueType upperBound_;