2 * Copyright 2017 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
21 #include <type_traits>
23 #include <folly/Bits.h>
24 #include <folly/Likely.h>
25 #include <folly/Portability.h>
26 #include <folly/Range.h>
27 #include <folly/experimental/Bits.h>
28 #include <folly/experimental/Instructions.h>
29 #include <folly/experimental/Select64.h>
30 #include <glog/logging.h>
33 #error BitVectorCoding.h requires x86_64
36 namespace folly { namespace compression {
38 static_assert(kIsLittleEndian, "BitVectorCoding.h requires little endianness");
40 template <class Pointer>
41 struct BitVectorCompressedListBase {
42 BitVectorCompressedListBase() = default;
44 template <class OtherPointer>
45 BitVectorCompressedListBase(
46 const BitVectorCompressedListBase<OtherPointer>& other)
48 upperBound(other.upperBound),
50 bits(reinterpret_cast<Pointer>(other.bits)),
51 skipPointers(reinterpret_cast<Pointer>(other.skipPointers)),
52 forwardPointers(reinterpret_cast<Pointer>(other.forwardPointers)) {}
54 template <class T = Pointer>
55 auto free() -> decltype(::free(T(nullptr))) {
56 return ::free(data.data());
60 size_t upperBound = 0;
62 folly::Range<Pointer> data;
64 Pointer bits = nullptr;
65 Pointer skipPointers = nullptr;
66 Pointer forwardPointers = nullptr;
69 typedef BitVectorCompressedListBase<const uint8_t*> BitVectorCompressedList;
70 typedef BitVectorCompressedListBase<uint8_t*> MutableBitVectorCompressedList;
72 template <class Value,
74 size_t kSkipQuantum = 0,
75 size_t kForwardQuantum = 0>
76 struct BitVectorEncoder {
77 static_assert(std::is_integral<Value>::value &&
78 std::is_unsigned<Value>::value,
79 "Value should be unsigned integral");
81 typedef BitVectorCompressedList CompressedList;
82 typedef MutableBitVectorCompressedList MutableCompressedList;
84 typedef Value ValueType;
85 typedef SkipValue SkipValueType;
88 static constexpr size_t skipQuantum = kSkipQuantum;
89 static constexpr size_t forwardQuantum = kForwardQuantum;
91 template <class RandomAccessIterator>
92 static MutableCompressedList encode(RandomAccessIterator begin,
93 RandomAccessIterator end) {
95 return MutableCompressedList();
97 BitVectorEncoder encoder(size_t(end - begin), *(end - 1));
98 for (; begin != end; ++begin) {
101 return encoder.finish();
104 explicit BitVectorEncoder(const MutableCompressedList& result)
105 : bits_(result.bits),
106 skipPointers_(result.skipPointers),
107 forwardPointers_(result.forwardPointers),
109 memset(result.data.data(), 0, result.data.size());
112 BitVectorEncoder(size_t size, ValueType upperBound)
114 Layout::fromUpperBoundAndSize(upperBound, size).allocList()) {}
116 void add(ValueType value) {
117 CHECK_LT(value, std::numeric_limits<ValueType>::max());
118 // Also works when lastValue_ == -1.
119 CHECK_GT(value + 1, lastValue_ + 1)
120 << "BitVectorCoding only supports stricly monotone lists";
122 auto block = bits_ + (value / 64) * sizeof(uint64_t);
123 size_t inner = value % 64;
124 folly::Bits<folly::Unaligned<uint64_t>>::set(
125 reinterpret_cast<folly::Unaligned<uint64_t>*>(block), inner);
127 if (skipQuantum != 0) {
128 size_t nextSkipPointerSize = value / skipQuantum;
129 while (skipPointersSize_ < nextSkipPointerSize) {
130 auto pos = skipPointersSize_++;
131 folly::storeUnaligned<SkipValueType>(
132 skipPointers_ + pos * sizeof(SkipValueType), size_);
136 if (forwardQuantum != 0) {
137 if (size_ != 0 && (size_ % forwardQuantum == 0)) {
138 const auto pos = size_ / forwardQuantum - 1;
139 folly::storeUnaligned<SkipValueType>(
140 forwardPointers_ + pos * sizeof(SkipValueType), value);
148 const MutableCompressedList& finish() const {
149 CHECK_EQ(size_, result_.size);
150 // TODO(ott): Relax this assumption.
151 CHECK_EQ(result_.upperBound, lastValue_);
156 uint8_t* const bits_ = nullptr;
157 uint8_t* const skipPointers_ = nullptr;
158 uint8_t* const forwardPointers_ = nullptr;
160 ValueType lastValue_ = -1;
162 size_t skipPointersSize_ = 0;
164 MutableCompressedList result_;
167 template <class Value,
170 size_t kForwardQuantum>
171 struct BitVectorEncoder<Value, SkipValue, kSkipQuantum, kForwardQuantum>::
173 static Layout fromUpperBoundAndSize(size_t upperBound, size_t size) {
176 layout.upperBound = upperBound;
178 size_t bitVectorSizeInBytes = (upperBound / 8) + 1;
179 layout.bits = bitVectorSizeInBytes;
181 if (skipQuantum != 0) {
182 size_t numSkipPointers = upperBound / skipQuantum;
183 layout.skipPointers = numSkipPointers * sizeof(SkipValueType);
185 if (forwardQuantum != 0) {
186 size_t numForwardPointers = size / forwardQuantum;
187 layout.forwardPointers = numForwardPointers * sizeof(SkipValueType);
190 CHECK_LT(size, std::numeric_limits<SkipValueType>::max());
195 size_t bytes() const { return bits + skipPointers + forwardPointers; }
197 template <class Range>
198 BitVectorCompressedListBase<typename Range::iterator> openList(
200 BitVectorCompressedListBase<typename Range::iterator> result;
202 result.upperBound = upperBound;
203 result.data = buf.subpiece(0, bytes());
204 auto advance = [&](size_t n) {
205 auto begin = buf.data();
210 result.bits = advance(bits);
211 result.skipPointers = advance(skipPointers);
212 result.forwardPointers = advance(forwardPointers);
213 CHECK_EQ(buf.data() - result.data.data(), bytes());
218 MutableCompressedList allocList() const {
219 uint8_t* buf = nullptr;
221 buf = static_cast<uint8_t*>(malloc(bytes() + 7));
223 folly::MutableByteRange bufRange(buf, bytes());
224 return openList(bufRange);
228 size_t upperBound = 0;
232 size_t skipPointers = 0;
233 size_t forwardPointers = 0;
236 template <class Encoder,
237 class Instructions = instructions::Default,
238 bool kUnchecked = false>
239 class BitVectorReader {
241 typedef Encoder EncoderType;
242 typedef typename Encoder::ValueType ValueType;
243 // A bitvector can only be as large as its largest value.
244 typedef typename Encoder::ValueType SizeType;
245 typedef typename Encoder::SkipValueType SkipValueType;
247 explicit BitVectorReader(const typename Encoder::CompressedList& list)
250 skipPointers_(list.skipPointers),
251 forwardPointers_(list.forwardPointers) {
254 if (kUnchecked || UNLIKELY(list.size == 0)) {
259 upperBound_ = list.upperBound;
263 block_ = (bits_ != nullptr) ? folly::loadUnaligned<uint64_t>(bits_) : 0;
266 value_ = kInvalidValue;
270 if (!kUnchecked && UNLIKELY(position() + 1 >= size_)) {
274 while (block_ == 0) {
275 outer_ += sizeof(uint64_t);
276 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
280 auto inner = Instructions::ctz(block_);
281 block_ = Instructions::blsr(block_);
283 return setValue(inner);
286 bool skip(SizeType n) {
289 if (!kUnchecked && position() + n >= size_) {
292 // Small skip optimization.
293 if (LIKELY(n < kLinearScanThreshold)) {
294 for (size_t i = 0; i < n; ++i) {
302 // Use forward pointer.
303 if (Encoder::forwardQuantum > 0 && n > Encoder::forwardQuantum) {
304 const size_t steps = position_ / Encoder::forwardQuantum;
305 const size_t dest = folly::loadUnaligned<SkipValueType>(
306 forwardPointers_ + (steps - 1) * sizeof(SkipValueType));
309 n = position_ + 1 - steps * Encoder::forwardQuantum;
313 // Find necessary block.
314 while ((cnt = Instructions::popcount(block_)) < n) {
316 outer_ += sizeof(uint64_t);
317 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
320 // Skip to the n-th one in the block.
322 auto inner = select64<Instructions>(block_, n - 1);
323 block_ &= (uint64_t(-1) << inner) << 1;
325 return setValue(inner);
328 bool skipTo(ValueType v) {
329 // Also works when value_ == kInvalidValue.
330 if (v != kInvalidValue) { DCHECK_GE(v + 1, value_ + 1); }
332 if (!kUnchecked && v > upperBound_) {
334 } else if (v == value_) {
338 // Small skip optimization.
339 if (v - value_ < kLinearScanThreshold) {
342 } while (value() < v);
347 if (Encoder::skipQuantum > 0 && v - value_ > Encoder::skipQuantum) {
348 size_t q = v / Encoder::skipQuantum;
349 auto skipPointer = folly::loadUnaligned<SkipValueType>(
350 skipPointers_ + (q - 1) * sizeof(SkipValueType));
351 position_ = static_cast<SizeType>(skipPointer) - 1;
353 reposition(q * Encoder::skipQuantum);
357 size_t outer = v / 64 * 8;
359 while (outer_ < outer) {
360 position_ += Instructions::popcount(block_);
361 outer_ += sizeof(uint64_t);
362 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
365 DCHECK_EQ(outer_, outer);
366 uint64_t mask = ~((uint64_t(1) << (v % 64)) - 1);
367 position_ += Instructions::popcount(block_ & ~mask) + 1;
370 while (block_ == 0) {
371 outer_ += sizeof(uint64_t);
372 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
375 auto inner = Instructions::ctz(block_);
376 block_ = Instructions::blsr(block_);
382 SizeType size() const {
387 return position() < size(); // Also checks that position() != -1.
390 SizeType position() const {
393 ValueType value() const {
398 bool jump(SizeType n) {
403 bool jumpTo(ValueType v) {
409 value_ = kInvalidValue;
415 constexpr static ValueType kInvalidValue =
416 std::numeric_limits<ValueType>::max(); // Must hold kInvalidValue + 1 == 0.
418 bool setValue(size_t inner) {
419 value_ = static_cast<ValueType>(8 * outer_ + inner);
423 void reposition(size_t dest) {
424 outer_ = dest / 64 * 8;
425 // We maintain the invariant that outer_ is divisible by 8.
426 block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
427 block_ &= ~((uint64_t(1) << (dest % 64)) - 1);
430 constexpr static size_t kLinearScanThreshold = 4;
438 ValueType upperBound_;
439 const uint8_t* const bits_;
440 const uint8_t* const skipPointers_;
441 const uint8_t* const forwardPointers_;