2 * Copyright 2015 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * @author Philip Pronin (philipp@fb.com)
20 * Based on the paper by Sebastiano Vigna,
21 * "Quasi-succinct indices" (arxiv:1206.4300).
24 #ifndef FOLLY_EXPERIMENTAL_ELIAS_FANO_CODING_H
25 #define FOLLY_EXPERIMENTAL_ELIAS_FANO_CODING_H
28 #error EliasFanoCoding.h requires GCC
32 #error EliasFanoCoding.h requires x86_64
37 #include <type_traits>
38 #include <boost/noncopyable.hpp>
39 #include <glog/logging.h>
41 #include <folly/Bits.h>
42 #include <folly/CpuId.h>
43 #include <folly/Likely.h>
44 #include <folly/Range.h>
45 #include <folly/experimental/Select64.h>
47 #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
48 #error EliasFanoCoding.h requires little endianness
51 namespace folly { namespace compression {
53 struct EliasFanoCompressedList {
54 EliasFanoCompressedList() { }
57 ::free(const_cast<unsigned char*>(lower.data()));
58 ::free(const_cast<unsigned char*>(upper.data()));
59 ::free(const_cast<unsigned char*>(skipPointers.data()));
60 ::free(const_cast<unsigned char*>(forwardPointers.data()));
64 uint8_t numLowerBits = 0;
66 // WARNING: EliasFanoCompressedList has no ownership of
67 // lower, upper, skipPointers and forwardPointers.
68 // The 7 bytes following the last byte of lower and upper
69 // sequences should be readable.
70 folly::ByteRange lower;
71 folly::ByteRange upper;
73 folly::ByteRange skipPointers;
74 folly::ByteRange forwardPointers;
78 // In version 1 skip / forward pointers encoding has been changed,
79 // so SkipValue = uint32_t is able to address up to ~4B elements,
80 // instead of only ~2B.
81 template <class Value,
82 class SkipValue = size_t,
83 size_t kSkipQuantum = 0, // 0 = disabled
84 size_t kForwardQuantum = 0, // 0 = disabled
86 struct EliasFanoEncoder {
87 static_assert(std::is_integral<Value>::value &&
88 std::is_unsigned<Value>::value,
89 "Value should be unsigned integral");
91 typedef EliasFanoCompressedList CompressedList;
93 typedef Value ValueType;
94 typedef SkipValue SkipValueType;
96 static constexpr size_t skipQuantum = kSkipQuantum;
97 static constexpr size_t forwardQuantum = kForwardQuantum;
98 static constexpr size_t version = kVersion;
100 static uint8_t defaultNumLowerBits(size_t upperBound, size_t size) {
101 if (size == 0 || upperBound < size) {
104 // floor(log(upperBound / size));
105 return folly::findLastSet(upperBound / size) - 1;
108 // Requires: input range (begin, end) is sorted (encoding
109 // crashes if it's not).
110 // WARNING: encode() mallocates lower, upper, skipPointers
111 // and forwardPointers. As EliasFanoCompressedList has
112 // no ownership of them, you need to call free() explicitly.
113 template <class RandomAccessIterator>
114 static EliasFanoCompressedList encode(RandomAccessIterator begin,
115 RandomAccessIterator end) {
117 return EliasFanoCompressedList();
119 EliasFanoEncoder encoder(end - begin, *(end - 1));
120 for (; begin != end; ++begin) {
123 return encoder.finish();
126 EliasFanoEncoder(size_t size, ValueType upperBound) {
131 uint8_t numLowerBits = defaultNumLowerBits(upperBound, size);
133 // This is detail::writeBits56 limitation.
134 numLowerBits = std::min<uint8_t>(numLowerBits, 56);
135 CHECK_LT(numLowerBits, 8 * sizeof(Value)); // As we shift by numLowerBits.
137 // WARNING: Current read/write logic assumes that the 7 bytes
138 // following the last byte of lower and upper sequences are
139 // readable (stored value doesn't matter and won't be changed),
140 // so we allocate additional 7B, but do not include them in size
141 // of returned value.
144 const size_t lowerSize = (numLowerBits * size + 7) / 8;
145 if (lowerSize > 0) { // numLowerBits != 0
146 lower_ = static_cast<unsigned char*>(calloc(lowerSize + 7, 1));
150 // Upper bits are stored using unary delta encoding.
151 // For example, (3 5 5 9) will be encoded as 1000011001000_2.
152 const size_t upperSizeBits =
153 (upperBound >> numLowerBits) + // Number of 0-bits to be stored.
155 const size_t upperSize = (upperSizeBits + 7) / 8;
156 upper_ = static_cast<unsigned char*>(calloc(upperSize + 7, 1));
158 // *** Skip pointers.
159 // Store (1-indexed) position of every skipQuantum-th
160 // 0-bit in upper bits sequence.
161 size_t numSkipPointers = 0;
162 /* static */ if (skipQuantum != 0) {
163 /* static */ if (kVersion > 0) {
164 CHECK_LT(size, std::numeric_limits<SkipValueType>::max());
166 CHECK_LT(upperSizeBits, std::numeric_limits<SkipValueType>::max());
168 // 8 * upperSize is used here instead of upperSizeBits, as that is
169 // more serialization-friendly way (upperSizeBits isn't known outside of
170 // this function, unlike upperSize; thus numSkipPointers could easily be
171 // deduced from upperSize).
172 numSkipPointers = (8 * upperSize - size) / (skipQuantum ?: 1);
173 skipPointers_ = static_cast<SkipValueType*>(
176 : calloc(numSkipPointers, sizeof(SkipValueType)));
179 // *** Forward pointers.
180 // Store (1-indexed) position of every forwardQuantum-th
181 // 1-bit in upper bits sequence.
182 size_t numForwardPointers = 0;
183 /* static */ if (forwardQuantum != 0) {
184 /* static */ if (kVersion > 0) {
185 CHECK_LT(upperBound >> numLowerBits,
186 std::numeric_limits<SkipValueType>::max());
188 CHECK_LT(upperSizeBits, std::numeric_limits<SkipValueType>::max());
191 // '?: 1' is a workaround for false 'division by zero' compile-time error.
192 numForwardPointers = size / (forwardQuantum ?: 1);
193 forwardPointers_ = static_cast<SkipValueType*>(
194 numForwardPointers == 0
196 : malloc(numForwardPointers * sizeof(SkipValueType)));
201 result_.numLowerBits = numLowerBits;
202 result_.lower.reset(lower_, lowerSize);
203 result_.upper.reset(upper_, upperSize);
204 result_.skipPointers.reset(
205 reinterpret_cast<unsigned char*>(skipPointers_),
206 numSkipPointers * sizeof(SkipValueType));
207 result_.forwardPointers.reset(
208 reinterpret_cast<unsigned char*>(forwardPointers_),
209 numForwardPointers * sizeof(SkipValueType));
212 void add(ValueType value) {
213 CHECK_GE(value, lastValue_);
215 const auto numLowerBits = result_.numLowerBits;
216 const ValueType upperBits = value >> numLowerBits;
218 // Upper sequence consists of upperBits 0-bits and (size_ + 1) 1-bits.
219 const size_t pos = upperBits + size_;
220 upper_[pos / 8] |= 1U << (pos % 8);
221 // Append numLowerBits bits to lower sequence.
222 if (numLowerBits != 0) {
223 const ValueType lowerBits = value & ((ValueType(1) << numLowerBits) - 1);
224 writeBits56(lower_, size_ * numLowerBits, numLowerBits, lowerBits);
227 /* static */ if (skipQuantum != 0) {
228 while ((skipPointersSize_ + 1) * skipQuantum <= upperBits) {
229 /* static */ if (kVersion > 0) {
230 // Since version 1, just the number of preceding 1-bits is stored.
231 skipPointers_[skipPointersSize_] = size_;
233 skipPointers_[skipPointersSize_] =
234 size_ + (skipPointersSize_ + 1) * skipQuantum;
240 /* static */ if (forwardQuantum != 0) {
241 if ((size_ + 1) % forwardQuantum == 0) {
242 const auto pos = size_ / forwardQuantum;
243 /* static */ if (kVersion > 0) {
244 // Since version 1, just the number of preceding 0-bits is stored.
245 forwardPointers_[pos] = upperBits;
247 forwardPointers_[pos] = upperBits + size_ + 1;
256 const EliasFanoCompressedList& finish() const {
257 CHECK_EQ(size_, result_.size);
262 // Writes value (with len up to 56 bits) to data starting at pos-th bit.
263 static void writeBits56(unsigned char* data, size_t pos,
264 uint8_t len, uint64_t value) {
265 DCHECK_LE(uint32_t(len), 56);
266 DCHECK_EQ(0, value & ~((uint64_t(1) << len) - 1));
267 unsigned char* const ptr = data + (pos / 8);
268 uint64_t ptrv = folly::loadUnaligned<uint64_t>(ptr);
269 ptrv |= value << (pos % 8);
270 folly::storeUnaligned<uint64_t>(ptr, ptrv);
273 unsigned char* lower_ = nullptr;
274 unsigned char* upper_ = nullptr;
275 SkipValueType* skipPointers_ = nullptr;
276 SkipValueType* forwardPointers_ = nullptr;
278 ValueType lastValue_ = 0;
280 size_t skipPointersSize_ = 0;
282 EliasFanoCompressedList result_;
285 // NOTE: It's recommended to compile EF coding with -msse4.2, starting
286 // with Nehalem, Intel CPUs support POPCNT instruction and gcc will emit
287 // it for __builtin_popcountll intrinsic.
288 // But we provide an alternative way for the client code: it can switch to
289 // the appropriate version of EliasFanoReader<> in realtime (client should
290 // implement this switching logic itself) by specifying instruction set to
292 namespace instructions {
295 static bool supported(const folly::CpuId& cpuId = {}) {
298 static inline uint64_t popcount(uint64_t value) {
299 return __builtin_popcountll(value);
301 static inline int ctz(uint64_t value) {
303 return __builtin_ctzll(value);
305 static inline uint64_t blsr(uint64_t value) {
306 return value & (value - 1);
310 struct Nehalem : public Default {
311 static bool supported(const folly::CpuId& cpuId = {}) {
312 return cpuId.popcnt();
314 static inline uint64_t popcount(uint64_t value) {
315 // POPCNT is supported starting with Intel Nehalem, AMD K10.
317 asm ("popcntq %1, %0" : "=r" (result) : "r" (value));
322 struct Haswell : public Nehalem {
323 static bool supported(const folly::CpuId& cpuId = {}) {
324 return Nehalem::supported(cpuId) && cpuId.bmi1();
326 static inline uint64_t blsr(uint64_t value) {
327 // BMI1 is supported starting with Intel Haswell, AMD Piledriver.
328 // BLSR combines two instuctions into one and reduces register pressure.
330 asm ("blsrq %1, %0" : "=r" (result) : "r" (value));
335 } // namespace instructions
339 template <class Encoder, class Instructions>
340 class UpperBitsReader {
341 typedef typename Encoder::SkipValueType SkipValueType;
343 typedef typename Encoder::ValueType ValueType;
345 explicit UpperBitsReader(const EliasFanoCompressedList& list)
346 : forwardPointers_(list.forwardPointers.data()),
347 skipPointers_(list.skipPointers.data()),
348 start_(list.upper.data()) {
353 block_ = start_ != nullptr ? folly::loadUnaligned<block_t>(start_) : 0;
360 size_t position() const { return position_; }
361 ValueType value() const { return value_; }
364 // Skip to the first non-zero block.
365 while (block_ == 0) {
366 outer_ += sizeof(block_t);
367 block_ = folly::loadUnaligned<block_t>(start_ + outer_);
371 inner_ = Instructions::ctz(block_);
372 block_ = Instructions::blsr(block_);
377 ValueType skip(size_t n) {
380 position_ += n; // n 1-bits will be read.
382 // Use forward pointer.
383 if (Encoder::forwardQuantum > 0 && n > Encoder::forwardQuantum) {
384 // Workaround to avoid 'division by zero' compile-time error.
385 constexpr size_t q = Encoder::forwardQuantum ?: 1;
387 const size_t steps = position_ / q;
389 folly::loadUnaligned<SkipValueType>(
390 forwardPointers_ + (steps - 1) * sizeof(SkipValueType));
392 /* static */ if (Encoder::version > 0) {
393 reposition(dest + steps * q);
397 n = position_ + 1 - steps * q; // n is > 0.
398 // correct inner_ will be set at the end.
402 // Find necessary block.
403 while ((cnt = Instructions::popcount(block_)) < n) {
405 outer_ += sizeof(block_t);
406 block_ = folly::loadUnaligned<block_t>(start_ + outer_);
409 // Skip to the n-th one in the block.
411 inner_ = select64<Instructions>(block_, n - 1);
412 block_ &= (block_t(-1) << inner_) << 1;
417 // Skip to the first element that is >= v and located *after* the current
418 // one (so even if current value equals v, position will be increased by 1).
419 ValueType skipToNext(ValueType v) {
420 DCHECK_GE(v, value_);
423 if (Encoder::skipQuantum > 0 && v >= value_ + Encoder::skipQuantum) {
424 // Workaround to avoid 'division by zero' compile-time error.
425 constexpr size_t q = Encoder::skipQuantum ?: 1;
427 const size_t steps = v / q;
429 folly::loadUnaligned<SkipValueType>(
430 skipPointers_ + (steps - 1) * sizeof(SkipValueType));
432 /* static */ if (Encoder::version > 0) {
433 reposition(dest + q * steps);
434 position_ = dest - 1;
437 position_ = dest - q * steps - 1;
439 // Correct inner_ and value_ will be set during the next()
442 // NOTE: Corresponding block of lower bits sequence may be
443 // prefetched here (via __builtin_prefetch), but experiments
444 // didn't show any significant improvements.
449 size_t skip = v - (8 * outer_ - position_ - 1);
451 constexpr size_t kBitsPerBlock = 8 * sizeof(block_t);
452 while ((cnt = Instructions::popcount(~block_)) < skip) {
454 position_ += kBitsPerBlock - cnt;
455 outer_ += sizeof(block_t);
456 block_ = folly::loadUnaligned<block_t>(start_ + outer_);
460 auto inner = select64<Instructions>(~block_, skip - 1);
461 position_ += inner - skip + 1;
462 block_ &= block_t(-1) << inner;
469 ValueType jump(size_t n) {
470 if (Encoder::forwardQuantum == 0 || n <= Encoder::forwardQuantum) {
473 position_ = -1; // Avoid reading the head, skip() will reposition.
478 ValueType jumpToNext(ValueType v) {
479 if (Encoder::skipQuantum == 0 || v < Encoder::skipQuantum) {
482 value_ = 0; // Avoid reading the head, skipToNext() will reposition.
484 return skipToNext(v);
488 ValueType setValue() {
489 value_ = static_cast<ValueType>(8 * outer_ + inner_ - position_);
493 void reposition(size_t dest) {
495 block_ = folly::loadUnaligned<block_t>(start_ + outer_);
496 block_ &= ~((block_t(1) << (dest % 8)) - 1);
499 typedef unsigned long long block_t;
500 const unsigned char* const forwardPointers_;
501 const unsigned char* const skipPointers_;
502 const unsigned char* const start_;
504 size_t outer_; // Outer offset: number of consumed bytes in upper.
505 size_t inner_; // Inner offset: (bit) position in current block.
506 size_t position_; // Index of current value (= #reads - 1).
510 } // namespace detail
512 template <class Encoder,
513 class Instructions = instructions::Default>
514 class EliasFanoReader : private boost::noncopyable {
516 typedef Encoder EncoderType;
517 typedef typename Encoder::ValueType ValueType;
519 explicit EliasFanoReader(const EliasFanoCompressedList& list)
521 lowerMask_((ValueType(1) << list_.numLowerBits) - 1),
523 DCHECK(Instructions::supported());
524 // To avoid extra branching during skipTo() while reading
525 // upper sequence we need to know the last element.
526 if (UNLIKELY(list_.size == 0)) {
530 ValueType lastUpperValue = 8 * list_.upper.size() - list_.size;
531 auto it = list_.upper.end() - 1;
533 lastUpperValue -= 8 - folly::findLastSet(*it);
534 lastValue_ = readLowerPart(list_.size - 1) |
535 (lastUpperValue << list_.numLowerBits);
545 if (UNLIKELY(progress_ >= list_.size)) {
548 value_ = readLowerPart(progress_) |
549 (upper_.next() << list_.numLowerBits);
554 bool skip(size_t n) {
558 if (LIKELY(progress_ <= list_.size)) {
559 if (LIKELY(n < kLinearScanThreshold)) {
560 for (size_t i = 0; i < n; ++i) upper_.next();
564 value_ = readLowerPart(progress_ - 1) |
565 (upper_.value() << list_.numLowerBits);
572 bool skipTo(ValueType value) {
573 DCHECK_GE(value, value_);
574 if (value <= value_) {
576 } else if (value > lastValue_) {
580 size_t upperValue = (value >> list_.numLowerBits);
581 size_t upperSkip = upperValue - upper_.value();
582 // The average density of ones in upper bits is 1/2.
583 // LIKELY here seems to make things worse, even for small skips.
584 if (upperSkip < 2 * kLinearScanThreshold) {
587 } while (UNLIKELY(upper_.value() < upperValue));
589 upper_.skipToNext(upperValue);
596 bool jump(size_t n) {
597 if (LIKELY(n - 1 < list_.size)) { // n > 0 && n <= list_.size
599 value_ = readLowerPart(n - 1) | (upper_.jump(n) << list_.numLowerBits);
608 bool jumpTo(ValueType value) {
612 } else if (value > lastValue_) {
616 upper_.jumpToNext(value >> list_.numLowerBits);
621 size_t size() const { return list_.size; }
623 size_t position() const { return progress_ - 1; }
624 ValueType value() const { return value_; }
628 value_ = std::numeric_limits<ValueType>::max();
629 progress_ = list_.size + 1;
633 ValueType readLowerPart(size_t i) const {
634 DCHECK_LT(i, list_.size);
635 const size_t pos = i * list_.numLowerBits;
636 const unsigned char* ptr = list_.lower.data() + (pos / 8);
637 const uint64_t ptrv = folly::loadUnaligned<uint64_t>(ptr);
638 return lowerMask_ & (ptrv >> (pos % 8));
641 void iterateTo(ValueType value) {
643 value_ = readLowerPart(upper_.position()) |
644 (upper_.value() << list_.numLowerBits);
645 if (LIKELY(value_ >= value)) break;
648 progress_ = upper_.position() + 1;
651 constexpr static size_t kLinearScanThreshold = 8;
653 const EliasFanoCompressedList list_;
654 const ValueType lowerMask_;
655 detail::UpperBitsReader<Encoder, Instructions> upper_;
656 size_t progress_ = 0;
657 ValueType value_ = 0;
658 ValueType lastValue_;
663 #endif // FOLLY_EXPERIMENTAL_ELIAS_FANO_CODING_H