From 121ecb47e326245d256a2a4ac32e4670429d34ba Mon Sep 17 00:00:00 2001 From: Giuseppe Ottaviano Date: Wed, 3 May 2017 14:58:42 -0700 Subject: [PATCH] Template EliasFanoReader on size type Summary: Shrink the reader type if less than 64 bits are sufficient for sizes. Do the same for `BitVectorCoding`, where we don't need an extra template parameter because the size is limited by the value domain. Reviewed By: philippv, luciang Differential Revision: D4976756 fbshipit-source-id: 685047da81a556d049fb924c612f99cea3056a82 --- folly/experimental/BitVectorCoding.h | 25 ++++--- folly/experimental/EliasFanoCoding.h | 73 ++++++++++++------- .../experimental/test/EliasFanoCodingTest.cpp | 17 +++-- 3 files changed, 72 insertions(+), 43 deletions(-) diff --git a/folly/experimental/BitVectorCoding.h b/folly/experimental/BitVectorCoding.h index a2985d9d..d3592937 100644 --- a/folly/experimental/BitVectorCoding.h +++ b/folly/experimental/BitVectorCoding.h @@ -240,6 +240,8 @@ class BitVectorReader { public: typedef Encoder EncoderType; typedef typename Encoder::ValueType ValueType; + // A bitvector can only be as large as its largest value. + typedef typename Encoder::ValueType SizeType; typedef typename Encoder::SkipValueType SkipValueType; explicit BitVectorReader(const typename Encoder::CompressedList& list) @@ -281,7 +283,7 @@ class BitVectorReader { return setValue(inner); } - bool skip(size_t n) { + bool skip(SizeType n) { CHECK_GT(n, 0); if (!kUnchecked && position() + n >= size_) { @@ -344,8 +346,9 @@ class BitVectorReader { if (Encoder::skipQuantum > 0 && v - value_ > Encoder::skipQuantum) { size_t q = v / Encoder::skipQuantum; - position_ = size_t(folly::loadUnaligned( - skipPointers_ + (q - 1) * sizeof(SkipValueType))) - 1; + auto skipPointer = folly::loadUnaligned( + skipPointers_ + (q - 1) * sizeof(SkipValueType)); + position_ = static_cast(skipPointer) - 1; reposition(q * Encoder::skipQuantum); } @@ -376,19 +379,23 @@ class BitVectorReader { return true; } - size_t size() const { return size_; } + SizeType size() const { + return size_; + } bool valid() const { return position() < size(); // Also checks that position() != -1. } - size_t position() const { return position_; } + SizeType position() const { + return position_; + } ValueType value() const { DCHECK(valid()); return value_; } - bool jump(size_t n) { + bool jump(SizeType n) { reset(); return skip(n + 1); } @@ -422,12 +429,12 @@ class BitVectorReader { constexpr static size_t kLinearScanThreshold = 4; - size_t outer_; - size_t position_; uint64_t block_; + SizeType outer_; + SizeType position_; ValueType value_; - size_t size_; + SizeType size_; ValueType upperBound_; const uint8_t* const bits_; const uint8_t* const skipPointers_; diff --git a/folly/experimental/EliasFanoCoding.h b/folly/experimental/EliasFanoCoding.h index 66679ca1..9276f960 100644 --- a/folly/experimental/EliasFanoCoding.h +++ b/folly/experimental/EliasFanoCoding.h @@ -331,7 +331,7 @@ struct EliasFanoEncoderV2 +template class UpperBitsReader { typedef typename Encoder::SkipValueType SkipValueType; public: @@ -346,13 +346,17 @@ class UpperBitsReader { void reset() { block_ = start_ != nullptr ? folly::loadUnaligned(start_) : 0; + position_ = std::numeric_limits::max(); outer_ = 0; - position_ = std::numeric_limits::max(); value_ = 0; } - size_t position() const { return position_; } - ValueType value() const { return value_; } + SizeType position() const { + return position_; + } + ValueType value() const { + return value_; + } ValueType next() { // Skip to the first non-zero block. @@ -368,7 +372,7 @@ class UpperBitsReader { return setValue(inner); } - ValueType skip(size_t n) { + ValueType skip(SizeType n) { DCHECK_GT(n, 0); position_ += n; // n 1-bits will be read. @@ -448,7 +452,8 @@ class UpperBitsReader { if (Encoder::forwardQuantum == 0 || n <= Encoder::forwardQuantum) { reset(); } else { - position_ = size_t(-1); // Avoid reading the head, skip() will reposition. + // Avoid reading the head, skip() will reposition. + position_ = std::numeric_limits::max(); } return skip(n); } @@ -463,17 +468,17 @@ class UpperBitsReader { } ValueType previousValue() const { - DCHECK_NE(position(), -1); + DCHECK_NE(position(), std::numeric_limits::max()); DCHECK_GT(position(), 0); - size_t outer = outer_; + auto outer = outer_; auto inner = size_t(value_) - 8 * outer_ + position_; block_t block = folly::loadUnaligned(start_ + outer); block &= (block_t(1) << inner) - 1; while (UNLIKELY(block == 0)) { DCHECK_GT(outer, 0); - outer -= std::min(sizeof(block_t), outer); + outer -= std::min(sizeof(block_t), outer); block = folly::loadUnaligned(start_ + outer); } @@ -481,7 +486,7 @@ class UpperBitsReader { return static_cast(8 * outer + inner - (position_ - 1)); } - void setDone(size_t endPos) { + void setDone(SizeType endPos) { position_ = endPos; } @@ -491,19 +496,23 @@ class UpperBitsReader { return value_; } - void reposition(size_t dest) { + void reposition(SizeType dest) { outer_ = dest / 8; block_ = folly::loadUnaligned(start_ + outer_); block_ &= ~((block_t(1) << (dest % 8)) - 1); } - typedef uint64_t block_t; + using block_t = uint64_t; + // The size in bytes of the upper bits is limited by n + universe / 8, + // so a type that can hold either sizes or values is sufficient. + using OuterType = typename std::common_type::type; + const unsigned char* const forwardPointers_; const unsigned char* const skipPointers_; const unsigned char* const start_; block_t block_; - size_t outer_; // Outer offset: number of consumed bytes in upper. - size_t position_; // Index of current value (= #reads - 1). + SizeType position_; // Index of current value (= #reads - 1). + OuterType outer_; // Outer offset: number of consumed bytes in upper. ValueType value_; }; @@ -512,18 +521,20 @@ class UpperBitsReader { // If kUnchecked = true the caller must guarantee that all the // operations return valid elements, i.e., they would never return // false if checked. -template +template < + class Encoder, + class Instructions = instructions::Default, + bool kUnchecked = false, + class SizeType = size_t> class EliasFanoReader { public: typedef Encoder EncoderType; typedef typename Encoder::ValueType ValueType; explicit EliasFanoReader(const typename Encoder::CompressedList& list) - : size_(list.size), + : upper_(list), lower_(list.lower), - upper_(list), + size_(list.size), numLowerBits_(list.numLowerBits) { DCHECK(Instructions::supported()); // To avoid extra branching during skipTo() while reading @@ -556,12 +567,13 @@ class EliasFanoReader { return true; } - bool skip(size_t n) { + bool skip(SizeType n) { CHECK_GT(n, 0); if (kUnchecked || LIKELY(position() + n < size_)) { if (LIKELY(n < kLinearScanThreshold)) { - for (size_t i = 0; i < n; ++i) upper_.next(); + for (SizeType i = 0; i < n; ++i) + upper_.next(); } else { upper_.skip(n); } @@ -599,7 +611,7 @@ class EliasFanoReader { return true; } - bool jump(size_t n) { + bool jump(SizeType n) { if (LIKELY(n < size_)) { // Also checks that n != -1. value_ = readLowerPart(n) | (upper_.jump(n + 1) << numLowerBits_); return true; @@ -624,21 +636,26 @@ class EliasFanoReader { (upper_.previousValue() << numLowerBits_); } - size_t size() const { return size_; } + SizeType size() const { + return size_; + } bool valid() const { return position() < size(); // Also checks that position() != -1. } - size_t position() const { return upper_.position(); } + SizeType position() const { + return upper_.position(); + } ValueType value() const { DCHECK(valid()); return value_; } private: + // Must hold kInvalidValue + 1 == 0. constexpr static ValueType kInvalidValue = - std::numeric_limits::max(); // Must hold kInvalidValue + 1 == 0. + std::numeric_limits::max(); bool setDone() { value_ = kInvalidValue; @@ -646,7 +663,7 @@ class EliasFanoReader { return false; } - ValueType readLowerPart(size_t i) const { + ValueType readLowerPart(SizeType i) const { DCHECK_LT(i, size_); const size_t pos = i * numLowerBits_; const unsigned char* ptr = lower_ + (pos / 8); @@ -668,9 +685,9 @@ class EliasFanoReader { constexpr static size_t kLinearScanThreshold = 8; - size_t size_; + detail::UpperBitsReader upper_; const uint8_t* lower_; - detail::UpperBitsReader upper_; + SizeType size_; ValueType value_ = kInvalidValue; ValueType lastValue_; uint8_t numLowerBits_; diff --git a/folly/experimental/test/EliasFanoCodingTest.cpp b/folly/experimental/test/EliasFanoCodingTest.cpp index 12df2224..55af37d8 100644 --- a/folly/experimental/test/EliasFanoCodingTest.cpp +++ b/folly/experimental/test/EliasFanoCodingTest.cpp @@ -94,11 +94,12 @@ class EliasFanoCodingTest : public ::testing::Test { testEmpty(); } - template + template void doTestAll() { typedef EliasFanoEncoderV2< uint32_t, uint32_t, kSkipQuantum, kForwardQuantum> Encoder; - typedef EliasFanoReader Reader; + using Reader = + EliasFanoReader; testAll({0}); testAll(generateRandomList(100 * 1000, 10 * 1000 * 1000)); testAll(generateSeqList(1, 100000, 100)); @@ -110,19 +111,23 @@ TEST_F(EliasFanoCodingTest, Empty) { } TEST_F(EliasFanoCodingTest, Simple) { - doTestAll<0, 0>(); + doTestAll<0, 0, uint32_t>(); + doTestAll<0, 0, size_t>(); } TEST_F(EliasFanoCodingTest, SkipPointers) { - doTestAll<128, 0>(); + doTestAll<128, 0, uint32_t>(); + doTestAll<128, 0, size_t>(); } TEST_F(EliasFanoCodingTest, ForwardPointers) { - doTestAll<0, 128>(); + doTestAll<0, 128, uint32_t>(); + doTestAll<0, 128, size_t>(); } TEST_F(EliasFanoCodingTest, SkipForwardPointers) { - doTestAll<128, 128>(); + doTestAll<128, 128, uint32_t>(); + doTestAll<128, 128, size_t>(); } TEST_F(EliasFanoCodingTest, Select64) { -- 2.34.1