From: Vignesh Gowda Date: Wed, 1 Jul 2015 21:07:37 +0000 (-0700) Subject: Implement a BitVector-based list X-Git-Tag: v0.49.0~16 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=308f7da1922d0168c46d748a4f707ac86ae0766d;p=folly.git Implement a BitVector-based list Summary: Implemented a BitVector Encoder and BitVector Reader with the same interface as EliasFanoCoding.h Reviewed By: @ot, @philippv Differential Revision: D2198935 --- diff --git a/folly/Makefile.am b/folly/Makefile.am index d85c052d..f0eaa9c9 100644 --- a/folly/Makefile.am +++ b/folly/Makefile.am @@ -74,6 +74,7 @@ nobase_follyinclude_HEADERS = \ EvictingCacheMap.h \ experimental/AutoTimer.h \ experimental/Bits.h \ + experimental/BitVectorCoding.h \ experimental/ExecutionObserver.h \ experimental/EliasFanoCoding.h \ experimental/EventCount.h \ diff --git a/folly/experimental/BitVectorCoding.h b/folly/experimental/BitVectorCoding.h new file mode 100644 index 00000000..edaea7a5 --- /dev/null +++ b/folly/experimental/BitVectorCoding.h @@ -0,0 +1,439 @@ +/* + * Copyright 2015 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FOLLY_EXPERIMENTAL_BIT_VECTOR_CODING_H +#define FOLLY_EXPERIMENTAL_BIT_VECTOR_CODING_H + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef __GNUC__ +#error BitVectorCoding.h requires GCC +#endif + +#if !FOLLY_X64 +#error BitVectorCoding.h requires x86_64 +#endif + +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ +#error BitVectorCoding.h requires little endianness +#endif + +namespace folly { namespace compression { + +template +struct BitVectorCompressedListBase { + BitVectorCompressedListBase() = default; + + template + BitVectorCompressedListBase( + const BitVectorCompressedListBase& other) + : size(other.size), + upperBound(other.upperBound), + data(other.data), + bits(reinterpret_cast(other.bits)), + skipPointers(reinterpret_cast(other.skipPointers)), + forwardPointers(reinterpret_cast(other.forwardPointers)) {} + + void free() { ::free(const_cast(data.data())); } + + size_t getUpperBound() const { return upperBound; } + + size_t size = 0; + size_t upperBound = 0; + + folly::Range data; + + Pointer bits = nullptr; + Pointer skipPointers = nullptr; + Pointer forwardPointers = nullptr; +}; + +typedef BitVectorCompressedListBase BitVectorCompressedList; +typedef BitVectorCompressedListBase MutableBitVectorCompressedList; + +template +struct BitVectorEncoder { + static_assert(std::is_integral::value && + std::is_unsigned::value, + "Value should be unsigned integral"); + + typedef BitVectorCompressedList CompressedList; + + typedef Value ValueType; + typedef SkipValue SkipValueType; + struct Layout; + + static constexpr size_t skipQuantum = kSkipQuantum; + static constexpr size_t forwardQuantum = kForwardQuantum; + + template + static BitVectorCompressedList encode(RandomAccessIterator begin, + RandomAccessIterator end) { + if (begin == end) { + return BitVectorCompressedList(); + } + BitVectorEncoder encoder(end - begin, *(end - 1)); + for (; begin != end; ++begin) { + encoder.add(*begin); + } + return encoder.finish(); + } + + explicit BitVectorEncoder(const MutableBitVectorCompressedList& result) + : bits_(result.bits), + skipPointers_(result.skipPointers), + forwardPointers_(result.forwardPointers), + result_(result) { + memset(result.data.data(), 0, result.data.size()); + } + + BitVectorEncoder(size_t size, ValueType upperBound) + : BitVectorEncoder( + Layout::fromUpperBoundAndSize(upperBound, size).allocList()) {} + + void add(ValueType value) { + CHECK_GE(value, lastValue_); + auto block = bits_ + (value / 64) * sizeof(uint64_t); + size_t inner = value % 64; + folly::Bits>::set( + reinterpret_cast*>(block), inner); + + if (skipQuantum != 0) { + size_t nextSkipPointerSize = value / (skipQuantum ?: 1); + while (skipPointersSize_ < nextSkipPointerSize) { + auto pos = skipPointersSize_++; + folly::storeUnaligned( + skipPointers_ + pos * sizeof(SkipValueType), size_); + } + } + + if (forwardQuantum != 0) { + if ( size_ != 0 && (size_ % (forwardQuantum ?: 1) == 0)) { + const auto pos = size_ / (forwardQuantum ?: 1) - 1; + folly::storeUnaligned( + forwardPointers_ + pos * sizeof(SkipValueType), value); + } + } + + lastValue_ = value; + ++size_; + } + + const BitVectorCompressedList& finish() const { + CHECK_EQ(size_, result_.size); + // TODO(ott): Relax this assumption. + CHECK_EQ(result_.getUpperBound(), lastValue_); + return result_; + } + + private: + uint8_t* const bits_ = nullptr; + uint8_t* const skipPointers_ = nullptr; + uint8_t* const forwardPointers_ = nullptr; + + ValueType lastValue_ = 0; + size_t size_ = 0; + size_t skipPointersSize_ = 0; + + BitVectorCompressedList result_; +}; + +template +struct BitVectorEncoder:: + Layout { + static Layout fromUpperBoundAndSize(size_t upperBound, size_t size) { + Layout layout; + layout.size = size; + layout.upperBound = upperBound; + + size_t bitVectorSizeInBytes = (upperBound / 8) + 1; + layout.bits = bitVectorSizeInBytes; + + if (skipQuantum != 0) { + size_t numSkipPointers = upperBound / (skipQuantum ?: 1); + layout.skipPointers = numSkipPointers * sizeof(SkipValueType); + } + if (forwardQuantum != 0) { + size_t numForwardPointers = size / (forwardQuantum ?: 1); + layout.forwardPointers = numForwardPointers * sizeof(SkipValueType); + } + + CHECK_LT(size, std::numeric_limits::max()); + + return layout; + } + + size_t bytes() const { return bits + skipPointers + forwardPointers; } + + template + BitVectorCompressedListBase openList( + Range& buf) const { + BitVectorCompressedListBase result; + result.size = size; + result.upperBound = upperBound; + result.data = buf.subpiece(0, bytes()); + auto advance = [&](size_t n) { + auto begin = buf.data(); + buf.advance(n); + return begin; + }; + + result.bits = advance(bits); + result.skipPointers = advance(skipPointers); + result.forwardPointers = advance(forwardPointers); + CHECK_EQ(buf.data() - result.data.data(), bytes()); + + return result; + } + + MutableBitVectorCompressedList allocList() const { + uint8_t* buf = nullptr; + if (size > 0) { + buf = static_cast(malloc(bytes() + 7)); + } + folly::MutableByteRange bufRange(buf, bytes()); + return openList(bufRange); + } + + size_t size = 0; + size_t upperBound = 0; + + // Sizes in bytes. + size_t bits = 0; + size_t skipPointers = 0; + size_t forwardPointers = 0; +}; + +template +class BitVectorReader { + public: + typedef Encoder EncoderType; + typedef typename Encoder::ValueType ValueType; + typedef typename Encoder::SkipValueType SkipValueType; + + explicit BitVectorReader(const BitVectorCompressedList& list) + : size_(list.size), + bits_(list.bits), + skipPointers_(list.skipPointers), + forwardPointers_(list.forwardPointers) { + reset(); + + if (kUnchecked || UNLIKELY(list.size == 0)) { + upperBound_ = 0; + return; + } + + upperBound_ = list.getUpperBound(); + } + + void reset() { + block_ = (bits_ != nullptr) ? folly::loadUnaligned(bits_) : 0; + outer_ = 0; + inner_ = -1; + position_ = -1; + value_ = 0; + } + + bool next() { + if (!kUnchecked && UNLIKELY(position() + 1 >= size_)) { + return setDone(); + } + + while (block_ == 0) { + outer_ += sizeof(uint64_t); + block_ = folly::loadUnaligned(bits_ + outer_); + } + + ++position_; + inner_ = Instructions::ctz(block_); + block_ = Instructions::blsr(block_); + + return setValue(); + } + + bool skip(size_t n) { + CHECK_GT(n, 0); + + if (!kUnchecked && position() + n >= size_) { + return setDone(); + } + // Small skip optimization. + if (LIKELY(n < kLinearScanThreshold)) { + for (size_t i = 0; i < n; ++i) { + next(); + } + return true; + } + + position_ += n; + + // Use forward pointer. + if (Encoder::forwardQuantum > 0 && n > Encoder::forwardQuantum) { + // Workaround to avoid 'division by zero' compile-time error. + constexpr size_t q = Encoder::forwardQuantum ?: 1; + + const size_t steps = position_ / q; + const size_t dest = folly::loadUnaligned( + forwardPointers_ + (steps - 1) * sizeof(SkipValueType)); + + reposition(dest); + n = position_ + 1 - steps * q; + // Correct inner_ will be set at the end. + } + + size_t cnt; + // Find necessary block. + while ((cnt = Instructions::popcount(block_)) < n) { + n -= cnt; + outer_ += sizeof(uint64_t); + block_ = folly::loadUnaligned(bits_ + outer_); + } + + // Skip to the n-th one in the block. + DCHECK_GT(n, 0); + inner_ = select64(block_, n - 1); + block_ &= (uint64_t(-1) << inner_) << 1; + + return setValue(); + } + + bool skipTo(ValueType v) { + DCHECK_GE(v, value_); + if (v <= value_) { + return true; + } else if (!kUnchecked && v > upperBound_) { + return setDone(); + } + + // Small skip optimization. + if (v - value_ < kLinearScanThreshold) { + do { + next(); + } while (value() < v); + + return true; + } + + if (Encoder::skipQuantum > 0 && v - value_ > Encoder::skipQuantum) { + size_t q = v / Encoder::skipQuantum; + position_ = folly::loadUnaligned( + skipPointers_ + (q - 1) * sizeof(SkipValueType)) - 1; + + reposition(q * Encoder::skipQuantum); + } + + // Find the value. + size_t outer = v / 64 * 8; + + while (outer_ < outer) { + position_ += Instructions::popcount(block_); + outer_ += sizeof(uint64_t); + block_ = folly::loadUnaligned(bits_ + outer_); + } + + DCHECK_EQ(outer_, outer); + uint64_t mask = ~((uint64_t(1) << (v % 64)) - 1); + position_ += Instructions::popcount(block_ & ~mask) + 1; + block_ &= mask; + + while (block_ == 0) { + outer_ += sizeof(uint64_t); + block_ = folly::loadUnaligned(bits_ + outer_); + } + + inner_ = Instructions::ctz(block_); + block_ = Instructions::blsr(block_); + + setValue(); + return true; + } + + size_t size() const { return size_; } + + size_t position() const { return position_; } + + ValueType value() const { return value_; } + + bool jump(size_t n) { + reset(); + if (n > 0) { + return skip(n); + } else { + return true; + } + } + + bool jumpTo(ValueType v) { + reset(); + return skipTo(v); + } + + bool setDone() { + value_ = std::numeric_limits::max(); + position_ = size_; + return false; + } + + private: + bool setValue() { + value_ = static_cast(8 * outer_ + inner_); + return true; + } + + void reposition(size_t dest) { + outer_ = dest / 64 * 8; + // We maintain the invariant that outer_ is divisible by 8. + block_ = folly::loadUnaligned(bits_ + outer_); + block_ &= ~((uint64_t(1) << (dest % 64)) - 1); + } + + constexpr static size_t kLinearScanThreshold = 4; + + size_t outer_; + size_t inner_; + size_t position_; + uint64_t block_; + ValueType value_ = 0; + + size_t size_; + ValueType upperBound_; + const uint8_t* const bits_; + const uint8_t* const skipPointers_; + const uint8_t* const forwardPointers_; +}; + +}} // namespaces + +#endif // FOLLY_EXPERIMENTAL_BIT_VECTOR_CODING_H diff --git a/folly/experimental/EliasFanoCoding.h b/folly/experimental/EliasFanoCoding.h index f5fcfe86..c0f121d7 100644 --- a/folly/experimental/EliasFanoCoding.h +++ b/folly/experimental/EliasFanoCoding.h @@ -27,15 +27,15 @@ #include #include #include -#include #include -#include #include #include #include #include #include +#include + #ifndef __GNUC__ #error EliasFanoCoding.h requires GCC #endif diff --git a/folly/experimental/Instructions.h b/folly/experimental/Instructions.h index 694e1f50..cdcd5257 100644 --- a/folly/experimental/Instructions.h +++ b/folly/experimental/Instructions.h @@ -17,6 +17,8 @@ #ifndef FOLLY_EXPERIMENTAL_INSTRUCTIONS_H #define FOLLY_EXPERIMENTAL_INSTRUCTIONS_H +#include + namespace folly { namespace compression { namespace instructions { // NOTE: It's recommended to compile EF coding with -msse4.2, starting diff --git a/folly/experimental/test/BitVectorCodingTest.cpp b/folly/experimental/test/BitVectorCodingTest.cpp new file mode 100644 index 00000000..1bbc3b7c --- /dev/null +++ b/folly/experimental/test/BitVectorCodingTest.cpp @@ -0,0 +1,198 @@ +/* + * Copyright 2015 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +using namespace folly::compression; + +#ifndef BV_TEST_ARCH +#define BV_TEST_ARCH Default +#endif // BV_TEST_ARCH + +class BitVectorCodingTest : public ::testing::Test { + public: + void doTestEmpty() { + typedef BitVectorEncoder Encoder; + typedef BitVectorReader Reader; + testEmpty(); + } + + template + void doTestAll() { + typedef BitVectorEncoder + Encoder; + typedef BitVectorReader Reader; + testAll(generateRandomList(100 * 1000, 10 * 1000 * 1000)); + testAll(generateSeqList(1, 100000, 100)); + } +}; + +TEST_F(BitVectorCodingTest, Empty) { + doTestEmpty(); +} + +TEST_F(BitVectorCodingTest, Simple) { + doTestAll<0, 0>(); +} + +TEST_F(BitVectorCodingTest, SkipPointers) { + doTestAll<128, 0>(); +} + +TEST_F(BitVectorCodingTest, ForwardPointers) { + doTestAll<0, 128>(); +} + +TEST_F(BitVectorCodingTest, SkipForwardPointers) { + doTestAll<128, 128>(); +} + +namespace bm { + +constexpr size_t k1M = 1000000; + +typedef BitVectorEncoder Encoder; +typedef BitVectorReader Reader; + +std::vector data; +std::vector order; + +std::vector encodeSmallData; +std::vector encodeLargeData; + +typename Encoder::CompressedList list; + +void init() { + std::mt19937 gen; + + data = generateRandomList(100 * 1000, 10 * 1000 * 1000, gen); + list = Encoder::encode(data.begin(), data.end()); + + order.resize(data.size()); + std::iota(order.begin(), order.end(), size_t()); + std::shuffle(order.begin(), order.end(), gen); + + encodeSmallData = generateRandomList(10, 100 * 1000, gen); + encodeLargeData = generateRandomList(1000 * 1000, 100 * 1000 * 1000, gen); +} + +void free() { list.free(); } + +} // namespace bm + +BENCHMARK(Next, iters) { bmNext(bm::list, bm::data, iters); } + +size_t Skip_ForwardQ128(size_t iters, size_t logAvgSkip) { + bmSkip(bm::list, bm::data, logAvgSkip, iters); + return iters; +} + +BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 1, 0) +BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 2, 1) +BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 4_pm_1, 2) +BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 16_pm_4, 4) +BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 64_pm_16, 6) +BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 256_pm_64, 8) +BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 1024_pm_256, 10) + +BENCHMARK(Jump_ForwardQ128, iters) { + bmJump(bm::list, bm::data, bm::order, iters); +} + +BENCHMARK_DRAW_LINE(); + +size_t SkipTo_SkipQ128(size_t iters, size_t logAvgSkip) { + bmSkipTo(bm::list, bm::data, logAvgSkip, iters); + return iters; +} + +BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 1, 0) +BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 2, 1) +BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 4_pm_1, 2) +BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 16_pm_4, 4) +BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 64_pm_16, 6) +BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 256_pm_64, 8) +BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 1024_pm_256, 10) + +BENCHMARK(JumpTo_SkipQ128, iters) { + bmJumpTo(bm::list, bm::data, bm::order, iters); +} + +BENCHMARK_DRAW_LINE(); + +BENCHMARK(Encode_10) { + auto list = bm::Encoder::encode(bm::encodeSmallData.begin(), + bm::encodeSmallData.end()); + list.free(); +} + +BENCHMARK(Encode) { + auto list = bm::Encoder::encode(bm::encodeLargeData.begin(), + bm::encodeLargeData.end()); + list.free(); +} + +#if 0 +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz (turbo off), +using instructions::Default and GCC 4.8 with --bm_min_usec 100000. +============================================================================ +folly/experimental/test/BitVectorCodingTest.cpp relative time/iter iters/s +============================================================================ +Next 9.59ns 104.25M +Skip_ForwardQ128(1) 11.56ns 86.53M +Skip_ForwardQ128(2) 23.30ns 42.93M +Skip_ForwardQ128(4_pm_1) 52.99ns 18.87M +Skip_ForwardQ128(16_pm_4) 200.85ns 4.98M +Skip_ForwardQ128(64_pm_16) 733.20ns 1.36M +Skip_ForwardQ128(256_pm_64) 748.35ns 1.34M +Skip_ForwardQ128(1024_pm_256) 742.77ns 1.35M +Jump_ForwardQ128 752.98ns 1.33M +---------------------------------------------------------------------------- +SkipTo_SkipQ128(1) 23.47ns 42.62M +SkipTo_SkipQ128(2) 24.48ns 40.85M +SkipTo_SkipQ128(4_pm_1) 22.16ns 45.13M +SkipTo_SkipQ128(16_pm_4) 28.43ns 35.17M +SkipTo_SkipQ128(64_pm_16) 45.51ns 21.97M +SkipTo_SkipQ128(256_pm_64) 44.03ns 22.71M +SkipTo_SkipQ128(1024_pm_256) 45.84ns 21.81M +JumpTo_SkipQ128 15.33ns 65.25M +---------------------------------------------------------------------------- +Encode_10 1.60us 624.33K +Encode 16.98ms 58.89 +============================================================================ +#endif + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + gflags::ParseCommandLineFlags(&argc, &argv, true); + + auto ret = RUN_ALL_TESTS(); + if (ret == 0 && FLAGS_benchmark) { + bm::init(); + folly::runBenchmarks(); + bm::free(); + } + + return ret; +} diff --git a/folly/experimental/test/EliasFanoCodingTest.cpp b/folly/experimental/test/EliasFanoCodingTest.cpp index a79b701b..2e53ea42 100644 --- a/folly/experimental/test/EliasFanoCodingTest.cpp +++ b/folly/experimental/test/EliasFanoCodingTest.cpp @@ -87,7 +87,6 @@ void init() { std::mt19937 gen; data = generateRandomList(100 * 1000, 10 * 1000 * 1000, gen); - //data = loadList("/home/philipp/pl_test_dump.txt"); list = Encoder::encode(data.begin(), data.end()); order.resize(data.size());