--- /dev/null
+/*
+ * Copyright 2015 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_EXPERIMENTAL_BIT_VECTOR_CODING_H
+#define FOLLY_EXPERIMENTAL_BIT_VECTOR_CODING_H
+
+#include <cstdlib>
+#include <limits>
+#include <type_traits>
+
+#include <folly/Bits.h>
+#include <folly/Likely.h>
+#include <folly/Portability.h>
+#include <folly/Range.h>
+#include <folly/experimental/Bits.h>
+#include <folly/experimental/Instructions.h>
+#include <folly/experimental/Select64.h>
+#include <glog/logging.h>
+
+#ifndef __GNUC__
+#error BitVectorCoding.h requires GCC
+#endif
+
+#if !FOLLY_X64
+#error BitVectorCoding.h requires x86_64
+#endif
+
+#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
+#error BitVectorCoding.h requires little endianness
+#endif
+
+namespace folly { namespace compression {
+
+template <class Pointer>
+struct BitVectorCompressedListBase {
+ BitVectorCompressedListBase() = default;
+
+ template <class OtherPointer>
+ BitVectorCompressedListBase(
+ const BitVectorCompressedListBase<OtherPointer>& other)
+ : size(other.size),
+ upperBound(other.upperBound),
+ data(other.data),
+ bits(reinterpret_cast<Pointer>(other.bits)),
+ skipPointers(reinterpret_cast<Pointer>(other.skipPointers)),
+ forwardPointers(reinterpret_cast<Pointer>(other.forwardPointers)) {}
+
+ void free() { ::free(const_cast<unsigned char*>(data.data())); }
+
+ size_t getUpperBound() const { return upperBound; }
+
+ size_t size = 0;
+ size_t upperBound = 0;
+
+ folly::Range<Pointer> data;
+
+ Pointer bits = nullptr;
+ Pointer skipPointers = nullptr;
+ Pointer forwardPointers = nullptr;
+};
+
+typedef BitVectorCompressedListBase<const uint8_t*> BitVectorCompressedList;
+typedef BitVectorCompressedListBase<uint8_t*> MutableBitVectorCompressedList;
+
+template <class Value,
+ class SkipValue,
+ size_t kSkipQuantum = 0,
+ size_t kForwardQuantum = 0>
+struct BitVectorEncoder {
+ static_assert(std::is_integral<Value>::value &&
+ std::is_unsigned<Value>::value,
+ "Value should be unsigned integral");
+
+ typedef BitVectorCompressedList CompressedList;
+
+ typedef Value ValueType;
+ typedef SkipValue SkipValueType;
+ struct Layout;
+
+ static constexpr size_t skipQuantum = kSkipQuantum;
+ static constexpr size_t forwardQuantum = kForwardQuantum;
+
+ template <class RandomAccessIterator>
+ static BitVectorCompressedList encode(RandomAccessIterator begin,
+ RandomAccessIterator end) {
+ if (begin == end) {
+ return BitVectorCompressedList();
+ }
+ BitVectorEncoder encoder(end - begin, *(end - 1));
+ for (; begin != end; ++begin) {
+ encoder.add(*begin);
+ }
+ return encoder.finish();
+ }
+
+ explicit BitVectorEncoder(const MutableBitVectorCompressedList& result)
+ : bits_(result.bits),
+ skipPointers_(result.skipPointers),
+ forwardPointers_(result.forwardPointers),
+ result_(result) {
+ memset(result.data.data(), 0, result.data.size());
+ }
+
+ BitVectorEncoder(size_t size, ValueType upperBound)
+ : BitVectorEncoder(
+ Layout::fromUpperBoundAndSize(upperBound, size).allocList()) {}
+
+ void add(ValueType value) {
+ CHECK_GE(value, lastValue_);
+ auto block = bits_ + (value / 64) * sizeof(uint64_t);
+ size_t inner = value % 64;
+ folly::Bits<folly::Unaligned<uint64_t>>::set(
+ reinterpret_cast<folly::Unaligned<uint64_t>*>(block), inner);
+
+ if (skipQuantum != 0) {
+ size_t nextSkipPointerSize = value / (skipQuantum ?: 1);
+ while (skipPointersSize_ < nextSkipPointerSize) {
+ auto pos = skipPointersSize_++;
+ folly::storeUnaligned<SkipValueType>(
+ skipPointers_ + pos * sizeof(SkipValueType), size_);
+ }
+ }
+
+ if (forwardQuantum != 0) {
+ if ( size_ != 0 && (size_ % (forwardQuantum ?: 1) == 0)) {
+ const auto pos = size_ / (forwardQuantum ?: 1) - 1;
+ folly::storeUnaligned<SkipValueType>(
+ forwardPointers_ + pos * sizeof(SkipValueType), value);
+ }
+ }
+
+ lastValue_ = value;
+ ++size_;
+ }
+
+ const BitVectorCompressedList& finish() const {
+ CHECK_EQ(size_, result_.size);
+ // TODO(ott): Relax this assumption.
+ CHECK_EQ(result_.getUpperBound(), lastValue_);
+ return result_;
+ }
+
+ private:
+ uint8_t* const bits_ = nullptr;
+ uint8_t* const skipPointers_ = nullptr;
+ uint8_t* const forwardPointers_ = nullptr;
+
+ ValueType lastValue_ = 0;
+ size_t size_ = 0;
+ size_t skipPointersSize_ = 0;
+
+ BitVectorCompressedList result_;
+};
+
+template <class Value,
+ class SkipValue,
+ size_t kSkipQuantum,
+ size_t kForwardQuantum>
+struct BitVectorEncoder<Value, SkipValue, kSkipQuantum, kForwardQuantum>::
+ Layout {
+ static Layout fromUpperBoundAndSize(size_t upperBound, size_t size) {
+ Layout layout;
+ layout.size = size;
+ layout.upperBound = upperBound;
+
+ size_t bitVectorSizeInBytes = (upperBound / 8) + 1;
+ layout.bits = bitVectorSizeInBytes;
+
+ if (skipQuantum != 0) {
+ size_t numSkipPointers = upperBound / (skipQuantum ?: 1);
+ layout.skipPointers = numSkipPointers * sizeof(SkipValueType);
+ }
+ if (forwardQuantum != 0) {
+ size_t numForwardPointers = size / (forwardQuantum ?: 1);
+ layout.forwardPointers = numForwardPointers * sizeof(SkipValueType);
+ }
+
+ CHECK_LT(size, std::numeric_limits<SkipValueType>::max());
+
+ return layout;
+ }
+
+ size_t bytes() const { return bits + skipPointers + forwardPointers; }
+
+ template <typename Range>
+ BitVectorCompressedListBase<typename Range::iterator> openList(
+ Range& buf) const {
+ BitVectorCompressedListBase<typename Range::iterator> result;
+ result.size = size;
+ result.upperBound = upperBound;
+ result.data = buf.subpiece(0, bytes());
+ auto advance = [&](size_t n) {
+ auto begin = buf.data();
+ buf.advance(n);
+ return begin;
+ };
+
+ result.bits = advance(bits);
+ result.skipPointers = advance(skipPointers);
+ result.forwardPointers = advance(forwardPointers);
+ CHECK_EQ(buf.data() - result.data.data(), bytes());
+
+ return result;
+ }
+
+ MutableBitVectorCompressedList allocList() const {
+ uint8_t* buf = nullptr;
+ if (size > 0) {
+ buf = static_cast<uint8_t*>(malloc(bytes() + 7));
+ }
+ folly::MutableByteRange bufRange(buf, bytes());
+ return openList(bufRange);
+ }
+
+ size_t size = 0;
+ size_t upperBound = 0;
+
+ // Sizes in bytes.
+ size_t bits = 0;
+ size_t skipPointers = 0;
+ size_t forwardPointers = 0;
+};
+
+template <class Encoder,
+ class Instructions = instructions::Default,
+ bool kUnchecked = false>
+class BitVectorReader {
+ public:
+ typedef Encoder EncoderType;
+ typedef typename Encoder::ValueType ValueType;
+ typedef typename Encoder::SkipValueType SkipValueType;
+
+ explicit BitVectorReader(const BitVectorCompressedList& list)
+ : size_(list.size),
+ bits_(list.bits),
+ skipPointers_(list.skipPointers),
+ forwardPointers_(list.forwardPointers) {
+ reset();
+
+ if (kUnchecked || UNLIKELY(list.size == 0)) {
+ upperBound_ = 0;
+ return;
+ }
+
+ upperBound_ = list.getUpperBound();
+ }
+
+ void reset() {
+ block_ = (bits_ != nullptr) ? folly::loadUnaligned<uint64_t>(bits_) : 0;
+ outer_ = 0;
+ inner_ = -1;
+ position_ = -1;
+ value_ = 0;
+ }
+
+ bool next() {
+ if (!kUnchecked && UNLIKELY(position() + 1 >= size_)) {
+ return setDone();
+ }
+
+ while (block_ == 0) {
+ outer_ += sizeof(uint64_t);
+ block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
+ }
+
+ ++position_;
+ inner_ = Instructions::ctz(block_);
+ block_ = Instructions::blsr(block_);
+
+ return setValue();
+ }
+
+ bool skip(size_t n) {
+ CHECK_GT(n, 0);
+
+ if (!kUnchecked && position() + n >= size_) {
+ return setDone();
+ }
+ // Small skip optimization.
+ if (LIKELY(n < kLinearScanThreshold)) {
+ for (size_t i = 0; i < n; ++i) {
+ next();
+ }
+ return true;
+ }
+
+ position_ += n;
+
+ // Use forward pointer.
+ if (Encoder::forwardQuantum > 0 && n > Encoder::forwardQuantum) {
+ // Workaround to avoid 'division by zero' compile-time error.
+ constexpr size_t q = Encoder::forwardQuantum ?: 1;
+
+ const size_t steps = position_ / q;
+ const size_t dest = folly::loadUnaligned<SkipValueType>(
+ forwardPointers_ + (steps - 1) * sizeof(SkipValueType));
+
+ reposition(dest);
+ n = position_ + 1 - steps * q;
+ // Correct inner_ will be set at the end.
+ }
+
+ size_t cnt;
+ // Find necessary block.
+ while ((cnt = Instructions::popcount(block_)) < n) {
+ n -= cnt;
+ outer_ += sizeof(uint64_t);
+ block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
+ }
+
+ // Skip to the n-th one in the block.
+ DCHECK_GT(n, 0);
+ inner_ = select64<Instructions>(block_, n - 1);
+ block_ &= (uint64_t(-1) << inner_) << 1;
+
+ return setValue();
+ }
+
+ bool skipTo(ValueType v) {
+ DCHECK_GE(v, value_);
+ if (v <= value_) {
+ return true;
+ } else if (!kUnchecked && v > upperBound_) {
+ return setDone();
+ }
+
+ // Small skip optimization.
+ if (v - value_ < kLinearScanThreshold) {
+ do {
+ next();
+ } while (value() < v);
+
+ return true;
+ }
+
+ if (Encoder::skipQuantum > 0 && v - value_ > Encoder::skipQuantum) {
+ size_t q = v / Encoder::skipQuantum;
+ position_ = folly::loadUnaligned<SkipValueType>(
+ skipPointers_ + (q - 1) * sizeof(SkipValueType)) - 1;
+
+ reposition(q * Encoder::skipQuantum);
+ }
+
+ // Find the value.
+ size_t outer = v / 64 * 8;
+
+ while (outer_ < outer) {
+ position_ += Instructions::popcount(block_);
+ outer_ += sizeof(uint64_t);
+ block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
+ }
+
+ DCHECK_EQ(outer_, outer);
+ uint64_t mask = ~((uint64_t(1) << (v % 64)) - 1);
+ position_ += Instructions::popcount(block_ & ~mask) + 1;
+ block_ &= mask;
+
+ while (block_ == 0) {
+ outer_ += sizeof(uint64_t);
+ block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
+ }
+
+ inner_ = Instructions::ctz(block_);
+ block_ = Instructions::blsr(block_);
+
+ setValue();
+ return true;
+ }
+
+ size_t size() const { return size_; }
+
+ size_t position() const { return position_; }
+
+ ValueType value() const { return value_; }
+
+ bool jump(size_t n) {
+ reset();
+ if (n > 0) {
+ return skip(n);
+ } else {
+ return true;
+ }
+ }
+
+ bool jumpTo(ValueType v) {
+ reset();
+ return skipTo(v);
+ }
+
+ bool setDone() {
+ value_ = std::numeric_limits<ValueType>::max();
+ position_ = size_;
+ return false;
+ }
+
+ private:
+ bool setValue() {
+ value_ = static_cast<ValueType>(8 * outer_ + inner_);
+ return true;
+ }
+
+ void reposition(size_t dest) {
+ outer_ = dest / 64 * 8;
+ // We maintain the invariant that outer_ is divisible by 8.
+ block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
+ block_ &= ~((uint64_t(1) << (dest % 64)) - 1);
+ }
+
+ constexpr static size_t kLinearScanThreshold = 4;
+
+ size_t outer_;
+ size_t inner_;
+ size_t position_;
+ uint64_t block_;
+ ValueType value_ = 0;
+
+ size_t size_;
+ ValueType upperBound_;
+ const uint8_t* const bits_;
+ const uint8_t* const skipPointers_;
+ const uint8_t* const forwardPointers_;
+};
+
+}} // namespaces
+
+#endif // FOLLY_EXPERIMENTAL_BIT_VECTOR_CODING_H
--- /dev/null
+/*
+ * Copyright 2015 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <algorithm>
+#include <numeric>
+#include <random>
+#include <vector>
+
+#include <folly/Benchmark.h>
+#include <folly/experimental/BitVectorCoding.h>
+#include <folly/experimental/Select64.h>
+#include <folly/experimental/test/CodingTestUtils.h>
+
+using namespace folly::compression;
+
+#ifndef BV_TEST_ARCH
+#define BV_TEST_ARCH Default
+#endif // BV_TEST_ARCH
+
+class BitVectorCodingTest : public ::testing::Test {
+ public:
+ void doTestEmpty() {
+ typedef BitVectorEncoder<uint32_t, size_t> Encoder;
+ typedef BitVectorReader<Encoder, instructions::BV_TEST_ARCH> Reader;
+ testEmpty<Reader, Encoder>();
+ }
+
+ template <size_t kSkipQuantum, size_t kForwardQuantum>
+ void doTestAll() {
+ typedef BitVectorEncoder<uint32_t, uint32_t, kSkipQuantum, kForwardQuantum>
+ Encoder;
+ typedef BitVectorReader<Encoder> Reader;
+ testAll<Reader, Encoder>(generateRandomList(100 * 1000, 10 * 1000 * 1000));
+ testAll<Reader, Encoder>(generateSeqList(1, 100000, 100));
+ }
+};
+
+TEST_F(BitVectorCodingTest, Empty) {
+ doTestEmpty();
+}
+
+TEST_F(BitVectorCodingTest, Simple) {
+ doTestAll<0, 0>();
+}
+
+TEST_F(BitVectorCodingTest, SkipPointers) {
+ doTestAll<128, 0>();
+}
+
+TEST_F(BitVectorCodingTest, ForwardPointers) {
+ doTestAll<0, 128>();
+}
+
+TEST_F(BitVectorCodingTest, SkipForwardPointers) {
+ doTestAll<128, 128>();
+}
+
+namespace bm {
+
+constexpr size_t k1M = 1000000;
+
+typedef BitVectorEncoder<uint32_t, uint32_t, 128, 128> Encoder;
+typedef BitVectorReader<Encoder> Reader;
+
+std::vector<uint32_t> data;
+std::vector<size_t> order;
+
+std::vector<uint32_t> encodeSmallData;
+std::vector<uint32_t> encodeLargeData;
+
+typename Encoder::CompressedList list;
+
+void init() {
+ std::mt19937 gen;
+
+ data = generateRandomList(100 * 1000, 10 * 1000 * 1000, gen);
+ list = Encoder::encode(data.begin(), data.end());
+
+ order.resize(data.size());
+ std::iota(order.begin(), order.end(), size_t());
+ std::shuffle(order.begin(), order.end(), gen);
+
+ encodeSmallData = generateRandomList(10, 100 * 1000, gen);
+ encodeLargeData = generateRandomList(1000 * 1000, 100 * 1000 * 1000, gen);
+}
+
+void free() { list.free(); }
+
+} // namespace bm
+
+BENCHMARK(Next, iters) { bmNext<bm::Reader>(bm::list, bm::data, iters); }
+
+size_t Skip_ForwardQ128(size_t iters, size_t logAvgSkip) {
+ bmSkip<bm::Reader>(bm::list, bm::data, logAvgSkip, iters);
+ return iters;
+}
+
+BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 1, 0)
+BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 2, 1)
+BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 4_pm_1, 2)
+BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 16_pm_4, 4)
+BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 64_pm_16, 6)
+BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 256_pm_64, 8)
+BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 1024_pm_256, 10)
+
+BENCHMARK(Jump_ForwardQ128, iters) {
+ bmJump<bm::Reader>(bm::list, bm::data, bm::order, iters);
+}
+
+BENCHMARK_DRAW_LINE();
+
+size_t SkipTo_SkipQ128(size_t iters, size_t logAvgSkip) {
+ bmSkipTo<bm::Reader>(bm::list, bm::data, logAvgSkip, iters);
+ return iters;
+}
+
+BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 1, 0)
+BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 2, 1)
+BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 4_pm_1, 2)
+BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 16_pm_4, 4)
+BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 64_pm_16, 6)
+BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 256_pm_64, 8)
+BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 1024_pm_256, 10)
+
+BENCHMARK(JumpTo_SkipQ128, iters) {
+ bmJumpTo<bm::Reader>(bm::list, bm::data, bm::order, iters);
+}
+
+BENCHMARK_DRAW_LINE();
+
+BENCHMARK(Encode_10) {
+ auto list = bm::Encoder::encode(bm::encodeSmallData.begin(),
+ bm::encodeSmallData.end());
+ list.free();
+}
+
+BENCHMARK(Encode) {
+ auto list = bm::Encoder::encode(bm::encodeLargeData.begin(),
+ bm::encodeLargeData.end());
+ list.free();
+}
+
+#if 0
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz (turbo off),
+using instructions::Default and GCC 4.8 with --bm_min_usec 100000.
+============================================================================
+folly/experimental/test/BitVectorCodingTest.cpp relative time/iter iters/s
+============================================================================
+Next 9.59ns 104.25M
+Skip_ForwardQ128(1) 11.56ns 86.53M
+Skip_ForwardQ128(2) 23.30ns 42.93M
+Skip_ForwardQ128(4_pm_1) 52.99ns 18.87M
+Skip_ForwardQ128(16_pm_4) 200.85ns 4.98M
+Skip_ForwardQ128(64_pm_16) 733.20ns 1.36M
+Skip_ForwardQ128(256_pm_64) 748.35ns 1.34M
+Skip_ForwardQ128(1024_pm_256) 742.77ns 1.35M
+Jump_ForwardQ128 752.98ns 1.33M
+----------------------------------------------------------------------------
+SkipTo_SkipQ128(1) 23.47ns 42.62M
+SkipTo_SkipQ128(2) 24.48ns 40.85M
+SkipTo_SkipQ128(4_pm_1) 22.16ns 45.13M
+SkipTo_SkipQ128(16_pm_4) 28.43ns 35.17M
+SkipTo_SkipQ128(64_pm_16) 45.51ns 21.97M
+SkipTo_SkipQ128(256_pm_64) 44.03ns 22.71M
+SkipTo_SkipQ128(1024_pm_256) 45.84ns 21.81M
+JumpTo_SkipQ128 15.33ns 65.25M
+----------------------------------------------------------------------------
+Encode_10 1.60us 624.33K
+Encode 16.98ms 58.89
+============================================================================
+#endif
+
+int main(int argc, char** argv) {
+ testing::InitGoogleTest(&argc, argv);
+ gflags::ParseCommandLineFlags(&argc, &argv, true);
+
+ auto ret = RUN_ALL_TESTS();
+ if (ret == 0 && FLAGS_benchmark) {
+ bm::init();
+ folly::runBenchmarks();
+ bm::free();
+ }
+
+ return ret;
+}