#error EliasFanoCoding.h requires x86_64
#endif
-#include <cstdlib>
#include <algorithm>
+#include <cstdlib>
#include <limits>
#include <type_traits>
#include <boost/noncopyable.hpp>
#include <glog/logging.h>
+
#include "folly/Bits.h"
#include "folly/CpuId.h"
#include "folly/Likely.h"
namespace folly { namespace compression {
-template <class Value,
- class SkipValue = size_t,
- size_t kSkipQuantum = 0, // 0 = disabled
- size_t kForwardQuantum = 0> // 0 = disabled
struct EliasFanoCompressedList {
- static_assert(std::is_integral<Value>::value &&
- std::is_unsigned<Value>::value,
- "Value should be unsigned integral");
-
- typedef Value ValueType;
- typedef SkipValue SkipValueType;
-
EliasFanoCompressedList()
: size(0), numLowerBits(0) { }
- static constexpr size_t skipQuantum = kSkipQuantum;
- static constexpr size_t forwardQuantum = kForwardQuantum;
+ void free() {
+ ::free(const_cast<unsigned char*>(lower.data()));
+ ::free(const_cast<unsigned char*>(upper.data()));
+ ::free(const_cast<unsigned char*>(skipPointers.data()));
+ ::free(const_cast<unsigned char*>(forwardPointers.data()));
+ }
size_t size;
uint8_t numLowerBits;
folly::ByteRange skipPointers;
folly::ByteRange forwardPointers;
+};
- void free() {
- ::free(const_cast<unsigned char*>(lower.data()));
- ::free(const_cast<unsigned char*>(upper.data()));
- ::free(const_cast<unsigned char*>(skipPointers.data()));
- ::free(const_cast<unsigned char*>(forwardPointers.data()));
- }
+// Version history:
+// In version 1 skip / forward pointers encoding has been changed,
+// so SkipValue = uint32_t is able to address up to ~4B elements,
+// instead of only ~2B.
+template <class Value,
+ class SkipValue = size_t,
+ size_t kSkipQuantum = 0, // 0 = disabled
+ size_t kForwardQuantum = 0, // 0 = disabled
+ size_t kVersion = 0>
+struct EliasFanoEncoder {
+ static_assert(std::is_integral<Value>::value &&
+ std::is_unsigned<Value>::value,
+ "Value should be unsigned integral");
+
+ typedef EliasFanoCompressedList CompressedList;
+
+ typedef Value ValueType;
+ typedef SkipValue SkipValueType;
+
+ static constexpr size_t skipQuantum = kSkipQuantum;
+ static constexpr size_t forwardQuantum = kForwardQuantum;
+ static constexpr size_t version = kVersion;
static uint8_t defaultNumLowerBits(size_t upperBound, size_t size) {
if (size == 0 || upperBound < size) {
/* static */ if (skipQuantum != 0) {
// Workaround to avoid 'division by zero' compile-time error.
constexpr size_t q = skipQuantum ?: 1;
- CHECK_LT(upperSizeBits, std::numeric_limits<SkipValueType>::max());
+ /* static */ if (kVersion > 0) {
+ CHECK_LT(size, std::numeric_limits<SkipValueType>::max());
+ } else {
+ CHECK_LT(upperSizeBits, std::numeric_limits<SkipValueType>::max());
+ }
// 8 * upperSize is used here instead of upperSizeBits, as that is
- // more serialization-friendly way.
+ // more serialization-friendly way (upperSizeBits isn't known outside of
+ // this function, unlike upperSize; thus numSkipPointers could easily be
+ // deduced from upperSize).
numSkipPointers = (8 * upperSize - size) / q;
skipPointers = static_cast<SkipValueType*>(
numSkipPointers == 0
for (size_t i = 0, pos = 0; i < size; ++i) {
const ValueType upperBits = list[i] >> numLowerBits;
for (; (pos + 1) * q <= upperBits; ++pos) {
- skipPointers[pos] = i + (pos + 1) * q;
+ /* static */ if (kVersion > 0) {
+ // Since version 1, just the number of preceding 1-bits is stored.
+ skipPointers[pos] = i;
+ } else {
+ skipPointers[pos] = i + (pos + 1) * q;
+ }
}
}
}
for (size_t i = q - 1, pos = 0; i < size; i += q, ++pos) {
const ValueType upperBits = list[i] >> numLowerBits;
- forwardPointers[pos] = upperBits + i + 1;
+ /* static */ if (kVersion > 0) {
+ // Since version 1, just the number of preceding 0-bits is stored.
+ forwardPointers[pos] = upperBits;
+ } else {
+ forwardPointers[pos] = upperBits + i + 1;
+ }
}
}
DCHECK_EQ(0, value & ~((uint64_t(1) << len) - 1));
unsigned char* const ptr = data + (pos / 8);
uint64_t ptrv = folly::loadUnaligned<uint64_t>(ptr);
- ptrv |= value << (pos % 8);
+ ptrv |= value << (pos % 8);
folly::storeUnaligned<uint64_t>(ptr, ptrv);
}
};
namespace detail {
-template <class CompressedList, class Instructions>
+template <class Encoder, class Instructions>
class UpperBitsReader {
- typedef typename CompressedList::SkipValueType SkipValueType;
+ typedef typename Encoder::SkipValueType SkipValueType;
public:
- typedef typename CompressedList::ValueType ValueType;
+ typedef typename Encoder::ValueType ValueType;
- explicit UpperBitsReader(const CompressedList& list)
+ explicit UpperBitsReader(const EliasFanoCompressedList& list)
: forwardPointers_(list.forwardPointers.data()),
skipPointers_(list.skipPointers.data()),
start_(list.upper.data()),
position_ += n; // n 1-bits will be read.
// Use forward pointer.
- if (CompressedList::forwardQuantum > 0 &&
- n > CompressedList::forwardQuantum) {
+ if (Encoder::forwardQuantum > 0 && n > Encoder::forwardQuantum) {
// Workaround to avoid 'division by zero' compile-time error.
- constexpr size_t q = CompressedList::forwardQuantum ?: 1;
+ constexpr size_t q = Encoder::forwardQuantum ?: 1;
const size_t steps = position_ / q;
const size_t dest =
folly::loadUnaligned<SkipValueType>(
forwardPointers_ + (steps - 1) * sizeof(SkipValueType));
- reposition(dest);
+ /* static */ if (Encoder::version > 0) {
+ reposition(dest + steps * q);
+ } else {
+ reposition(dest);
+ }
n = position_ + 1 - steps * q; // n is > 0.
// correct inner_ will be set at the end.
}
DCHECK_GE(v, value_);
// Use skip pointer.
- if (CompressedList::skipQuantum > 0 &&
- v >= value_ + CompressedList::skipQuantum) {
+ if (Encoder::skipQuantum > 0 && v >= value_ + Encoder::skipQuantum) {
// Workaround to avoid 'division by zero' compile-time error.
- constexpr size_t q = CompressedList::skipQuantum ?: 1;
+ constexpr size_t q = Encoder::skipQuantum ?: 1;
const size_t steps = v / q;
const size_t dest =
folly::loadUnaligned<SkipValueType>(
skipPointers_ + (steps - 1) * sizeof(SkipValueType));
- reposition(dest);
- position_ = dest - q * steps - 1;
+ /* static */ if (Encoder::version > 0) {
+ reposition(dest + q * steps);
+ position_ = dest - 1;
+ } else {
+ reposition(dest);
+ position_ = dest - q * steps - 1;
+ }
// Correct inner_ and value_ will be set during the next()
// call at the end.
} // namespace detail
-template <class CompressedList,
+template <class Encoder,
class Instructions = instructions::Default>
class EliasFanoReader : private boost::noncopyable {
public:
- typedef typename CompressedList::ValueType ValueType;
+ typedef typename Encoder::ValueType ValueType;
- explicit EliasFanoReader(const CompressedList& list)
+ explicit EliasFanoReader(const EliasFanoCompressedList& list)
: list_(list),
lowerMask_((ValueType(1) << list_.numLowerBits) - 1),
upper_(list),
return lowerMask_ & (ptrv >> (pos % 8));
}
- const CompressedList list_;
+ const EliasFanoCompressedList list_;
const ValueType lowerMask_;
- detail::UpperBitsReader<CompressedList, Instructions> upper_;
+ detail::UpperBitsReader<Encoder, Instructions> upper_;
size_t progress_;
ValueType value_;
ValueType lastValue_;
return result;
}
-template <class Reader, class List>
-void testEmpty() {
- List list;
- const typename List::ValueType* const data = nullptr;
- List::encode(data, 0, list);
- {
- Reader reader(list);
- EXPECT_FALSE(reader.next());
- EXPECT_EQ(reader.size(), 0);
- }
- {
- Reader reader(list);
- EXPECT_FALSE(reader.skip(1));
- EXPECT_FALSE(reader.skip(10));
- }
- {
- Reader reader(list);
- EXPECT_FALSE(reader.skipTo(1));
- }
-}
-
template <class Reader, class List>
void testNext(const std::vector<uint32_t>& data, const List& list) {
Reader reader(list);
}
}
-template <class Reader, class List>
+template <class Reader, class Encoder>
+void testEmpty() {
+ typename Encoder::CompressedList list;
+ const typename Encoder::ValueType* const data = nullptr;
+ Encoder::encode(data, 0, list);
+ {
+ Reader reader(list);
+ EXPECT_FALSE(reader.next());
+ EXPECT_EQ(reader.size(), 0);
+ }
+ {
+ Reader reader(list);
+ EXPECT_FALSE(reader.skip(1));
+ EXPECT_FALSE(reader.skip(10));
+ }
+ {
+ Reader reader(list);
+ EXPECT_FALSE(reader.skipTo(1));
+ }
+}
+
+template <class Reader, class Encoder>
void testAll(const std::vector<uint32_t>& data) {
- List list;
- List::encode(data.begin(), data.end(), list);
+ typename Encoder::CompressedList list;
+ Encoder::encode(data.begin(), data.end(), list);
testNext<Reader>(data, list);
testSkip<Reader>(data, list);
testSkipTo<Reader>(data, list);
* limitations under the License.
*/
-#include "folly/experimental/test/CodingTestUtils.h"
-#include "folly/experimental/EliasFanoCoding.h"
#include "folly/Benchmark.h"
+#include "folly/experimental/EliasFanoCoding.h"
+#include "folly/experimental/test/CodingTestUtils.h"
using namespace folly::compression;
-template <class List>
-void testAll() {
- typedef EliasFanoReader<List> Reader;
- testAll<Reader, List>(generateRandomList(100 * 1000, 10 * 1000 * 1000));
- testAll<Reader, List>(generateSeqList(1, 100000, 100));
-}
+template <size_t kVersion>
+struct TestType {
+ static constexpr size_t Version = kVersion;
+};
+
+template <class T>
+class EliasFanoCodingTest : public ::testing::Test {
+ public:
+ void doTestEmpty() {
+ typedef EliasFanoEncoder<uint32_t, size_t, 0, 0, T::Version> Encoder;
+ typedef EliasFanoReader<Encoder> Reader;
+ testEmpty<Reader, Encoder>();
+ }
+
+ template <size_t kSkipQuantum, size_t kForwardQuantum>
+ void doTestAll() {
+ typedef EliasFanoEncoder<
+ uint32_t, uint32_t, kSkipQuantum, kForwardQuantum, T::Version> Encoder;
+ typedef EliasFanoReader<Encoder> Reader;
+ testAll<Reader, Encoder>(generateRandomList(100 * 1000, 10 * 1000 * 1000));
+ testAll<Reader, Encoder>(generateSeqList(1, 100000, 100));
+ }
+};
-TEST(EliasFanoCompressedList, Empty) {
- typedef EliasFanoCompressedList<uint32_t> List;
- typedef EliasFanoReader<List> Reader;
- testEmpty<Reader, List>();
+typedef ::testing::Types<TestType<0>, TestType<1>> TestTypes;
+TYPED_TEST_CASE(EliasFanoCodingTest, TestTypes);
+
+TYPED_TEST(EliasFanoCodingTest, Empty) {
+ TestFixture::doTestEmpty();
}
-TEST(EliasFanoCompressedList, Simple) {
- testAll<EliasFanoCompressedList<uint32_t> >();
+TYPED_TEST(EliasFanoCodingTest, Simple) {
+ TestFixture::template doTestAll<0, 0>();
}
-TEST(EliasFanoCompressedList, SkipPointers) {
- testAll<EliasFanoCompressedList<uint32_t, uint32_t, 128, 0> >();
+TYPED_TEST(EliasFanoCodingTest, SkipPointers) {
+ TestFixture::template doTestAll<128, 0>();
}
-TEST(EliasFanoCompressedList, ForwardPointers) {
- testAll<EliasFanoCompressedList<uint32_t, uint32_t, 0, 128> >();
+TYPED_TEST(EliasFanoCodingTest, ForwardPointers) {
+ TestFixture::template doTestAll<0, 128>();
}
-TEST(EliasFanoCompressedList, SkipForwardPointers) {
- testAll<EliasFanoCompressedList<uint32_t, uint32_t, 128, 128> >();
+TYPED_TEST(EliasFanoCodingTest, SkipForwardPointers) {
+ TestFixture::template doTestAll<128, 128>();
}
namespace bm {
constexpr size_t k1M = 1000000;
-typedef EliasFanoCompressedList<uint32_t, uint32_t, 128, 128> List;
-typedef EliasFanoReader<List> Reader;
+constexpr size_t kVersion = 1;
+
+typedef EliasFanoEncoder<uint32_t, uint32_t, 128, 128, kVersion> Encoder;
+typedef EliasFanoReader<Encoder> Reader;
std::vector<uint32_t> data;
-List list;
+typename Encoder::CompressedList list;
void init() {
data = generateRandomList(100 * 1000, 10 * 1000 * 1000);
//data = loadList("/home/philipp/pl_test_dump.txt");
- List::encode(data.data(), data.size(), bm::list);
+ Encoder::encode(data.data(), data.size(), bm::list);
}
void free() {