explicit UpperBitsReader(const EliasFanoCompressedList& list)
: forwardPointers_(list.forwardPointers.data()),
skipPointers_(list.skipPointers.data()),
- start_(list.upper.data()),
- block_(start_ != nullptr ? folly::loadUnaligned<block_t>(start_) : 0),
- outer_(0), // outer offset: number of consumed bytes in upper.
- inner_(-1), // inner offset: (bit) position in current block.
- position_(-1), // index of current value (= #reads - 1).
- value_(0) { }
+ start_(list.upper.data()) {
+ reset();
+ }
+
+ void reset() {
+ block_ = start_ != nullptr ? folly::loadUnaligned<block_t>(start_) : 0;
+ outer_ = 0;
+ inner_ = -1;
+ position_ = -1;
+ value_ = 0;
+ }
size_t position() const { return position_; }
ValueType value() const { return value_; }
return value_;
}
+ ValueType goTo(size_t n) {
+ if (Encoder::forwardQuantum == 0 || n <= Encoder::forwardQuantum) {
+ reset();
+ } else {
+ position_ = -1; // Avoid reading the head, skip() will call reposition().
+ }
+ return skip(n);
+ }
+
private:
ValueType setValue() {
value_ = static_cast<ValueType>(8 * outer_ + inner_ - position_);
const unsigned char* const skipPointers_;
const unsigned char* const start_;
block_t block_;
- size_t outer_;
- size_t inner_;
- size_t position_;
+ size_t outer_; // Outer offset: number of consumed bytes in upper.
+ size_t inner_; // Inner offset: (bit) position in current block.
+ size_t position_; // Index of current value (= #reads - 1).
ValueType value_;
};
class Instructions = instructions::Default>
class EliasFanoReader : private boost::noncopyable {
public:
+ typedef Encoder EncoderType;
typedef typename Encoder::ValueType ValueType;
explicit EliasFanoReader(const EliasFanoCompressedList& list)
: list_(list),
lowerMask_((ValueType(1) << list_.numLowerBits) - 1),
- upper_(list),
- progress_(0),
- value_(0) {
+ upper_(list_) {
DCHECK(Instructions::supported());
// To avoid extra branching during skipTo() while reading
// upper sequence we need to know the last element.
bool skip(size_t n) {
CHECK_GT(n, 0);
- progress_ += n - 1;
- if (LIKELY(progress_ < list_.size)) {
- value_ = readLowerPart(progress_) |
+ progress_ += n;
+ if (LIKELY(progress_ <= list_.size)) {
+ value_ = readLowerPart(progress_ - 1) |
(upper_.skip(n) << list_.numLowerBits);
- ++progress_;
return true;
}
return true;
}
+ bool goTo(size_t n) {
+ if (LIKELY(n - 1 < list_.size)) { // n > 0 && n <= list_.size
+ progress_ = n;
+ value_ = readLowerPart(n - 1) | (upper_.goTo(n) << list_.numLowerBits);
+ return true;
+ } else if (n == 0) {
+ upper_.reset();
+ progress_ = 0;
+ value_ = 0;
+ return true;
+ }
+ progress_ = list_.size;
+ value_ = std::numeric_limits<ValueType>::max();
+ return false;
+ }
+
private:
ValueType readLowerPart(size_t i) const {
+ DCHECK_LT(i, list_.size);
const size_t pos = i * list_.numLowerBits;
const unsigned char* ptr = list_.lower.data() + (pos / 8);
const uint64_t ptrv = folly::loadUnaligned<uint64_t>(ptr);
const EliasFanoCompressedList list_;
const ValueType lowerMask_;
detail::UpperBitsReader<Encoder, Instructions> upper_;
- size_t progress_;
- ValueType value_;
+ size_t progress_ = 0;
+ ValueType value_ = 0;
ValueType lastValue_;
};
namespace folly { namespace compression {
-std::vector<uint32_t> generateRandomList(size_t n, uint32_t maxId) {
+template <class URNG>
+std::vector<uint32_t> generateRandomList(size_t n, uint32_t maxId, URNG&& g) {
CHECK_LT(n, 2 * maxId);
- std::mt19937 gen;
std::uniform_int_distribution<> uid(1, maxId);
std::unordered_set<uint32_t> dataset;
while (dataset.size() < n) {
- uint32_t value = uid(gen);
+ uint32_t value = uid(g);
if (dataset.count(value) == 0) {
dataset.insert(value);
}
return ids;
}
-std::vector<uint32_t> generateSeqList(uint32_t minId, uint32_t maxId,
- uint32_t step = 1) {
+inline std::vector<uint32_t> generateRandomList(size_t n, uint32_t maxId) {
+ std::mt19937 gen;
+ return generateRandomList(n, maxId, gen);
+}
+
+inline std::vector<uint32_t> generateSeqList(uint32_t minId, uint32_t maxId,
+ uint32_t step = 1) {
CHECK_LE(minId, maxId);
CHECK_GT(step, 0);
std::vector<uint32_t> ids;
return ids;
}
-std::vector<uint32_t> loadList(const std::string& filename) {
+inline std::vector<uint32_t> loadList(const std::string& filename) {
std::ifstream fin(filename);
std::vector<uint32_t> result;
uint32_t id;
EXPECT_EQ(reader.value(), *it);
value = reader.value() + delta;
}
-
EXPECT_EQ(reader.value(), std::numeric_limits<uint32_t>::max());
EXPECT_FALSE(reader.next());
}
}
}
+template <class Reader, class List>
+void testGoTo(const std::vector<uint32_t>& data, const List& list) {
+ std::mt19937 gen;
+ std::vector<size_t> is(data.size());
+ for (size_t i = 0; i < data.size(); ++i) {
+ is[i] = i;
+ }
+ std::shuffle(is.begin(), is.end(), gen);
+ if (Reader::EncoderType::forwardQuantum == 0) {
+ is.resize(std::min<size_t>(is.size(), 100));
+ }
+
+ Reader reader(list);
+ EXPECT_TRUE(reader.goTo(0));
+ EXPECT_EQ(reader.value(), 0);
+ for (auto i : is) {
+ EXPECT_TRUE(reader.goTo(i + 1));
+ EXPECT_EQ(reader.value(), data[i]);
+ }
+ EXPECT_FALSE(reader.goTo(data.size() + 1));
+ EXPECT_EQ(reader.value(), std::numeric_limits<uint32_t>::max());
+}
+
template <class Reader, class Encoder>
void testEmpty() {
typename Encoder::CompressedList list;
testNext<Reader>(data, list);
testSkip<Reader>(data, list);
testSkipTo<Reader>(data, list);
+ testGoTo<Reader>(data, list);
list.free();
}
}
}
+template <class Reader, class List>
+void bmGoTo(const List& list, const std::vector<uint32_t>& data,
+ const std::vector<size_t>& order, size_t iters) {
+ CHECK(!data.empty());
+ CHECK_EQ(data.size(), order.size());
+
+ Reader reader(list);
+ for (size_t i = 0; i < iters; ) {
+ for (size_t j : order) {
+ reader.goTo(j + 1);
+ const uint32_t value = reader.value();
+ CHECK_EQ(value, data[j]);
+ ++i;
+ }
+ }
+}
+
}} // namespaces
#endif // FOLLY_EXPERIMENTAL_CODING_TEST_UTILS_H
* limitations under the License.
*/
+#include <algorithm>
+#include <random>
+#include <vector>
+
#include <folly/Benchmark.h>
#include <folly/experimental/EliasFanoCoding.h>
#include <folly/experimental/test/CodingTestUtils.h>
typedef EliasFanoReader<Encoder> Reader;
std::vector<uint32_t> data;
+std::vector<size_t> order;
+
typename Encoder::CompressedList list;
void init() {
- data = generateRandomList(100 * 1000, 10 * 1000 * 1000);
+ std::mt19937 gen;
+
+ data = generateRandomList(100 * 1000, 10 * 1000 * 1000, gen);
//data = loadList("/home/philipp/pl_test_dump.txt");
Encoder::encode(data.data(), data.size(), bm::list);
+
+ order.clear();
+ order.reserve(data.size());
+ for (size_t i = 0; i < data.size(); ++i) {
+ order.push_back(i);
+ }
+ std::shuffle(order.begin(), order.end(), gen);
}
void free() {
bmSkip<bm::Reader>(bm::list, bm::data, 1000, bm::k1M);
}
+BENCHMARK(GoTo_ForwardQ128_1M) {
+ bmGoTo<bm::Reader>(bm::list, bm::data, bm::order, bm::k1M);
+}
+
BENCHMARK(SkipTo1_SkipQ128_1M) {
bmSkipTo<bm::Reader>(bm::list, bm::data, 1, bm::k1M);
}
bmSkipTo<bm::Reader>(bm::list, bm::data, 1000, bm::k1M);
}
+#if 0
+Intel Xeon CPU E5-2660 @ 2.7GHz (turbo on), using instructions::Fast.
+
+============================================================================
+folly/experimental/test/EliasFanoCodingTest.cpp relative time/iter iters/s
+============================================================================
+Next_1M 4.86ms 205.97
+Skip1_ForwarQ128_1M 5.17ms 193.36
+Skip10_ForwarQ128_1M 13.69ms 73.03
+Skip100_ForwardQ128_1M 26.76ms 37.37
+Skip1000_ForwardQ128_1M 20.66ms 48.40
+GoTo_ForwardQ128_1M 43.75ms 22.86
+SkipTo1_SkipQ128_1M 9.74ms 102.70
+SkipTo10_SkipQ128_1M 30.62ms 32.66
+SkipTo100_SkipQ128_1M 37.70ms 26.53
+SkipTo1000_SkipQ128_1M 31.14ms 32.11
+============================================================================
+#endif
+
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
gflags::ParseCommandLineFlags(&argc, &argv, true);