From 29c72a135c04cf45aef35498432f45557aec02df Mon Sep 17 00:00:00 2001 From: Rushy Panchal Date: Wed, 16 Aug 2017 17:19:03 -0700 Subject: [PATCH] Add prepareSkipTo() method to EliasFanoReader Summary: `prepareSkipTo(x`) allows the client of EliasFanoReader to "hint" that `skipTo(x)` will be called in the near future. The primary benefit of doing so is that memory which is needed for `skipTo(x)` can be prefetched to minimize cache misses incurred when calling `skipTo(x)`. Reviewed By: ot, philippv Differential Revision: D5508995 fbshipit-source-id: 4876b566256849f76193db3dc0404768aeeeb30d --- folly/experimental/EliasFanoCoding.h | 57 ++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/folly/experimental/EliasFanoCoding.h b/folly/experimental/EliasFanoCoding.h index 15d3d5d6..96ea7cd7 100644 --- a/folly/experimental/EliasFanoCoding.h +++ b/folly/experimental/EliasFanoCoding.h @@ -46,6 +46,8 @@ namespace folly { namespace compression { static_assert(kIsLittleEndian, "EliasFanoCoding.h requires little endianness"); +constexpr size_t kCacheLineSize = 64; + template struct EliasFanoCompressedListBase { EliasFanoCompressedListBase() = default; @@ -450,6 +452,38 @@ class UpperBitsReader : ForwardPointers, return value_; } + /** + * Prepare to skip to `value`. This is a constant-time operation that will + * prefetch memory required for a `skipTo(value)` call. + * + * @return position of reader + */ + SizeType prepareSkipTo(ValueType v) const { + auto position = position_; + + if (Encoder::skipQuantum > 0 && v >= value_ + Encoder::skipQuantum) { + auto outer = outer_; + const size_t steps = v / Encoder::skipQuantum; + const size_t dest = folly::loadUnaligned( + this->skipPointers_ + (steps - 1) * sizeof(SkipValueType)); + + position = dest - 1; + outer = (dest + Encoder::skipQuantum * steps) / 8; + + // Prefetch up to the beginning of where we linear search. After that, + // hardware prefetching will outperform our own. In addition, this + // simplifies calculating what to prefetch as we don't have to calculate + // the entire destination address. Two cache lines are prefetched because + // this results in fewer cycles used (based on practical results) than + // one. However, three cache lines does not have any additional effect. + const auto addr = start_ + outer; + __builtin_prefetch(addr); + __builtin_prefetch(addr + kCacheLineSize); + } + + return position; + } + ValueType jump(size_t n) { if (Encoder::forwardQuantum == 0 || n <= Encoder::forwardQuantum) { reset(); @@ -611,6 +645,29 @@ class EliasFanoReader { return true; } + /** + * Prepare to skip to `value` by prefetching appropriate memory in both the + * upper and lower bits. + */ + void prepareSkipTo(ValueType value) const { + // Also works when value_ == kInvalidValue. + if (value != kInvalidValue) { + DCHECK_GE(value + 1, value_ + 1); + } + + if ((!kUnchecked && value > lastValue_) || (value == value_)) { + return; + } + + // Do minimal computation required to prefetch address used in + // `readLowerPart()`. + ValueType upperValue = (value >> numLowerBits_); + const auto upperPosition = upper_.prepareSkipTo(upperValue); + const auto addr = lower_ + (upperPosition * numLowerBits_ / 8); + __builtin_prefetch(addr); + __builtin_prefetch(addr + kCacheLineSize); + } + bool jump(SizeType n) { if (LIKELY(n < size_)) { // Also checks that n != -1. value_ = readLowerPart(n) | (upper_.jump(n + 1) << numLowerBits_); -- 2.34.1