#include <limits>
#include <type_traits>
+#include <folly/Assume.h>
#include <folly/Bits.h>
#include <folly/Likely.h>
#include <folly/Portability.h>
: size_(list.size),
lower_(list.lower),
upper_(list),
- lowerMask_((ValueType(1) << list.numLowerBits) - 1),
numLowerBits_(list.numLowerBits) {
DCHECK(Instructions::supported());
// To avoid extra branching during skipTo() while reading
const size_t pos = i * numLowerBits_;
const unsigned char* ptr = lower_ + (pos / 8);
const uint64_t ptrv = folly::loadUnaligned<uint64_t>(ptr);
- return lowerMask_ & (ptrv >> (pos % 8));
+ // This removes the branch in the fallback implementation of
+ // bzhi. The condition is verified at encoding time.
+ assume(numLowerBits_ < sizeof(ValueType) * 8);
+ return Instructions::bzhi(ptrv >> (pos % 8), numLowerBits_);
}
void iterateTo(ValueType value) {
size_t size_;
const uint8_t* lower_;
detail::UpperBitsReader<Encoder, Instructions> upper_;
- const ValueType lowerMask_;
ValueType value_ = kInvalidValue;
ValueType lastValue_;
uint8_t numLowerBits_;
return (value >> start) &
((length == 64) ? (~0ULL) : ((1ULL << length) - 1ULL));
}
+
+ // Clear high bits starting at position index.
+ static FOLLY_ALWAYS_INLINE uint64_t bzhi(uint64_t value, uint32_t index) {
+ if (index > 63) {
+ return 0;
+ }
+ return value & ((uint64_t(1) << index) - 1);
+ }
};
struct Nehalem : public Default {
struct Haswell : public Nehalem {
static bool supported(const folly::CpuId& cpuId = {}) {
- return Nehalem::supported(cpuId) && cpuId.bmi1();
+ return Nehalem::supported(cpuId) && cpuId.bmi1() && cpuId.bmi2();
}
static FOLLY_ALWAYS_INLINE uint64_t blsr(uint64_t value) {
// BMI1 is supported starting with Intel Haswell, AMD Piledriver.
-// BLSR combines two instuctions into one and reduces register pressure.
+// BLSR combines two instructions into one and reduces register pressure.
#if defined(__GNUC__) || defined(__clang__)
// GCC and Clang won't inline the intrinsics.
uint64_t result;
return result;
#else
return _bextr_u64(value, start, length);
+#endif
+ }
+
+ static FOLLY_ALWAYS_INLINE uint64_t bzhi(uint64_t value, uint32_t index) {
+#if defined(__GNUC__) || defined(__clang__)
+ // GCC and Clang won't inline the intrinsics.
+ const uint64_t index64 = index;
+ uint64_t result;
+ asm("bzhiq %2, %1, %0" : "=r"(result) : "r"(value), "r"(index64));
+ return result;
+#else
+ return _bzhi_u64(value, index);
#endif
}
};