* (T, where T is an unsigned integral type) or unaligned values
* (Unaligned<T>, where T is an unsigned integral type)
*/
-template <class T, class Enable=void> struct BitsTraits;
+template <class T, class Enable = void>
+struct BitsTraits;
// Partial specialization for Unaligned<T>, where T is unsigned integral
// loadRMW is the same as load, but it indicates that it loads for a
// silence the GCC warning in that case.
template <class T>
struct BitsTraits<Unaligned<T>, typename std::enable_if<
- (std::is_integral<T>::value && std::is_unsigned<T>::value)>::type> {
+ (std::is_integral<T>::value)>::type> {
typedef T UnderlyingType;
static T load(const Unaligned<T>& x) { return x.value; }
static void store(Unaligned<T>& x, T v) { x.value = v; }
// Special version that allows to disable address sanitizer on demand.
template <class T>
struct BitsTraits<UnalignedNoASan<T>, typename std::enable_if<
- (std::is_integral<T>::value && std::is_unsigned<T>::value)>::type> {
+ (std::is_integral<T>::value)>::type> {
typedef T UnderlyingType;
static T FOLLY_DISABLE_ADDRESS_SANITIZER
load(const UnalignedNoASan<T>& x) { return x.value; }
// Partial specialization for T, where T is unsigned integral
template <class T>
struct BitsTraits<T, typename std::enable_if<
- (std::is_integral<T>::value && std::is_unsigned<T>::value)>::type> {
+ (std::is_integral<T>::value)>::type> {
typedef T UnderlyingType;
static T load(const T& x) { return x; }
static void store(T& x, T v) { x = v; }
/**
* Number of bits in a block.
*/
- static constexpr size_t bitsPerBlock =
- std::numeric_limits<UnderlyingType>::digits;
+ static constexpr size_t bitsPerBlock = std::numeric_limits<
+ typename std::make_unsigned<UnderlyingType>::type>::digits;
/**
* Byte index of the given bit.
* from the least significant count bits of value; little endian.
* (value & 1 becomes the bit at bitStart, etc)
* Precondition: count <= sizeof(T) * 8
+ * Precondition: value can fit in 'count' bits
*/
static void set(T* p, size_t bitStart, size_t count, UnderlyingType value);
inline void Bits<T, Traits>::set(T* p, size_t bitStart, size_t count,
UnderlyingType value) {
assert(count <= sizeof(UnderlyingType) * 8);
+ size_t cut = bitsPerBlock - count;
+ assert(value == (value << cut >> cut));
size_t idx = blockIndex(bitStart);
size_t offset = bitOffset(bitStart);
+ if (std::is_signed<UnderlyingType>::value) {
+ value &= ones(count);
+ }
if (offset + count <= bitsPerBlock) {
innerSet(p + idx, offset, count, value);
} else {
size_t countInThisBlock = bitsPerBlock - offset;
size_t countInNextBlock = count - countInThisBlock;
- innerSet(p + idx, offset, countInThisBlock,
- value & ((one << countInThisBlock) - 1));
- innerSet(p + idx + 1, 0, countInNextBlock, value >> countInThisBlock);
+
+ UnderlyingType thisBlock = value & ((one << countInThisBlock) - 1);
+ UnderlyingType nextBlock = value >> countInThisBlock;
+ if (std::is_signed<UnderlyingType>::value) {
+ nextBlock &= ones(countInNextBlock);
+ }
+ innerSet(p + idx, offset, countInThisBlock, thisBlock);
+ innerSet(p + idx + 1, 0, countInNextBlock, nextBlock);
}
}
assert(count <= sizeof(UnderlyingType) * 8);
size_t idx = blockIndex(bitStart);
size_t offset = bitOffset(bitStart);
+ UnderlyingType ret;
if (offset + count <= bitsPerBlock) {
- return innerGet(p + idx, offset, count);
+ ret = innerGet(p + idx, offset, count);
} else {
size_t countInThisBlock = bitsPerBlock - offset;
size_t countInNextBlock = count - countInThisBlock;
UnderlyingType thisBlockValue = innerGet(p + idx, offset, countInThisBlock);
UnderlyingType nextBlockValue = innerGet(p + idx + 1, 0, countInNextBlock);
- return (nextBlockValue << countInThisBlock) | thisBlockValue;
+ ret = (nextBlockValue << countInThisBlock) | thisBlockValue;
}
+ if (std::is_signed<UnderlyingType>::value) {
+ size_t emptyBits = bitsPerBlock - count;
+ ret <<= emptyBits;
+ ret >>= emptyBits;
+ }
+ return ret;
}
template <class T, class Traits>
inline auto Bits<T, Traits>::innerGet(const T* p, size_t offset, size_t count)
- -> UnderlyingType {
+ -> UnderlyingType {
return (Traits::load(*p) >> offset) & ones(count);
}
--- /dev/null
+/*
+ * Copyright 2014 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <atomic>
+#include <glog/logging.h>
+#include <random>
+#include <memory>
+
+#include "folly/Benchmark.h"
+#include "folly/experimental/Bits.h"
+
+std::random_device rd;
+
+const size_t kBufferSize = 1 << 10;
+std::vector<uint8_t> buffer(kBufferSize + 16);
+
+template <class T>
+void benchmarkSet(size_t n, T) {
+ size_t size = sizeof(T) * 6.9; // use 6.9 bits/byte
+ const size_t k = 16;
+ T values[k];
+ BENCHMARK_SUSPEND {
+ std::mt19937 gen(rd());
+ T max, min;
+ if (std::is_signed<T>::value) {
+ max = (T(1) << (size - 1)) - 1;
+ min = -(T(1) << (size - 1));
+ } else {
+ max = (T(1) << size) - 1;
+ min = 0;
+ }
+ CHECK_LE(folly::findLastSet(max), size);
+ CHECK_LE(folly::findLastSet(-min), size);
+ std::uniform_int_distribution<T> dis(min, max);
+ for (int i = 0; i < k; ++i) {
+ values[i] = dis(gen);
+ }
+ }
+
+ for (size_t i = 0; i < n; ++i) {
+ size_t bit = (i * 2973) % (kBufferSize * 8);
+ size_t drop = i % size;
+ folly::Bits<T>::set(reinterpret_cast<T *>(buffer.data()),
+ bit, size - drop, values[i % k] >> drop);
+ }
+
+ folly::doNotOptimizeAway(
+ folly::Bits<T>::test(reinterpret_cast<T *>(buffer.data()), 512));
+}
+
+BENCHMARK_NAMED_PARAM(benchmarkSet, u16, uint16_t());
+BENCHMARK_RELATIVE_NAMED_PARAM(benchmarkSet, i16, int16_t());
+BENCHMARK_NAMED_PARAM(benchmarkSet, u32, uint32_t());
+BENCHMARK_RELATIVE_NAMED_PARAM(benchmarkSet, i32, int32_t());
+BENCHMARK_NAMED_PARAM(benchmarkSet, u64, uint64_t());
+BENCHMARK_RELATIVE_NAMED_PARAM(benchmarkSet, i64, int64_t());
+
+BENCHMARK_DRAW_LINE();
+
+std::atomic<int64_t> sum(0);
+
+template <class T>
+void benchmarkGet(size_t n, T x) {
+ size_t size = sizeof(T) * 6.9; // use 6.9 bits/byte
+ for (size_t i = 0; i < n; ++i) {
+ size_t bit = (i * 2973) % (kBufferSize * 8);
+ size_t drop = i % size;
+ x += folly::Bits<T>::get(
+ reinterpret_cast<T *>(buffer.data()), bit, size - drop);
+ }
+ folly::doNotOptimizeAway(x);
+}
+
+BENCHMARK_NAMED_PARAM(benchmarkGet, u16, uint16_t(0));
+BENCHMARK_RELATIVE_NAMED_PARAM(benchmarkGet, i16, int16_t(0));
+BENCHMARK_NAMED_PARAM(benchmarkGet, u32, uint32_t(0));
+BENCHMARK_RELATIVE_NAMED_PARAM(benchmarkGet, i32, int32_t(0));
+BENCHMARK_NAMED_PARAM(benchmarkGet, u64, uint64_t(0));
+BENCHMARK_RELATIVE_NAMED_PARAM(benchmarkGet, i64, int64_t(0));
+
+#if 0
+============================================================================
+folly/experimental/test/BitsBenchmark.cpp relative time/iter iters/s
+============================================================================
+benchmarkSet(u16) 8.58ns 116.59M
+benchmarkSet(i16) 88.42% 9.70ns 103.08M
+benchmarkSet(u32) 8.37ns 119.45M
+benchmarkSet(i32) 88.23% 9.49ns 105.39M
+benchmarkSet(u64) 9.23ns 108.34M
+benchmarkSet(i64) 82.77% 11.15ns 89.68M
+----------------------------------------------------------------------------
+benchmarkGet(u16) 6.32ns 158.13M
+benchmarkGet(i16) 80.40% 7.87ns 127.14M
+benchmarkGet(u32) 6.34ns 157.65M
+benchmarkGet(i32) 84.61% 7.50ns 133.39M
+benchmarkGet(u64) 7.32ns 136.58M
+benchmarkGet(i64) 85.78% 8.53ns 117.16M
+============================================================================
+#endif
+
+int main(int argc, char *argv[]) {
+ google::ParseCommandLineFlags(&argc, &argv, true);
+ folly::runBenchmarks();
+ return sum.load();
+}
}
template <class T>
+void runSignedMultiBitTest8() {
+ auto load = detail::BitsTraits<T>::load;
+ T buf[] = {0x12, 0x34, 0x56, 0x78};
+
+ EXPECT_EQ(0x02, load(Bits<T>::get(buf, 0, 4)));
+ EXPECT_EQ(0x1a-32, load(Bits<T>::get(buf, 9, 5)));
+ EXPECT_EQ(0xb1-256, load(Bits<T>::get(buf, 13, 8)));
+
+ Bits<T>::set(buf, 0, 4, 0x0b - 0x10);
+ EXPECT_EQ(0x1b, load(buf[0]));
+ EXPECT_EQ(0x34, load(buf[1]));
+ EXPECT_EQ(0x56, load(buf[2]));
+ EXPECT_EQ(0x78, load(buf[3]));
+
+ Bits<T>::set(buf, 9, 5, 0x0e);
+ EXPECT_EQ(0x1b, load(buf[0]));
+ EXPECT_EQ(0x1c, load(buf[1]));
+ EXPECT_EQ(0x56, load(buf[2]));
+ EXPECT_EQ(0x78, load(buf[3]));
+
+ Bits<T>::set(buf, 13, 8, 0xaa - 0x100);
+ EXPECT_EQ(0x1b, load(buf[0]));
+ EXPECT_EQ(0x5c, load(buf[1]));
+ EXPECT_EQ(0x55, load(buf[2]));
+ EXPECT_EQ(0x78, load(buf[3]));
+}
+
+
+TEST(Bits, SignedMultiBit8) {
+ runSignedMultiBitTest8<int8_t>();
+}
+
+template <class T, class R = T>
void runMultiBitTest64() {
auto load = detail::BitsTraits<T>::load;
T buf[] = {0x123456789abcdef0, 0x13579bdf2468ace0};
runMultiBitTest64<uint64_t>();
}
+TEST(Bits, MultiBitSigned64) {
+ //runMultiBitTest64<int64_t>();
+}
+
TEST(Bits, MultiBitUnaligned64) {
- runMultiBitTest64<Unaligned<uint64_t>>();
+ runMultiBitTest64<Unaligned<uint64_t>, uint64_t>();
+}
+
+namespace {
+template <bool aligned, class T>
+typename std::enable_if<!aligned>::type testSet(uint8_t *buf,
+ size_t start,
+ size_t bits,
+ T value) {
+ Bits<Unaligned<T>>::set(
+ reinterpret_cast<Unaligned<T> *>(buf), start, bits, value);
+}
+
+template <bool aligned, class T>
+typename std::enable_if<aligned>::type testSet(uint8_t *buf,
+ size_t start,
+ size_t bits,
+ T value) {
+ Bits<T>::set(reinterpret_cast<T *>(buf), start, bits, value);
+}
+
+template <bool aligned, class T>
+typename std::enable_if<!aligned, T>::type testGet(uint8_t *buf,
+ size_t start,
+ size_t bits) {
+ return Bits<Unaligned<T>>::get(
+ reinterpret_cast<Unaligned<T> *>(buf), start, bits);
+}
+
+template <bool aligned, class T>
+typename std::enable_if<aligned, T>::type testGet(uint8_t *buf,
+ size_t start,
+ size_t bits) {
+ return Bits<T>::get(reinterpret_cast<T *>(buf), start, bits);
+}
+
+template <class T, bool negate = false>
+T testValue(int bits) {
+ if (std::is_signed<T>::value) {
+ --bits;
+ }
+ auto value = pow(2, bits) * (negate ? -2.0 : 2.0) / 3.0;
+ CHECK_GE(value, std::numeric_limits<T>::min());
+ CHECK_LE(value, std::numeric_limits<T>::max());
+ return static_cast<T>(value);
+}
+}
+
+static_assert((-4) >> 1 == -2, "OH");
+template <bool aligned>
+void testConcatenation() {
+ // concatenate fields of length 1, 2, 3, ... 64, all unsigned, storing 2/3s
+ // the maximum value in each.
+ const size_t bitLimit = 64;
+ const size_t bitsPerPass = (1 + bitLimit) * bitLimit / 2;
+ const size_t totalBits = bitsPerPass * 3;
+ uint8_t buf[(totalBits + 7) / 8];
+#define EACH_UNSIGNED_SIZE(MACRO, ARG) \
+ MACRO(8, uint8_t, ARG); \
+ MACRO(16, uint16_t, ARG); \
+ MACRO(32, uint32_t, ARG); \
+ MACRO(64, uint64_t, ARG);
+#define EACH_SIGNED_SIZE(MACRO, ARG) \
+ MACRO(7, int8_t, ARG); \
+ MACRO(15, int16_t, ARG); \
+ MACRO(31, int32_t, ARG); \
+ MACRO(63, int64_t, ARG);
+ {
+ size_t w = 0, s;
+#define WRITE_TEST(N, T, NEG) \
+ for (; s <= N; ++s, w += s) { \
+ testSet<aligned>(buf, w, s, testValue<T, NEG>(s)); \
+ EXPECT_EQ((testValue<T, NEG>(s)), (testGet<aligned, T>(buf, w, s))) << s; \
+ }
+ s = 0;
+ EACH_UNSIGNED_SIZE(WRITE_TEST, false)
+ s = 0;
+ EACH_SIGNED_SIZE(WRITE_TEST, false)
+ s = 0;
+ EACH_SIGNED_SIZE(WRITE_TEST, true)
+#undef WRITE_TEST
+ }
+ {
+ size_t r = 0, s;
+#define READ_TEST(N, T, NEG) \
+ for (; s <= N; ++s, r += s) \
+ EXPECT_EQ((testValue<T, NEG>(s)), (testGet<aligned, T>(buf, r, s))) << s;
+ s = 0;
+ EACH_UNSIGNED_SIZE(READ_TEST, false)
+ s = 0;
+ EACH_SIGNED_SIZE(READ_TEST, false)
+ s = 0;
+ EACH_SIGNED_SIZE(READ_TEST, true)
+#undef READ_TEST
+ }
+#undef EACH_UNSIGNED_SIZE
+}
+
+TEST(Bits, ConcatenationUnalignedUnsigned) { testConcatenation<false>(); }
+
+TEST(Bits, ConcatenationAligned) {
+ testConcatenation<true>();
}
int main(int argc, char *argv[]) {