--- /dev/null
+/*
+ * Copyright 2013 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_VARINT_H_
+#define FOLLY_VARINT_H_
+
+#include "folly/Range.h"
+
+namespace folly {
+
+/**
+ * Variable-length integer encoding, using a little-endian, base-128
+ * representation.
+ *
+ * The MSb is set on all bytes except the last.
+ *
+ * Details:
+ * https://developers.google.com/protocol-buffers/docs/encoding#varints
+ *
+ * If you want to encode multiple values, GroupVarint (in GroupVarint.h)
+ * is faster and likely smaller.
+ */
+
+/**
+ * Maximum length (in bytes) of the varint encoding of a 32-bit value.
+ */
+constexpr size_t kMaxVarintLength32 = 5;
+
+/**
+ * Maximum length (in bytes) of the varint encoding of a 64-bit value.
+ */
+constexpr size_t kMaxVarintLength64 = 10;
+
+/**
+ * Encode a value in the given buffer, returning the number of bytes used
+ * for encoding.
+ * buf must have enough space to represent the value (at least
+ * kMaxVarintLength64 bytes to encode arbitrary 64-bit values)
+ */
+size_t encodeVarint(uint64_t val, uint8_t* buf);
+
+/**
+ * Decode a value from a given buffer, advances data past the returned value.
+ */
+uint64_t decodeVarint(ByteRange& data);
+
+/**
+ * ZigZag encoding that maps signed integers with a small absolute value
+ * to unsigned integers with a small (positive) values. Without this,
+ * encoding negative values using Varint would use up 9 or 10 bytes.
+ *
+ * if x >= 0, encodeZigZag(x) == 2*x
+ * if x < 0, encodeZigZag(x) == -2*x + 1
+ */
+
+inline uint64_t encodeZigZag(int64_t val) {
+ // Bit-twiddling magic stolen from the Google protocol buffer document;
+ // val >> 63 is an arithmetic shift because val is signed
+ return static_cast<uint64_t>((val << 1) ^ (val >> 63));
+}
+
+inline int64_t decodeZigZag(uint64_t val) {
+ return static_cast<int64_t>((val >> 1) ^ -(val & 1));
+}
+
+// Implementation below
+
+inline size_t encodeVarint(uint64_t val, uint8_t* buf) {
+ uint8_t* p = buf;
+ while (val >= 128) {
+ *p++ = 0x80 | (val & 0x7f);
+ val >>= 7;
+ }
+ *p++ = val;
+ return p - buf;
+}
+
+inline uint64_t decodeVarint(ByteRange& data) {
+ const int8_t* begin = reinterpret_cast<const int8_t*>(data.begin());
+ const int8_t* end = reinterpret_cast<const int8_t*>(data.end());
+ const int8_t* p = begin;
+ uint64_t val = 0;
+
+ if (LIKELY(end - begin >= kMaxVarintLength64)) { // fast path
+ int64_t b;
+ do {
+ b = *p++; val = (b & 0x7f) ; if (b >= 0) break;
+ b = *p++; val |= (b & 0x7f) << 7; if (b >= 0) break;
+ b = *p++; val |= (b & 0x7f) << 14; if (b >= 0) break;
+ b = *p++; val |= (b & 0x7f) << 21; if (b >= 0) break;
+ b = *p++; val |= (b & 0x7f) << 28; if (b >= 0) break;
+ b = *p++; val |= (b & 0x7f) << 35; if (b >= 0) break;
+ b = *p++; val |= (b & 0x7f) << 42; if (b >= 0) break;
+ b = *p++; val |= (b & 0x7f) << 49; if (b >= 0) break;
+ b = *p++; val |= (b & 0x7f) << 56; if (b >= 0) break;
+ b = *p++; val |= (b & 0x7f) << 63; if (b >= 0) break;
+ throw std::invalid_argument("Invalid varint value"); // too big
+ } while (false);
+ } else {
+ int shift = 0;
+ while (p != end && *p < 0) {
+ val |= static_cast<uint64_t>(*p++ & 0x7f) << shift;
+ shift += 7;
+ }
+ if (p == end) throw std::invalid_argument("Invalid varint value");
+ val |= static_cast<uint64_t>(*p++) << shift;
+ }
+
+ data.advance(p - begin);
+ return val;
+}
+
+} // namespaces
+
+#endif /* FOLLY_VARINT_H_ */
+
--- /dev/null
+/*
+ * Copyright 2013 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "folly/Varint.h"
+
+#include <array>
+#include <initializer_list>
+#include <random>
+#include <vector>
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "folly/Benchmark.h"
+#include "folly/Random.h"
+
+DEFINE_int32(random_seed, folly::randomNumberSeed(), "random seed");
+
+namespace folly { namespace test {
+
+void testVarint(uint64_t val, std::initializer_list<uint8_t> bytes) {
+ size_t n = bytes.size();
+ ByteRange expected(&*bytes.begin(), n);
+
+ {
+ uint8_t buf[kMaxVarintLength64];
+ EXPECT_EQ(expected.size(), encodeVarint(val, buf));
+ EXPECT_TRUE(ByteRange(buf, expected.size()) == expected);
+ }
+
+ {
+ ByteRange r = expected;
+ uint64_t decoded = decodeVarint(r);
+ EXPECT_TRUE(r.empty());
+ EXPECT_EQ(val, decoded);
+ }
+
+ if (n < kMaxVarintLength64) {
+ // Try from a full buffer too, different code path
+ uint8_t buf[kMaxVarintLength64];
+ memcpy(buf, &*bytes.begin(), n);
+
+ uint8_t fills[] = {0, 0x7f, 0x80, 0xff};
+
+ for (uint8_t fill : fills) {
+ memset(buf + n, fill, kMaxVarintLength64 - n);
+ ByteRange r(buf, kMaxVarintLength64);
+ uint64_t decoded = decodeVarint(r);
+ EXPECT_EQ(val, decoded);
+ EXPECT_EQ(kMaxVarintLength64 - n, r.size());
+ }
+ }
+}
+
+TEST(Varint, Simple) {
+ testVarint(0, {0});
+ testVarint(1, {1});
+ testVarint(127, {127});
+ testVarint(128, {0x80, 0x01});
+ testVarint(300, {0xac, 0x02});
+ testVarint(16383, {0xff, 0x7f});
+ testVarint(16384, {0x80, 0x80, 0x01});
+
+ testVarint(static_cast<uint32_t>(-1),
+ {0xff, 0xff, 0xff, 0xff, 0x0f});
+ testVarint(static_cast<uint64_t>(-1),
+ {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01});
+}
+
+TEST(ZigZag, Simple) {
+ EXPECT_EQ(0, encodeZigZag(0));
+ EXPECT_EQ(1, encodeZigZag(-1));
+ EXPECT_EQ(2, encodeZigZag(1));
+ EXPECT_EQ(3, encodeZigZag(-2));
+ EXPECT_EQ(4, encodeZigZag(2));
+
+ EXPECT_EQ(0, decodeZigZag(0));
+ EXPECT_EQ(-1, decodeZigZag(1));
+ EXPECT_EQ(1, decodeZigZag(2));
+ EXPECT_EQ(-2, decodeZigZag(3));
+ EXPECT_EQ(2, decodeZigZag(4));
+}
+
+namespace {
+
+constexpr size_t kNumValues = 1000;
+std::vector<uint64_t> gValues;
+std::vector<uint64_t> gDecodedValues;
+std::vector<uint8_t> gEncoded;
+
+void generateRandomValues() {
+ LOG(INFO) << "Random seed is " << FLAGS_random_seed;
+ std::mt19937 rng(FLAGS_random_seed);
+
+ // Approximation of power law
+ std::uniform_int_distribution<int> numBytes(1, 8);
+ std::uniform_int_distribution<int> byte(0, 255);
+
+ gValues.resize(kNumValues);
+ gDecodedValues.resize(kNumValues);
+ gEncoded.resize(kNumValues * kMaxVarintLength64);
+ for (size_t i = 0; i < kNumValues; ++i) {
+ int n = numBytes(rng);
+ uint64_t val = 0;
+ for (size_t j = 0; j < n; ++j) {
+ val = (val << 8) + byte(rng);
+ }
+ gValues[i] = val;
+ }
+}
+
+// Benchmark results (Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz, Linux x86_64)
+//
+// I0814 19:13:14.466256 7504 VarintTest.cpp:146] Random seed is -1216518886
+// ============================================================================
+// folly/test/VarintTest.cpp relative time/iter iters/s
+// ============================================================================
+// VarintEncoding 6.69us 149.37K
+// VarintDecoding 6.85us 145.90K
+// ============================================================================
+//
+// Disabling the "fast path" code in decodeVarint hurts performance:
+//
+// I0814 19:15:13.871467 9550 VarintTest.cpp:156] Random seed is -1216518886
+// ============================================================================
+// folly/test/VarintTest.cpp relative time/iter iters/s
+// ============================================================================
+// VarintEncoding 6.75us 148.26K
+// VarintDecoding 12.60us 79.37K
+// ============================================================================
+
+BENCHMARK(VarintEncoding, iters) {
+ uint8_t* start = &(*gEncoded.begin());
+ uint8_t* p = start;
+ bool empty = (iters == 0);
+ while (iters--) {
+ p = start;
+ for (auto& v : gValues) {
+ p += encodeVarint(v, p);
+ }
+ }
+
+ gEncoded.erase(gEncoded.begin() + (p - start), gEncoded.end());
+}
+
+BENCHMARK(VarintDecoding, iters) {
+ while (iters--) {
+ size_t i = 0;
+ ByteRange range(&(*gEncoded.begin()), &(*gEncoded.end()));
+ while (!range.empty()) {
+ gDecodedValues[i++] = decodeVarint(range);
+ }
+ }
+}
+
+} // namespace
+
+}} // namespaces
+
+int main(int argc, char *argv[]) {
+ testing::InitGoogleTest(&argc, argv);
+ google::ParseCommandLineFlags(&argc, &argv, true);
+ google::InitGoogleLogging(argv[0]);
+ int ret = RUN_ALL_TESTS();
+ if (ret == 0) {
+ folly::test::generateRandomValues();
+ folly::runBenchmarksOnFlag();
+ }
+ return ret;
+}
+