2 * Copyright 2014 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef FOLLY_VARINT_H_
18 #define FOLLY_VARINT_H_
20 #include <type_traits>
21 #include <folly/Conv.h>
22 #include <folly/Range.h>
27 * Variable-length integer encoding, using a little-endian, base-128
30 * The MSb is set on all bytes except the last.
33 * https://developers.google.com/protocol-buffers/docs/encoding#varints
35 * If you want to encode multiple values, GroupVarint (in GroupVarint.h)
36 * is faster and likely smaller.
40 * Maximum length (in bytes) of the varint encoding of a 32-bit value.
42 constexpr size_t kMaxVarintLength32 = 5;
45 * Maximum length (in bytes) of the varint encoding of a 64-bit value.
47 constexpr size_t kMaxVarintLength64 = 10;
50 * Encode a value in the given buffer, returning the number of bytes used
52 * buf must have enough space to represent the value (at least
53 * kMaxVarintLength64 bytes to encode arbitrary 64-bit values)
55 size_t encodeVarint(uint64_t val, uint8_t* buf);
58 * Decode a value from a given buffer, advances data past the returned value.
61 uint64_t decodeVarint(Range<T*>& data);
64 * ZigZag encoding that maps signed integers with a small absolute value
65 * to unsigned integers with a small (positive) values. Without this,
66 * encoding negative values using Varint would use up 9 or 10 bytes.
68 * if x >= 0, encodeZigZag(x) == 2*x
69 * if x < 0, encodeZigZag(x) == -2*x + 1
72 inline uint64_t encodeZigZag(int64_t val) {
73 // Bit-twiddling magic stolen from the Google protocol buffer document;
74 // val >> 63 is an arithmetic shift because val is signed
75 return static_cast<uint64_t>((val << 1) ^ (val >> 63));
78 inline int64_t decodeZigZag(uint64_t val) {
79 return static_cast<int64_t>((val >> 1) ^ -(val & 1));
82 // Implementation below
84 inline size_t encodeVarint(uint64_t val, uint8_t* buf) {
87 *p++ = 0x80 | (val & 0x7f);
95 inline uint64_t decodeVarint(Range<T*>& data) {
97 std::is_same<typename std::remove_cv<T>::type, char>::value ||
98 std::is_same<typename std::remove_cv<T>::type, unsigned char>::value,
99 "Only character ranges are supported");
101 const int8_t* begin = reinterpret_cast<const int8_t*>(data.begin());
102 const int8_t* end = reinterpret_cast<const int8_t*>(data.end());
103 const int8_t* p = begin;
106 // end is always greater than or equal to begin, so this subtraction is safe
107 if (LIKELY(size_t(end - begin) >= kMaxVarintLength64)) { // fast path
110 b = *p++; val = (b & 0x7f) ; if (b >= 0) break;
111 b = *p++; val |= (b & 0x7f) << 7; if (b >= 0) break;
112 b = *p++; val |= (b & 0x7f) << 14; if (b >= 0) break;
113 b = *p++; val |= (b & 0x7f) << 21; if (b >= 0) break;
114 b = *p++; val |= (b & 0x7f) << 28; if (b >= 0) break;
115 b = *p++; val |= (b & 0x7f) << 35; if (b >= 0) break;
116 b = *p++; val |= (b & 0x7f) << 42; if (b >= 0) break;
117 b = *p++; val |= (b & 0x7f) << 49; if (b >= 0) break;
118 b = *p++; val |= (b & 0x7f) << 56; if (b >= 0) break;
119 b = *p++; val |= (b & 0x7f) << 63; if (b >= 0) break;
120 throw std::invalid_argument("Invalid varint value. Too big.");
124 while (p != end && *p < 0) {
125 val |= static_cast<uint64_t>(*p++ & 0x7f) << shift;
129 throw std::invalid_argument("Invalid varint value. Too small: " +
130 folly::to<std::string>(end - begin) +
133 val |= static_cast<uint64_t>(*p++) << shift;
136 data.advance(p - begin);
142 #endif /* FOLLY_VARINT_H_ */