2 * Copyright 2016 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include <type_traits>
20 #include <folly/Conv.h>
21 #include <folly/Range.h>
26 * Variable-length integer encoding, using a little-endian, base-128
29 * The MSb is set on all bytes except the last.
32 * https://developers.google.com/protocol-buffers/docs/encoding#varints
34 * If you want to encode multiple values, GroupVarint (in GroupVarint.h)
35 * is faster and likely smaller.
39 * Maximum length (in bytes) of the varint encoding of a 32-bit value.
41 constexpr size_t kMaxVarintLength32 = 5;
44 * Maximum length (in bytes) of the varint encoding of a 64-bit value.
46 constexpr size_t kMaxVarintLength64 = 10;
49 * Encode a value in the given buffer, returning the number of bytes used
51 * buf must have enough space to represent the value (at least
52 * kMaxVarintLength64 bytes to encode arbitrary 64-bit values)
54 size_t encodeVarint(uint64_t val, uint8_t* buf);
57 * Decode a value from a given buffer, advances data past the returned value.
60 uint64_t decodeVarint(Range<T*>& data);
63 * ZigZag encoding that maps signed integers with a small absolute value
64 * to unsigned integers with a small (positive) values. Without this,
65 * encoding negative values using Varint would use up 9 or 10 bytes.
67 * if x >= 0, encodeZigZag(x) == 2*x
68 * if x < 0, encodeZigZag(x) == -2*x + 1
71 inline uint64_t encodeZigZag(int64_t val) {
72 // Bit-twiddling magic stolen from the Google protocol buffer document;
73 // val >> 63 is an arithmetic shift because val is signed
74 auto uval = static_cast<uint64_t>(val);
75 return static_cast<uint64_t>((uval << 1) ^ (val >> 63));
78 inline int64_t decodeZigZag(uint64_t val) {
79 return static_cast<int64_t>((val >> 1) ^ -(val & 1));
82 // Implementation below
84 inline size_t encodeVarint(uint64_t val, uint8_t* buf) {
87 *p++ = 0x80 | (val & 0x7f);
95 inline uint64_t decodeVarint(Range<T*>& data) {
97 std::is_same<typename std::remove_cv<T>::type, char>::value ||
98 std::is_same<typename std::remove_cv<T>::type, unsigned char>::value,
99 "Only character ranges are supported");
101 const int8_t* begin = reinterpret_cast<const int8_t*>(data.begin());
102 const int8_t* end = reinterpret_cast<const int8_t*>(data.end());
103 const int8_t* p = begin;
106 // end is always greater than or equal to begin, so this subtraction is safe
107 if (LIKELY(size_t(end - begin) >= kMaxVarintLength64)) { // fast path
110 b = *p++; val = (b & 0x7f) ; if (b >= 0) break;
111 b = *p++; val |= (b & 0x7f) << 7; if (b >= 0) break;
112 b = *p++; val |= (b & 0x7f) << 14; if (b >= 0) break;
113 b = *p++; val |= (b & 0x7f) << 21; if (b >= 0) break;
114 b = *p++; val |= (b & 0x7f) << 28; if (b >= 0) break;
115 b = *p++; val |= (b & 0x7f) << 35; if (b >= 0) break;
116 b = *p++; val |= (b & 0x7f) << 42; if (b >= 0) break;
117 b = *p++; val |= (b & 0x7f) << 49; if (b >= 0) break;
118 b = *p++; val |= (b & 0x7f) << 56; if (b >= 0) break;
119 b = *p++; val |= (b & 0x7f) << 63; if (b >= 0) break;
120 throw std::invalid_argument("Invalid varint value. Too big.");
124 while (p != end && *p < 0) {
125 val |= static_cast<uint64_t>(*p++ & 0x7f) << shift;
129 throw std::invalid_argument("Invalid varint value. Too small: " +
130 folly::to<std::string>(end - begin) +
133 val |= static_cast<uint64_t>(*p++) << shift;
136 data.advance(p - begin);