folly/Varint.h

   1 /*
   2  * Copyright 2016 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #pragma once
  18
  19 #include <type_traits>
  20 #include <folly/Conv.h>
  21 #include <folly/Range.h>
  22
  23 namespace folly {
  24
  25 /**
  26  * Variable-length integer encoding, using a little-endian, base-128
  27  * representation.
  28  *
  29  * The MSb is set on all bytes except the last.
  30  *
  31  * Details:
  32  * https://developers.google.com/protocol-buffers/docs/encoding#varints
  33  *
  34  * If you want to encode multiple values, GroupVarint (in GroupVarint.h)
  35  * is faster and likely smaller.
  36  */
  37
  38 /**
  39  * Maximum length (in bytes) of the varint encoding of a 32-bit value.
  40  */
  41 constexpr size_t kMaxVarintLength32 = 5;
  42
  43 /**
  44  * Maximum length (in bytes) of the varint encoding of a 64-bit value.
  45  */
  46 constexpr size_t kMaxVarintLength64 = 10;
  47
  48 /**
  49  * Encode a value in the given buffer, returning the number of bytes used
  50  * for encoding.
  51  * buf must have enough space to represent the value (at least
  52  * kMaxVarintLength64 bytes to encode arbitrary 64-bit values)
  53  */
  54 size_t encodeVarint(uint64_t val, uint8_t* buf);
  55
  56 /**
  57  * Decode a value from a given buffer, advances data past the returned value.
  58  */
  59 template <class T>
  60 uint64_t decodeVarint(Range<T*>& data);
  61
  62 /**
  63  * ZigZag encoding that maps signed integers with a small absolute value
  64  * to unsigned integers with a small (positive) values. Without this,
  65  * encoding negative values using Varint would use up 9 or 10 bytes.
  66  *
  67  * if x >= 0, encodeZigZag(x) == 2*x
  68  * if x <  0, encodeZigZag(x) == -2*x + 1
  69  */
  70
  71 inline uint64_t encodeZigZag(int64_t val) {
  72   // Bit-twiddling magic stolen from the Google protocol buffer document;
  73   // val >> 63 is an arithmetic shift because val is signed
  74   auto uval = static_cast<uint64_t>(val);
  75   return static_cast<uint64_t>((uval << 1) ^ (val >> 63));
  76 }
  77
  78 inline int64_t decodeZigZag(uint64_t val) {
  79   return static_cast<int64_t>((val >> 1) ^ -(val & 1));
  80 }
  81
  82 // Implementation below
  83
  84 inline size_t encodeVarint(uint64_t val, uint8_t* buf) {
  85   uint8_t* p = buf;
  86   while (val >= 128) {
  87     *p++ = 0x80 | (val & 0x7f);
  88     val >>= 7;
  89   }
  90   *p++ = val;
  91   return p - buf;
  92 }
  93
  94 template <class T>
  95 inline uint64_t decodeVarint(Range<T*>& data) {
  96   static_assert(
  97       std::is_same<typename std::remove_cv<T>::type, char>::value ||
  98           std::is_same<typename std::remove_cv<T>::type, unsigned char>::value,
  99       "Only character ranges are supported");
 100
 101   const int8_t* begin = reinterpret_cast<const int8_t*>(data.begin());
 102   const int8_t* end = reinterpret_cast<const int8_t*>(data.end());
 103   const int8_t* p = begin;
 104   uint64_t val = 0;
 105
 106   // end is always greater than or equal to begin, so this subtraction is safe
 107   if (LIKELY(size_t(end - begin) >= kMaxVarintLength64)) {  // fast path
 108     int64_t b;
 109     do {
 110       b = *p++; val  = (b & 0x7f)      ; if (b >= 0) break;
 111       b = *p++; val |= (b & 0x7f) <<  7; if (b >= 0) break;
 112       b = *p++; val |= (b & 0x7f) << 14; if (b >= 0) break;
 113       b = *p++; val |= (b & 0x7f) << 21; if (b >= 0) break;
 114       b = *p++; val |= (b & 0x7f) << 28; if (b >= 0) break;
 115       b = *p++; val |= (b & 0x7f) << 35; if (b >= 0) break;
 116       b = *p++; val |= (b & 0x7f) << 42; if (b >= 0) break;
 117       b = *p++; val |= (b & 0x7f) << 49; if (b >= 0) break;
 118       b = *p++; val |= (b & 0x7f) << 56; if (b >= 0) break;
 119       b = *p++; val |= (b & 0x7f) << 63; if (b >= 0) break;
 120       throw std::invalid_argument("Invalid varint value. Too big.");
 121     } while (false);
 122   } else {
 123     int shift = 0;
 124     while (p != end && *p < 0) {
 125       val |= static_cast<uint64_t>(*p++ & 0x7f) << shift;
 126       shift += 7;
 127     }
 128     if (p == end) {
 129       throw std::invalid_argument("Invalid varint value. Too small: " +
 130                                   folly::to<std::string>(end - begin) +
 131                                   " bytes");
 132     }
 133     val |= static_cast<uint64_t>(*p++) << shift;
 134   }
 135
 136   data.advance(p - begin);
 137   return val;
 138 }
 139
 140 }  // namespaces