2 * Copyright 2017 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
24 #include <type_traits>
27 #include <folly/ApplyTuple.h>
28 #include <folly/Bits.h>
29 #include <folly/hash/SpookyHashV1.h>
30 #include <folly/hash/SpookyHashV2.h>
33 * Various hashing functions.
36 namespace folly { namespace hash {
38 // This is a general-purpose way to create a single hash from multiple
39 // hashable objects. hash_combine_generic takes a class Hasher implementing
40 // hash<T>; hash_combine uses a default hasher StdHasher that uses std::hash.
41 // hash_combine_generic hashes each argument and combines those hashes in
42 // an order-dependent way to yield a new hash.
45 // This is the Hash128to64 function from Google's cityhash (available
46 // under the MIT License). We use it to reduce multiple 64 bit hashes
47 // into a single hash.
48 inline uint64_t hash_128_to_64(const uint64_t upper, const uint64_t lower) {
49 // Murmur-inspired hashing.
50 const uint64_t kMul = 0x9ddfea08eb382d69ULL;
51 uint64_t a = (lower ^ upper) * kMul;
53 uint64_t b = (upper ^ a) * kMul;
59 // Never used, but gcc demands it.
60 template <class Hasher>
61 inline size_t hash_combine_generic() {
67 class Hash = std::hash<typename std::iterator_traits<Iter>::value_type>>
68 uint64_t hash_range(Iter begin,
71 Hash hasher = Hash()) {
72 for (; begin != end; ++begin) {
73 hash = hash_128_to_64(hash, hasher(*begin));
78 inline uint32_t twang_32from64(uint64_t key);
80 template <class Hasher, typename T, typename... Ts>
81 size_t hash_combine_generic(const T& t, const Ts&... ts) {
82 size_t seed = Hasher::hash(t);
83 if (sizeof...(ts) == 0) {
86 size_t remainder = hash_combine_generic<Hasher>(ts...);
87 /* static */ if (sizeof(size_t) == sizeof(uint32_t)) {
88 return twang_32from64((uint64_t(seed) << 32) | remainder);
90 return static_cast<size_t>(hash_128_to_64(seed, remainder));
94 // Simply uses std::hash to hash. Note that std::hash is not guaranteed
95 // to be a very good hash function; provided std::hash doesn't collide on
96 // the individual inputs, you are fine, but that won't be true for, say,
100 template <typename T>
101 static size_t hash(const T& t) {
102 return std::hash<T>()(t);
106 template <typename T, typename... Ts>
107 size_t hash_combine(const T& t, const Ts&... ts) {
108 return hash_combine_generic<StdHasher>(t, ts...);
111 //////////////////////////////////////////////////////////////////////
114 * Thomas Wang 64 bit mix hash function
117 inline uint64_t twang_mix64(uint64_t key) {
118 key = (~key) + (key << 21); // key *= (1 << 21) - 1; key -= 1;
119 key = key ^ (key >> 24);
120 key = key + (key << 3) + (key << 8); // key *= 1 + (1 << 3) + (1 << 8)
121 key = key ^ (key >> 14);
122 key = key + (key << 2) + (key << 4); // key *= 1 + (1 << 2) + (1 << 4)
123 key = key ^ (key >> 28);
124 key = key + (key << 31); // key *= 1 + (1 << 31)
129 * Inverse of twang_mix64
131 * Note that twang_unmix64 is significantly slower than twang_mix64.
134 inline uint64_t twang_unmix64(uint64_t key) {
135 // See the comments in jenkins_rev_unmix32 for an explanation as to how this
137 key *= 4611686016279904257U;
138 key ^= (key >> 28) ^ (key >> 56);
139 key *= 14933078535860113213U;
140 key ^= (key >> 14) ^ (key >> 28) ^ (key >> 42) ^ (key >> 56);
141 key *= 15244667743933553977U;
142 key ^= (key >> 24) ^ (key >> 48);
143 key = (key + 1) * 9223367638806167551U;
148 * Thomas Wang downscaling hash function
151 inline uint32_t twang_32from64(uint64_t key) {
152 key = (~key) + (key << 18);
153 key = key ^ (key >> 31);
155 key = key ^ (key >> 11);
156 key = key + (key << 6);
157 key = key ^ (key >> 22);
158 return (uint32_t) key;
162 * Robert Jenkins' reversible 32 bit mix hash function
165 inline uint32_t jenkins_rev_mix32(uint32_t key) {
166 key += (key << 12); // key *= (1 + (1 << 12))
168 key += (key << 4); // key *= (1 + (1 << 4))
170 key += (key << 10); // key *= (1 + (1 << 10))
172 // key *= (1 + (1 << 7)) * (1 + (1 << 12))
179 * Inverse of jenkins_rev_mix32
181 * Note that jenkinks_rev_unmix32 is significantly slower than
185 inline uint32_t jenkins_rev_unmix32(uint32_t key) {
186 // These are the modular multiplicative inverses (in Z_2^32) of the
187 // multiplication factors in jenkins_rev_mix32, in reverse order. They were
188 // computed using the Extended Euclidean algorithm, see
189 // http://en.wikipedia.org/wiki/Modular_multiplicative_inverse
192 // The inverse of a ^= (a >> n) is
194 // for (int i = n; i < 32; i += n) {
198 (key >> 2) ^ (key >> 4) ^ (key >> 6) ^ (key >> 8) ^
199 (key >> 10) ^ (key >> 12) ^ (key >> 14) ^ (key >> 16) ^
200 (key >> 18) ^ (key >> 20) ^ (key >> 22) ^ (key >> 24) ^
201 (key >> 26) ^ (key >> 28) ^ (key >> 30);
203 key ^= (key >> 9) ^ (key >> 18) ^ (key >> 27);
211 * Fowler / Noll / Vo (FNV) Hash
212 * http://www.isthe.com/chongo/tech/comp/fnv/
215 const uint32_t FNV_32_HASH_START = 2166136261UL;
216 const uint64_t FNV_64_HASH_START = 14695981039346656037ULL;
217 const uint64_t FNVA_64_HASH_START = 14695981039346656037ULL;
219 inline uint32_t fnv32(const char* buf, uint32_t hash = FNV_32_HASH_START) {
220 // forcing signed char, since other platforms can use unsigned
221 const signed char* s = reinterpret_cast<const signed char*>(buf);
224 hash += (hash << 1) + (hash << 4) + (hash << 7) +
225 (hash << 8) + (hash << 24);
231 inline uint32_t fnv32_buf(const void* buf,
233 uint32_t hash = FNV_32_HASH_START) {
234 // forcing signed char, since other platforms can use unsigned
235 const signed char* char_buf = reinterpret_cast<const signed char*>(buf);
237 for (size_t i = 0; i < n; ++i) {
238 hash += (hash << 1) + (hash << 4) + (hash << 7) +
239 (hash << 8) + (hash << 24);
246 inline uint32_t fnv32(const std::string& str,
247 uint32_t hash = FNV_32_HASH_START) {
248 return fnv32_buf(str.data(), str.size(), hash);
251 inline uint64_t fnv64(const char* buf, uint64_t hash = FNV_64_HASH_START) {
252 // forcing signed char, since other platforms can use unsigned
253 const signed char* s = reinterpret_cast<const signed char*>(buf);
256 hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) +
257 (hash << 8) + (hash << 40);
263 inline uint64_t fnv64_buf(const void* buf,
265 uint64_t hash = FNV_64_HASH_START) {
266 // forcing signed char, since other platforms can use unsigned
267 const signed char* char_buf = reinterpret_cast<const signed char*>(buf);
269 for (size_t i = 0; i < n; ++i) {
270 hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) +
271 (hash << 8) + (hash << 40);
277 inline uint64_t fnv64(const std::string& str,
278 uint64_t hash = FNV_64_HASH_START) {
279 return fnv64_buf(str.data(), str.size(), hash);
282 inline uint64_t fnva64_buf(const void* buf,
284 uint64_t hash = FNVA_64_HASH_START) {
285 const uint8_t* char_buf = reinterpret_cast<const uint8_t*>(buf);
287 for (size_t i = 0; i < n; ++i) {
289 hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) +
290 (hash << 8) + (hash << 40);
295 inline uint64_t fnva64(const std::string& str,
296 uint64_t hash = FNVA_64_HASH_START) {
297 return fnva64_buf(str.data(), str.size(), hash);
301 * Paul Hsieh: http://www.azillionmonkeys.com/qed/hash.html
304 #define get16bits(d) folly::loadUnaligned<uint16_t>(d)
306 inline uint32_t hsieh_hash32_buf(const void* buf, size_t len) {
307 // forcing signed char, since other platforms can use unsigned
308 const unsigned char* s = reinterpret_cast<const unsigned char*>(buf);
309 uint32_t hash = static_cast<uint32_t>(len);
313 if (len <= 0 || buf == nullptr) {
321 for (;len > 0; len--) {
322 hash += get16bits (s);
323 tmp = (get16bits (s+2) << 11) ^ hash;
324 hash = (hash << 16) ^ tmp;
325 s += 2*sizeof (uint16_t);
329 /* Handle end cases */
332 hash += get16bits(s);
334 hash ^= s[sizeof (uint16_t)] << 18;
338 hash += get16bits(s);
348 /* Force "avalanching" of final 127 bits */
361 inline uint32_t hsieh_hash32(const char* s) {
362 return hsieh_hash32_buf(s, std::strlen(s));
365 inline uint32_t hsieh_hash32_str(const std::string& str) {
366 return hsieh_hash32_buf(str.data(), str.size());
369 //////////////////////////////////////////////////////////////////////
374 struct integral_hasher {
375 template <typename I>
376 size_t operator()(I const& i) const {
377 static_assert(sizeof(I) <= 8, "input type is too wide");
378 if (sizeof(I) <= 4) { // the branch taken is known at compile time
379 auto const i32 = static_cast<int32_t>(i); // impl accident: sign-extends
380 auto const u32 = static_cast<uint32_t>(i32);
381 return static_cast<size_t>(hash::jenkins_rev_mix32(u32));
383 auto const u64 = static_cast<uint64_t>(i);
384 return static_cast<size_t>(hash::twang_mix64(u64));
388 } // namespace detail
390 template <class Key, class Enable = void>
395 size_t operator()(const T& v) const {
396 return hasher<T>()(v);
399 template <class T, class... Ts>
400 size_t operator()(const T& t, const Ts&... ts) const {
401 return hash::hash_128_to_64((*this)(t), (*this)(ts...));
406 struct hasher<bool> {
407 size_t operator()(bool key) const {
408 // Make sure that all the output bits depend on the input.
409 return key ? std::numeric_limits<size_t>::max() : 0;
414 struct hasher<unsigned long long> : detail::integral_hasher {};
417 struct hasher<signed long long> : detail::integral_hasher {};
420 struct hasher<unsigned long> : detail::integral_hasher {};
423 struct hasher<signed long> : detail::integral_hasher {};
426 struct hasher<unsigned int> : detail::integral_hasher {};
429 struct hasher<signed int> : detail::integral_hasher {};
432 struct hasher<unsigned short> : detail::integral_hasher {};
435 struct hasher<signed short> : detail::integral_hasher {};
438 struct hasher<unsigned char> : detail::integral_hasher {};
441 struct hasher<signed char> : detail::integral_hasher {};
443 template <> // char is a different type from both signed char and unsigned char
444 struct hasher<char> : detail::integral_hasher {};
446 template <> struct hasher<std::string> {
447 size_t operator()(const std::string& key) const {
448 return static_cast<size_t>(
449 hash::SpookyHashV2::Hash64(key.data(), key.size(), 0));
454 struct hasher<T, typename std::enable_if<std::is_enum<T>::value, void>::type> {
455 size_t operator()(T key) const {
456 return Hash()(static_cast<typename std::underlying_type<T>::type>(key));
460 template <class T1, class T2>
461 struct hasher<std::pair<T1, T2>> {
462 size_t operator()(const std::pair<T1, T2>& key) const {
463 return Hash()(key.first, key.second);
467 template <typename... Ts>
468 struct hasher<std::tuple<Ts...>> {
469 size_t operator() (const std::tuple<Ts...>& key) const {
470 return applyTuple(Hash(), key);
475 template <size_t index, typename... Ts>
477 size_t operator()(std::tuple<Ts...> const& key) const {
478 return hash::hash_combine(
479 TupleHasher<index - 1, Ts...>()(key),
480 std::get<index>(key));
485 template <typename... Ts>
486 struct TupleHasher<0, Ts...> {
487 size_t operator()(std::tuple<Ts...> const& key) const {
488 // we could do std::hash here directly, but hash_combine hides all the
489 // ugly templating implicitly
490 return hash::hash_combine(std::get<0>(key));
496 // Custom hash functions.
498 // Hash function for pairs. Requires default hash functions for both
499 // items in the pair.
500 template <typename T1, typename T2>
501 struct hash<std::pair<T1, T2> > {
503 size_t operator()(const std::pair<T1, T2>& x) const {
504 return folly::hash::hash_combine(x.first, x.second);
508 // Hash function for tuples. Requires default hash functions for all types.
509 template <typename... Ts>
510 struct hash<std::tuple<Ts...>> {
511 size_t operator()(std::tuple<Ts...> const& key) const {
513 std::tuple_size<std::tuple<Ts...>>::value - 1, // start index