X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=folly%2FHash.h;h=b0ae03bb764bbb3e9e0034c75969f896258c5900;hb=c1dd3ab18408c97e52856f11d135dcb3237f29b4;hp=a52da9c634e6ff0206b17e327b852896281ed786;hpb=27494a20393fa45072e7d526d358835f3abe312a;p=folly.git diff --git a/folly/Hash.h b/folly/Hash.h index a52da9c6..b0ae03bb 100644 --- a/folly/Hash.h +++ b/folly/Hash.h @@ -1,5 +1,5 @@ /* - * Copyright 2012 Facebook, Inc. + * Copyright 2016 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,12 +14,19 @@ * limitations under the License. */ -#ifndef FOLLY_BASE_HASH_H_ -#define FOLLY_BASE_HASH_H_ +#pragma once -#include +#include #include #include +#include +#include +#include + +#include +#include +#include +#include /* * Various hashing functions. @@ -27,6 +34,79 @@ namespace folly { namespace hash { +// This is a general-purpose way to create a single hash from multiple +// hashable objects. hash_combine_generic takes a class Hasher implementing +// hash; hash_combine uses a default hasher StdHasher that uses std::hash. +// hash_combine_generic hashes each argument and combines those hashes in +// an order-dependent way to yield a new hash. + + +// This is the Hash128to64 function from Google's cityhash (available +// under the MIT License). We use it to reduce multiple 64 bit hashes +// into a single hash. +inline uint64_t hash_128_to_64(const uint64_t upper, const uint64_t lower) { + // Murmur-inspired hashing. + const uint64_t kMul = 0x9ddfea08eb382d69ULL; + uint64_t a = (lower ^ upper) * kMul; + a ^= (a >> 47); + uint64_t b = (upper ^ a) * kMul; + b ^= (b >> 47); + b *= kMul; + return b; +} + +// Never used, but gcc demands it. +template +inline size_t hash_combine_generic() { + return 0; +} + +template < + class Iter, + class Hash = std::hash::value_type>> +uint64_t hash_range(Iter begin, + Iter end, + uint64_t hash = 0, + Hash hasher = Hash()) { + for (; begin != end; ++begin) { + hash = hash_128_to_64(hash, hasher(*begin)); + } + return hash; +} + +inline uint32_t twang_32from64(uint64_t key); + +template +size_t hash_combine_generic(const T& t, const Ts&... ts) { + size_t seed = Hasher::hash(t); + if (sizeof...(ts) == 0) { + return seed; + } + size_t remainder = hash_combine_generic(ts...); + /* static */ if (sizeof(size_t) == sizeof(uint32_t)) { + return twang_32from64((uint64_t(seed) << 32) | remainder); + } else { + return static_cast(hash_128_to_64(seed, remainder)); + } +} + +// Simply uses std::hash to hash. Note that std::hash is not guaranteed +// to be a very good hash function; provided std::hash doesn't collide on +// the individual inputs, you are fine, but that won't be true for, say, +// strings or pairs +class StdHasher { + public: + template + static size_t hash(const T& t) { + return std::hash()(t); + } +}; + +template +size_t hash_combine(const T& t, const Ts&... ts) { + return hash_combine_generic(t, ts...); +} + ////////////////////////////////////////////////////////////////////// /* @@ -34,13 +114,32 @@ namespace folly { namespace hash { */ inline uint64_t twang_mix64(uint64_t key) { - key = (~key) + (key << 21); + key = (~key) + (key << 21); // key *= (1 << 21) - 1; key -= 1; key = key ^ (key >> 24); - key = (key + (key << 3)) + (key << 8); + key = key + (key << 3) + (key << 8); // key *= 1 + (1 << 3) + (1 << 8) key = key ^ (key >> 14); - key = (key + (key << 2)) + (key << 4); + key = key + (key << 2) + (key << 4); // key *= 1 + (1 << 2) + (1 << 4) key = key ^ (key >> 28); - key = key + (key << 31); + key = key + (key << 31); // key *= 1 + (1 << 31) + return key; +} + +/* + * Inverse of twang_mix64 + * + * Note that twang_unmix64 is significantly slower than twang_mix64. + */ + +inline uint64_t twang_unmix64(uint64_t key) { + // See the comments in jenkins_rev_unmix32 for an explanation as to how this + // was generated + key *= 4611686016279904257U; + key ^= (key >> 28) ^ (key >> 56); + key *= 14933078535860113213U; + key ^= (key >> 14) ^ (key >> 28) ^ (key >> 42) ^ (key >> 56); + key *= 15244667743933553977U; + key ^= (key >> 24) ^ (key >> 48); + key = (key + 1) * 9223367638806167551U; return key; } @@ -63,23 +162,56 @@ inline uint32_t twang_32from64(uint64_t key) { */ inline uint32_t jenkins_rev_mix32(uint32_t key) { - key += (key << 12); + key += (key << 12); // key *= (1 + (1 << 12)) key ^= (key >> 22); - key += (key << 4); + key += (key << 4); // key *= (1 + (1 << 4)) key ^= (key >> 9); - key += (key << 10); + key += (key << 10); // key *= (1 + (1 << 10)) key ^= (key >> 2); + // key *= (1 + (1 << 7)) * (1 + (1 << 12)) key += (key << 7); key += (key << 12); return key; } +/* + * Inverse of jenkins_rev_mix32 + * + * Note that jenkinks_rev_unmix32 is significantly slower than + * jenkins_rev_mix32. + */ + +inline uint32_t jenkins_rev_unmix32(uint32_t key) { + // These are the modular multiplicative inverses (in Z_2^32) of the + // multiplication factors in jenkins_rev_mix32, in reverse order. They were + // computed using the Extended Euclidean algorithm, see + // http://en.wikipedia.org/wiki/Modular_multiplicative_inverse + key *= 2364026753U; + + // The inverse of a ^= (a >> n) is + // b = a + // for (int i = n; i < 32; i += n) { + // b ^= (a >> i); + // } + key ^= + (key >> 2) ^ (key >> 4) ^ (key >> 6) ^ (key >> 8) ^ + (key >> 10) ^ (key >> 12) ^ (key >> 14) ^ (key >> 16) ^ + (key >> 18) ^ (key >> 20) ^ (key >> 22) ^ (key >> 24) ^ + (key >> 26) ^ (key >> 28) ^ (key >> 30); + key *= 3222273025U; + key ^= (key >> 9) ^ (key >> 18) ^ (key >> 27); + key *= 4042322161U; + key ^= (key >> 22); + key *= 16773121U; + return key; +} + /* * Fowler / Noll / Vo (FNV) Hash * http://www.isthe.com/chongo/tech/comp/fnv/ */ -const uint32_t FNV_32_HASH_START = 216613626UL; +const uint32_t FNV_32_HASH_START = 2166136261UL; const uint64_t FNV_64_HASH_START = 14695981039346656037ULL; inline uint32_t fnv32(const char* s, @@ -93,11 +225,12 @@ inline uint32_t fnv32(const char* s, } inline uint32_t fnv32_buf(const void* buf, - int n, + size_t n, uint32_t hash = FNV_32_HASH_START) { - const char* char_buf = reinterpret_cast(buf); + // forcing signed char, since other platforms can use unsigned + const signed char* char_buf = reinterpret_cast(buf); - for (int i = 0; i < n; ++i) { + for (size_t i = 0; i < n; ++i) { hash += (hash << 1) + (hash << 4) + (hash << 7) + (hash << 8) + (hash << 24); hash ^= char_buf[i]; @@ -107,7 +240,7 @@ inline uint32_t fnv32_buf(const void* buf, } inline uint32_t fnv32(const std::string& str, - uint64_t hash = FNV_32_HASH_START) { + uint32_t hash = FNV_32_HASH_START) { return fnv32_buf(str.data(), str.size(), hash); } @@ -122,11 +255,12 @@ inline uint64_t fnv64(const char* s, } inline uint64_t fnv64_buf(const void* buf, - int n, + size_t n, uint64_t hash = FNV_64_HASH_START) { - const char* char_buf = reinterpret_cast(buf); + // forcing signed char, since other platforms can use unsigned + const signed char* char_buf = reinterpret_cast(buf); - for (int i = 0; i < n; ++i) { + for (size_t i = 0; i < n; ++i) { hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); hash ^= char_buf[i]; @@ -143,13 +277,14 @@ inline uint64_t fnv64(const std::string& str, * Paul Hsieh: http://www.azillionmonkeys.com/qed/hash.html */ -#define get16bits(d) (*((const uint16_t*) (d))) +#define get16bits(d) folly::loadUnaligned(d) -inline uint32_t hsieh_hash32_buf(const void* buf, int len) { - const char* s = reinterpret_cast(buf); - uint32_t hash = len; +inline uint32_t hsieh_hash32_buf(const void* buf, size_t len) { + // forcing signed char, since other platforms can use unsigned + const unsigned char* s = reinterpret_cast(buf); + uint32_t hash = static_cast(len); uint32_t tmp; - int rem; + size_t rem; if (len <= 0 || buf == 0) { return 0; @@ -211,9 +346,21 @@ inline uint32_t hsieh_hash32_str(const std::string& str) { } // namespace hash -template +template struct hasher; +struct Hash { + template + size_t operator()(const T& v) const { + return hasher()(v); + } + + template + size_t operator()(const T& t, const Ts&... ts) const { + return hash::hash_128_to_64((*this)(t), (*this)(ts...)); + } +}; + template<> struct hasher { size_t operator()(int32_t key) const { return hash::jenkins_rev_mix32(uint32_t(key)); @@ -228,16 +375,87 @@ template<> struct hasher { template<> struct hasher { size_t operator()(int64_t key) const { - return hash::twang_mix64(uint64_t(key)); + return static_cast(hash::twang_mix64(uint64_t(key))); } }; template<> struct hasher { size_t operator()(uint64_t key) const { - return hash::twang_mix64(key); + return static_cast(hash::twang_mix64(key)); + } +}; + +template<> struct hasher { + size_t operator()(const std::string& key) const { + return static_cast( + hash::SpookyHashV2::Hash64(key.data(), key.size(), 0)); + } +}; + +template +struct hasher::value, void>::type> { + size_t operator()(T key) const { + return Hash()(static_cast::type>(key)); + } +}; + +template +struct hasher> { + size_t operator()(const std::pair& key) const { + return Hash()(key.first, key.second); + } +}; + +template +struct hasher> { + size_t operator() (const std::tuple& key) const { + return applyTuple(Hash(), key); + } +}; + +// recursion +template +struct TupleHasher { + size_t operator()(std::tuple const& key) const { + return hash::hash_combine( + TupleHasher()(key), + std::get(key)); + } +}; + +// base +template +struct TupleHasher<0, Ts...> { + size_t operator()(std::tuple const& key) const { + // we could do std::hash here directly, but hash_combine hides all the + // ugly templating implicitly + return hash::hash_combine(std::get<0>(key)); } }; } // namespace folly -#endif +// Custom hash functions. +namespace std { + // Hash function for pairs. Requires default hash functions for both + // items in the pair. + template + struct hash > { + public: + size_t operator()(const std::pair& x) const { + return folly::hash::hash_combine(x.first, x.second); + } + }; + + // Hash function for tuples. Requires default hash functions for all types. + template + struct hash> { + size_t operator()(std::tuple const& key) const { + folly::TupleHasher< + std::tuple_size>::value - 1, // start index + Ts...> hasher; + + return hasher(key); + } + }; +} // namespace std