X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=include%2Fllvm%2FADT%2FHashing.h;h=6ab07254a21d5849fdbccfa59ce8d6232ab3c236;hb=ac24e251014de60a16558fc0a1f2340c334d2aa8;hp=1a4c555b294710849643dae290823f60ff68982a;hpb=4166989f10eaecfb357551788a3d91275e75f119;p=oota-llvm.git diff --git a/include/llvm/ADT/Hashing.h b/include/llvm/ADT/Hashing.h index 1a4c555b294..6ab07254a21 100644 --- a/include/llvm/ADT/Hashing.h +++ b/include/llvm/ADT/Hashing.h @@ -47,6 +47,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/type_traits.h" #include #include @@ -74,10 +76,6 @@ namespace llvm { /// using llvm::hash_value; /// llvm::hash_code code = hash_value(x); /// \endcode -/// -/// Also note that there are two numerical values which are reserved, and the -/// implementation ensures will never be produced for real hash_codes. These -/// can be used as sentinels within hashing data structures. class hash_code { size_t value; @@ -103,6 +101,46 @@ public: friend size_t hash_value(const hash_code &code) { return code.value; } }; +/// \brief Compute a hash_code for any integer value. +/// +/// Note that this function is intended to compute the same hash_code for +/// a particular value without regard to the pre-promotion type. This is in +/// contrast to hash_combine which may produce different hash_codes for +/// differing argument types even if they would implicit promote to a common +/// type without changing the value. +template +typename enable_if, hash_code>::type hash_value(T value); + +/// \brief Compute a hash_code for a pointer's address. +/// +/// N.B.: This hashes the *address*. Not the value and not the type. +template hash_code hash_value(const T *ptr); + +/// \brief Compute a hash_code for a pair of objects. +template +hash_code hash_value(const std::pair &arg); + +/// \brief Compute a hash_code for a standard string. +template +hash_code hash_value(const std::basic_string &arg); + + +/// \brief Override the execution seed with a fixed value. +/// +/// This hashing library uses a per-execution seed designed to change on each +/// run with high probability in order to ensure that the hash codes are not +/// attackable and to ensure that output which is intended to be stable does +/// not rely on the particulars of the hash codes produced. +/// +/// That said, there are use cases where it is important to be able to +/// reproduce *exactly* a specific behavior. To that end, we provide a function +/// which will forcibly set the seed to a fixed value. This must be done at the +/// start of the program, before any hashes are computed. Also, it cannot be +/// undone. This makes it thread-hostile and very hard to use outside of +/// immediately on start of a simple program designed for reproducible +/// behavior. +void set_fixed_execution_hash_seed(size_t fixed_value); + // All of the implementation details of actually computing the various hash // code values are held within this namespace. These routines are included in @@ -113,12 +151,16 @@ namespace detail { inline uint64_t fetch64(const char *p) { uint64_t result; memcpy(&result, p, sizeof(result)); + if (sys::isBigEndianHost()) + return sys::SwapByteOrder(result); return result; } inline uint32_t fetch32(const char *p) { uint32_t result; memcpy(&result, p, sizeof(result)); + if (sys::isBigEndianHost()) + return sys::SwapByteOrder(result); return result; } @@ -131,7 +173,7 @@ static const uint64_t k3 = 0xc949d7c7509e6557ULL; /// \brief Bitwise right rotate. /// Normally this will compile to a single instruction, especially if the /// shift is a manifest constant. -inline uint64_t rotate(uint64_t val, unsigned shift) { +inline uint64_t rotate(uint64_t val, size_t shift) { // Avoid shifting by 64: doing so yields an undefined result. return shift == 0 ? val : ((val >> shift) | (val << (64 - shift))); } @@ -166,9 +208,9 @@ inline uint64_t hash_4to8_bytes(const char *s, size_t len, uint64_t seed) { } inline uint64_t hash_9to16_bytes(const char *s, size_t len, uint64_t seed) { - uint64_t a = fetch64(s); - uint64_t b = fetch64(s + len - 8); - return hash_16_bytes(seed ^ a, rotate(b + len, len)) ^ b; + uint64_t a = fetch64(s); + uint64_t b = fetch64(s + len - 8); + return hash_16_bytes(seed ^ a, rotate(b + len, len)) ^ b; } inline uint64_t hash_17to32_bytes(const char *s, size_t len, uint64_t seed) { @@ -231,9 +273,8 @@ struct hash_state { static hash_state create(const char *s, uint64_t seed) { hash_state state = { 0, seed, hash_16_bytes(seed, k1), rotate(seed ^ k1, 49), - seed * k1, shift_mix(seed), hash_16_bytes(state.h4, state.h5), - seed - }; + seed * k1, shift_mix(seed), 0, seed }; + state.h6 = hash_16_bytes(state.h4, state.h5); state.mix(s); return state; } @@ -293,86 +334,38 @@ inline size_t get_execution_seed() { // called, return that instead of the per-execution seed. const uint64_t seed_prime = 0xff51afd7ed558ccdULL; static size_t seed = fixed_seed_override ? fixed_seed_override - : static_cast(seed_prime); + : (size_t)seed_prime; return seed; } -/// \brief Helper to hash the value of a single integer. -/// -/// Overloads for smaller integer types are not provided to ensure consistent -/// behavior in the presence of integral promotions. Essentially, -/// "hash_value('4')" and "hash_value('0' + 4)" should be the same. -inline hash_code hash_integer_value(uint64_t value) { - // Similar to hash_4to8_bytes but using a seed instead of length. - const uint64_t seed = get_execution_seed(); - const char *s = reinterpret_cast(&value); - const uint64_t a = fetch32(s); - return hash_16_bytes(seed + (a << 3), fetch32(s + 4)); -} - -} // namespace detail -} // namespace hashing - - -/// \brief Override the execution seed with a fixed value. -/// -/// This hashing library uses a per-execution seed designed to change on each -/// run with high probability in order to ensure that the hash codes are not -/// attackable and to ensure that output which is intended to be stable does -/// not rely on the particulars of the hash codes produced. -/// -/// That said, there are use cases where it is important to be able to -/// reproduce *exactly* a specific behavior. To that end, we provide a function -/// which will forcibly set the seed to a fixed value. This must be done at the -/// start of the program, before any hashes are computed. Also, it cannot be -/// undone. This makes it thread-hostile and very hard to use outside of -/// immediately on start of a simple program designed for reproducible -/// behavior. -void set_fixed_execution_hash_seed(size_t fixed_value); - - -/// \brief Compute a hash_code for any integer value. -/// -/// Note that this function is intended to compute the same hash_code for -/// a particular value without regard to the pre-promotion type. This is in -/// contrast to hash_combine which may produce different hash_codes for -/// differing argument types even if they would implicit promote to a common -/// type without changing the value. -template -typename enable_if, hash_code>::type hash_value(T value) { - return ::llvm::hashing::detail::hash_integer_value(value); -} - -/// \brief Compute a hash_code for a pointer's address. -/// -/// N.B.: This hashes the *address*. Not the value and not the type. -template hash_code hash_value(const T *ptr) { - return ::llvm::hashing::detail::hash_integer_value( - reinterpret_cast(ptr)); -} - - -// Implementation details for implementing hash combining functions. -namespace hashing { -namespace detail { - /// \brief Trait to indicate whether a type's bits can be hashed directly. /// /// A type trait which is true if we want to combine values for hashing by /// reading the underlying data. It is false if values of this type must /// first be passed to hash_value, and the resulting hash_codes combined. // -// FIXME: We want to replace is_integral and is_pointer here with a predicate -// which asserts that comparing the underlying storage of two values of the -// type for equality is equivalent to comparing the two values for equality. -// For all the platforms we care about, this holds for integers and pointers, -// but there are platforms where it doesn't and we would like to support -// user-defined types which happen to satisfy this property. +// FIXME: We want to replace is_integral_or_enum and is_pointer here with +// a predicate which asserts that comparing the underlying storage of two +// values of the type for equality is equivalent to comparing the two values +// for equality. For all the platforms we care about, this holds for integers +// and pointers, but there are platforms where it doesn't and we would like to +// support user-defined types which happen to satisfy this property. template struct is_hashable_data - : integral_constant::value || is_pointer::value) && + : integral_constant::value || + is_pointer::value) && 64 % sizeof(T) == 0)> {}; +// Special case std::pair to detect when both types are viable and when there +// is no alignment-derived padding in the pair. This is a bit of a lie because +// std::pair isn't truly POD, but it's close enough in all reasonable +// implementations for our use case of hashing the underlying data. +template struct is_hashable_data > + : integral_constant::value && + is_hashable_data::value && + (sizeof(T) + sizeof(U)) == + sizeof(std::pair))> {}; + /// \brief Helper to get the hashable data representation for a type. /// This variant is enabled when the type itself can be used. template @@ -423,8 +416,6 @@ hash_code hash_combine_range_impl(InputIteratorT first, InputIteratorT last) { while (first != last && store_and_advance(buffer_ptr, buffer_end, get_hashable_data(*first))) ++first; -/// \brief Metafunction that determines whether the given type is an integral -/// type. if (first == last) return hash_short(buffer, buffer_ptr - buffer, seed); assert(buffer_ptr == buffer_end); @@ -462,7 +453,7 @@ hash_code hash_combine_range_impl(InputIteratorT first, InputIteratorT last) { /// and directly reads from the underlying memory. template typename enable_if, hash_code>::type -hash_combine_range_impl(const ValueT *first, const ValueT *last) { +hash_combine_range_impl(ValueT *first, ValueT *last) { const size_t seed = get_execution_seed(); const char *s_begin = reinterpret_cast(first); const char *s_end = reinterpret_cast(last); @@ -510,13 +501,10 @@ namespace detail { /// recursive combining of arguments used in hash_combine. It is particularly /// useful at minimizing the code in the recursive calls to ease the pain /// caused by a lack of variadic functions. -class hash_combine_recursive_helper { - const size_t seed; +struct hash_combine_recursive_helper { char buffer[64]; - char *const buffer_end; - char *buffer_ptr; - size_t length; hash_state state; + const size_t seed; public: /// \brief Construct a recursive hash combining helper. @@ -524,10 +512,7 @@ public: /// This sets up the state for a recursive hash combine, including getting /// the seed and buffer setup. hash_combine_recursive_helper() - : seed(get_execution_seed()), - buffer_end(buffer + array_lengthof(buffer)), - buffer_ptr(buffer), - length(0) {} + : seed(get_execution_seed()) {} /// \brief Combine one chunk of data into the current in-flight hash. /// @@ -535,7 +520,8 @@ public: /// the data. If the buffer is full, it hashes the buffer into its /// hash_state, empties it, and then merges the new chunk in. This also /// handles cases where the data straddles the end of the buffer. - template void combine_data(T data) { + template + char *combine_data(size_t &length, char *buffer_ptr, char *buffer_end, T data) { if (!store_and_advance(buffer_ptr, buffer_end, data)) { // Check for skew which prevents the buffer from being packed, and do // a partial store into the buffer to fill it. This is only a concern @@ -566,6 +552,7 @@ public: partial_store_size)) abort(); } + return buffer_ptr; } #if defined(__has_feature) && __has_feature(__cxx_variadic_templates__) @@ -575,11 +562,12 @@ public: /// This function recurses through each argument, combining that argument /// into a single hash. template - hash_code combine(const T &arg, const Ts &...args) { - combine_data( get_hashable_data(arg)); + hash_code combine(size_t length, char *buffer_ptr, char *buffer_end, + const T &arg, const Ts &...args) { + buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg)); // Recurse to the next argument. - return combine(args...); + return combine(length, buffer_ptr, buffer_end, args...); } #else @@ -588,37 +576,43 @@ public: template - hash_code combine(const T1 &arg1, const T2 &arg2, const T3 &arg3, + hash_code combine(size_t length, char *buffer_ptr, char *buffer_end, + const T1 &arg1, const T2 &arg2, const T3 &arg3, const T4 &arg4, const T5 &arg5, const T6 &arg6) { - combine_data(get_hashable_data(arg1)); - return combine(arg2, arg3, arg4, arg5, arg6); + buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg1)); + return combine(length, buffer_ptr, buffer_end, arg2, arg3, arg4, arg5, arg6); } template - hash_code combine(const T1 &arg1, const T2 &arg2, const T3 &arg3, + hash_code combine(size_t length, char *buffer_ptr, char *buffer_end, + const T1 &arg1, const T2 &arg2, const T3 &arg3, const T4 &arg4, const T5 &arg5) { - combine_data(get_hashable_data(arg1)); - return combine(arg2, arg3, arg4, arg5); + buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg1)); + return combine(length, buffer_ptr, buffer_end, arg2, arg3, arg4, arg5); } template - hash_code combine(const T1 &arg1, const T2 &arg2, const T3 &arg3, + hash_code combine(size_t length, char *buffer_ptr, char *buffer_end, + const T1 &arg1, const T2 &arg2, const T3 &arg3, const T4 &arg4) { - combine_data(get_hashable_data(arg1)); - return combine(arg2, arg3, arg4); + buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg1)); + return combine(length, buffer_ptr, buffer_end, arg2, arg3, arg4); } template - hash_code combine(const T1 &arg1, const T2 &arg2, const T3 &arg3) { - combine_data(get_hashable_data(arg1)); - return combine(arg2, arg3); + hash_code combine(size_t length, char *buffer_ptr, char *buffer_end, + const T1 &arg1, const T2 &arg2, const T3 &arg3) { + buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg1)); + return combine(length, buffer_ptr, buffer_end, arg2, arg3); } template - hash_code combine(const T1 &arg1, const T2 &arg2) { - combine_data(get_hashable_data(arg1)); - return combine(arg2); + hash_code combine(size_t length, char *buffer_ptr, char *buffer_end, + const T1 &arg1, const T2 &arg2) { + buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg1)); + return combine(length, buffer_ptr, buffer_end, arg2); } template - hash_code combine(const T1 &arg1) { - combine_data(get_hashable_data(arg1)); - return combine(); + hash_code combine(size_t length, char *buffer_ptr, char *buffer_end, + const T1 &arg1) { + buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg1)); + return combine(length, buffer_ptr, buffer_end); } #endif @@ -628,7 +622,7 @@ public: /// The base case when combining arguments recursively is reached when all /// arguments have been handled. It flushes the remaining buffer and /// constructs a hash_code. - hash_code combine() { + hash_code combine(size_t length, char *buffer_ptr, char *buffer_end) { // Check whether the entire set of values fit in the buffer. If so, we'll // use the optimized short hashing routine and skip state entirely. if (length == 0) @@ -668,7 +662,7 @@ public: template hash_code hash_combine(const Ts &...args) { // Recursively hash each argument using a helper class. ::llvm::hashing::detail::hash_combine_recursive_helper helper; - return helper.combine(args...); + return helper.combine(0, helper.buffer, helper.buffer + 64, args...); } #else @@ -679,40 +673,94 @@ template hash_code hash_combine(const Ts &...args) { template hash_code hash_combine(const T1 &arg1, const T2 &arg2, const T3 &arg3, - const T4 &arg4, const T5 &arg5, const T6 &arg6) { + const T4 &arg4, const T5 &arg5, const T6 &arg6) { ::llvm::hashing::detail::hash_combine_recursive_helper helper; - return helper.combine(arg1, arg2, arg3, arg4, arg5, arg6); + return helper.combine(0, helper.buffer, helper.buffer + 64, + arg1, arg2, arg3, arg4, arg5, arg6); } template hash_code hash_combine(const T1 &arg1, const T2 &arg2, const T3 &arg3, - const T4 &arg4, const T5 &arg5) { + const T4 &arg4, const T5 &arg5) { ::llvm::hashing::detail::hash_combine_recursive_helper helper; - return helper.combine(arg1, arg2, arg3, arg4, arg5); + return helper.combine(0, helper.buffer, helper.buffer + 64, + arg1, arg2, arg3, arg4, arg5); } template hash_code hash_combine(const T1 &arg1, const T2 &arg2, const T3 &arg3, - const T4 &arg4) { + const T4 &arg4) { ::llvm::hashing::detail::hash_combine_recursive_helper helper; - return helper.combine(arg1, arg2, arg3, arg4); + return helper.combine(0, helper.buffer, helper.buffer + 64, + arg1, arg2, arg3, arg4); } template hash_code hash_combine(const T1 &arg1, const T2 &arg2, const T3 &arg3) { ::llvm::hashing::detail::hash_combine_recursive_helper helper; - return helper.combine(arg1, arg2, arg3); + return helper.combine(0, helper.buffer, helper.buffer + 64, arg1, arg2, arg3); } template hash_code hash_combine(const T1 &arg1, const T2 &arg2) { ::llvm::hashing::detail::hash_combine_recursive_helper helper; - return helper.combine(arg1, arg2); + return helper.combine(0, helper.buffer, helper.buffer + 64, arg1, arg2); } template hash_code hash_combine(const T1 &arg1) { ::llvm::hashing::detail::hash_combine_recursive_helper helper; - return helper.combine(arg1); + return helper.combine(0, helper.buffer, helper.buffer + 64, arg1); } #endif + +// Implementation details for implementatinos of hash_value overloads provided +// here. +namespace hashing { +namespace detail { + +/// \brief Helper to hash the value of a single integer. +/// +/// Overloads for smaller integer types are not provided to ensure consistent +/// behavior in the presence of integral promotions. Essentially, +/// "hash_value('4')" and "hash_value('0' + 4)" should be the same. +inline hash_code hash_integer_value(uint64_t value) { + // Similar to hash_4to8_bytes but using a seed instead of length. + const uint64_t seed = get_execution_seed(); + const char *s = reinterpret_cast(&value); + const uint64_t a = fetch32(s); + return hash_16_bytes(seed + (a << 3), fetch32(s + 4)); +} + +} // namespace detail +} // namespace hashing + +// Declared and documented above, but defined here so that any of the hashing +// infrastructure is available. +template +typename enable_if, hash_code>::type +hash_value(T value) { + return ::llvm::hashing::detail::hash_integer_value(value); +} + +// Declared and documented above, but defined here so that any of the hashing +// infrastructure is available. +template hash_code hash_value(const T *ptr) { + return ::llvm::hashing::detail::hash_integer_value( + reinterpret_cast(ptr)); +} + +// Declared and documented above, but defined here so that any of the hashing +// infrastructure is available. +template +hash_code hash_value(const std::pair &arg) { + return hash_combine(arg.first, arg.second); +} + +// Declared and documented above, but defined here so that any of the hashing +// infrastructure is available. +template +hash_code hash_value(const std::basic_string &arg) { + return hash_combine_range(arg.begin(), arg.end()); +} + } // namespace llvm #endif