Removing LLVM_DELETED_FUNCTION, as MSVC 2012 was the last reason for requiring the...

[oota-llvm.git] / include / llvm / ADT / Hashing.h
diff --git a/include/llvm/ADT/Hashing.h b/include/llvm/ADT/Hashing.h

index 1a4c555b294710849643dae290823f60ff68982a..a1e78642e94c90f818c6a606fc743c7f7cf25c63 100644 (file)
--- a/include/llvm/ADT/Hashing.h
+++ b/include/llvm/ADT/Hashing.h
@@ -45,8 +45,9 @@
  #ifndef LLVM_ADT_HASHING_H
  #define LLVM_ADT_HASHING_H
  
-#include "llvm/ADT/STLExtras.h"
  #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/SwapByteOrder.h"
  #include "llvm/Support/type_traits.h"
  #include <algorithm>
  #include <cassert>
@@ -54,12 +55,6 @@
  #include <iterator>
  #include <utility>
  
-// Allow detecting C++11 feature availability when building with Clang without
-// breaking other compilers.
-#ifndef __has_feature
-# define __has_feature(x) 0
-#endif
-
  namespace llvm {
  
  /// \brief An opaque object representing a hash code.
@@ -74,10 +69,6 @@ namespace llvm {
  ///   using llvm::hash_value;
  ///   llvm::hash_code code = hash_value(x);
  /// \endcode
-///
-/// Also note that there are two numerical values which are reserved, and the
-/// implementation ensures will never be produced for real hash_codes. These
-/// can be used as sentinels within hashing data structures.
  class hash_code {
    size_t value;
  
@@ -103,6 +94,47 @@ public:
    friend size_t hash_value(const hash_code &code) { return code.value; }
  };
  
+/// \brief Compute a hash_code for any integer value.
+///
+/// Note that this function is intended to compute the same hash_code for
+/// a particular value without regard to the pre-promotion type. This is in
+/// contrast to hash_combine which may produce different hash_codes for
+/// differing argument types even if they would implicit promote to a common
+/// type without changing the value.
+template <typename T>
+typename std::enable_if<is_integral_or_enum<T>::value, hash_code>::type
+hash_value(T value);
+
+/// \brief Compute a hash_code for a pointer's address.
+///
+/// N.B.: This hashes the *address*. Not the value and not the type.
+template <typename T> hash_code hash_value(const T *ptr);
+
+/// \brief Compute a hash_code for a pair of objects.
+template <typename T, typename U>
+hash_code hash_value(const std::pair<T, U> &arg);
+
+/// \brief Compute a hash_code for a standard string.
+template <typename T>
+hash_code hash_value(const std::basic_string<T> &arg);
+
+
+/// \brief Override the execution seed with a fixed value.
+///
+/// This hashing library uses a per-execution seed designed to change on each
+/// run with high probability in order to ensure that the hash codes are not
+/// attackable and to ensure that output which is intended to be stable does
+/// not rely on the particulars of the hash codes produced.
+///
+/// That said, there are use cases where it is important to be able to
+/// reproduce *exactly* a specific behavior. To that end, we provide a function
+/// which will forcibly set the seed to a fixed value. This must be done at the
+/// start of the program, before any hashes are computed. Also, it cannot be
+/// undone. This makes it thread-hostile and very hard to use outside of
+/// immediately on start of a simple program designed for reproducible
+/// behavior.
+void set_fixed_execution_hash_seed(size_t fixed_value);
+
  
  // All of the implementation details of actually computing the various hash
  // code values are held within this namespace. These routines are included in
@@ -113,12 +145,16 @@ namespace detail {
  inline uint64_t fetch64(const char *p) {
    uint64_t result;
    memcpy(&result, p, sizeof(result));
+  if (sys::IsBigEndianHost)
+    sys::swapByteOrder(result);
    return result;
  }
  
  inline uint32_t fetch32(const char *p) {
    uint32_t result;
    memcpy(&result, p, sizeof(result));
+  if (sys::IsBigEndianHost)
+    sys::swapByteOrder(result);
    return result;
  }
  
@@ -131,7 +167,7 @@ static const uint64_t k3 = 0xc949d7c7509e6557ULL;
  /// \brief Bitwise right rotate.
  /// Normally this will compile to a single instruction, especially if the
  /// shift is a manifest constant.
-inline uint64_t rotate(uint64_t val, unsigned shift) {
+inline uint64_t rotate(uint64_t val, size_t shift) {
    // Avoid shifting by 64: doing so yields an undefined result.
    return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
  }
@@ -166,9 +202,9 @@ inline uint64_t hash_4to8_bytes(const char *s, size_t len, uint64_t seed) {
  }
  
  inline uint64_t hash_9to16_bytes(const char *s, size_t len, uint64_t seed) {
-    uint64_t a = fetch64(s);
-    uint64_t b = fetch64(s + len - 8);
-    return hash_16_bytes(seed ^ a, rotate(b + len, len)) ^ b;
+  uint64_t a = fetch64(s);
+  uint64_t b = fetch64(s + len - 8);
+  return hash_16_bytes(seed ^ a, rotate(b + len, len)) ^ b;
  }
  
  inline uint64_t hash_17to32_bytes(const char *s, size_t len, uint64_t seed) {
@@ -223,7 +259,6 @@ inline uint64_t hash_short(const char *s, size_t length, uint64_t seed) {
  /// keeps 56 bytes of arbitrary state.
  struct hash_state {
    uint64_t h0, h1, h2, h3, h4, h5, h6;
-  uint64_t seed;
  
    /// \brief Create a new hash_state structure and initialize it based on the
    /// seed and the first 64-byte chunk.
@@ -231,9 +266,8 @@ struct hash_state {
    static hash_state create(const char *s, uint64_t seed) {
      hash_state state = {
        0, seed, hash_16_bytes(seed, k1), rotate(seed ^ k1, 49),
-      seed * k1, shift_mix(seed), hash_16_bytes(state.h4, state.h5),
-      seed
-    };
+      seed * k1, shift_mix(seed), 0 };
+    state.h6 = hash_16_bytes(state.h4, state.h5);
      state.mix(s);
      return state;
    }
@@ -293,90 +327,42 @@ inline size_t get_execution_seed() {
    // called, return that instead of the per-execution seed.
    const uint64_t seed_prime = 0xff51afd7ed558ccdULL;
    static size_t seed = fixed_seed_override ? fixed_seed_override
-                                           : static_cast<size_t>(seed_prime);
+                                           : (size_t)seed_prime;
    return seed;
  }
  
  
-/// \brief Helper to hash the value of a single integer.
-///
-/// Overloads for smaller integer types are not provided to ensure consistent
-/// behavior in the presence of integral promotions. Essentially,
-/// "hash_value('4')" and "hash_value('0' + 4)" should be the same.
-inline hash_code hash_integer_value(uint64_t value) {
-  // Similar to hash_4to8_bytes but using a seed instead of length.
-  const uint64_t seed = get_execution_seed();
-  const char *s = reinterpret_cast<const char *>(&value);
-  const uint64_t a = fetch32(s);
-  return hash_16_bytes(seed + (a << 3), fetch32(s + 4));
-}
-
-} // namespace detail
-} // namespace hashing
-
-
-/// \brief Override the execution seed with a fixed value.
-///
-/// This hashing library uses a per-execution seed designed to change on each
-/// run with high probability in order to ensure that the hash codes are not
-/// attackable and to ensure that output which is intended to be stable does
-/// not rely on the particulars of the hash codes produced.
-///
-/// That said, there are use cases where it is important to be able to
-/// reproduce *exactly* a specific behavior. To that end, we provide a function
-/// which will forcibly set the seed to a fixed value. This must be done at the
-/// start of the program, before any hashes are computed. Also, it cannot be
-/// undone. This makes it thread-hostile and very hard to use outside of
-/// immediately on start of a simple program designed for reproducible
-/// behavior.
-void set_fixed_execution_hash_seed(size_t fixed_value);
-
-
-/// \brief Compute a hash_code for any integer value.
-///
-/// Note that this function is intended to compute the same hash_code for
-/// a particular value without regard to the pre-promotion type. This is in
-/// contrast to hash_combine which may produce different hash_codes for
-/// differing argument types even if they would implicit promote to a common
-/// type without changing the value.
-template <typename T>
-typename enable_if<is_integral<T>, hash_code>::type hash_value(T value) {
-  return ::llvm::hashing::detail::hash_integer_value(value);
-}
-
-/// \brief Compute a hash_code for a pointer's address.
-///
-/// N.B.: This hashes the *address*. Not the value and not the type.
-template <typename T> hash_code hash_value(const T *ptr) {
-  return ::llvm::hashing::detail::hash_integer_value(
-    reinterpret_cast<uintptr_t>(ptr));
-}
-
-
-// Implementation details for implementing hash combining functions.
-namespace hashing {
-namespace detail {
-
  /// \brief Trait to indicate whether a type's bits can be hashed directly.
  ///
  /// A type trait which is true if we want to combine values for hashing by
  /// reading the underlying data. It is false if values of this type must
  /// first be passed to hash_value, and the resulting hash_codes combined.
  //
-// FIXME: We want to replace is_integral and is_pointer here with a predicate
-// which asserts that comparing the underlying storage of two values of the
-// type for equality is equivalent to comparing the two values for equality.
-// For all the platforms we care about, this holds for integers and pointers,
-// but there are platforms where it doesn't and we would like to support
-// user-defined types which happen to satisfy this property.
+// FIXME: We want to replace is_integral_or_enum and is_pointer here with
+// a predicate which asserts that comparing the underlying storage of two
+// values of the type for equality is equivalent to comparing the two values
+// for equality. For all the platforms we care about, this holds for integers
+// and pointers, but there are platforms where it doesn't and we would like to
+// support user-defined types which happen to satisfy this property.
  template <typename T> struct is_hashable_data
-  : integral_constant<bool, ((is_integral<T>::value || is_pointer<T>::value) &&
-                             64 % sizeof(T) == 0)> {};
+  : std::integral_constant<bool, ((is_integral_or_enum<T>::value ||
+                                   std::is_pointer<T>::value) &&
+                                  64 % sizeof(T) == 0)> {};
+
+// Special case std::pair to detect when both types are viable and when there
+// is no alignment-derived padding in the pair. This is a bit of a lie because
+// std::pair isn't truly POD, but it's close enough in all reasonable
+// implementations for our use case of hashing the underlying data.
+template <typename T, typename U> struct is_hashable_data<std::pair<T, U> >
+  : std::integral_constant<bool, (is_hashable_data<T>::value &&
+                                  is_hashable_data<U>::value &&
+                                  (sizeof(T) + sizeof(U)) ==
+                                   sizeof(std::pair<T, U>))> {};
  
  /// \brief Helper to get the hashable data representation for a type.
  /// This variant is enabled when the type itself can be used.
  template <typename T>
-typename enable_if<is_hashable_data<T>, T>::type
+typename std::enable_if<is_hashable_data<T>::value, T>::type
  get_hashable_data(const T &value) {
    return value;
  }
@@ -384,7 +370,7 @@ get_hashable_data(const T &value) {
  /// This variant is enabled when we must first call hash_value and use the
  /// result as our data.
  template <typename T>
-typename enable_if_c<!is_hashable_data<T>::value, size_t>::type
+typename std::enable_if<!is_hashable_data<T>::value, size_t>::type
  get_hashable_data(const T &value) {
    using ::llvm::hash_value;
    return hash_value(value);
@@ -416,15 +402,12 @@ bool store_and_advance(char *&buffer_ptr, char *buffer_end, const T& value,
  /// combining them, this (as an optimization) directly combines the integers.
  template <typename InputIteratorT>
  hash_code hash_combine_range_impl(InputIteratorT first, InputIteratorT last) {
-  typedef typename std::iterator_traits<InputIteratorT>::value_type ValueT;
    const size_t seed = get_execution_seed();
    char buffer[64], *buffer_ptr = buffer;
-  char *const buffer_end = buffer_ptr + array_lengthof(buffer);
+  char *const buffer_end = std::end(buffer);
    while (first != last && store_and_advance(buffer_ptr, buffer_end,
                                              get_hashable_data(*first)))
      ++first;
-/// \brief Metafunction that determines whether the given type is an integral
-/// type.
    if (first == last)
      return hash_short(buffer, buffer_ptr - buffer, seed);
    assert(buffer_ptr == buffer_end);
@@ -461,8 +444,8 @@ hash_code hash_combine_range_impl(InputIteratorT first, InputIteratorT last) {
  /// are stored in contiguous memory, this routine avoids copying each value
  /// and directly reads from the underlying memory.
  template <typename ValueT>
-typename enable_if<is_hashable_data<ValueT>, hash_code>::type
-hash_combine_range_impl(const ValueT *first, const ValueT *last) {
+typename std::enable_if<is_hashable_data<ValueT>::value, hash_code>::type
+hash_combine_range_impl(ValueT *first, ValueT *last) {
    const size_t seed = get_execution_seed();
    const char *s_begin = reinterpret_cast<const char *>(first);
    const char *s_end = reinterpret_cast<const char *>(last);
@@ -510,13 +493,10 @@ namespace detail {
  /// recursive combining of arguments used in hash_combine. It is particularly
  /// useful at minimizing the code in the recursive calls to ease the pain
  /// caused by a lack of variadic functions.
-class hash_combine_recursive_helper {
-  const size_t seed;
+struct hash_combine_recursive_helper {
    char buffer[64];
-  char *const buffer_end;
-  char *buffer_ptr;
-  size_t length;
    hash_state state;
+  const size_t seed;
  
  public:
    /// \brief Construct a recursive hash combining helper.
@@ -524,10 +504,7 @@ public:
    /// This sets up the state for a recursive hash combine, including getting
    /// the seed and buffer setup.
    hash_combine_recursive_helper()
-    : seed(get_execution_seed()),
-      buffer_end(buffer + array_lengthof(buffer)),
-      buffer_ptr(buffer),
-      length(0) {}
+    : seed(get_execution_seed()) {}
  
    /// \brief Combine one chunk of data into the current in-flight hash.
    ///
@@ -535,7 +512,8 @@ public:
    /// the data. If the buffer is full, it hashes the buffer into its
    /// hash_state, empties it, and then merges the new chunk in. This also
    /// handles cases where the data straddles the end of the buffer.
-  template <typename T> void combine_data(T data) {
+  template <typename T>
+  char *combine_data(size_t &length, char *buffer_ptr, char *buffer_end, T data) {
      if (!store_and_advance(buffer_ptr, buffer_end, data)) {
        // Check for skew which prevents the buffer from being packed, and do
        // a partial store into the buffer to fill it. This is only a concern
@@ -566,69 +544,28 @@ public:
                               partial_store_size))
          abort();
      }
+    return buffer_ptr;
    }
  
-#if defined(__has_feature) && __has_feature(__cxx_variadic_templates__)
-
    /// \brief Recursive, variadic combining method.
    ///
    /// This function recurses through each argument, combining that argument
    /// into a single hash.
    template <typename T, typename ...Ts>
-  hash_code combine(const T &arg, const Ts &...args) {
-    combine_data( get_hashable_data(arg));
+  hash_code combine(size_t length, char *buffer_ptr, char *buffer_end,
+                    const T &arg, const Ts &...args) {
+    buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg));
  
      // Recurse to the next argument.
-    return combine(args...);
-  }
-
-#else
-  // Manually expanded recursive combining methods. See variadic above for
-  // documentation.
-
-  template <typename T1, typename T2, typename T3, typename T4, typename T5,
-            typename T6>
-  hash_code combine(const T1 &arg1, const T2 &arg2, const T3 &arg3,
-                    const T4 &arg4, const T5 &arg5, const T6 &arg6) {
-    combine_data(get_hashable_data(arg1));
-    return combine(arg2, arg3, arg4, arg5, arg6);
-  }
-  template <typename T1, typename T2, typename T3, typename T4, typename T5>
-  hash_code combine(const T1 &arg1, const T2 &arg2, const T3 &arg3,
-                    const T4 &arg4, const T5 &arg5) {
-    combine_data(get_hashable_data(arg1));
-    return combine(arg2, arg3, arg4, arg5);
-  }
-  template <typename T1, typename T2, typename T3, typename T4>
-  hash_code combine(const T1 &arg1, const T2 &arg2, const T3 &arg3,
-                    const T4 &arg4) {
-    combine_data(get_hashable_data(arg1));
-    return combine(arg2, arg3, arg4);
-  }
-  template <typename T1, typename T2, typename T3>
-  hash_code combine(const T1 &arg1, const T2 &arg2, const T3 &arg3) {
-    combine_data(get_hashable_data(arg1));
-    return combine(arg2, arg3);
-  }
-  template <typename T1, typename T2>
-  hash_code combine(const T1 &arg1, const T2 &arg2) {
-    combine_data(get_hashable_data(arg1));
-    return combine(arg2);
-  }
-  template <typename T1>
-  hash_code combine(const T1 &arg1) {
-    combine_data(get_hashable_data(arg1));
-    return combine();
+    return combine(length, buffer_ptr, buffer_end, args...);
    }
  
-#endif
-
    /// \brief Base case for recursive, variadic combining.
    ///
    /// The base case when combining arguments recursively is reached when all
    /// arguments have been handled. It flushes the remaining buffer and
    /// constructs a hash_code.
-  hash_code combine() {
+  hash_code combine(size_t length, char *buffer_ptr, char *buffer_end) {
      // Check whether the entire set of values fit in the buffer. If so, we'll
      // use the optimized short hashing routine and skip state entirely.
      if (length == 0)
@@ -651,9 +588,6 @@ public:
  } // namespace detail
  } // namespace hashing
  
-
-#if __has_feature(__cxx_variadic_templates__)
-
  /// \brief Combine values into a single hash_code.
  ///
  /// This routine accepts a varying number of arguments of any type. It will
@@ -668,50 +602,58 @@ public:
  template <typename ...Ts> hash_code hash_combine(const Ts &...args) {
    // Recursively hash each argument using a helper class.
    ::llvm::hashing::detail::hash_combine_recursive_helper helper;
-  return helper.combine(args...);
+  return helper.combine(0, helper.buffer, helper.buffer + 64, args...);
  }
  
-#else
-
-// What follows are manually exploded overloads for each argument width. See
-// the above variadic definition for documentation and specification.
+// Implementation details for implementations of hash_value overloads provided
+// here.
+namespace hashing {
+namespace detail {
  
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-          typename T6>
-hash_code hash_combine(const T1 &arg1, const T2 &arg2, const T3 &arg3,
-                  const T4 &arg4, const T5 &arg5, const T6 &arg6) {
-  ::llvm::hashing::detail::hash_combine_recursive_helper helper;
-  return helper.combine(arg1, arg2, arg3, arg4, arg5, arg6);
-}
-template <typename T1, typename T2, typename T3, typename T4, typename T5>
-hash_code hash_combine(const T1 &arg1, const T2 &arg2, const T3 &arg3,
-                  const T4 &arg4, const T5 &arg5) {
-  ::llvm::hashing::detail::hash_combine_recursive_helper helper;
-  return helper.combine(arg1, arg2, arg3, arg4, arg5);
-}
-template <typename T1, typename T2, typename T3, typename T4>
-hash_code hash_combine(const T1 &arg1, const T2 &arg2, const T3 &arg3,
-                  const T4 &arg4) {
-  ::llvm::hashing::detail::hash_combine_recursive_helper helper;
-  return helper.combine(arg1, arg2, arg3, arg4);
+/// \brief Helper to hash the value of a single integer.
+///
+/// Overloads for smaller integer types are not provided to ensure consistent
+/// behavior in the presence of integral promotions. Essentially,
+/// "hash_value('4')" and "hash_value('0' + 4)" should be the same.
+inline hash_code hash_integer_value(uint64_t value) {
+  // Similar to hash_4to8_bytes but using a seed instead of length.
+  const uint64_t seed = get_execution_seed();
+  const char *s = reinterpret_cast<const char *>(&value);
+  const uint64_t a = fetch32(s);
+  return hash_16_bytes(seed + (a << 3), fetch32(s + 4));
  }
-template <typename T1, typename T2, typename T3>
-hash_code hash_combine(const T1 &arg1, const T2 &arg2, const T3 &arg3) {
-  ::llvm::hashing::detail::hash_combine_recursive_helper helper;
-  return helper.combine(arg1, arg2, arg3);
+
+} // namespace detail
+} // namespace hashing
+
+// Declared and documented above, but defined here so that any of the hashing
+// infrastructure is available.
+template <typename T>
+typename std::enable_if<is_integral_or_enum<T>::value, hash_code>::type
+hash_value(T value) {
+  return ::llvm::hashing::detail::hash_integer_value(value);
  }
-template <typename T1, typename T2>
-hash_code hash_combine(const T1 &arg1, const T2 &arg2) {
-  ::llvm::hashing::detail::hash_combine_recursive_helper helper;
-  return helper.combine(arg1, arg2);
+
+// Declared and documented above, but defined here so that any of the hashing
+// infrastructure is available.
+template <typename T> hash_code hash_value(const T *ptr) {
+  return ::llvm::hashing::detail::hash_integer_value(
+    reinterpret_cast<uintptr_t>(ptr));
  }
-template <typename T1>
-hash_code hash_combine(const T1 &arg1) {
-  ::llvm::hashing::detail::hash_combine_recursive_helper helper;
-  return helper.combine(arg1);
+
+// Declared and documented above, but defined here so that any of the hashing
+// infrastructure is available.
+template <typename T, typename U>
+hash_code hash_value(const std::pair<T, U> &arg) {
+  return hash_combine(arg.first, arg.second);
  }
  
-#endif
+// Declared and documented above, but defined here so that any of the hashing
+// infrastructure is available.
+template <typename T>
+hash_code hash_value(const std::basic_string<T> &arg) {
+  return hash_combine_range(arg.begin(), arg.end());
+}
  
  } // namespace llvm