From: khizmax Date: Sun, 13 Sep 2015 13:38:21 +0000 (+0300) Subject: Incorporated MultiLevelHashSet into set's MT-test X-Git-Tag: v2.1.0~121 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=d0eb9e0849eac0d1e7fba6b245a5e065ceb814bb;p=libcds.git Incorporated MultiLevelHashSet into set's MT-test --- diff --git a/cds/compiler/clang/defs.h b/cds/compiler/clang/defs.h index ce0874c0..5a92cf5b 100644 --- a/cds/compiler/clang/defs.h +++ b/cds/compiler/clang/defs.h @@ -55,8 +55,14 @@ // Attributes #if CDS_COMPILER_VERSION >= 30400 -# define CDS_DEPRECATED( reason ) [[deprecated(reason)]] -else +# if __cplusplus < 201103 +# define CDS_DEPRECATED( reason ) __attribute__((deprecated( reason ))) +# elif __cplusplus == 201103 // C++11 +# define CDS_DEPRECATED( reason ) [[gnu::deprecated(reason)]] +# else // C++14 +# define CDS_DEPRECATED( reason ) [[deprecated(reason)]] +# endif +#else # define CDS_DEPRECATED( reason ) __attribute__((deprecated( reason ))) #endif diff --git a/cds/compiler/gcc/defs.h b/cds/compiler/gcc/defs.h index bd3fcbd0..36f26f2f 100644 --- a/cds/compiler/gcc/defs.h +++ b/cds/compiler/gcc/defs.h @@ -58,8 +58,14 @@ // Attributes #if CDS_COMPILER_VERSION >= 40900 -# define CDS_DEPRECATED( reason ) [[deprecated(reason)]] -else +# if __cplusplus < 201103 +# define CDS_DEPRECATED( reason ) __attribute__((deprecated( reason ))) +# elif __cplusplus == 201103 // C++11 +# define CDS_DEPRECATED( reason ) [[gnu::deprecated(reason)]] +# else // C++14 +# define CDS_DEPRECATED( reason ) [[deprecated(reason)]] +# endif +#else # define CDS_DEPRECATED( reason ) __attribute__((deprecated( reason ))) #endif diff --git a/cds/opt/options.h b/cds/opt/options.h index 9fa817c2..135da12f 100644 --- a/cds/opt/options.h +++ b/cds/opt/options.h @@ -978,7 +978,7 @@ namespace cds { namespace opt { /// Metafunction to find opt::type_traits option in \p Options list /** @headerfile cds/opt/options.h - If \p Options contains opt::type_traits option then it is the metafunction result. + If \p Options contains \p opt::type_traits option then it is the metafunction result. Otherwise the result is \p DefaultOptons. */ template diff --git a/projects/Win/vc12/cds.sln b/projects/Win/vc12/cds.sln index a203d44c..5b7477b7 100644 --- a/projects/Win/vc12/cds.sln +++ b/projects/Win/vc12/cds.sln @@ -98,6 +98,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "set", "set", "{A64449B7-90F ..\..\..\tests\unit\set2\set_type_lazy_list.h = ..\..\..\tests\unit\set2\set_type_lazy_list.h ..\..\..\tests\unit\set2\set_type_michael.h = ..\..\..\tests\unit\set2\set_type_michael.h ..\..\..\tests\unit\set2\set_type_michael_list.h = ..\..\..\tests\unit\set2\set_type_michael_list.h + ..\..\..\tests\unit\set2\set_type_multilevel_hashset.h = ..\..\..\tests\unit\set2\set_type_multilevel_hashset.h ..\..\..\tests\unit\set2\set_type_skip_list.h = ..\..\..\tests\unit\set2\set_type_skip_list.h ..\..\..\tests\unit\set2\set_type_split_list.h = ..\..\..\tests\unit\set2\set_type_split_list.h ..\..\..\tests\unit\set2\set_type_std.h = ..\..\..\tests\unit\set2\set_type_std.h diff --git a/projects/Win/vc12/unit-prerequisites.vcxproj b/projects/Win/vc12/unit-prerequisites.vcxproj index 848ab3a8..7c1bc845 100644 --- a/projects/Win/vc12/unit-prerequisites.vcxproj +++ b/projects/Win/vc12/unit-prerequisites.vcxproj @@ -425,7 +425,7 @@ - + diff --git a/projects/Win/vc12/unit-set-insdel.vcxproj b/projects/Win/vc12/unit-set-insdel.vcxproj index b4bd97fe..8e0830af 100644 --- a/projects/Win/vc12/unit-set-insdel.vcxproj +++ b/projects/Win/vc12/unit-set-insdel.vcxproj @@ -47,6 +47,7 @@ + @@ -54,6 +55,7 @@ + diff --git a/projects/Win/vc12/unit-set-insdel.vcxproj.filters b/projects/Win/vc12/unit-set-insdel.vcxproj.filters index 9dd1ddb5..74851368 100644 --- a/projects/Win/vc12/unit-set-insdel.vcxproj.filters +++ b/projects/Win/vc12/unit-set-insdel.vcxproj.filters @@ -46,6 +46,12 @@ set_insdel_func + + set_insdel_func + + + set_insdel_string + diff --git a/projects/source.test-common.mk b/projects/source.test-common.mk index 99b66930..97aca01d 100644 --- a/projects/source.test-common.mk +++ b/projects/source.test-common.mk @@ -4,7 +4,7 @@ CDS_TESTCOMMON_SOURCES := \ tests/cppunit/thread.cpp \ tests/unit/michael_alloc.cpp \ tests/unit/ellen_bintree_update_desc_pool.cpp \ - tests/hashing/city.cc \ + tests/hashing/city.cpp \ tests/hashing/md5.cpp \ tests/hashing/sha256.cpp diff --git a/projects/source.unit.set.mk b/projects/source.unit.set.mk index bcd4f3ff..abbdd95d 100644 --- a/projects/source.unit.set.mk +++ b/projects/source.unit.set.mk @@ -4,6 +4,7 @@ CDSUNIT_SET_SOURCES := \ tests/unit/set2/set_insdel_func_cuckoo.cpp \ tests/unit/set2/set_insdel_func_ellentree.cpp \ tests/unit/set2/set_insdel_func_michael.cpp \ + tests/unit/set2/set_insdel_func_multilevelhashset.cpp \ tests/unit/set2/set_insdel_func_skip.cpp \ tests/unit/set2/set_insdel_func_split.cpp \ tests/unit/set2/set_insdel_func_striped.cpp \ @@ -11,6 +12,7 @@ CDSUNIT_SET_SOURCES := \ tests/unit/set2/set_insdel_string_cuckoo.cpp \ tests/unit/set2/set_insdel_string_ellentree.cpp \ tests/unit/set2/set_insdel_string_michael.cpp \ + tests/unit/set2/set_insdel_string_multilevelhashset.cpp \ tests/unit/set2/set_insdel_string_skip.cpp \ tests/unit/set2/set_insdel_string_split.cpp \ tests/unit/set2/set_insdel_string_striped.cpp \ diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 3c77e1af..8bbd6f53 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,7 +6,7 @@ set(SOURCES cppunit/test_main.cpp cppunit/thread.cpp unit/michael_alloc.cpp unit/ellen_bintree_update_desc_pool.cpp - hashing/city.cc + hashing/city.cpp hashing/md5.cpp hashing/sha256.cpp) diff --git a/tests/hashing/city.cc b/tests/hashing/city.cc deleted file mode 100644 index c33aec06..00000000 --- a/tests/hashing/city.cc +++ /dev/null @@ -1,628 +0,0 @@ -// Copyright (c) 2011 Google, Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. -// -// CityHash, by Geoff Pike and Jyrki Alakuijala -// -// This file provides CityHash64() and related functions. -// -// It's probably possible to create even faster hash functions by -// writing a program that systematically explores some of the space of -// possible hash functions, by using SIMD instructions, or by -// compromising on hash quality. - -//#include "config.h" -#include "city.h" - -#include -#include // for memcpy and memset - -using namespace std; - -static uint64 UNALIGNED_LOAD64(const char *p) { - uint64 result; - memcpy(&result, p, sizeof(result)); - return result; -} - -static uint32 UNALIGNED_LOAD32(const char *p) { - uint32 result; - memcpy(&result, p, sizeof(result)); - return result; -} - -#ifdef _MSC_VER - -#include -#define bswap_32(x) _byteswap_ulong(x) -#define bswap_64(x) _byteswap_uint64(x) - -#elif defined(__APPLE__) - -// Mac OS X / Darwin features -#include -#define bswap_32(x) OSSwapInt32(x) -#define bswap_64(x) OSSwapInt64(x) - -#elif defined(__NetBSD__) - -#include -#include -#if defined(__BSWAP_RENAME) && !defined(__bswap_32) -#define bswap_32(x) bswap32(x) -#define bswap_64(x) bswap64(x) -#endif - -#else - -#include - -#endif - -#ifdef WORDS_BIGENDIAN -#define uint32_in_expected_order(x) (bswap_32(x)) -#define uint64_in_expected_order(x) (bswap_64(x)) -#else -#define uint32_in_expected_order(x) (x) -#define uint64_in_expected_order(x) (x) -#endif - -#if !defined(LIKELY) -#if HAVE_BUILTIN_EXPECT -#define LIKELY(x) (__builtin_expect(!!(x), 1)) -#else -#define LIKELY(x) (x) -#endif -#endif - -static uint64 Fetch64(const char *p) { - return uint64_in_expected_order(UNALIGNED_LOAD64(p)); -} - -static uint32 Fetch32(const char *p) { - return uint32_in_expected_order(UNALIGNED_LOAD32(p)); -} - -// Some primes between 2^63 and 2^64 for various uses. -static const uint64 k0 = 0xc3a5c85c97cb3127ULL; -static const uint64 k1 = 0xb492b66fbe98f273ULL; -static const uint64 k2 = 0x9ae16a3b2f90404fULL; - -// Magic numbers for 32-bit hashing. Copied from Murmur3. -static const uint32_t c1 = 0xcc9e2d51; -static const uint32_t c2 = 0x1b873593; - -// A 32-bit to 32-bit integer hash copied from Murmur3. -static uint32 fmix(uint32 h) -{ - h ^= h >> 16; - h *= 0x85ebca6b; - h ^= h >> 13; - h *= 0xc2b2ae35; - h ^= h >> 16; - return h; -} - -static uint32 Rotate32(uint32 val, int shift) { - // Avoid shifting by 32: doing so yields an undefined result. - return shift == 0 ? val : ((val >> shift) | (val << (32 - shift))); -} - -#undef PERMUTE3 -#define PERMUTE3(a, b, c) do { std::swap(a, b); std::swap(a, c); } while (0) - -static uint32 Mur(uint32 a, uint32 h) { - // Helper from Murmur3 for combining two 32-bit values. - a *= c1; - a = Rotate32(a, 17); - a *= c2; - h ^= a; - h = Rotate32(h, 19); - return h * 5 + 0xe6546b64; -} - -static uint32 Hash32Len13to24(const char *s, size_t len) { - uint32 a = Fetch32(s - 4 + (len >> 1)); - uint32 b = Fetch32(s + 4); - uint32 c = Fetch32(s + len - 8); - uint32 d = Fetch32(s + (len >> 1)); - uint32 e = Fetch32(s); - uint32 f = Fetch32(s + len - 4); - uint32 h = len; - - return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h))))))); -} - -static uint32 Hash32Len0to4(const char *s, size_t len) { - uint32 b = 0; - uint32 c = 9; - for (int i = 0; i < len; i++) { - signed char v = s[i]; - b = b * c1 + v; - c ^= b; - } - return fmix(Mur(b, Mur(len, c))); -} - -static uint32 Hash32Len5to12(const char *s, size_t len) { - uint32 a = len, b = len * 5, c = 9, d = b; - a += Fetch32(s); - b += Fetch32(s + len - 4); - c += Fetch32(s + ((len >> 1) & 4)); - return fmix(Mur(c, Mur(b, Mur(a, d)))); -} - -uint32 CityHash32(const char *s, size_t len) { - if (len <= 24) { - return len <= 12 ? - (len <= 4 ? Hash32Len0to4(s, len) : Hash32Len5to12(s, len)) : - Hash32Len13to24(s, len); - } - - // len > 24 - uint32 h = len, g = c1 * len, f = g; - uint32 a0 = Rotate32(Fetch32(s + len - 4) * c1, 17) * c2; - uint32 a1 = Rotate32(Fetch32(s + len - 8) * c1, 17) * c2; - uint32 a2 = Rotate32(Fetch32(s + len - 16) * c1, 17) * c2; - uint32 a3 = Rotate32(Fetch32(s + len - 12) * c1, 17) * c2; - uint32 a4 = Rotate32(Fetch32(s + len - 20) * c1, 17) * c2; - h ^= a0; - h = Rotate32(h, 19); - h = h * 5 + 0xe6546b64; - h ^= a2; - h = Rotate32(h, 19); - h = h * 5 + 0xe6546b64; - g ^= a1; - g = Rotate32(g, 19); - g = g * 5 + 0xe6546b64; - g ^= a3; - g = Rotate32(g, 19); - g = g * 5 + 0xe6546b64; - f += a4; - f = Rotate32(f, 19); - f = f * 5 + 0xe6546b64; - size_t iters = (len - 1) / 20; - do { - uint32 a0 = Rotate32(Fetch32(s) * c1, 17) * c2; - uint32 a1 = Fetch32(s + 4); - uint32 a2 = Rotate32(Fetch32(s + 8) * c1, 17) * c2; - uint32 a3 = Rotate32(Fetch32(s + 12) * c1, 17) * c2; - uint32 a4 = Fetch32(s + 16); - h ^= a0; - h = Rotate32(h, 18); - h = h * 5 + 0xe6546b64; - f += a1; - f = Rotate32(f, 19); - f = f * c1; - g += a2; - g = Rotate32(g, 18); - g = g * 5 + 0xe6546b64; - h ^= a3 + a1; - h = Rotate32(h, 19); - h = h * 5 + 0xe6546b64; - g ^= a4; - g = bswap_32(g) * 5; - h += a4 * 5; - h = bswap_32(h); - f += a0; - PERMUTE3(f, h, g); - s += 20; - } while (--iters != 0); - g = Rotate32(g, 11) * c1; - g = Rotate32(g, 17) * c1; - f = Rotate32(f, 11) * c1; - f = Rotate32(f, 17) * c1; - h = Rotate32(h + g, 19); - h = h * 5 + 0xe6546b64; - h = Rotate32(h, 17) * c1; - h = Rotate32(h + f, 19); - h = h * 5 + 0xe6546b64; - h = Rotate32(h, 17) * c1; - return h; -} - -// Bitwise right rotate. Normally this will compile to a single -// instruction, especially if the shift is a manifest constant. -static uint64 Rotate(uint64 val, int shift) { - // Avoid shifting by 64: doing so yields an undefined result. - return shift == 0 ? val : ((val >> shift) | (val << (64 - shift))); -} - -static uint64 ShiftMix(uint64 val) { - return val ^ (val >> 47); -} - -static uint64 HashLen16(uint64 u, uint64 v) { - return Hash128to64(uint128(u, v)); -} - -static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) { - // Murmur-inspired hashing. - uint64 a = (u ^ v) * mul; - a ^= (a >> 47); - uint64 b = (v ^ a) * mul; - b ^= (b >> 47); - b *= mul; - return b; -} - -static uint64 HashLen0to16(const char *s, size_t len) { - if (len >= 8) { - uint64 mul = k2 + len * 2; - uint64 a = Fetch64(s) + k2; - uint64 b = Fetch64(s + len - 8); - uint64 c = Rotate(b, 37) * mul + a; - uint64 d = (Rotate(a, 25) + b) * mul; - return HashLen16(c, d, mul); - } - if (len >= 4) { - uint64 mul = k2 + len * 2; - uint64 a = Fetch32(s); - return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul); - } - if (len > 0) { - uint8 a = s[0]; - uint8 b = s[len >> 1]; - uint8 c = s[len - 1]; - uint32 y = static_cast(a) + (static_cast(b) << 8); - uint32 z = len + (static_cast(c) << 2); - return ShiftMix(y * k2 ^ z * k0) * k2; - } - return k2; -} - -// This probably works well for 16-byte strings as well, but it may be overkill -// in that case. -static uint64 HashLen17to32(const char *s, size_t len) { - uint64 mul = k2 + len * 2; - uint64 a = Fetch64(s) * k1; - uint64 b = Fetch64(s + 8); - uint64 c = Fetch64(s + len - 8) * mul; - uint64 d = Fetch64(s + len - 16) * k2; - return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d, - a + Rotate(b + k2, 18) + c, mul); -} - -// Return a 16-byte hash for 48 bytes. Quick and dirty. -// Callers do best to use "random-looking" values for a and b. -static pair WeakHashLen32WithSeeds( - uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) { - a += w; - b = Rotate(b + a + z, 21); - uint64 c = a; - a += x; - a += y; - b += Rotate(a, 44); - return make_pair(a + z, b + c); -} - -// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty. -static pair WeakHashLen32WithSeeds( - const char* s, uint64 a, uint64 b) { - return WeakHashLen32WithSeeds(Fetch64(s), - Fetch64(s + 8), - Fetch64(s + 16), - Fetch64(s + 24), - a, - b); -} - -// Return an 8-byte hash for 33 to 64 bytes. -static uint64 HashLen33to64(const char *s, size_t len) { - uint64 mul = k2 + len * 2; - uint64 a = Fetch64(s) * k2; - uint64 b = Fetch64(s + 8); - uint64 c = Fetch64(s + len - 24); - uint64 d = Fetch64(s + len - 32); - uint64 e = Fetch64(s + 16) * k2; - uint64 f = Fetch64(s + 24) * 9; - uint64 g = Fetch64(s + len - 8); - uint64 h = Fetch64(s + len - 16) * mul; - uint64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9; - uint64 v = ((a + g) ^ d) + f + 1; - uint64 w = bswap_64((u + v) * mul) + h; - uint64 x = Rotate(e + f, 42) + c; - uint64 y = (bswap_64((v + w) * mul) + g) * mul; - uint64 z = e + f + c; - a = bswap_64((x + z) * mul + y) + b; - b = ShiftMix((z + a) * mul + d + h) * mul; - return b + x; -} - -uint64 CityHash64(const char *s, size_t len) { - if (len <= 32) { - if (len <= 16) { - return HashLen0to16(s, len); - } else { - return HashLen17to32(s, len); - } - } else if (len <= 64) { - return HashLen33to64(s, len); - } - - // For strings over 64 bytes we hash the end first, and then as we - // loop we keep 56 bytes of state: v, w, x, y, and z. - uint64 x = Fetch64(s + len - 40); - uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56); - uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24)); - pair v = WeakHashLen32WithSeeds(s + len - 64, len, z); - pair w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x); - x = x * k1 + Fetch64(s); - - // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks. - len = (len - 1) & ~static_cast(63); - do { - x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; - y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; - x ^= w.second; - y += v.first + Fetch64(s + 40); - z = Rotate(z + w.first, 33) * k1; - v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); - w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); - std::swap(z, x); - s += 64; - len -= 64; - } while (len != 0); - return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z, - HashLen16(v.second, w.second) + x); -} - -uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) { - return CityHash64WithSeeds(s, len, k2, seed); -} - -uint64 CityHash64WithSeeds(const char *s, size_t len, - uint64 seed0, uint64 seed1) { - return HashLen16(CityHash64(s, len) - seed0, seed1); -} - -// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings -// of any length representable in signed long. Based on City and Murmur. -static uint128 CityMurmur(const char *s, size_t len, uint128 seed) { - uint64 a = Uint128Low64(seed); - uint64 b = Uint128High64(seed); - uint64 c = 0; - uint64 d = 0; - signed long l = len - 16; - if (l <= 0) { // len <= 16 - a = ShiftMix(a * k1) * k1; - c = b * k1 + HashLen0to16(s, len); - d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c)); - } else { // len > 16 - c = HashLen16(Fetch64(s + len - 8) + k1, a); - d = HashLen16(b + len, c + Fetch64(s + len - 16)); - a += d; - do { - a ^= ShiftMix(Fetch64(s) * k1) * k1; - a *= k1; - b ^= a; - c ^= ShiftMix(Fetch64(s + 8) * k1) * k1; - c *= k1; - d ^= c; - s += 16; - l -= 16; - } while (l > 0); - } - a = HashLen16(a, c); - b = HashLen16(d, b); - return uint128(a ^ b, HashLen16(b, a)); -} - -uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) { - if (len < 128) { - return CityMurmur(s, len, seed); - } - - // We expect len >= 128 to be the common case. Keep 56 bytes of state: - // v, w, x, y, and z. - pair v, w; - uint64 x = Uint128Low64(seed); - uint64 y = Uint128High64(seed); - uint64 z = len * k1; - v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s); - v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8); - w.first = Rotate(y + z, 35) * k1 + x; - w.second = Rotate(x + Fetch64(s + 88), 53) * k1; - - // This is the same inner loop as CityHash64(), manually unrolled. - do { - x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; - y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; - x ^= w.second; - y += v.first + Fetch64(s + 40); - z = Rotate(z + w.first, 33) * k1; - v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); - w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); - std::swap(z, x); - s += 64; - x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; - y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; - x ^= w.second; - y += v.first + Fetch64(s + 40); - z = Rotate(z + w.first, 33) * k1; - v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); - w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); - std::swap(z, x); - s += 64; - len -= 128; - } while (LIKELY(len >= 128)); - x += Rotate(v.first + z, 49) * k0; - y = y * k0 + Rotate(w.second, 37); - z = z * k0 + Rotate(w.first, 27); - w.first *= 9; - v.first *= k0; - // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s. - for (size_t tail_done = 0; tail_done < len; ) { - tail_done += 32; - y = Rotate(x + y, 42) * k0 + v.second; - w.first += Fetch64(s + len - tail_done + 16); - x = x * k0 + w.first; - z += w.second + Fetch64(s + len - tail_done); - w.second += v.first; - v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second); - v.first *= k0; - } - // At this point our 56 bytes of state should contain more than - // enough information for a strong 128-bit hash. We use two - // different 56-byte-to-8-byte hashes to get a 16-byte final result. - x = HashLen16(x, v.first); - y = HashLen16(y + z, w.first); - return uint128(HashLen16(x + v.second, w.second) + y, - HashLen16(x + w.second, y + v.second)); -} - -uint128 CityHash128(const char *s, size_t len) { - return len >= 16 ? - CityHash128WithSeed(s + 16, len - 16, - uint128(Fetch64(s), Fetch64(s + 8) + k0)) : - CityHash128WithSeed(s, len, uint128(k0, k1)); -} - -#ifdef __SSE4_2__ -#include -#include - -// Requires len >= 240. -static void CityHashCrc256Long(const char *s, size_t len, - uint32 seed, uint64 *result) { - uint64 a = Fetch64(s + 56) + k0; - uint64 b = Fetch64(s + 96) + k0; - uint64 c = result[0] = HashLen16(b, len); - uint64 d = result[1] = Fetch64(s + 120) * k0 + len; - uint64 e = Fetch64(s + 184) + seed; - uint64 f = 0; - uint64 g = 0; - uint64 h = c + d; - uint64 x = seed; - uint64 y = 0; - uint64 z = 0; - - // 240 bytes of input per iter. - size_t iters = len / 240; - len -= iters * 240; - do { -#undef CHUNK -#define CHUNK(r) \ - PERMUTE3(x, z, y); \ - b += Fetch64(s); \ - c += Fetch64(s + 8); \ - d += Fetch64(s + 16); \ - e += Fetch64(s + 24); \ - f += Fetch64(s + 32); \ - a += b; \ - h += f; \ - b += c; \ - f += d; \ - g += e; \ - e += z; \ - g += x; \ - z = _mm_crc32_u64(z, b + g); \ - y = _mm_crc32_u64(y, e + h); \ - x = _mm_crc32_u64(x, f + a); \ - e = Rotate(e, r); \ - c += e; \ - s += 40 - - CHUNK(0); PERMUTE3(a, h, c); - CHUNK(33); PERMUTE3(a, h, f); - CHUNK(0); PERMUTE3(b, h, f); - CHUNK(42); PERMUTE3(b, h, d); - CHUNK(0); PERMUTE3(b, h, e); - CHUNK(33); PERMUTE3(a, h, e); - } while (--iters > 0); - - while (len >= 40) { - CHUNK(29); - e ^= Rotate(a, 20); - h += Rotate(b, 30); - g ^= Rotate(c, 40); - f += Rotate(d, 34); - PERMUTE3(c, h, g); - len -= 40; - } - if (len > 0) { - s = s + len - 40; - CHUNK(33); - e ^= Rotate(a, 43); - h += Rotate(b, 42); - g ^= Rotate(c, 41); - f += Rotate(d, 40); - } - result[0] ^= h; - result[1] ^= g; - g += h; - a = HashLen16(a, g + z); - x += y << 32; - b += x; - c = HashLen16(c, z) + h; - d = HashLen16(d, e + result[0]); - g += e; - h += HashLen16(x, f); - e = HashLen16(a, d) + g; - z = HashLen16(b, c) + a; - y = HashLen16(g, h) + c; - result[0] = e + z + y + x; - a = ShiftMix((a + y) * k0) * k0 + b; - result[1] += a + result[0]; - a = ShiftMix(a * k0) * k0 + c; - result[2] = a + result[1]; - a = ShiftMix((a + e) * k0) * k0; - result[3] = a + result[2]; -} - -// Requires len < 240. -static void CityHashCrc256Short(const char *s, size_t len, uint64 *result) { - char buf[240]; - memcpy(buf, s, len); - memset(buf + len, 0, 240 - len); - CityHashCrc256Long(buf, 240, ~static_cast(len), result); -} - -void CityHashCrc256(const char *s, size_t len, uint64 *result) { - if (LIKELY(len >= 240)) { - CityHashCrc256Long(s, len, 0, result); - } else { - CityHashCrc256Short(s, len, result); - } -} - -uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed) { - if (len <= 900) { - return CityHash128WithSeed(s, len, seed); - } else { - uint64 result[4]; - CityHashCrc256(s, len, result); - uint64 u = Uint128High64(seed) + result[0]; - uint64 v = Uint128Low64(seed) + result[1]; - return uint128(HashLen16(u, v + result[2]), - HashLen16(Rotate(v, 32), u * k0 + result[3])); - } -} - -uint128 CityHashCrc128(const char *s, size_t len) { - if (len <= 900) { - return CityHash128(s, len); - } else { - uint64 result[4]; - CityHashCrc256(s, len, result); - return uint128(result[2], result[3]); - } -} - -#endif diff --git a/tests/hashing/city.cpp b/tests/hashing/city.cpp new file mode 100644 index 00000000..c33aec06 --- /dev/null +++ b/tests/hashing/city.cpp @@ -0,0 +1,628 @@ +// Copyright (c) 2011 Google, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// CityHash, by Geoff Pike and Jyrki Alakuijala +// +// This file provides CityHash64() and related functions. +// +// It's probably possible to create even faster hash functions by +// writing a program that systematically explores some of the space of +// possible hash functions, by using SIMD instructions, or by +// compromising on hash quality. + +//#include "config.h" +#include "city.h" + +#include +#include // for memcpy and memset + +using namespace std; + +static uint64 UNALIGNED_LOAD64(const char *p) { + uint64 result; + memcpy(&result, p, sizeof(result)); + return result; +} + +static uint32 UNALIGNED_LOAD32(const char *p) { + uint32 result; + memcpy(&result, p, sizeof(result)); + return result; +} + +#ifdef _MSC_VER + +#include +#define bswap_32(x) _byteswap_ulong(x) +#define bswap_64(x) _byteswap_uint64(x) + +#elif defined(__APPLE__) + +// Mac OS X / Darwin features +#include +#define bswap_32(x) OSSwapInt32(x) +#define bswap_64(x) OSSwapInt64(x) + +#elif defined(__NetBSD__) + +#include +#include +#if defined(__BSWAP_RENAME) && !defined(__bswap_32) +#define bswap_32(x) bswap32(x) +#define bswap_64(x) bswap64(x) +#endif + +#else + +#include + +#endif + +#ifdef WORDS_BIGENDIAN +#define uint32_in_expected_order(x) (bswap_32(x)) +#define uint64_in_expected_order(x) (bswap_64(x)) +#else +#define uint32_in_expected_order(x) (x) +#define uint64_in_expected_order(x) (x) +#endif + +#if !defined(LIKELY) +#if HAVE_BUILTIN_EXPECT +#define LIKELY(x) (__builtin_expect(!!(x), 1)) +#else +#define LIKELY(x) (x) +#endif +#endif + +static uint64 Fetch64(const char *p) { + return uint64_in_expected_order(UNALIGNED_LOAD64(p)); +} + +static uint32 Fetch32(const char *p) { + return uint32_in_expected_order(UNALIGNED_LOAD32(p)); +} + +// Some primes between 2^63 and 2^64 for various uses. +static const uint64 k0 = 0xc3a5c85c97cb3127ULL; +static const uint64 k1 = 0xb492b66fbe98f273ULL; +static const uint64 k2 = 0x9ae16a3b2f90404fULL; + +// Magic numbers for 32-bit hashing. Copied from Murmur3. +static const uint32_t c1 = 0xcc9e2d51; +static const uint32_t c2 = 0x1b873593; + +// A 32-bit to 32-bit integer hash copied from Murmur3. +static uint32 fmix(uint32 h) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + return h; +} + +static uint32 Rotate32(uint32 val, int shift) { + // Avoid shifting by 32: doing so yields an undefined result. + return shift == 0 ? val : ((val >> shift) | (val << (32 - shift))); +} + +#undef PERMUTE3 +#define PERMUTE3(a, b, c) do { std::swap(a, b); std::swap(a, c); } while (0) + +static uint32 Mur(uint32 a, uint32 h) { + // Helper from Murmur3 for combining two 32-bit values. + a *= c1; + a = Rotate32(a, 17); + a *= c2; + h ^= a; + h = Rotate32(h, 19); + return h * 5 + 0xe6546b64; +} + +static uint32 Hash32Len13to24(const char *s, size_t len) { + uint32 a = Fetch32(s - 4 + (len >> 1)); + uint32 b = Fetch32(s + 4); + uint32 c = Fetch32(s + len - 8); + uint32 d = Fetch32(s + (len >> 1)); + uint32 e = Fetch32(s); + uint32 f = Fetch32(s + len - 4); + uint32 h = len; + + return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h))))))); +} + +static uint32 Hash32Len0to4(const char *s, size_t len) { + uint32 b = 0; + uint32 c = 9; + for (int i = 0; i < len; i++) { + signed char v = s[i]; + b = b * c1 + v; + c ^= b; + } + return fmix(Mur(b, Mur(len, c))); +} + +static uint32 Hash32Len5to12(const char *s, size_t len) { + uint32 a = len, b = len * 5, c = 9, d = b; + a += Fetch32(s); + b += Fetch32(s + len - 4); + c += Fetch32(s + ((len >> 1) & 4)); + return fmix(Mur(c, Mur(b, Mur(a, d)))); +} + +uint32 CityHash32(const char *s, size_t len) { + if (len <= 24) { + return len <= 12 ? + (len <= 4 ? Hash32Len0to4(s, len) : Hash32Len5to12(s, len)) : + Hash32Len13to24(s, len); + } + + // len > 24 + uint32 h = len, g = c1 * len, f = g; + uint32 a0 = Rotate32(Fetch32(s + len - 4) * c1, 17) * c2; + uint32 a1 = Rotate32(Fetch32(s + len - 8) * c1, 17) * c2; + uint32 a2 = Rotate32(Fetch32(s + len - 16) * c1, 17) * c2; + uint32 a3 = Rotate32(Fetch32(s + len - 12) * c1, 17) * c2; + uint32 a4 = Rotate32(Fetch32(s + len - 20) * c1, 17) * c2; + h ^= a0; + h = Rotate32(h, 19); + h = h * 5 + 0xe6546b64; + h ^= a2; + h = Rotate32(h, 19); + h = h * 5 + 0xe6546b64; + g ^= a1; + g = Rotate32(g, 19); + g = g * 5 + 0xe6546b64; + g ^= a3; + g = Rotate32(g, 19); + g = g * 5 + 0xe6546b64; + f += a4; + f = Rotate32(f, 19); + f = f * 5 + 0xe6546b64; + size_t iters = (len - 1) / 20; + do { + uint32 a0 = Rotate32(Fetch32(s) * c1, 17) * c2; + uint32 a1 = Fetch32(s + 4); + uint32 a2 = Rotate32(Fetch32(s + 8) * c1, 17) * c2; + uint32 a3 = Rotate32(Fetch32(s + 12) * c1, 17) * c2; + uint32 a4 = Fetch32(s + 16); + h ^= a0; + h = Rotate32(h, 18); + h = h * 5 + 0xe6546b64; + f += a1; + f = Rotate32(f, 19); + f = f * c1; + g += a2; + g = Rotate32(g, 18); + g = g * 5 + 0xe6546b64; + h ^= a3 + a1; + h = Rotate32(h, 19); + h = h * 5 + 0xe6546b64; + g ^= a4; + g = bswap_32(g) * 5; + h += a4 * 5; + h = bswap_32(h); + f += a0; + PERMUTE3(f, h, g); + s += 20; + } while (--iters != 0); + g = Rotate32(g, 11) * c1; + g = Rotate32(g, 17) * c1; + f = Rotate32(f, 11) * c1; + f = Rotate32(f, 17) * c1; + h = Rotate32(h + g, 19); + h = h * 5 + 0xe6546b64; + h = Rotate32(h, 17) * c1; + h = Rotate32(h + f, 19); + h = h * 5 + 0xe6546b64; + h = Rotate32(h, 17) * c1; + return h; +} + +// Bitwise right rotate. Normally this will compile to a single +// instruction, especially if the shift is a manifest constant. +static uint64 Rotate(uint64 val, int shift) { + // Avoid shifting by 64: doing so yields an undefined result. + return shift == 0 ? val : ((val >> shift) | (val << (64 - shift))); +} + +static uint64 ShiftMix(uint64 val) { + return val ^ (val >> 47); +} + +static uint64 HashLen16(uint64 u, uint64 v) { + return Hash128to64(uint128(u, v)); +} + +static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) { + // Murmur-inspired hashing. + uint64 a = (u ^ v) * mul; + a ^= (a >> 47); + uint64 b = (v ^ a) * mul; + b ^= (b >> 47); + b *= mul; + return b; +} + +static uint64 HashLen0to16(const char *s, size_t len) { + if (len >= 8) { + uint64 mul = k2 + len * 2; + uint64 a = Fetch64(s) + k2; + uint64 b = Fetch64(s + len - 8); + uint64 c = Rotate(b, 37) * mul + a; + uint64 d = (Rotate(a, 25) + b) * mul; + return HashLen16(c, d, mul); + } + if (len >= 4) { + uint64 mul = k2 + len * 2; + uint64 a = Fetch32(s); + return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul); + } + if (len > 0) { + uint8 a = s[0]; + uint8 b = s[len >> 1]; + uint8 c = s[len - 1]; + uint32 y = static_cast(a) + (static_cast(b) << 8); + uint32 z = len + (static_cast(c) << 2); + return ShiftMix(y * k2 ^ z * k0) * k2; + } + return k2; +} + +// This probably works well for 16-byte strings as well, but it may be overkill +// in that case. +static uint64 HashLen17to32(const char *s, size_t len) { + uint64 mul = k2 + len * 2; + uint64 a = Fetch64(s) * k1; + uint64 b = Fetch64(s + 8); + uint64 c = Fetch64(s + len - 8) * mul; + uint64 d = Fetch64(s + len - 16) * k2; + return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d, + a + Rotate(b + k2, 18) + c, mul); +} + +// Return a 16-byte hash for 48 bytes. Quick and dirty. +// Callers do best to use "random-looking" values for a and b. +static pair WeakHashLen32WithSeeds( + uint64 w, uint64 x, uint64 y, uint64 z, uint64 a, uint64 b) { + a += w; + b = Rotate(b + a + z, 21); + uint64 c = a; + a += x; + a += y; + b += Rotate(a, 44); + return make_pair(a + z, b + c); +} + +// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty. +static pair WeakHashLen32WithSeeds( + const char* s, uint64 a, uint64 b) { + return WeakHashLen32WithSeeds(Fetch64(s), + Fetch64(s + 8), + Fetch64(s + 16), + Fetch64(s + 24), + a, + b); +} + +// Return an 8-byte hash for 33 to 64 bytes. +static uint64 HashLen33to64(const char *s, size_t len) { + uint64 mul = k2 + len * 2; + uint64 a = Fetch64(s) * k2; + uint64 b = Fetch64(s + 8); + uint64 c = Fetch64(s + len - 24); + uint64 d = Fetch64(s + len - 32); + uint64 e = Fetch64(s + 16) * k2; + uint64 f = Fetch64(s + 24) * 9; + uint64 g = Fetch64(s + len - 8); + uint64 h = Fetch64(s + len - 16) * mul; + uint64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9; + uint64 v = ((a + g) ^ d) + f + 1; + uint64 w = bswap_64((u + v) * mul) + h; + uint64 x = Rotate(e + f, 42) + c; + uint64 y = (bswap_64((v + w) * mul) + g) * mul; + uint64 z = e + f + c; + a = bswap_64((x + z) * mul + y) + b; + b = ShiftMix((z + a) * mul + d + h) * mul; + return b + x; +} + +uint64 CityHash64(const char *s, size_t len) { + if (len <= 32) { + if (len <= 16) { + return HashLen0to16(s, len); + } else { + return HashLen17to32(s, len); + } + } else if (len <= 64) { + return HashLen33to64(s, len); + } + + // For strings over 64 bytes we hash the end first, and then as we + // loop we keep 56 bytes of state: v, w, x, y, and z. + uint64 x = Fetch64(s + len - 40); + uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56); + uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24)); + pair v = WeakHashLen32WithSeeds(s + len - 64, len, z); + pair w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x); + x = x * k1 + Fetch64(s); + + // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks. + len = (len - 1) & ~static_cast(63); + do { + x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; + y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; + x ^= w.second; + y += v.first + Fetch64(s + 40); + z = Rotate(z + w.first, 33) * k1; + v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); + w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); + std::swap(z, x); + s += 64; + len -= 64; + } while (len != 0); + return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z, + HashLen16(v.second, w.second) + x); +} + +uint64 CityHash64WithSeed(const char *s, size_t len, uint64 seed) { + return CityHash64WithSeeds(s, len, k2, seed); +} + +uint64 CityHash64WithSeeds(const char *s, size_t len, + uint64 seed0, uint64 seed1) { + return HashLen16(CityHash64(s, len) - seed0, seed1); +} + +// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings +// of any length representable in signed long. Based on City and Murmur. +static uint128 CityMurmur(const char *s, size_t len, uint128 seed) { + uint64 a = Uint128Low64(seed); + uint64 b = Uint128High64(seed); + uint64 c = 0; + uint64 d = 0; + signed long l = len - 16; + if (l <= 0) { // len <= 16 + a = ShiftMix(a * k1) * k1; + c = b * k1 + HashLen0to16(s, len); + d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c)); + } else { // len > 16 + c = HashLen16(Fetch64(s + len - 8) + k1, a); + d = HashLen16(b + len, c + Fetch64(s + len - 16)); + a += d; + do { + a ^= ShiftMix(Fetch64(s) * k1) * k1; + a *= k1; + b ^= a; + c ^= ShiftMix(Fetch64(s + 8) * k1) * k1; + c *= k1; + d ^= c; + s += 16; + l -= 16; + } while (l > 0); + } + a = HashLen16(a, c); + b = HashLen16(d, b); + return uint128(a ^ b, HashLen16(b, a)); +} + +uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) { + if (len < 128) { + return CityMurmur(s, len, seed); + } + + // We expect len >= 128 to be the common case. Keep 56 bytes of state: + // v, w, x, y, and z. + pair v, w; + uint64 x = Uint128Low64(seed); + uint64 y = Uint128High64(seed); + uint64 z = len * k1; + v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s); + v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8); + w.first = Rotate(y + z, 35) * k1 + x; + w.second = Rotate(x + Fetch64(s + 88), 53) * k1; + + // This is the same inner loop as CityHash64(), manually unrolled. + do { + x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; + y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; + x ^= w.second; + y += v.first + Fetch64(s + 40); + z = Rotate(z + w.first, 33) * k1; + v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); + w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); + std::swap(z, x); + s += 64; + x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; + y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; + x ^= w.second; + y += v.first + Fetch64(s + 40); + z = Rotate(z + w.first, 33) * k1; + v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); + w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); + std::swap(z, x); + s += 64; + len -= 128; + } while (LIKELY(len >= 128)); + x += Rotate(v.first + z, 49) * k0; + y = y * k0 + Rotate(w.second, 37); + z = z * k0 + Rotate(w.first, 27); + w.first *= 9; + v.first *= k0; + // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s. + for (size_t tail_done = 0; tail_done < len; ) { + tail_done += 32; + y = Rotate(x + y, 42) * k0 + v.second; + w.first += Fetch64(s + len - tail_done + 16); + x = x * k0 + w.first; + z += w.second + Fetch64(s + len - tail_done); + w.second += v.first; + v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second); + v.first *= k0; + } + // At this point our 56 bytes of state should contain more than + // enough information for a strong 128-bit hash. We use two + // different 56-byte-to-8-byte hashes to get a 16-byte final result. + x = HashLen16(x, v.first); + y = HashLen16(y + z, w.first); + return uint128(HashLen16(x + v.second, w.second) + y, + HashLen16(x + w.second, y + v.second)); +} + +uint128 CityHash128(const char *s, size_t len) { + return len >= 16 ? + CityHash128WithSeed(s + 16, len - 16, + uint128(Fetch64(s), Fetch64(s + 8) + k0)) : + CityHash128WithSeed(s, len, uint128(k0, k1)); +} + +#ifdef __SSE4_2__ +#include +#include + +// Requires len >= 240. +static void CityHashCrc256Long(const char *s, size_t len, + uint32 seed, uint64 *result) { + uint64 a = Fetch64(s + 56) + k0; + uint64 b = Fetch64(s + 96) + k0; + uint64 c = result[0] = HashLen16(b, len); + uint64 d = result[1] = Fetch64(s + 120) * k0 + len; + uint64 e = Fetch64(s + 184) + seed; + uint64 f = 0; + uint64 g = 0; + uint64 h = c + d; + uint64 x = seed; + uint64 y = 0; + uint64 z = 0; + + // 240 bytes of input per iter. + size_t iters = len / 240; + len -= iters * 240; + do { +#undef CHUNK +#define CHUNK(r) \ + PERMUTE3(x, z, y); \ + b += Fetch64(s); \ + c += Fetch64(s + 8); \ + d += Fetch64(s + 16); \ + e += Fetch64(s + 24); \ + f += Fetch64(s + 32); \ + a += b; \ + h += f; \ + b += c; \ + f += d; \ + g += e; \ + e += z; \ + g += x; \ + z = _mm_crc32_u64(z, b + g); \ + y = _mm_crc32_u64(y, e + h); \ + x = _mm_crc32_u64(x, f + a); \ + e = Rotate(e, r); \ + c += e; \ + s += 40 + + CHUNK(0); PERMUTE3(a, h, c); + CHUNK(33); PERMUTE3(a, h, f); + CHUNK(0); PERMUTE3(b, h, f); + CHUNK(42); PERMUTE3(b, h, d); + CHUNK(0); PERMUTE3(b, h, e); + CHUNK(33); PERMUTE3(a, h, e); + } while (--iters > 0); + + while (len >= 40) { + CHUNK(29); + e ^= Rotate(a, 20); + h += Rotate(b, 30); + g ^= Rotate(c, 40); + f += Rotate(d, 34); + PERMUTE3(c, h, g); + len -= 40; + } + if (len > 0) { + s = s + len - 40; + CHUNK(33); + e ^= Rotate(a, 43); + h += Rotate(b, 42); + g ^= Rotate(c, 41); + f += Rotate(d, 40); + } + result[0] ^= h; + result[1] ^= g; + g += h; + a = HashLen16(a, g + z); + x += y << 32; + b += x; + c = HashLen16(c, z) + h; + d = HashLen16(d, e + result[0]); + g += e; + h += HashLen16(x, f); + e = HashLen16(a, d) + g; + z = HashLen16(b, c) + a; + y = HashLen16(g, h) + c; + result[0] = e + z + y + x; + a = ShiftMix((a + y) * k0) * k0 + b; + result[1] += a + result[0]; + a = ShiftMix(a * k0) * k0 + c; + result[2] = a + result[1]; + a = ShiftMix((a + e) * k0) * k0; + result[3] = a + result[2]; +} + +// Requires len < 240. +static void CityHashCrc256Short(const char *s, size_t len, uint64 *result) { + char buf[240]; + memcpy(buf, s, len); + memset(buf + len, 0, 240 - len); + CityHashCrc256Long(buf, 240, ~static_cast(len), result); +} + +void CityHashCrc256(const char *s, size_t len, uint64 *result) { + if (LIKELY(len >= 240)) { + CityHashCrc256Long(s, len, 0, result); + } else { + CityHashCrc256Short(s, len, result); + } +} + +uint128 CityHashCrc128WithSeed(const char *s, size_t len, uint128 seed) { + if (len <= 900) { + return CityHash128WithSeed(s, len, seed); + } else { + uint64 result[4]; + CityHashCrc256(s, len, result); + uint64 u = Uint128High64(seed) + result[0]; + uint64 v = Uint128Low64(seed) + result[1]; + return uint128(HashLen16(u, v + result[2]), + HashLen16(Rotate(v, 32), u * k0 + result[3])); + } +} + +uint128 CityHashCrc128(const char *s, size_t len) { + if (len <= 900) { + return CityHash128(s, len); + } else { + uint64 result[4]; + CityHashCrc256(s, len, result); + return uint128(result[2], result[3]); + } +} + +#endif diff --git a/tests/unit/set2/CMakeLists.txt b/tests/unit/set2/CMakeLists.txt index c375a3fd..408b5f79 100644 --- a/tests/unit/set2/CMakeLists.txt +++ b/tests/unit/set2/CMakeLists.txt @@ -5,6 +5,7 @@ set(CDSUNIT_SET_SOURCES set_insdel_func_cuckoo.cpp set_insdel_func_ellentree.cpp set_insdel_func_michael.cpp + set_insdel_func_multilevelhashset.cpp set_insdel_func_skip.cpp set_insdel_func_split.cpp set_insdel_func_striped.cpp @@ -12,6 +13,7 @@ set(CDSUNIT_SET_SOURCES set_insdel_string_cuckoo.cpp set_insdel_string_ellentree.cpp set_insdel_string_michael.cpp + set_insdel_string_multilevelhashset.cpp set_insdel_string_skip.cpp set_insdel_string_split.cpp set_insdel_string_striped.cpp diff --git a/tests/unit/set2/set_defs.h b/tests/unit/set2/set_defs.h index 5412a968..be2bcf02 100644 --- a/tests/unit/set2/set_defs.h +++ b/tests/unit/set2/set_defs.h @@ -1,8 +1,5 @@ //$$CDS-header$$ -#ifndef CDSUNIT_SET_DEFS_H -#define CDSUNIT_SET_DEFS_H - #define CDSUNIT_DECLARE_StdSet \ TEST_CASE( tag_StdSet, StdSet_Spin) \ TEST_CASE( tag_StdSet, StdSet_Mutex) \ @@ -617,4 +614,54 @@ CDSUNIT_TEST_RefinableSet_boost_container \ CDSUNIT_TEST_RefinableSet_boost_flat_container -#endif // #ifndef CDSUNIT_SET_DEFS_H + +//*********************************************** +// MultiLevelHashSet + +#undef CDSUNIT_DECLARE_MultiLevelHashSet +#define CDSUNIT_DECLARE_MultiLevelHashSet \ + TEST_CASE(tag_MultiLevelHashSet, MultiLevelHashSet_hp_stdhash) \ + TEST_CASE(tag_MultiLevelHashSet, MultiLevelHashSet_hp_stdhash_stat) \ + TEST_CASE(tag_MultiLevelHashSet, MultiLevelHashSet_dhp_stdhash) \ + TEST_CASE(tag_MultiLevelHashSet, MultiLevelHashSet_dhp_stdhash_stat) \ + TEST_CASE(tag_MultiLevelHashSet, MultiLevelHashSet_hp_md5) \ + TEST_CASE(tag_MultiLevelHashSet, MultiLevelHashSet_hp_md5_stat) \ + TEST_CASE(tag_MultiLevelHashSet, MultiLevelHashSet_dhp_md5) \ + TEST_CASE(tag_MultiLevelHashSet, MultiLevelHashSet_dhp_md5_stat) \ + TEST_CASE(tag_MultiLevelHashSet, MultiLevelHashSet_hp_sha256) \ + TEST_CASE(tag_MultiLevelHashSet, MultiLevelHashSet_hp_sha256_stat) \ + TEST_CASE(tag_MultiLevelHashSet, MultiLevelHashSet_dhp_sha256) \ + TEST_CASE(tag_MultiLevelHashSet, MultiLevelHashSet_dhp_sha256_stat) \ + TEST_CASE(tag_MultiLevelHashSet, MultiLevelHashSet_hp_city64) \ + TEST_CASE(tag_MultiLevelHashSet, MultiLevelHashSet_hp_city64_stat) \ + TEST_CASE(tag_MultiLevelHashSet, MultiLevelHashSet_dhp_city64) \ + TEST_CASE(tag_MultiLevelHashSet, MultiLevelHashSet_dhp_city64_stat) \ + TEST_CASE(tag_MultiLevelHashSet, MultiLevelHashSet_hp_city128) \ + TEST_CASE(tag_MultiLevelHashSet, MultiLevelHashSet_hp_city128_stat) \ + TEST_CASE(tag_MultiLevelHashSet, MultiLevelHashSet_dhp_city128) \ + TEST_CASE(tag_MultiLevelHashSet, MultiLevelHashSet_dhp_city128_stat) + + +#undef CDSUNIT_TEST_MultiLevelHashSet +#define CDSUNIT_TEST_MultiLevelHashSet \ + CPPUNIT_TEST(MultiLevelHashSet_hp_stdhash) \ + CPPUNIT_TEST(MultiLevelHashSet_hp_stdhash_stat) \ + CPPUNIT_TEST(MultiLevelHashSet_dhp_stdhash) \ + CPPUNIT_TEST(MultiLevelHashSet_dhp_stdhash_stat) \ + CPPUNIT_TEST(MultiLevelHashSet_hp_md5) \ + CPPUNIT_TEST(MultiLevelHashSet_hp_md5_stat) \ + CPPUNIT_TEST(MultiLevelHashSet_dhp_md5) \ + CPPUNIT_TEST(MultiLevelHashSet_dhp_md5_stat) \ + CPPUNIT_TEST(MultiLevelHashSet_hp_sha256) \ + CPPUNIT_TEST(MultiLevelHashSet_hp_sha256_stat) \ + CPPUNIT_TEST(MultiLevelHashSet_dhp_sha256) \ + CPPUNIT_TEST(MultiLevelHashSet_dhp_sha256_stat) \ + CPPUNIT_TEST(MultiLevelHashSet_hp_city64) \ + CPPUNIT_TEST(MultiLevelHashSet_hp_city64_stat) \ + CPPUNIT_TEST(MultiLevelHashSet_dhp_city64) \ + CPPUNIT_TEST(MultiLevelHashSet_dhp_city64_stat) \ + CPPUNIT_TEST(MultiLevelHashSet_hp_city128) \ + CPPUNIT_TEST(MultiLevelHashSet_hp_city128_stat) \ + CPPUNIT_TEST(MultiLevelHashSet_dhp_city128) \ + CPPUNIT_TEST(MultiLevelHashSet_dhp_city128_stat) + diff --git a/tests/unit/set2/set_insdel_func.h b/tests/unit/set2/set_insdel_func.h index ad8e0a74..505c0a49 100644 --- a/tests/unit/set2/set_insdel_func.h +++ b/tests/unit/set2/set_insdel_func.h @@ -207,6 +207,12 @@ namespace set2 { ++nModified; } } + + void operator()( keyval_type& cur, keyval_type * old ) + { + operator()( old == nullptr, cur, 0 ); + } + private: update_functor(const update_functor& ); }; @@ -540,17 +546,17 @@ namespace set2 { CDSUNIT_DECLARE_RefinableSet CDSUNIT_DECLARE_CuckooSet CDSUNIT_DECLARE_EllenBinTreeSet + CDSUNIT_DECLARE_MultiLevelHashSet CPPUNIT_TEST_SUITE_(Set_InsDel_func, "Map_InsDel_func") CDSUNIT_TEST_MichaelSet CDSUNIT_TEST_SplitList CDSUNIT_TEST_SkipListSet + CDSUNIT_TEST_MultiLevelHashSet CDSUNIT_TEST_EllenBinTreeSet CDSUNIT_TEST_StripedSet CDSUNIT_TEST_RefinableSet CDSUNIT_TEST_CuckooSet - - //CDSUNIT_TEST_MultiLevelHashSet // the test is not suitable CPPUNIT_TEST_SUITE_END(); }; diff --git a/tests/unit/set2/set_insdel_func_multilevelhashset.cpp b/tests/unit/set2/set_insdel_func_multilevelhashset.cpp new file mode 100644 index 00000000..92b2dd48 --- /dev/null +++ b/tests/unit/set2/set_insdel_func_multilevelhashset.cpp @@ -0,0 +1,12 @@ +//$$CDS-header$$ + +#include "set2/set_insdel_func.h" +#include "set2/set_type_multilevel_hashset.h" + +#undef TEST_CASE +#define TEST_CASE(TAG, X) void Set_InsDel_func::X() { run_test::X>(); } +#include "set2/set_defs.h" + +namespace set2 { + CDSUNIT_DECLARE_MultiLevelHashSet +} // namespace set2 diff --git a/tests/unit/set2/set_insdel_string.h b/tests/unit/set2/set_insdel_string.h index 6e818f80..8d68793a 100644 --- a/tests/unit/set2/set_insdel_string.h +++ b/tests/unit/set2/set_insdel_string.h @@ -515,12 +515,14 @@ namespace set2 { CDSUNIT_DECLARE_CuckooSet CDSUNIT_DECLARE_SkipListSet CDSUNIT_DECLARE_EllenBinTreeSet + CDSUNIT_DECLARE_MultiLevelHashSet CDSUNIT_DECLARE_StdSet - CPPUNIT_TEST_SUITE_(Set_InsDel_func, "Map_InsDel_func") + CPPUNIT_TEST_SUITE_(Set_InsDel_string, "Map_InsDel_func") CDSUNIT_TEST_MichaelSet CDSUNIT_TEST_SplitList CDSUNIT_TEST_SkipListSet + CDSUNIT_TEST_MultiLevelHashSet CDSUNIT_TEST_EllenBinTreeSet CDSUNIT_TEST_StripedSet CDSUNIT_TEST_RefinableSet diff --git a/tests/unit/set2/set_insdel_string_multilevelhashset.cpp b/tests/unit/set2/set_insdel_string_multilevelhashset.cpp new file mode 100644 index 00000000..59c10ee8 --- /dev/null +++ b/tests/unit/set2/set_insdel_string_multilevelhashset.cpp @@ -0,0 +1,12 @@ +//$$CDS-header$$ + +#include "set2/set_insdel_string.h" +#include "set2/set_type_multilevel_hashset.h" + +#undef TEST_CASE +#define TEST_CASE(TAG, X) void Set_InsDel_string::X() { run_test_extract::X>(); } +#include "set2/set_defs.h" + +namespace set2 { + CDSUNIT_DECLARE_MultiLevelHashSet +} // namespace set2 diff --git a/tests/unit/set2/set_type_multilevel_hashset.h b/tests/unit/set2/set_type_multilevel_hashset.h new file mode 100644 index 00000000..c13d1fc4 --- /dev/null +++ b/tests/unit/set2/set_type_multilevel_hashset.h @@ -0,0 +1,178 @@ +//$$CDS-header$$ + +#ifndef CDSUNIT_SET_TYPE_MICHAEL_H +#define CDSUNIT_SET_TYPE_MICHAEL_H + +#include +#include + +#include "print_multilevel_hashset_stat.h" +#include "hashing/hash_func.h" + +namespace set2 { + + template + class MultiLevelHashSet : public cc::MultiLevelHashSet< GC, T, Traits > + { + typedef cc::MultiLevelHashSet< GC, T, Traits > base_class; + public: + typedef typename T::hasher hasher ; + typedef typename base_class::guarded_ptr guarded_ptr; + + template + MultiLevelHashSet( Config const& cfg ) + : base_class( cfg.c_nSetSize, cfg.c_nLoadFactor ) + {} + + template + bool erase( Q const& key ) + { + return base_class::erase( hasher()( key )); + } + + template + bool erase( Q const& key, Func f ) + { + return base_class::erase( hasher()( key ), f ); + } + + template + guarded_ptr extract(Q const& key) + { + return base_class::extract( hasher()(key) ); + } + + // for testing + static CDS_CONSTEXPR bool const c_bExtractSupported = true; + static CDS_CONSTEXPR bool const c_bLoadFactorDepended = false; + }; + + struct tag_MultiLevelHashSet; + + template + struct set_type< tag_MultiLevelHashSet, Key, Val >: public set_type_base< Key, Val > + { + typedef set_type_base< Key, Val > base_class; + typedef typename base_class::compare compare; + typedef typename base_class::less less; + typedef typename base_class::hash hash; + typedef typename base_class::key_type key_type; + typedef typename base_class::value_type value_type; + + template + struct hash_type + { + typedef Hasher hasher; + typedef typename hasher::hash_type type; + }; + + template + struct hash_type> + { + typedef std::hash hasher; + typedef size_t type; + }; + + template + struct key_val: base_class::key_val + { + typedef typename base_class::key_val base; + typedef Hasher hasher; + typedef typename hash_type::type hash_type; + + hash_type hash; + + /*explicit*/ key_val( key_type const& k ): base(k), hash( hasher()( k )) {} + key_val( key_type const& k, value_type const& v ): base(k, v), hash( hasher()( k )) {} + + template + /*explicit*/ key_val( K const& k ): base(k), hash( hasher()( k )) {} + + template + key_val( K const& k, T const& v ): base(k, v), hash( hasher()( k )) {} + }; + + struct default_traits : public cc::multilevel_hashset::traits + { + struct hash_accessor { + template + typename key_val::hash_type const& operator()( key_val const& kv ) + { + return kv.hash; + } + }; + }; + + typedef MultiLevelHashSet< cds::gc::HP, key_val>, default_traits > MultiLevelHashSet_hp_stdhash; + typedef MultiLevelHashSet< cds::gc::DHP, key_val>, default_traits > MultiLevelHashSet_dhp_stdhash; + + struct traits_MultiLevelHashSet_stat: public cc::multilevel_hashset::make_traits< + co::type_traits< default_traits >, + co::stat< cc::multilevel_hashset::stat<>> + >::type + {}; + + typedef MultiLevelHashSet< cds::gc::HP, key_val>, traits_MultiLevelHashSet_stat > MultiLevelHashSet_hp_stdhash_stat; + typedef MultiLevelHashSet< cds::gc::DHP, key_val>, traits_MultiLevelHashSet_stat > MultiLevelHashSet_dhp_stdhash_stat; + + // SHA256 + typedef MultiLevelHashSet< cds::gc::HP, key_val<::hashing::sha256>, default_traits > MultiLevelHashSet_hp_sha256; + typedef MultiLevelHashSet< cds::gc::DHP, key_val<::hashing::sha256>, default_traits > MultiLevelHashSet_dhp_sha256; + + struct traits_MultiLevelHashSet_sha256_stat : public default_traits + { + typedef cc::multilevel_hashset::stat<> stat; + }; + typedef MultiLevelHashSet< cds::gc::HP, key_val<::hashing::sha256>, traits_MultiLevelHashSet_sha256_stat > MultiLevelHashSet_hp_sha256_stat; + typedef MultiLevelHashSet< cds::gc::DHP, key_val<::hashing::sha256>, traits_MultiLevelHashSet_sha256_stat > MultiLevelHashSet_dhp_sha256_stat; + + //MD5 + typedef MultiLevelHashSet< cds::gc::HP, key_val<::hashing::md5>, default_traits > MultiLevelHashSet_hp_md5; + typedef MultiLevelHashSet< cds::gc::DHP, key_val<::hashing::md5>, default_traits > MultiLevelHashSet_dhp_md5; + + struct traits_MultiLevelHashSet_md5_stat : public default_traits + { + typedef cc::multilevel_hashset::stat<> stat; + }; + typedef MultiLevelHashSet< cds::gc::HP, key_val<::hashing::md5>, traits_MultiLevelHashSet_md5_stat > MultiLevelHashSet_hp_md5_stat; + typedef MultiLevelHashSet< cds::gc::DHP, key_val<::hashing::md5>, traits_MultiLevelHashSet_md5_stat > MultiLevelHashSet_dhp_md5_stat; + + // CityHash + struct traits_MultiLevelHashSet_city64 : public default_traits + { + typedef ::hashing::city64::less less; + }; + typedef MultiLevelHashSet< cds::gc::HP, key_val<::hashing::city64>, traits_MultiLevelHashSet_city64 > MultiLevelHashSet_hp_city64; + typedef MultiLevelHashSet< cds::gc::DHP, key_val<::hashing::city64>, traits_MultiLevelHashSet_city64 > MultiLevelHashSet_dhp_city64; + + struct traits_MultiLevelHashSet_city64_stat : public traits_MultiLevelHashSet_city64 + { + typedef cc::multilevel_hashset::stat<> stat; + }; + typedef MultiLevelHashSet< cds::gc::HP, key_val<::hashing::city64>, traits_MultiLevelHashSet_city64_stat > MultiLevelHashSet_hp_city64_stat; + typedef MultiLevelHashSet< cds::gc::DHP, key_val<::hashing::city64>, traits_MultiLevelHashSet_city64_stat > MultiLevelHashSet_dhp_city64_stat; + + struct traits_MultiLevelHashSet_city128 : public default_traits + { + typedef ::hashing::city128::less less; + }; + typedef MultiLevelHashSet< cds::gc::HP, key_val<::hashing::city128>, traits_MultiLevelHashSet_city128 > MultiLevelHashSet_hp_city128; + typedef MultiLevelHashSet< cds::gc::DHP, key_val<::hashing::city128>, traits_MultiLevelHashSet_city128 > MultiLevelHashSet_dhp_city128; + + struct traits_MultiLevelHashSet_city128_stat : public traits_MultiLevelHashSet_city128 + { + typedef cc::multilevel_hashset::stat<> stat; + }; + typedef MultiLevelHashSet< cds::gc::HP, key_val<::hashing::city128>, traits_MultiLevelHashSet_city128_stat > MultiLevelHashSet_hp_city128_stat; + typedef MultiLevelHashSet< cds::gc::DHP, key_val<::hashing::city128>, traits_MultiLevelHashSet_city128_stat > MultiLevelHashSet_dhp_city128_stat; + }; + + template + static inline void print_stat( cc::MultiLevelHashSet< GC, T, Traits > const& s ) + { + CPPUNIT_MSG( s.statistics() ); + } + +} // namespace set2 + +#endif // #ifndef CDSUNIT_SET_TYPE_MICHAEL_H