X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=folly%2Ftest%2FAtomicHashMapTest.cpp;h=c39553952cb27b6cc7bbda1b79ffa07749a267fc;hb=d5c1c7940fc29aadbd8bb8e8d1990add57a7b4f3;hp=16e17e1401f3c6d2a0a288ea41989660e05c203e;hpb=d8d3e2676a073e415ecbfeb3144948ddcd6449df;p=folly.git diff --git a/folly/test/AtomicHashMapTest.cpp b/folly/test/AtomicHashMapTest.cpp index 16e17e14..c3955395 100644 --- a/folly/test/AtomicHashMapTest.cpp +++ b/folly/test/AtomicHashMapTest.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2014 Facebook, Inc. + * Copyright 2016 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,18 +17,22 @@ #include #include -#include -#include #include #include #include + +#include #include #include +#include +#include +#include using std::vector; using std::string; using folly::AtomicHashMap; using folly::AtomicHashArray; +using folly::StringPiece; // Tunables: DEFINE_double(targetLoadFactor, 0.75, "Target memory utilization fraction."); @@ -78,13 +82,19 @@ TEST(Ahm, BasicNoncopyable) { for (int i = 50; i < 100; ++i) { myMap.insert(i, std::unique_ptr(new int (i))); } - for (int i = 0; i < 100; ++i) { + for (int i = 100; i < 150; ++i) { + myMap.emplace(i, new int (i)); + } + for (int i = 150; i < 200; ++i) { + myMap.emplace(i, new int (i), std::default_delete()); + } + for (int i = 0; i < 200; ++i) { EXPECT_EQ(*(myMap.find(i)->second), i); } - for (int i = 0; i < 100; i+=4) { + for (int i = 0; i < 200; i+=4) { myMap.erase(i); } - for (int i = 0; i < 100; i+=4) { + for (int i = 0; i < 200; i+=4) { EXPECT_EQ(myMap.find(i), myMap.end()); } } @@ -95,21 +105,86 @@ typedef int32_t ValueT; typedef AtomicHashMap AHMapT; typedef AHMapT::value_type RecordT; typedef AtomicHashArray AHArrayT; - AHArrayT::Config config; +typedef folly::QuadraticProbingAtomicHashMap QPAHMapT; +QPAHMapT::Config qpConfig; static AHArrayT::SmartPtr globalAHA(nullptr); static std::unique_ptr globalAHM; +static std::unique_ptr globalQPAHM; // Generate a deterministic value based on an input key static int genVal(int key) { return key / 3; } +static bool legalKey(const char* a); + +struct EqTraits { + bool operator()(const char* a, const char* b) { + return legalKey(a) && (strcmp(a, b) == 0); + } + bool operator()(const char* a, const char& b) { + return legalKey(a) && (a[0] != '\0') && (a[0] == b); + } + bool operator()(const char* a, const StringPiece b) { + return legalKey(a) && + (strlen(a) == b.size()) && (strcmp(a, b.begin()) == 0); + } +}; + +struct HashTraits { + size_t operator()(const char* a) { + size_t result = 0; + while (a[0] != 0) result += static_cast(*(a++)); + return result; + } + size_t operator()(const char& a) { + return static_cast(a); + } + size_t operator()(const StringPiece a) { + size_t result = 0; + for (const auto& ch : a) result += static_cast(ch); + return result; + } +}; + +typedef AtomicHashMap AHMCstrInt; +AHMCstrInt::Config cstrIntCfg; + +static bool legalKey(const char* a) { + return a != cstrIntCfg.emptyKey && + a != cstrIntCfg.lockedKey && + a != cstrIntCfg.erasedKey; +} + +TEST(Ahm, BasicLookup) { + AHMCstrInt myMap(1024, cstrIntCfg); + EXPECT_TRUE(myMap.begin() == myMap.end()); + myMap.insert(std::make_pair("f", 42)); + EXPECT_EQ(42, myMap.find("f")->second); + { + // Look up a single char, successfully. + auto it = myMap.find('f'); + EXPECT_EQ(42, it->second); + } + { + // Look up a single char, unsuccessfully. + auto it = myMap.find('g'); + EXPECT_TRUE(it == myMap.end()); + } + { + // Look up a string piece, successfully. + const StringPiece piece("f"); + auto it = myMap.find(piece); + EXPECT_EQ(42, it->second); + } +} + TEST(Ahm, grow) { VLOG(1) << "Overhead: " << sizeof(AHArrayT) << " (array) " << sizeof(AHMapT) + sizeof(AHArrayT) << " (map/set) Bytes."; uint64_t numEntries = 10000; - float sizeFactor = 0.46; + float sizeFactor = 0.46f; std::unique_ptr m(new AHMapT(int(numEntries * sizeFactor), config)); @@ -132,8 +207,6 @@ TEST(Ahm, grow) { EXPECT_TRUE(success); // check correctness - size_t cap = m->capacity(); - ValueT val; EXPECT_GT(m->numSubMaps(), 1); // make sure we grew success = true; EXPECT_EQ(m->size(), numEntries); @@ -144,7 +217,6 @@ TEST(Ahm, grow) { // Check findAt success = true; - KeyT key(0); AHMapT::const_iterator retIt; for (int32_t i = 0; i < int32_t(numEntries); i++) { retIt = m->find(i); @@ -172,7 +244,7 @@ TEST(Ahm, grow) { TEST(Ahm, iterator) { int numEntries = 10000; - float sizeFactor = .46; + float sizeFactor = .46f; std::unique_ptr m(new AHMapT(int(numEntries * sizeFactor), config)); // load map - make sure we succeed and the index is accurate @@ -278,7 +350,7 @@ TEST(Ahm, map_exception_safety) { typedef AtomicHashMap MyMapT; int numEntries = 10000; - float sizeFactor = 0.46; + float sizeFactor = 0.46f; std::unique_ptr m(new MyMapT(int(numEntries * sizeFactor))); bool success = true; @@ -350,6 +422,15 @@ void* insertThread(void* jj) { return nullptr; } +void* qpInsertThread(void* jj) { + int64_t j = (int64_t) jj; + for (int i = 0; i < numOpsPerThread; ++i) { + KeyT key = randomizeKey(i + j * numOpsPerThread); + globalQPAHM->insert(key, genVal(key)); + } + return nullptr; +} + void* insertThreadArr(void* jj) { int64_t j = (int64_t) jj; for (int i = 0; i < numOpsPerThread; ++i) { @@ -360,28 +441,28 @@ void* insertThreadArr(void* jj) { } std::atomic runThreadsCreatedAllThreads; -void runThreads(void *(*thread)(void*), int numThreads, void **statuses) { +void runThreads(void *(*mainFunc)(void*), int numThreads, void **statuses) { folly::BenchmarkSuspender susp; runThreadsCreatedAllThreads.store(false); - vector threadIds; + vector threads; for (int64_t j = 0; j < numThreads; j++) { - pthread_t tid; - if (pthread_create(&tid, nullptr, thread, (void*) j) != 0) { - LOG(ERROR) << "Could not start thread"; - } else { - threadIds.push_back(tid); - } + threads.emplace_back([statuses, mainFunc, j]() { + auto ret = mainFunc((void*)j); + if (statuses != nullptr) { + statuses[j] = ret; + } + }); } susp.dismiss(); runThreadsCreatedAllThreads.store(true); - for (size_t i = 0; i < threadIds.size(); ++i) { - pthread_join(threadIds[i], statuses == nullptr ? nullptr : &statuses[i]); + for (size_t i = 0; i < threads.size(); ++i) { + threads[i].join(); } } -void runThreads(void *(*thread)(void*)) { - runThreads(thread, FLAGS_numThreads, nullptr); +void runThreads(void *(*mainFunc)(void*)) { + runThreads(mainFunc, FLAGS_numThreads, nullptr); } } @@ -392,7 +473,7 @@ TEST(Ahm, collision_test) { // Doing the same number on each thread so we collide. numOpsPerThread = numInserts; - float sizeFactor = 0.46; + float sizeFactor = 0.46f; int entrySize = sizeof(KeyT) + sizeof(ValueT); VLOG(1) << "Testing " << numInserts << " unique " << entrySize << " Byte entries replicated in " << FLAGS_numThreads << @@ -425,7 +506,6 @@ TEST(Ahm, collision_test) { // check correctness EXPECT_EQ(sizeAHM, numInserts); bool success = true; - ValueT val; for (int i = 0; i < numInserts; ++i) { KeyT key = randomizeKey(i); success &= (globalAHM->find(key)->second == genVal(key)); @@ -453,8 +533,7 @@ namespace { const int kInsertPerThread = 100000; int raceFinalSizeEstimate; -void* raceIterateThread(void* jj) { - int64_t j = (int64_t) jj; +void* raceIterateThread(void*) { int count = 0; AHMapT::iterator it = globalAHM->begin(); @@ -469,8 +548,7 @@ void* raceIterateThread(void* jj) { return nullptr; } -void* raceInsertRandomThread(void* jj) { - int64_t j = (int64_t) jj; +void* raceInsertRandomThread(void*) { for (int i = 0; i < kInsertPerThread; ++i) { KeyT key = rand(); globalAHM->insert(key, genVal(key)); @@ -493,11 +571,11 @@ TEST(Ahm, race_insert_iterate_thread_test) { globalAHM.reset(new AHMapT(raceFinalSizeEstimate / 9, config)); vector threadIds; - for (int64_t j = 0; j < kInsertThreads + kIterateThreads; j++) { + for (int j = 0; j < kInsertThreads + kIterateThreads; j++) { pthread_t tid; void *(*thread)(void*) = (j < kInsertThreads ? raceInsertRandomThread : raceIterateThread); - if (pthread_create(&tid, nullptr, thread, (void*) j) != 0) { + if (pthread_create(&tid, nullptr, thread, nullptr) != 0) { LOG(ERROR) << "Could not start thread"; } else { threadIds.push_back(tid); @@ -593,7 +671,7 @@ TEST(Ahm, thread_erase_insert_race) { typedef AtomicHashArray AHA; AHA::Config configRace; auto atomicHashArrayInsertRaceArray = AHA::create(2, configRace); -void* atomicHashArrayInsertRaceThread(void* j) { +void* atomicHashArrayInsertRaceThread(void* /* j */) { AHA* arr = atomicHashArrayInsertRaceArray.get(); uintptr_t numInserted = 0; while (!runThreadsCreatedAllThreads.load()); @@ -602,22 +680,89 @@ void* atomicHashArrayInsertRaceThread(void* j) { numInserted++; } } - pthread_exit((void *) numInserted); + return (void*)numInserted; } TEST(Ahm, atomic_hash_array_insert_race) { AHA* arr = atomicHashArrayInsertRaceArray.get(); - int numIterations = 50000, FLAGS_numThreads = 4; - void* statuses[FLAGS_numThreads]; + int numIterations = 5000; + constexpr int numThreads = 4; + void* statuses[numThreads]; for (int i = 0; i < numIterations; i++) { arr->clear(); - runThreads(atomicHashArrayInsertRaceThread, FLAGS_numThreads, statuses); + runThreads(atomicHashArrayInsertRaceThread, numThreads, statuses); EXPECT_GE(arr->size(), 1); - for (int j = 0; j < FLAGS_numThreads; j++) { + for (int j = 0; j < numThreads; j++) { EXPECT_EQ(arr->size(), uintptr_t(statuses[j])); } } } +// Repro for T#5841499. Race between erase() and find() on the same key. +TEST(Ahm, erase_find_race) { + const uint64_t limit = 10000; + AtomicHashMap map(limit + 10); + std::atomic key {1}; + + // Invariant: all values are equal to their keys. + // At any moment there is one or two consecutive keys in the map. + + std::thread write_thread([&]() { + while (true) { + uint64_t k = ++key; + if (k > limit) { + break; + } + map.insert(k + 1, k + 1); + map.erase(k); + } + }); + + std::thread read_thread([&]() { + while (true) { + uint64_t k = key.load(); + if (k > limit) { + break; + } + + auto it = map.find(k); + if (it != map.end()) { + ASSERT_EQ(k, it->second); + } + } + }); + + read_thread.join(); + write_thread.join(); +} + +// Erase right after insert race bug repro (t9130653) +TEST(Ahm, erase_after_insert_race) { + const uint64_t limit = 10000; + const size_t num_threads = 100; + const size_t num_iters = 500; + AtomicHashMap map(limit + 10); + + std::atomic go{false}; + std::vector ts; + for (size_t i = 0; i < num_threads; ++i) { + ts.emplace_back([&]() { + while (!go) { + continue; + } + for (size_t n = 0; n < num_iters; ++n) { + map.erase(1); + map.insert(1, 1); + } + }); + } + + go = true; + + for (auto& t : ts) { + t.join(); + } +} + // Repro for a bug when iterator didn't skip empty submaps. TEST(Ahm, iterator_skips_empty_submaps) { AtomicHashMap::Config config; @@ -677,6 +822,19 @@ void loadGlobalAhm() { EXPECT_EQ(globalAHM->size(), FLAGS_numBMElements); } +void loadGlobalQPAhm() { + std::cout << "loading global QPAHM with " << FLAGS_numThreads + << " threads...\n"; + uint64_t start = nowInUsec(); + globalQPAHM.reset(new QPAHMapT(maxBMElements, qpConfig)); + numOpsPerThread = FLAGS_numBMElements / FLAGS_numThreads; + runThreads(qpInsertThread); + uint64_t elapsed = nowInUsec() - start; + std::cout << " took " << elapsed / 1000 << " ms (" << + (elapsed * 1000 / FLAGS_numBMElements) << " ns/insert).\n"; + EXPECT_EQ(globalQPAHM->size(), FLAGS_numBMElements); +} + } BENCHMARK(st_aha_find, iters) { @@ -695,6 +853,14 @@ BENCHMARK(st_ahm_find, iters) { } } +BENCHMARK(st_qpahm_find, iters) { + CHECK_LE(iters, FLAGS_numBMElements); + for (size_t i = 0; i < iters; i++) { + KeyT key = randomizeKey(i); + folly::doNotOptimizeAway(globalQPAHM->find(key)->second); + } +} + BENCHMARK_DRAW_LINE() BENCHMARK(mt_ahm_miss, iters) { @@ -711,6 +877,20 @@ BENCHMARK(mt_ahm_miss, iters) { }); } +BENCHMARK(mt_qpahm_miss, iters) { + CHECK_LE(iters, FLAGS_numBMElements); + numOpsPerThread = iters / FLAGS_numThreads; + runThreads([](void* jj) -> void* { + int64_t j = (int64_t) jj; + while (!runThreadsCreatedAllThreads.load()); + for (int i = 0; i < numOpsPerThread; ++i) { + KeyT key = i + j * numOpsPerThread * 100; + folly::doNotOptimizeAway(globalQPAHM->find(key) == globalQPAHM->end()); + } + return nullptr; + }); +} + BENCHMARK(st_ahm_miss, iters) { CHECK_LE(iters, FLAGS_numBMElements); for (size_t i = 0; i < iters; i++) { @@ -719,6 +899,14 @@ BENCHMARK(st_ahm_miss, iters) { } } +BENCHMARK(st_qpahm_miss, iters) { + CHECK_LE(iters, FLAGS_numBMElements); + for (size_t i = 0; i < iters; i++) { + KeyT key = randomizeKey(i + iters * 100); + folly::doNotOptimizeAway(globalQPAHM->find(key) == globalQPAHM->end()); + } +} + BENCHMARK(mt_ahm_find_insert_mix, iters) { CHECK_LE(iters, FLAGS_numBMElements); numOpsPerThread = iters / FLAGS_numThreads; @@ -738,6 +926,26 @@ BENCHMARK(mt_ahm_find_insert_mix, iters) { }); } + +BENCHMARK(mt_qpahm_find_insert_mix, iters) { + CHECK_LE(iters, FLAGS_numBMElements); + numOpsPerThread = iters / FLAGS_numThreads; + runThreads([](void* jj) -> void* { + int64_t j = (int64_t) jj; + while (!runThreadsCreatedAllThreads.load()); + for (int i = 0; i < numOpsPerThread; ++i) { + if (i % 128) { // ~1% insert mix + KeyT key = randomizeKey(i + j * numOpsPerThread); + folly::doNotOptimizeAway(globalQPAHM->find(key)->second); + } else { + KeyT key = randomizeKey(i + j * numOpsPerThread * 100); + globalQPAHM->insert(key, genVal(key)); + } + } + return nullptr; + }); +} + BENCHMARK(mt_aha_find, iters) { CHECK_LE(iters, FLAGS_numBMElements); numOpsPerThread = iters / FLAGS_numThreads; @@ -766,6 +974,20 @@ BENCHMARK(mt_ahm_find, iters) { }); } +BENCHMARK(mt_qpahm_find, iters) { + CHECK_LE(iters, FLAGS_numBMElements); + numOpsPerThread = iters / FLAGS_numThreads; + runThreads([](void* jj) -> void* { + int64_t j = (int64_t) jj; + while (!runThreadsCreatedAllThreads.load()); + for (int i = 0; i < numOpsPerThread; ++i) { + KeyT key = randomizeKey(i + j * numOpsPerThread); + folly::doNotOptimizeAway(globalQPAHM->find(key)->second); + } + return nullptr; + }); +} + KeyT k; BENCHMARK(st_baseline_modulus_and_random, iters) { for (size_t i = 0; i < iters; ++i) { @@ -783,6 +1005,14 @@ BENCHMARK(mt_ahm_insert, iters) { runThreads(insertThread); } +BENCHMARK(mt_qpahm_insert, iters) { + BENCHMARK_SUSPEND { + globalQPAHM.reset(new QPAHMapT(int(iters * LF), qpConfig)); + numOpsPerThread = iters / FLAGS_numThreads; + } + runThreads(qpInsertThread); +} + BENCHMARK(st_ahm_insert, iters) { folly::BenchmarkSuspender susp; std::unique_ptr ahm(new AHMapT(int(iters * LF), config)); @@ -794,12 +1024,25 @@ BENCHMARK(st_ahm_insert, iters) { } } +BENCHMARK(st_qpahm_insert, iters) { + folly::BenchmarkSuspender susp; + std::unique_ptr ahm(new QPAHMapT(int(iters * LF), qpConfig)); + susp.dismiss(); + + for (size_t i = 0; i < iters; i++) { + KeyT key = randomizeKey(i); + ahm->insert(key, genVal(key)); + } +} + void benchmarkSetup() { config.maxLoadFactor = FLAGS_maxLoadFactor; + qpConfig.maxLoadFactor = FLAGS_maxLoadFactor; configRace.maxLoadFactor = 0.5; int numCores = sysconf(_SC_NPROCESSORS_ONLN); loadGlobalAha(); loadGlobalAhm(); + loadGlobalQPAhm(); string numIters = folly::to( std::min(1000000, int(FLAGS_numBMElements))); @@ -833,28 +1076,38 @@ int main(int argc, char** argv) { } /* -Benchmarks run on dual Xeon X5650's @ 2.67GHz w/hyperthreading enabled - (12 physical cores, 12 MB cache, 72 GB RAM) +loading global AHA with 8 threads... + took 487 ms (40 ns/insert). +loading global AHM with 8 threads... + took 478 ms (39 ns/insert). +loading global QPAHM with 8 threads... + took 478 ms (39 ns/insert). Running AHM benchmarks on machine with 24 logical cores. num elements per map: 12000000 num threads for mt tests: 24 AHM load factor: 0.75 -Benchmark Iters Total t t/iter iter/sec ------------------------------------------------------------------------------- -Comparing benchmarks: BM_mt_aha_find,BM_mt_ahm_find -* BM_mt_aha_find 1000000 7.767 ms 7.767 ns 122.8 M - +0.81% BM_mt_ahm_find 1000000 7.83 ms 7.83 ns 121.8 M ------------------------------------------------------------------------------- -Comparing benchmarks: BM_st_aha_find,BM_st_ahm_find -* BM_st_aha_find 1000000 57.83 ms 57.83 ns 16.49 M - +77.9% BM_st_ahm_find 1000000 102.9 ms 102.9 ns 9.27 M ------------------------------------------------------------------------------- -BM_mt_ahm_miss 1000000 2.937 ms 2.937 ns 324.7 M -BM_st_ahm_miss 1000000 164.2 ms 164.2 ns 5.807 M -BM_mt_ahm_find_insert_mix 1000000 8.797 ms 8.797 ns 108.4 M -BM_mt_ahm_insert 1000000 17.39 ms 17.39 ns 54.83 M -BM_st_ahm_insert 1000000 106.8 ms 106.8 ns 8.93 M -BM_st_baseline_modulus_and_rando 1000000 6.223 ms 6.223 ns 153.2 M +============================================================================ +folly/test/AtomicHashMapTest.cpp relative time/iter iters/s +============================================================================ +st_aha_find 92.63ns 10.80M +st_ahm_find 107.78ns 9.28M +st_qpahm_find 90.69ns 11.03M +---------------------------------------------------------------------------- +mt_ahm_miss 2.09ns 477.36M +mt_qpahm_miss 1.37ns 728.82M +st_ahm_miss 241.07ns 4.15M +st_qpahm_miss 223.17ns 4.48M +mt_ahm_find_insert_mix 8.05ns 124.24M +mt_qpahm_find_insert_mix 9.10ns 109.85M +mt_aha_find 6.82ns 146.68M +mt_ahm_find 7.95ns 125.77M +mt_qpahm_find 6.81ns 146.83M +st_baseline_modulus_and_random 6.02ns 166.03M +mt_ahm_insert 14.29ns 69.97M +mt_qpahm_insert 11.68ns 85.61M +st_ahm_insert 125.39ns 7.98M +st_qpahm_insert 128.76ns 7.77M +============================================================================ */