folly/test/AtomicUnorderedMapTest.cpp

   1 /*
   2  * Copyright 2017 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include <folly/AtomicUnorderedMap.h>
  18
  19 #include <memory>
  20 #include <thread>
  21 #include <unordered_map>
  22
  23 #include <folly/Benchmark.h>
  24 #include <folly/portability/GFlags.h>
  25 #include <folly/portability/GTest.h>
  26 #include <folly/portability/Semaphore.h>
  27 #include <folly/test/DeterministicSchedule.h>
  28
  29 using namespace folly;
  30 using namespace folly::test;
  31
  32 template <class T>
  33 struct non_atomic {
  34   T value;
  35
  36   non_atomic() = default;
  37   non_atomic(const non_atomic&) = delete;
  38   constexpr /* implicit */ non_atomic(T desired): value(desired) {}
  39
  40   T operator+=(T arg) { value += arg; return load();}
  41
  42   T load(std::memory_order /* order */ = std::memory_order_seq_cst) const {
  43     return value;
  44   }
  45
  46   /* implicit */
  47   operator T() const {return load();}
  48
  49   void store(T desired,
  50              std::memory_order /* order */ = std::memory_order_seq_cst) {
  51     value = desired;
  52   }
  53
  54   T exchange(T desired,
  55              std::memory_order /* order */ = std::memory_order_seq_cst) {
  56     T old = load();
  57     store(desired);
  58     return old;
  59   }
  60
  61   bool compare_exchange_weak(
  62       T& expected,
  63       T desired,
  64       std::memory_order /* success */ = std::memory_order_seq_cst,
  65       std::memory_order /* failure */ = std::memory_order_seq_cst) {
  66     if (value == expected) {
  67       value = desired;
  68       return true;
  69     }
  70
  71     expected = value;
  72     return false;
  73   }
  74
  75   bool compare_exchange_strong(
  76       T& expected,
  77       T desired,
  78       std::memory_order /* success */ = std::memory_order_seq_cst,
  79       std::memory_order /* failure */ = std::memory_order_seq_cst) {
  80     if (value == expected) {
  81       value = desired;
  82       return true;
  83     }
  84
  85     expected = value;
  86     return false;
  87   }
  88
  89   bool is_lock_free() const {return true;}
  90 };
  91
  92 template <
  93     typename Key,
  94     typename Value,
  95     typename IndexType,
  96     template <typename> class Atom = std::atomic,
  97     typename Allocator = std::allocator<char>>
  98 using UIM =
  99     AtomicUnorderedInsertMap<Key,
 100                              Value,
 101                              std::hash<Key>,
 102                              std::equal_to<Key>,
 103                              (boost::has_trivial_destructor<Key>::value &&
 104                               boost::has_trivial_destructor<Value>::value),
 105                              Atom,
 106                              IndexType,
 107                              Allocator>;
 108
 109 namespace {
 110 template <typename T>
 111 struct AtomicUnorderedInsertMapTest : public ::testing::Test {};
 112 } // namespace
 113
 114 // uint16_t doesn't make sense for most platforms, but we might as well
 115 // test it
 116 using IndexTypesToTest = ::testing::Types<uint16_t, uint32_t, uint64_t>;
 117 TYPED_TEST_CASE(AtomicUnorderedInsertMapTest, IndexTypesToTest);
 118
 119 TYPED_TEST(AtomicUnorderedInsertMapTest, basic) {
 120   UIM<std::string,
 121       std::string,
 122       TypeParam,
 123       std::atomic,
 124       folly::detail::MMapAlloc> m(100);
 125
 126   m.emplace("abc", "ABC");
 127   EXPECT_TRUE(m.find("abc") != m.cend());
 128   EXPECT_EQ(m.find("abc")->first, "abc");
 129   EXPECT_EQ(m.find("abc")->second, "ABC");
 130   EXPECT_TRUE(m.find("def") == m.cend());
 131   auto iter = m.cbegin();
 132   EXPECT_TRUE(iter != m.cend());
 133   EXPECT_TRUE(iter == m.find("abc"));
 134   auto a = iter;
 135   EXPECT_TRUE(a == iter);
 136   auto b = iter;
 137   ++iter;
 138   EXPECT_TRUE(iter == m.cend());
 139   EXPECT_TRUE(a == b);
 140   EXPECT_TRUE(a != iter);
 141   a++;
 142   EXPECT_TRUE(a == iter);
 143   EXPECT_TRUE(a != b);
 144 }
 145
 146 TEST(AtomicUnorderedInsertMap, load_factor) {
 147   AtomicUnorderedInsertMap<int, bool> m(5000, 0.5f);
 148
 149   // we should be able to put in much more than 5000 things because of
 150   // our load factor request
 151   for (int i = 0; i < 10000; ++i) {
 152     m.emplace(i, true);
 153   }
 154 }
 155
 156 TEST(AtomicUnorderedInsertMap, capacity_exceeded) {
 157   AtomicUnorderedInsertMap<int, bool> m(5000, 1.0f);
 158
 159   EXPECT_THROW({
 160     for (int i = 0; i < 6000; ++i) {
 161       m.emplace(i, false);
 162     }
 163   }, std::bad_alloc);
 164 }
 165
 166 TYPED_TEST(AtomicUnorderedInsertMapTest, value_mutation) {
 167   UIM<int, MutableAtom<int>, TypeParam> m(100);
 168
 169   for (int i = 0; i < 50; ++i) {
 170     m.emplace(i, i);
 171   }
 172
 173   m.find(1)->second.data++;
 174 }
 175
 176 TEST(UnorderedInsertMap, value_mutation) {
 177   UIM<int, MutableData<int>, uint32_t, non_atomic> m(100);
 178
 179   for (int i = 0; i < 50; ++i) {
 180     m.emplace(i, i);
 181   }
 182
 183   m.find(1)->second.data++;
 184   EXPECT_EQ(m.find(1)->second.data, 2);
 185 }
 186
 187 // This test is too expensive to run automatically.  On my dev server it
 188 // takes about 10 minutes for dbg build, 2 for opt.
 189 TEST(AtomicUnorderedInsertMap, DISABLED_mega_map) {
 190   size_t capacity = 2000000000;
 191   AtomicUnorderedInsertMap64<size_t,size_t> big(capacity);
 192   for (size_t i = 0; i < capacity * 2; i += 2) {
 193     big.emplace(i, i * 10);
 194   }
 195   for (size_t i = 0; i < capacity * 3; i += capacity / 1000 + 1) {
 196     auto iter = big.find(i);
 197     if ((i & 1) == 0 && i < capacity * 2) {
 198       EXPECT_EQ(iter->second, i * 10);
 199     } else {
 200       EXPECT_TRUE(iter == big.cend());
 201     }
 202   }
 203 }
 204
 205 BENCHMARK(lookup_int_int_hit, iters) {
 206   std::unique_ptr<AtomicUnorderedInsertMap<int,size_t>> ptr = {};
 207
 208   size_t capacity = 100000;
 209
 210   BENCHMARK_SUSPEND {
 211     ptr = std::make_unique<AtomicUnorderedInsertMap<int, size_t>>(capacity);
 212     for (size_t i = 0; i < capacity; ++i) {
 213       auto k = 3 * ((5641 * i) % capacity);
 214       ptr->emplace(k, k + 1);
 215       EXPECT_EQ(ptr->find(k)->second, k + 1);
 216     }
 217   }
 218
 219   for (size_t i = 0; i < iters; ++i) {
 220     size_t k = 3 * (((i * 7919) ^ (i * 4001)) % capacity);
 221     auto iter = ptr->find(k);
 222     if (iter == ptr->cend() ||
 223         iter->second != k + 1) {
 224       auto jter = ptr->find(k);
 225       EXPECT_TRUE(iter == jter);
 226     }
 227     EXPECT_EQ(iter->second, k + 1);
 228   }
 229
 230   BENCHMARK_SUSPEND {
 231     ptr.reset(nullptr);
 232   }
 233 }
 234
 235 struct PairHash {
 236   size_t operator()(const std::pair<uint64_t,uint64_t>& pr) const {
 237     return pr.first ^ pr.second;
 238   }
 239 };
 240
 241 void contendedRW(size_t itersPerThread,
 242                  size_t capacity,
 243                  size_t numThreads,
 244                  size_t readsPerWrite) {
 245   typedef std::pair<uint64_t,uint64_t> Key;
 246   typedef AtomicUnorderedInsertMap<Key,MutableAtom<uint32_t>,PairHash> Map;
 247
 248   std::unique_ptr<Map> ptr = {};
 249   std::atomic<bool> go;
 250   std::vector<std::thread> threads;
 251
 252   BENCHMARK_SUSPEND {
 253     ptr = std::make_unique<Map>(capacity);
 254     while (threads.size() < numThreads) {
 255       threads.emplace_back([&](){
 256         while (!go) {
 257           std::this_thread::yield();
 258         }
 259
 260         size_t reads = 0;
 261         size_t writes = 0;
 262         while (reads + writes < itersPerThread) {
 263           auto r = Random::rand32();
 264           Key key(reads + writes, r);
 265           if (reads < writes * readsPerWrite ||
 266               writes >= capacity / numThreads) {
 267             // read needed
 268             ++reads;
 269             auto iter = ptr->find(key);
 270             EXPECT_TRUE(
 271                 iter == ptr->cend() ||
 272                 iter->second.data.load(std::memory_order_acquire) >= key.first);
 273           } else {
 274             ++writes;
 275             try {
 276               auto pr = ptr->emplace(key, key.first);
 277               if (!pr.second) {
 278                 pr.first->second.data++;
 279               }
 280             } catch (std::bad_alloc&) {
 281               LOG(INFO) << "bad alloc";
 282             }
 283           }
 284         }
 285       });
 286     }
 287   }
 288
 289   go = true;
 290
 291   for (auto& thr : threads) {
 292     thr.join();
 293   }
 294
 295   BENCHMARK_SUSPEND {
 296     ptr.reset(nullptr);
 297   }
 298 }
 299
 300 // sudo nice -n -20 ~/fbcode/_bin/common/concurrency/experimental/atomic_unordered_map --benchmark --bm_min_iters=1000000
 301 //
 302 // without MAP_HUGETLB (default)
 303 //
 304 // ============================================================================
 305 // common/concurrency/experimental/AtomicUnorderedMapTest.cpprelative  time/iter
 306 //   iters/s
 307 // ============================================================================
 308 // lookup_int_int_hit                                          20.05ns   49.89M
 309 // contendedRW(small_32thr_99pct)                              70.36ns   14.21M
 310 // contendedRW(large_32thr_99pct)                             164.23ns    6.09M
 311 // contendedRW(large_32thr_99_9pct)                           158.81ns    6.30M
 312 // ============================================================================
 313 //
 314 // with MAP_HUGETLB hacked in
 315 // ============================================================================
 316 // lookup_int_int_hit                                          19.67ns   50.84M
 317 // contendedRW(small_32thr_99pct)                              62.46ns   16.01M
 318 // contendedRW(large_32thr_99pct)                             119.41ns    8.37M
 319 // contendedRW(large_32thr_99_9pct)                           111.23ns    8.99M
 320 // ============================================================================
 321 BENCHMARK_NAMED_PARAM(contendedRW, small_32thr_99pct, 100000, 32, 99)
 322 BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99pct, 100000000, 32, 99)
 323 BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99_9pct, 100000000, 32, 999)
 324
 325 BENCHMARK_DRAW_LINE();
 326
 327 // sudo nice -n -20 ~/fbcode/_build/opt/site_integrity/quasar/experimental/atomic_unordered_map_test --benchmark --bm_min_iters=10000
 328 // Single threaded benchmarks to test how much better we are than
 329 // std::unordered_map and what is the cost of using atomic operations
 330 // in the uncontended use case
 331 // ============================================================================
 332 // std_map                                                      1.20ms   832.58
 333 // atomic_fast_map                                            511.35us    1.96K
 334 // fast_map                                                   196.28us    5.09K
 335 // ============================================================================
 336
 337 BENCHMARK(std_map) {
 338   std::unordered_map<long, long> m;
 339   m.reserve(10000);
 340   for (int i=0; i<10000; ++i) {
 341     m.emplace(i,i);
 342   }
 343
 344   for (int i=0; i<10000; ++i) {
 345     auto a = m.find(i);
 346     folly::doNotOptimizeAway(&*a);
 347   }
 348 }
 349
 350 BENCHMARK(atomic_fast_map) {
 351   UIM<long, long, uint32_t, std::atomic> m(10000);
 352   for (int i=0; i<10000; ++i) {
 353     m.emplace(i,i);
 354   }
 355
 356   for (int i=0; i<10000; ++i) {
 357     auto a = m.find(i);
 358     folly::doNotOptimizeAway(&*a);
 359   }
 360 }
 361
 362 BENCHMARK(fast_map) {
 363   UIM<long, long, uint32_t, non_atomic> m(10000);
 364   for (int i=0; i<10000; ++i) {
 365     m.emplace(i,i);
 366   }
 367
 368   for (int i=0; i<10000; ++i) {
 369     auto a = m.find(i);
 370     folly::doNotOptimizeAway(&*a);
 371   }
 372 }
 373
 374 BENCHMARK(atomic_fast_map_64) {
 375   UIM<long, long, uint64_t, std::atomic> m(10000);
 376   for (int i=0; i<10000; ++i) {
 377     m.emplace(i,i);
 378   }
 379
 380   for (int i=0; i<10000; ++i) {
 381     auto a = m.find(i);
 382     folly::doNotOptimizeAway(&*a);
 383   }
 384 }
 385
 386 BENCHMARK(fast_map_64) {
 387   UIM<long, long, uint64_t, non_atomic> m(10000);
 388   for (int i=0; i<10000; ++i) {
 389     m.emplace(i,i);
 390   }
 391
 392   for (int i=0; i<10000; ++i) {
 393     auto a = m.find(i);
 394     folly::doNotOptimizeAway(&*a);
 395   }
 396 }
 397
 398
 399 int main(int argc, char ** argv) {
 400   testing::InitGoogleTest(&argc, argv);
 401   gflags::ParseCommandLineFlags(&argc, &argv, true);
 402   int rv = RUN_ALL_TESTS();
 403   folly::runBenchmarksOnFlag();
 404   return rv;
 405 }