folly/test/AtomicUnorderedMapTest.cpp

   1 /*
   2  * Copyright 2016 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16 #include <folly/AtomicUnorderedMap.h>
  17 #include <folly/test/DeterministicSchedule.h>
  18 #include <thread>
  19 #include <semaphore.h>
  20 #include <gflags/gflags.h>
  21 #include <gtest/gtest.h>
  22 #include <folly/Benchmark.h>
  23 #include <unordered_map>
  24
  25 using namespace folly;
  26 using namespace folly::test;
  27
  28 template<class T>
  29 struct non_atomic {
  30   T value;
  31
  32   non_atomic() = default;
  33   non_atomic(const non_atomic&) = delete;
  34   constexpr /* implicit */ non_atomic(T desired): value(desired) {}
  35
  36   T operator+=(T arg) { value += arg; return load();}
  37
  38   T load(std::memory_order /* order */ = std::memory_order_seq_cst) const {
  39     return value;
  40   }
  41
  42   /* implicit */
  43   operator T() const {return load();}
  44
  45   void store(T desired,
  46              std::memory_order /* order */ = std::memory_order_seq_cst) {
  47     value = desired;
  48   }
  49
  50   T exchange(T desired,
  51              std::memory_order /* order */ = std::memory_order_seq_cst) {
  52     T old = load();
  53     store(desired);
  54     return old;
  55   }
  56
  57   bool compare_exchange_weak(
  58       T& expected,
  59       T desired,
  60       std::memory_order /* success */ = std::memory_order_seq_cst,
  61       std::memory_order /* failure */ = std::memory_order_seq_cst) {
  62     if (value == expected) {
  63       value = desired;
  64       return true;
  65     }
  66
  67     expected = value;
  68     return false;
  69   }
  70
  71   bool compare_exchange_strong(
  72       T& expected,
  73       T desired,
  74       std::memory_order /* success */ = std::memory_order_seq_cst,
  75       std::memory_order /* failure */ = std::memory_order_seq_cst) {
  76     if (value == expected) {
  77       value = desired;
  78       return true;
  79     }
  80
  81     expected = value;
  82     return false;
  83   }
  84
  85   bool is_lock_free() const {return true;}
  86 };
  87
  88 template <typename Key,
  89           typename Value,
  90           typename IndexType,
  91           template <typename> class Atom = std::atomic,
  92           typename Allocator = std::allocator<char>>
  93 using UIM =
  94     AtomicUnorderedInsertMap<Key,
  95                              Value,
  96                              std::hash<Key>,
  97                              std::equal_to<Key>,
  98                              (boost::has_trivial_destructor<Key>::value &&
  99                               boost::has_trivial_destructor<Value>::value),
 100                              Atom,
 101                              IndexType,
 102                              Allocator>;
 103
 104 namespace {
 105 template <typename T>
 106 struct AtomicUnorderedInsertMapTest : public ::testing::Test {};
 107 }
 108
 109 // uint16_t doesn't make sense for most platforms, but we might as well
 110 // test it
 111 using IndexTypesToTest = ::testing::Types<uint16_t, uint32_t, uint64_t>;
 112 TYPED_TEST_CASE(AtomicUnorderedInsertMapTest, IndexTypesToTest);
 113
 114 TYPED_TEST(AtomicUnorderedInsertMapTest, basic) {
 115   UIM<std::string,
 116       std::string,
 117       TypeParam,
 118       std::atomic,
 119       folly::detail::MMapAlloc> m(100);
 120
 121   m.emplace("abc", "ABC");
 122   EXPECT_TRUE(m.find("abc") != m.cend());
 123   EXPECT_EQ(m.find("abc")->first, "abc");
 124   EXPECT_EQ(m.find("abc")->second, "ABC");
 125   EXPECT_TRUE(m.find("def") == m.cend());
 126   auto iter = m.cbegin();
 127   EXPECT_TRUE(iter != m.cend());
 128   EXPECT_TRUE(iter == m.find("abc"));
 129   auto a = iter;
 130   EXPECT_TRUE(a == iter);
 131   auto b = iter;
 132   ++iter;
 133   EXPECT_TRUE(iter == m.cend());
 134   EXPECT_TRUE(a == b);
 135   EXPECT_TRUE(a != iter);
 136   a++;
 137   EXPECT_TRUE(a == iter);
 138   EXPECT_TRUE(a != b);
 139 }
 140
 141 TEST(AtomicUnorderedInsertMap, load_factor) {
 142   AtomicUnorderedInsertMap<int, bool> m(5000, 0.5f);
 143
 144   // we should be able to put in much more than 5000 things because of
 145   // our load factor request
 146   for (int i = 0; i < 10000; ++i) {
 147     m.emplace(i, true);
 148   }
 149 }
 150
 151 TEST(AtomicUnorderedInsertMap, capacity_exceeded) {
 152   AtomicUnorderedInsertMap<int, bool> m(5000, 1.0f);
 153
 154   EXPECT_THROW({
 155     for (int i = 0; i < 6000; ++i) {
 156       m.emplace(i, false);
 157     }
 158   }, std::bad_alloc);
 159 }
 160
 161 TYPED_TEST(AtomicUnorderedInsertMapTest, value_mutation) {
 162   UIM<int, MutableAtom<int>, TypeParam> m(100);
 163
 164   for (int i = 0; i < 50; ++i) {
 165     m.emplace(i, i);
 166   }
 167
 168   m.find(1)->second.data++;
 169 }
 170
 171 TEST(UnorderedInsertMap, value_mutation) {
 172   UIM<int, MutableData<int>, uint32_t, non_atomic> m(100);
 173
 174   for (int i = 0; i < 50; ++i) {
 175     m.emplace(i, i);
 176   }
 177
 178   m.find(1)->second.data++;
 179   EXPECT_EQ(m.find(1)->second.data, 2);
 180 }
 181
 182 // This test is too expensive to run automatically.  On my dev server it
 183 // takes about 10 minutes for dbg build, 2 for opt.
 184 TEST(AtomicUnorderedInsertMap, DISABLED_mega_map) {
 185   size_t capacity = 2000000000;
 186   AtomicUnorderedInsertMap64<size_t,size_t> big(capacity);
 187   for (size_t i = 0; i < capacity * 2; i += 2) {
 188     big.emplace(i, i * 10);
 189   }
 190   for (size_t i = 0; i < capacity * 3; i += capacity / 1000 + 1) {
 191     auto iter = big.find(i);
 192     if ((i & 1) == 0 && i < capacity * 2) {
 193       EXPECT_EQ(iter->second, i * 10);
 194     } else {
 195       EXPECT_TRUE(iter == big.cend());
 196     }
 197   }
 198 }
 199
 200 BENCHMARK(lookup_int_int_hit, iters) {
 201   std::unique_ptr<AtomicUnorderedInsertMap<int,size_t>> ptr = {};
 202
 203   size_t capacity = 100000;
 204
 205   BENCHMARK_SUSPEND {
 206     ptr.reset(new AtomicUnorderedInsertMap<int,size_t>(capacity));
 207     for (size_t i = 0; i < capacity; ++i) {
 208       auto k = 3 * ((5641 * i) % capacity);
 209       ptr->emplace(k, k + 1);
 210       EXPECT_EQ(ptr->find(k)->second, k + 1);
 211     }
 212   }
 213
 214   for (size_t i = 0; i < iters; ++i) {
 215     size_t k = 3 * (((i * 7919) ^ (i * 4001)) % capacity);
 216     auto iter = ptr->find(k);
 217     if (iter == ptr->cend() ||
 218         iter->second != k + 1) {
 219       auto jter = ptr->find(k);
 220       EXPECT_TRUE(iter == jter);
 221     }
 222     EXPECT_EQ(iter->second, k + 1);
 223   }
 224
 225   BENCHMARK_SUSPEND {
 226     ptr.reset(nullptr);
 227   }
 228 }
 229
 230 struct PairHash {
 231   size_t operator()(const std::pair<uint64_t,uint64_t>& pr) const {
 232     return pr.first ^ pr.second;
 233   }
 234 };
 235
 236 void contendedRW(size_t itersPerThread,
 237                  size_t capacity,
 238                  size_t numThreads,
 239                  size_t readsPerWrite) {
 240   typedef std::pair<uint64_t,uint64_t> Key;
 241   typedef AtomicUnorderedInsertMap<Key,MutableAtom<uint32_t>,PairHash> Map;
 242
 243   std::unique_ptr<Map> ptr = {};
 244   std::atomic<bool> go;
 245   std::vector<std::thread> threads;
 246
 247   BENCHMARK_SUSPEND {
 248     ptr.reset(new Map(capacity));
 249     while (threads.size() < numThreads) {
 250       threads.emplace_back([&](){
 251         while (!go) {
 252           std::this_thread::yield();
 253         }
 254
 255         size_t reads = 0;
 256         size_t writes = 0;
 257         while (reads + writes < itersPerThread) {
 258           auto r = Random::rand32();
 259           Key key(reads + writes, r);
 260           if (reads < writes * readsPerWrite ||
 261               writes >= capacity / numThreads) {
 262             // read needed
 263             ++reads;
 264             auto iter = ptr->find(key);
 265             EXPECT_TRUE(
 266                 iter == ptr->cend() ||
 267                 iter->second.data.load(std::memory_order_acquire) >= key.first);
 268           } else {
 269             ++writes;
 270             try {
 271               auto pr = ptr->emplace(key, key.first);
 272               if (!pr.second) {
 273                 pr.first->second.data++;
 274               }
 275             } catch (std::bad_alloc& x) {
 276               LOG(INFO) << "bad alloc";
 277             }
 278           }
 279         }
 280       });
 281     }
 282   }
 283
 284   go = true;
 285
 286   for (auto& thr : threads) {
 287     thr.join();
 288   }
 289
 290   BENCHMARK_SUSPEND {
 291     ptr.reset(nullptr);
 292   }
 293 }
 294
 295 // sudo nice -n -20 ~/fbcode/_bin/common/concurrency/experimental/atomic_unordered_map --benchmark --bm_min_iters=1000000
 296 //
 297 // without MAP_HUGETLB (default)
 298 //
 299 // ============================================================================
 300 // common/concurrency/experimental/AtomicUnorderedMapTest.cpprelative  time/iter
 301 //   iters/s
 302 // ============================================================================
 303 // lookup_int_int_hit                                          20.05ns   49.89M
 304 // contendedRW(small_32thr_99pct)                              70.36ns   14.21M
 305 // contendedRW(large_32thr_99pct)                             164.23ns    6.09M
 306 // contendedRW(large_32thr_99_9pct)                           158.81ns    6.30M
 307 // ============================================================================
 308 //
 309 // with MAP_HUGETLB hacked in
 310 // ============================================================================
 311 // lookup_int_int_hit                                          19.67ns   50.84M
 312 // contendedRW(small_32thr_99pct)                              62.46ns   16.01M
 313 // contendedRW(large_32thr_99pct)                             119.41ns    8.37M
 314 // contendedRW(large_32thr_99_9pct)                           111.23ns    8.99M
 315 // ============================================================================
 316 BENCHMARK_NAMED_PARAM(contendedRW, small_32thr_99pct, 100000, 32, 99)
 317 BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99pct, 100000000, 32, 99)
 318 BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99_9pct, 100000000, 32, 999)
 319
 320 BENCHMARK_DRAW_LINE();
 321
 322 // sudo nice -n -20 ~/fbcode/_build/opt/site_integrity/quasar/experimental/atomic_unordered_map_test --benchmark --bm_min_iters=10000
 323 // Single threaded benchmarks to test how much better we are than
 324 // std::unordered_map and what is the cost of using atomic operations
 325 // in the uncontended use case
 326 // ============================================================================
 327 // std_map                                                      1.20ms   832.58
 328 // atomic_fast_map                                            511.35us    1.96K
 329 // fast_map                                                   196.28us    5.09K
 330 // ============================================================================
 331
 332 BENCHMARK(std_map) {
 333   std::unordered_map<long, long> m;
 334   m.reserve(10000);
 335   for (int i=0; i<10000; ++i) {
 336     m.emplace(i,i);
 337   }
 338
 339   for (int i=0; i<10000; ++i) {
 340     auto a = m.find(i);
 341     folly::doNotOptimizeAway(&*a);
 342   }
 343 }
 344
 345 BENCHMARK(atomic_fast_map) {
 346   UIM<long, long, uint32_t, std::atomic> m(10000);
 347   for (int i=0; i<10000; ++i) {
 348     m.emplace(i,i);
 349   }
 350
 351   for (int i=0; i<10000; ++i) {
 352     auto a = m.find(i);
 353     folly::doNotOptimizeAway(&*a);
 354   }
 355 }
 356
 357 BENCHMARK(fast_map) {
 358   UIM<long, long, uint32_t, non_atomic> m(10000);
 359   for (int i=0; i<10000; ++i) {
 360     m.emplace(i,i);
 361   }
 362
 363   for (int i=0; i<10000; ++i) {
 364     auto a = m.find(i);
 365     folly::doNotOptimizeAway(&*a);
 366   }
 367 }
 368
 369 BENCHMARK(atomic_fast_map_64) {
 370   UIM<long, long, uint64_t, std::atomic> m(10000);
 371   for (int i=0; i<10000; ++i) {
 372     m.emplace(i,i);
 373   }
 374
 375   for (int i=0; i<10000; ++i) {
 376     auto a = m.find(i);
 377     folly::doNotOptimizeAway(&*a);
 378   }
 379 }
 380
 381 BENCHMARK(fast_map_64) {
 382   UIM<long, long, uint64_t, non_atomic> m(10000);
 383   for (int i=0; i<10000; ++i) {
 384     m.emplace(i,i);
 385   }
 386
 387   for (int i=0; i<10000; ++i) {
 388     auto a = m.find(i);
 389     folly::doNotOptimizeAway(&*a);
 390   }
 391 }
 392
 393
 394 int main(int argc, char ** argv) {
 395   testing::InitGoogleTest(&argc, argv);
 396   google::ParseCommandLineFlags(&argc, &argv, true);
 397   int rv = RUN_ALL_TESTS();
 398   folly::runBenchmarksOnFlag();
 399   return rv;
 400 }