Fix a ThreadLocal bug: hold the meta lock when resizing the element vector

author Jordan DeLong <jdelong@fb.com>

Thu, 15 Aug 2013 00:03:04 +0000 (17:03 -0700)

committer Sara Golemon <sgolemon@fb.com>

Wed, 28 Aug 2013 21:30:11 +0000 (14:30 -0700)
author Jordan DeLong <jdelong@fb.com>
Thu, 15 Aug 2013 00:03:04 +0000 (17:03 -0700)
committer Sara Golemon <sgolemon@fb.com>
Wed, 28 Aug 2013 21:30:11 +0000 (14:30 -0700)
diff --git a/folly/detail/ThreadLocalDetail.h b/folly/detail/ThreadLocalDetail.h

index ad53b9c9e671490260f858a915a42697901e2f6f..b9624389f4ee3836b618b4a6bf659ab2e84386cd 100644 (file)
--- a/folly/detail/ThreadLocalDetail.h
+++ b/folly/detail/ThreadLocalDetail.h
@@ -245,14 +245,20 @@ struct StaticMeta {
            if (id < e->elementsCapacity && e->elements[id].ptr) {
              elements.push_back(e->elements[id]);
  
-            // Writing another thread's ThreadEntry from here is fine;
-            // the only other potential reader is the owning thread --
-            // from onThreadExit (which grabs the lock, so is properly
-            // synchronized with us) or from get() -- but using get() on a
-            // ThreadLocalPtr object that's being destroyed is a bug, so
-            // undefined behavior is fair game.
-            e->elements[id].ptr = NULL;
-            e->elements[id].deleter = NULL;
+            /*
+             * Writing another thread's ThreadEntry from here is fine;
+             * the only other potential reader is the owning thread --
+             * from onThreadExit (which grabs the lock, so is properly
+             * synchronized with us) or from get(), which also grabs
+             * the lock if it needs to resize the elements vector.
+             *
+             * We can't conflict with reads for a get(id), because
+             * it's illegal to call get on a thread local that's
+             * destructing.
+             */
+            e->elements[id].ptr = nullptr;
+            e->elements[id].deleter = nullptr;
+            e->elements[id].ownsDeleter = false;
            }
          }
          meta.freeIds_.push_back(id);
@@ -275,13 +281,14 @@ struct StaticMeta {
      size_t newSize = static_cast<size_t>((id + 5) * 1.7);
      auto& meta = instance();
      ElementWrapper* ptr = nullptr;
+
      // Rely on jemalloc to zero the memory if possible -- maybe it knows
      // it's already zeroed and saves us some work.
      if (!usingJEMalloc() ||
          prevSize < jemallocMinInPlaceExpandable ||
          (rallocm(
            static_cast<void**>(static_cast<void*>(&threadEntry_.elements)),
-          NULL, newSize * sizeof(ElementWrapper), 0,
+          nullptr, newSize * sizeof(ElementWrapper), 0,
            ALLOCM_NO_MOVE | ALLOCM_ZERO) != ALLOCM_SUCCESS)) {
        // Sigh, must realloc, but we can't call realloc here, as elements is
        // still linked in meta, so another thread might access invalid memory
@@ -295,25 +302,32 @@ struct StaticMeta {
        // is zeroed.  calloc() is simpler than malloc() followed by memset(),
        // and potentially faster when dealing with a lot of memory, as
        // it can get already-zeroed pages from the kernel.
-      if ((ptr = static_cast<ElementWrapper*>(
-             calloc(newSize, sizeof(ElementWrapper)))) != nullptr) {
-        memcpy(ptr, threadEntry_.elements, sizeof(ElementWrapper) * prevSize);
-      } else {
-        throw std::bad_alloc();
-      }
+      ptr = static_cast<ElementWrapper*>(
+        calloc(newSize, sizeof(ElementWrapper))
+      );
+      if (!ptr) throw std::bad_alloc();
      }
  
      // Success, update the entry
      {
        boost::lock_guard<boost::mutex> g(meta.lock_);
+
        if (prevSize == 0) {
          meta.push_back(&threadEntry_);
        }
+
        if (ptr) {
+       /*
+        * Note: we need to hold the meta lock when copying data out of
+        * the old vector, because some other thread might be
+        * destructing a ThreadLocal and writing to the elements vector
+        * of this thread.
+        */
+        memcpy(ptr, threadEntry_.elements, sizeof(ElementWrapper) * prevSize);
          using std::swap;
          swap(ptr, threadEntry_.elements);
+        threadEntry_.elementsCapacity = newSize;
        }
-      threadEntry_.elementsCapacity = newSize;
      }
  
      free(ptr);
diff --git a/folly/test/AHMIntStressTest.cpp b/folly/test/AHMIntStressTest.cpp

new file mode 100644 (file)

index 0000000..1d90660
--- /dev/null
+++ b/folly/test/AHMIntStressTest.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2013 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <thread>
+#include <memory>
+#include <mutex>
+
+#include "folly/AtomicHashMap.h"
+#include "folly/ScopeGuard.h"
+#include "folly/Memory.h"
+
+namespace {
+
+struct MyObject {
+  explicit MyObject(int i) : i(i) {}
+  int i;
+};
+
+typedef folly::AtomicHashMap<int,std::shared_ptr<MyObject>> MyMap;
+typedef std::lock_guard<std::mutex> Guard;
+
+std::unique_ptr<MyMap> newMap() { return folly::make_unique<MyMap>(100); }
+
+struct MyObjectDirectory {
+  MyObjectDirectory()
+    : cur_(newMap())
+    , prev_(newMap())
+  {}
+
+  std::shared_ptr<MyObject> get(int key) {
+    auto val = tryGet(key);
+    if (val) {
+      return val;
+    }
+
+    std::shared_ptr<MyMap> cur;
+    {
+      Guard g(lock_);
+      cur = cur_;
+    }
+
+    auto ret = cur->insert(key, std::make_shared<MyObject>(key));
+    return ret.first->second;
+  }
+
+  std::shared_ptr<MyObject> tryGet(int key) {
+    std::shared_ptr<MyMap> cur;
+    std::shared_ptr<MyMap> prev;
+    {
+      Guard g(lock_);
+      cur = cur_;
+      prev = prev_;
+    }
+
+    auto it = cur->find(key);
+    if (it != cur->end()) {
+      return it->second;
+    }
+
+    it = prev->find(key);
+    if (it != prev->end()) {
+      auto ret = cur->insert(key, it->second);
+      return ret.first->second;
+    }
+
+    return nullptr;
+  }
+
+  void archive() {
+    std::shared_ptr<MyMap> cur(newMap());
+
+    Guard g(lock_);
+    prev_ = cur_;
+    cur_ = cur;
+  }
+
+  std::mutex lock_;
+  std::shared_ptr<MyMap> cur_;
+  std::shared_ptr<MyMap> prev_;
+};
+
+}
+
+//////////////////////////////////////////////////////////////////////
+
+/*
+ * This test case stresses ThreadLocal allocation/deallocation heavily
+ * via ThreadCachedInt and AtomicHashMap, and a bunch of other
+ * mallocing.
+ */
+TEST(AHMIntStressTest, Test) {
+  auto const objs = new MyObjectDirectory();
+  SCOPE_EXIT { delete objs; };
+
+  std::vector<std::thread> threads;
+  for (int threadId = 0; threadId < 64; ++threadId) {
+    threads.emplace_back(
+      [objs,threadId] {
+        for (int recycles = 0; recycles < 500; ++recycles) {
+          for (int i = 0; i < 10; i++) {
+            auto val = objs->get(i);
+          }
+
+          objs->archive();
+        }
+      }
+    );
+  }
+
+  for (auto& t : threads) t.join();
+}
author	Jordan DeLong <jdelong@fb.com>
	Thu, 15 Aug 2013 00:03:04 +0000 (17:03 -0700)
committer	Sara Golemon <sgolemon@fb.com>
	Wed, 28 Aug 2013 21:30:11 +0000 (14:30 -0700)
folly/detail/ThreadLocalDetail.h		patch \| blob \| history
folly/test/AHMIntStressTest.cpp	[new file with mode: 0644]	patch \| blob