if (id < e->elementsCapacity && e->elements[id].ptr) {
elements.push_back(e->elements[id]);
- // Writing another thread's ThreadEntry from here is fine;
- // the only other potential reader is the owning thread --
- // from onThreadExit (which grabs the lock, so is properly
- // synchronized with us) or from get() -- but using get() on a
- // ThreadLocalPtr object that's being destroyed is a bug, so
- // undefined behavior is fair game.
- e->elements[id].ptr = NULL;
- e->elements[id].deleter = NULL;
+ /*
+ * Writing another thread's ThreadEntry from here is fine;
+ * the only other potential reader is the owning thread --
+ * from onThreadExit (which grabs the lock, so is properly
+ * synchronized with us) or from get(), which also grabs
+ * the lock if it needs to resize the elements vector.
+ *
+ * We can't conflict with reads for a get(id), because
+ * it's illegal to call get on a thread local that's
+ * destructing.
+ */
+ e->elements[id].ptr = nullptr;
+ e->elements[id].deleter = nullptr;
+ e->elements[id].ownsDeleter = false;
}
}
meta.freeIds_.push_back(id);
size_t newSize = static_cast<size_t>((id + 5) * 1.7);
auto& meta = instance();
ElementWrapper* ptr = nullptr;
+
// Rely on jemalloc to zero the memory if possible -- maybe it knows
// it's already zeroed and saves us some work.
if (!usingJEMalloc() ||
prevSize < jemallocMinInPlaceExpandable ||
(rallocm(
static_cast<void**>(static_cast<void*>(&threadEntry_.elements)),
- NULL, newSize * sizeof(ElementWrapper), 0,
+ nullptr, newSize * sizeof(ElementWrapper), 0,
ALLOCM_NO_MOVE | ALLOCM_ZERO) != ALLOCM_SUCCESS)) {
// Sigh, must realloc, but we can't call realloc here, as elements is
// still linked in meta, so another thread might access invalid memory
// is zeroed. calloc() is simpler than malloc() followed by memset(),
// and potentially faster when dealing with a lot of memory, as
// it can get already-zeroed pages from the kernel.
- if ((ptr = static_cast<ElementWrapper*>(
- calloc(newSize, sizeof(ElementWrapper)))) != nullptr) {
- memcpy(ptr, threadEntry_.elements, sizeof(ElementWrapper) * prevSize);
- } else {
- throw std::bad_alloc();
- }
+ ptr = static_cast<ElementWrapper*>(
+ calloc(newSize, sizeof(ElementWrapper))
+ );
+ if (!ptr) throw std::bad_alloc();
}
// Success, update the entry
{
boost::lock_guard<boost::mutex> g(meta.lock_);
+
if (prevSize == 0) {
meta.push_back(&threadEntry_);
}
+
if (ptr) {
+ /*
+ * Note: we need to hold the meta lock when copying data out of
+ * the old vector, because some other thread might be
+ * destructing a ThreadLocal and writing to the elements vector
+ * of this thread.
+ */
+ memcpy(ptr, threadEntry_.elements, sizeof(ElementWrapper) * prevSize);
using std::swap;
swap(ptr, threadEntry_.elements);
+ threadEntry_.elementsCapacity = newSize;
}
- threadEntry_.elementsCapacity = newSize;
}
free(ptr);
--- /dev/null
+/*
+ * Copyright 2013 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <thread>
+#include <memory>
+#include <mutex>
+
+#include "folly/AtomicHashMap.h"
+#include "folly/ScopeGuard.h"
+#include "folly/Memory.h"
+
+namespace {
+
+struct MyObject {
+ explicit MyObject(int i) : i(i) {}
+ int i;
+};
+
+typedef folly::AtomicHashMap<int,std::shared_ptr<MyObject>> MyMap;
+typedef std::lock_guard<std::mutex> Guard;
+
+std::unique_ptr<MyMap> newMap() { return folly::make_unique<MyMap>(100); }
+
+struct MyObjectDirectory {
+ MyObjectDirectory()
+ : cur_(newMap())
+ , prev_(newMap())
+ {}
+
+ std::shared_ptr<MyObject> get(int key) {
+ auto val = tryGet(key);
+ if (val) {
+ return val;
+ }
+
+ std::shared_ptr<MyMap> cur;
+ {
+ Guard g(lock_);
+ cur = cur_;
+ }
+
+ auto ret = cur->insert(key, std::make_shared<MyObject>(key));
+ return ret.first->second;
+ }
+
+ std::shared_ptr<MyObject> tryGet(int key) {
+ std::shared_ptr<MyMap> cur;
+ std::shared_ptr<MyMap> prev;
+ {
+ Guard g(lock_);
+ cur = cur_;
+ prev = prev_;
+ }
+
+ auto it = cur->find(key);
+ if (it != cur->end()) {
+ return it->second;
+ }
+
+ it = prev->find(key);
+ if (it != prev->end()) {
+ auto ret = cur->insert(key, it->second);
+ return ret.first->second;
+ }
+
+ return nullptr;
+ }
+
+ void archive() {
+ std::shared_ptr<MyMap> cur(newMap());
+
+ Guard g(lock_);
+ prev_ = cur_;
+ cur_ = cur;
+ }
+
+ std::mutex lock_;
+ std::shared_ptr<MyMap> cur_;
+ std::shared_ptr<MyMap> prev_;
+};
+
+}
+
+//////////////////////////////////////////////////////////////////////
+
+/*
+ * This test case stresses ThreadLocal allocation/deallocation heavily
+ * via ThreadCachedInt and AtomicHashMap, and a bunch of other
+ * mallocing.
+ */
+TEST(AHMIntStressTest, Test) {
+ auto const objs = new MyObjectDirectory();
+ SCOPE_EXIT { delete objs; };
+
+ std::vector<std::thread> threads;
+ for (int threadId = 0; threadId < 64; ++threadId) {
+ threads.emplace_back(
+ [objs,threadId] {
+ for (int recycles = 0; recycles < 500; ++recycles) {
+ for (int i = 0; i < 10; i++) {
+ auto val = objs->get(i);
+ }
+
+ objs->archive();
+ }
+ }
+ );
+ }
+
+ for (auto& t : threads) t.join();
+}