From: Lucian Grijincu Date: Wed, 3 Apr 2013 00:56:32 +0000 (-0700) Subject: folly: speed up fastpath of StaticMeta::get() X-Git-Tag: v0.22.0~1016 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=05fd99096aadb7fea9104d0f38e2f48daff19410;p=folly.git folly: speed up fastpath of StaticMeta::get() Summary: A smaller function makes it more likely it will be inlined (it wasn't before, is now). Test Plan: n/a Reviewed By: tudorb@fb.com FB internal diff: D760000 --- diff --git a/folly/detail/ThreadLocalDetail.h b/folly/detail/ThreadLocalDetail.h index c751d209..537e6b6b 100644 --- a/folly/detail/ThreadLocalDetail.h +++ b/folly/detail/ThreadLocalDetail.h @@ -260,59 +260,66 @@ struct StaticMeta { } } - static ElementWrapper& get(int id) { + /** + * Reserve enough space in the threadEntry_.elements for the item + * @id to fit in. + */ + static void reserve(int id) { size_t prevSize = threadEntry_.elementsCapacity; - if (prevSize <= id) { - size_t newSize = static_cast((id + 5) * 1.7); - auto & meta = instance(); - ElementWrapper* ptr = NULL; - // Rely on jemalloc to zero the memory if possible -- maybe it knows - // it's already zeroed and saves us some work. - if (!usingJEMalloc() || - prevSize < jemallocMinInPlaceExpandable || - (rallocm( - static_cast(static_cast(&threadEntry_.elements)), - NULL, newSize * sizeof(ElementWrapper), 0, - ALLOCM_NO_MOVE | ALLOCM_ZERO) != ALLOCM_SUCCESS)) { - // Sigh, must realloc, but we can't call realloc here, as elements is - // still linked in meta, so another thread might access invalid memory - // after realloc succeeds. We'll copy by hand and update threadEntry_ - // under the lock. - // - // Note that we're using calloc instead of malloc in order to zero - // the entire region. rallocm (ALLOCM_ZERO) will only zero newly - // allocated memory, so if a previous allocation allocated more than - // we requested, it's our responsibility to guarantee that the tail - // is zeroed. calloc() is simpler than malloc() followed by memset(), - // and potentially faster when dealing with a lot of memory, as - // it can get already-zeroed pages from the kernel. - if ((ptr = static_cast( - calloc(newSize, sizeof(ElementWrapper)))) != NULL) { - memcpy(ptr, threadEntry_.elements, - sizeof(ElementWrapper) * prevSize); - } else { - throw std::bad_alloc(); - } + size_t newSize = static_cast((id + 5) * 1.7); + auto& meta = instance(); + ElementWrapper* ptr = nullptr; + // Rely on jemalloc to zero the memory if possible -- maybe it knows + // it's already zeroed and saves us some work. + if (!usingJEMalloc() || + prevSize < jemallocMinInPlaceExpandable || + (rallocm( + static_cast(static_cast(&threadEntry_.elements)), + NULL, newSize * sizeof(ElementWrapper), 0, + ALLOCM_NO_MOVE | ALLOCM_ZERO) != ALLOCM_SUCCESS)) { + // Sigh, must realloc, but we can't call realloc here, as elements is + // still linked in meta, so another thread might access invalid memory + // after realloc succeeds. We'll copy by hand and update threadEntry_ + // under the lock. + // + // Note that we're using calloc instead of malloc in order to zero + // the entire region. rallocm (ALLOCM_ZERO) will only zero newly + // allocated memory, so if a previous allocation allocated more than + // we requested, it's our responsibility to guarantee that the tail + // is zeroed. calloc() is simpler than malloc() followed by memset(), + // and potentially faster when dealing with a lot of memory, as + // it can get already-zeroed pages from the kernel. + if ((ptr = static_cast( + calloc(newSize, sizeof(ElementWrapper)))) != nullptr) { + memcpy(ptr, threadEntry_.elements, sizeof(ElementWrapper) * prevSize); + } else { + throw std::bad_alloc(); } + } - // Success, update the entry - { - boost::lock_guard g(meta.lock_); - if (prevSize == 0) { - meta.push_back(&threadEntry_); - } - if (ptr) { - using std::swap; - swap(ptr, threadEntry_.elements); - } - threadEntry_.elementsCapacity = newSize; + // Success, update the entry + { + boost::lock_guard g(meta.lock_); + if (prevSize == 0) { + meta.push_back(&threadEntry_); + } + if (ptr) { + using std::swap; + swap(ptr, threadEntry_.elements); } + threadEntry_.elementsCapacity = newSize; + } - free(ptr); + free(ptr); - if (prevSize == 0) { - pthread_setspecific(meta.pthreadKey_, &meta); - } + if (prevSize == 0) { + pthread_setspecific(meta.pthreadKey_, &meta); + } + } + + static ElementWrapper& get(int id) { + if (UNLIKELY(threadEntry_.elementsCapacity <= id)) { + reserve(id); } return threadEntry_.elements[id]; }