From: Nicholas Ormrod Date: Fri, 14 Oct 2016 18:32:14 +0000 (-0700) Subject: Backed out changeset 09c1712854b3 X-Git-Tag: v2016.10.17.00~1 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=fc838f2b6a5f850b9465dd9aa448bb1039d11d8a;p=folly.git Backed out changeset 09c1712854b3 Summary: Removing COW from fbstring had adverse memory consequences when sync'd with libgcc. Revert this diff to keep folly and libgcc in sync. Reviewed By: yfeldblum, luciang Differential Revision: D4019604 fbshipit-source-id: 80bd31c220098bfab37f0effc90f67876432369d --- diff --git a/folly/FBString.h b/folly/FBString.h index 00d1a959..bb3d339b 100644 --- a/folly/FBString.h +++ b/folly/FBString.h @@ -289,13 +289,12 @@ private: * The storage is selected as follows (assuming we store one-byte * characters on a 64-bit machine): (a) "small" strings between 0 and * 23 chars are stored in-situ without allocation (the rightmost byte - * stores the size); (b) "medium" strings (> 23 chars) are stored in - * malloc-allocated memory that is copied eagerly. - * There exists a third storage category: (c) "large", which has the - * copy-on-write optimization. COW was disallowed in C++11, so large is - * now deprecated in fbstring_core. fbstring_core no longer creates large - * strings, though still works with them. Later, large strings will be - * completely removed. + * stores the size); (b) "medium" strings from 24 through 254 chars + * are stored in malloc-allocated memory that is copied eagerly; (c) + * "large" strings of 255 chars and above are stored in a similar + * structure as medium arrays, except that the string is + * reference-counted and copied lazily. the reference count is + * allocated right before the character array. * * The discriminator between these three strategies sits in two * bits of the rightmost char of the storage. If neither is set, then the @@ -326,10 +325,18 @@ public: fbstring_core(const fbstring_core & rhs) { FBSTRING_ASSERT(&rhs != this); - if (rhs.category() == Category::isSmall) { - makeSmall(rhs); - } else { - makeMedium(rhs); + switch (rhs.category()) { + case Category::isSmall: + copySmall(rhs); + break; + case Category::isMedium: + copyMedium(rhs); + break; + case Category::isLarge: + copyLarge(rhs); + break; + default: + fbstring_detail::assume_unreachable(); } FBSTRING_ASSERT(size() == rhs.size()); FBSTRING_ASSERT(memcmp(data(), rhs.data(), size() * sizeof(Char)) == 0); @@ -347,8 +354,10 @@ public: bool disableSSO = FBSTRING_DISABLE_SSO) { if (!disableSSO && size <= maxSmallSize) { initSmall(data, size); - } else { + } else if (size <= maxMediumSize) { initMedium(data, size); + } else { + initLarge(data, size); } FBSTRING_ASSERT(this->size() == size); FBSTRING_ASSERT( @@ -608,7 +617,6 @@ private: } void setCapacity(size_t cap, Category cat) { - FBSTRING_ASSERT(cat != Category::isLarge); capacity_ = kIsLittleEndian ? cap | (static_cast(cat) << kCategoryShift) : (cap << 2) | static_cast(cat); @@ -652,9 +660,9 @@ private: FBSTRING_ASSERT(category() == Category::isSmall && size() == s); } - void makeSmall(const fbstring_core&); - void makeMedium(const fbstring_core&); - void makeLarge(const fbstring_core&); + void copySmall(const fbstring_core&); + void copyMedium(const fbstring_core&); + void copyLarge(const fbstring_core&); void initSmall(const Char* data, size_t size); void initMedium(const Char* data, size_t size); @@ -673,7 +681,7 @@ private: }; template -inline void fbstring_core::makeSmall(const fbstring_core& rhs) { +inline void fbstring_core::copySmall(const fbstring_core& rhs) { static_assert(offsetof(MediumLarge, data_) == 0, "fbstring layout failure"); static_assert( offsetof(MediumLarge, size_) == sizeof(ml_.data_), @@ -692,7 +700,7 @@ inline void fbstring_core::makeSmall(const fbstring_core& rhs) { } template -FOLLY_MALLOC_NOINLINE inline void fbstring_core::makeMedium( +FOLLY_MALLOC_NOINLINE inline void fbstring_core::copyMedium( const fbstring_core& rhs) { // Medium strings are copied eagerly. Don't forget to allocate // one extra Char for the null terminator. @@ -707,7 +715,7 @@ FOLLY_MALLOC_NOINLINE inline void fbstring_core::makeMedium( } template -FOLLY_MALLOC_NOINLINE inline void fbstring_core::makeLarge( +FOLLY_MALLOC_NOINLINE inline void fbstring_core::copyLarge( const fbstring_core& rhs) { // Large strings are just refcounted ml_ = rhs.ml_; @@ -844,16 +852,29 @@ FOLLY_MALLOC_NOINLINE inline void fbstring_core::reserveMedium( if (minCapacity <= ml_.capacity()) { return; // nothing to do, there's enough room } - // Keep the string at medium size. Don't forget to allocate - // one extra Char for the terminating null. - size_t capacityBytes = goodMallocSize((1 + minCapacity) * sizeof(Char)); - // Also copies terminator. - ml_.data_ = static_cast(smartRealloc( - ml_.data_, - (ml_.size_ + 1) * sizeof(Char), - (ml_.capacity() + 1) * sizeof(Char), - capacityBytes)); - ml_.setCapacity(capacityBytes / sizeof(Char) - 1, Category::isMedium); + if (minCapacity <= maxMediumSize) { + // Keep the string at medium size. Don't forget to allocate + // one extra Char for the terminating null. + size_t capacityBytes = goodMallocSize((1 + minCapacity) * sizeof(Char)); + // Also copies terminator. + ml_.data_ = static_cast(smartRealloc( + ml_.data_, + (ml_.size_ + 1) * sizeof(Char), + (ml_.capacity() + 1) * sizeof(Char), + capacityBytes)); + ml_.setCapacity(capacityBytes / sizeof(Char) - 1, Category::isMedium); + } else { + // Conversion from medium to large string + fbstring_core nascent; + // Will recurse to another branch of this function + nascent.reserve(minCapacity); + nascent.ml_.size_ = ml_.size_; + // Also copies terminator. + fbstring_detail::podCopy( + ml_.data_, ml_.data_ + ml_.size_ + 1, nascent.ml_.data_); + nascent.swap(*this); + FBSTRING_ASSERT(capacity() >= minCapacity); + } } template @@ -863,17 +884,29 @@ FOLLY_MALLOC_NOINLINE inline void fbstring_core::reserveSmall( if (!disableSSO && minCapacity <= maxSmallSize) { // small // Nothing to do, everything stays put - return; + } else if (minCapacity <= maxMediumSize) { + // medium + // Don't forget to allocate one extra Char for the terminating null + auto const allocSizeBytes = + goodMallocSize((1 + minCapacity) * sizeof(Char)); + auto const pData = static_cast(checkedMalloc(allocSizeBytes)); + auto const size = smallSize(); + // Also copies terminator. + fbstring_detail::podCopy(small_, small_ + size + 1, pData); + ml_.data_ = pData; + ml_.size_ = size; + ml_.setCapacity(allocSizeBytes / sizeof(Char) - 1, Category::isMedium); + } else { + // large + auto const newRC = RefCounted::create(&minCapacity); + auto const size = smallSize(); + // Also copies terminator. + fbstring_detail::podCopy(small_, small_ + size + 1, newRC->data_); + ml_.data_ = newRC->data_; + ml_.size_ = size; + ml_.setCapacity(minCapacity, Category::isLarge); + FBSTRING_ASSERT(capacity() >= minCapacity); } - // Don't forget to allocate one extra Char for the terminating null - auto const allocSizeBytes = goodMallocSize((1 + minCapacity) * sizeof(Char)); - auto const pData = static_cast(checkedMalloc(allocSizeBytes)); - auto const size = smallSize(); - // Also copies terminator. - fbstring_detail::podCopy(small_, small_ + size + 1, pData); - ml_.data_ = pData; - ml_.size_ = size; - ml_.setCapacity(allocSizeBytes / sizeof(Char) - 1, Category::isMedium); } template diff --git a/folly/docs/FBString.md b/folly/docs/FBString.md index e6db45fe..b41a63ed 100644 --- a/folly/docs/FBString.md +++ b/folly/docs/FBString.md @@ -4,7 +4,7 @@ `fbstring` is a drop-in replacement for `std::string`. The main benefit of `fbstring` is significantly increased performance on virtually all important primitives. This is achieved by using a -two-tiered storage strategy and by cooperating with the memory +three-tiered storage strategy and by cooperating with the memory allocator. In particular, `fbstring` is designed to detect use of jemalloc and cooperate with it to achieve significant improvements in speed and memory usage. @@ -18,14 +18,20 @@ architectures. * Small strings (<= 23 chars) are stored in-situ without memory allocation. -* Other strings (> 23 chars) are stored in malloc-allocated +* Medium strings (24 - 255 chars) are stored in malloc-allocated memory and copied eagerly. +* Large strings (> 255 chars) are stored in malloc-allocated memory and + copied lazily. + ### Implementation highlights *** * 100% compatible with `std::string`. +* Thread-safe reference counted copy-on-write for strings "large" + strings (> 255 chars). + * Uses `malloc` instead of allocators. * Jemalloc-friendly. `fbstring` automatically detects if application