X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=folly%2FFBString.h;h=98bf1668e1e85c78601af9646445ba4e674d56bb;hb=bc857f03fc85a34e84ae8fcdf2d910259940f711;hp=23bfeb47b816002e5b80ef5bed7917b5b5e9a45f;hpb=58ddb58be76d43be88709cd45afabe0c785ea9e2;p=folly.git diff --git a/folly/FBString.h b/folly/FBString.h index 23bfeb47..98bf1668 100644 --- a/folly/FBString.h +++ b/folly/FBString.h @@ -101,7 +101,10 @@ namespace folly { // has issues when inlining is used, so disable that as well. #if defined(__clang__) # if __has_feature(address_sanitizer) -# if __has_attribute(__no_address_safety_analysis__) +# if __has_attribute(__no_sanitize__) +# define FBSTRING_DISABLE_ADDRESS_SANITIZER \ + __attribute__((__no_sanitize__("address"), __noinline__)) +# elif __has_attribute(__no_address_safety_analysis__) # define FBSTRING_DISABLE_ADDRESS_SANITIZER \ __attribute__((__no_address_safety_analysis__, __noinline__)) # elif __has_attribute(__no_sanitize_address__) @@ -255,19 +258,10 @@ private: }; */ -/** - * gcc-4.7 throws what appears to be some false positive uninitialized - * warnings for the members of the MediumLarge struct. So, mute them here. - */ -#if defined(__GNUC__) && !defined(__clang__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wuninitialized" -#endif - /** * This is the core of the string. The code should work on 32- and - * 64-bit architectures and with any Char size. Porting to big endian - * architectures would require some changes. + * 64-bit and both big- and little-endianan architectures with any + * Char size. * * The storage is selected as follows (assuming we store one-byte * characters on a 64-bit machine): (a) "small" strings between 0 and @@ -279,23 +273,27 @@ private: * reference-counted and copied lazily. the reference count is * allocated right before the character array. * - * The discriminator between these three strategies sits in the two - * most significant bits of the rightmost char of the storage. If - * neither is set, then the string is small (and its length sits in - * the lower-order bits of that rightmost character). If the MSb is - * set, the string is medium width. If the second MSb is set, then the - * string is large. + * The discriminator between these three strategies sits in two + * bits of the rightmost char of the storage. If neither is set, then the + * string is small (and its length sits in the lower-order bits on + * little-endian or the high-order bits on big-endian of that + * rightmost character). If the MSb is set, the string is medium width. + * If the second MSb is set, then the string is large. On little-endian, + * these 2 bits are the 2 MSbs of MediumLarge::capacity_, while on + * big-endian, these 2 bits are the 2 LSbs. This keeps both little-endian + * and big-endian fbstring_core equivalent with merely different ops used + * to extract capacity/category. */ template class fbstring_core { +protected: + static constexpr bool kIsLittleEndian = + __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__; + static constexpr bool kIsBigEndian = + __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__; + static_assert( + kIsLittleEndian || kIsBigEndian, "unable to identify endianness"); public: - fbstring_core() noexcept { - // Only initialize the tag, will set the MSBs (i.e. the small - // string size) to zero too - ml_.capacity_ = maxSmallSize << (8 * (sizeof(size_t) - sizeof(Char))); - // or: setSmallSize(0); - writeTerminator(); - assert(category() == Category::isSmall && size() == 0); - } + fbstring_core() noexcept { reset(); } fbstring_core(const fbstring_core & rhs) { assert(&rhs != this); @@ -307,18 +305,12 @@ public: "fbstring layout failure"); static_assert(offsetof(MediumLarge, capacity_) == 2 * sizeof(ml_.data_), "fbstring layout failure"); - const size_t size = rhs.smallSize(); - if (size == 0) { - ml_.capacity_ = rhs.ml_.capacity_; - writeTerminator(); - } else { - // Just write the whole thing, don't look at details. In - // particular we need to copy capacity anyway because we want - // to set the size (don't forget that the last character, - // which stores a short string's length, is shared with the - // ml_.capacity field). - ml_ = rhs.ml_; - } + // Just write the whole thing, don't look at details. In + // particular we need to copy capacity anyway because we want + // to set the size (don't forget that the last character, + // which stores a short string's length, is shared with the + // ml_.capacity field). + ml_ = rhs.ml_; assert(category() == Category::isSmall && this->size() == rhs.size()); } else if (rhs.category() == Category::isLarge) { // Large strings are just refcounted @@ -338,8 +330,7 @@ public: // No need for writeTerminator() here, we copied one extra // element just above. ml_.size_ = rhs.ml_.size_; - ml_.capacity_ = (allocSize / sizeof(Char) - 1) - | static_cast(Category::isMedium); + ml_.setCapacity(allocSize / sizeof(Char) - 1, Category::isMedium); assert(category() == Category::isMedium); } assert(size() == rhs.size()); @@ -347,14 +338,11 @@ public: } fbstring_core(fbstring_core&& goner) noexcept { - if (goner.category() == Category::isSmall) { - // Just copy, leave the goner in peace - new(this) fbstring_core(goner.small_, goner.smallSize()); - } else { - // Take goner's guts - ml_ = goner.ml_; + // Take goner's guts + ml_ = goner.ml_; + if (goner.category() != Category::isSmall) { // Clean goner's carcass - goner.setSmallSize(0); + goner.reset(); } } @@ -414,16 +402,14 @@ public: ml_.data_ = static_cast(checkedMalloc(allocSize)); fbstring_detail::pod_copy(data, data + size, ml_.data_); ml_.size_ = size; - ml_.capacity_ = (allocSize / sizeof(Char) - 1) - | static_cast(Category::isMedium); + ml_.setCapacity(allocSize / sizeof(Char) - 1, Category::isMedium); } else { // Large strings are allocated differently size_t effectiveCapacity = size; auto const newRC = RefCounted::create(data, & effectiveCapacity); ml_.data_ = newRC->data_; ml_.size_ = size; - ml_.capacity_ = effectiveCapacity - | static_cast(Category::isLarge); + ml_.setCapacity(effectiveCapacity, Category::isLarge); } writeTerminator(); } @@ -458,12 +444,11 @@ public: ml_.data_ = data; ml_.size_ = size; // Don't forget about null terminator - ml_.capacity_ = (allocatedSize - 1) - | static_cast(Category::isMedium); + ml_.setCapacity(allocatedSize - 1, Category::isMedium); } else { // No need for the memory free(data); - setSmallSize(0); + reset(); } } @@ -556,8 +541,7 @@ public: // we have + 1 above. RefCounted::decrementRefs(ml_.data_); ml_.data_ = newRC->data_; - ml_.capacity_ = minCapacity - | static_cast(Category::isLarge); + ml_.setCapacity(minCapacity, Category::isLarge); // size remains unchanged } else { // String is not shared, so let's try to realloc (if needed) @@ -567,8 +551,7 @@ public: RefCounted::reallocate(ml_.data_, ml_.size_, ml_.capacity(), minCapacity); ml_.data_ = newRC->data_; - ml_.capacity_ = minCapacity - | static_cast(Category::isLarge); + ml_.setCapacity(minCapacity, Category::isLarge); writeTerminator(); } assert(capacity() >= minCapacity); @@ -589,8 +572,7 @@ public: (ml_.capacity() + 1) * sizeof(Char), capacityBytes)); writeTerminator(); - ml_.capacity_ = (capacityBytes / sizeof(Char) - 1) - | static_cast(Category::isMedium); + ml_.setCapacity(capacityBytes / sizeof(Char) - 1, Category::isMedium); } else { // Conversion from medium to large string fbstring_core nascent; @@ -613,8 +595,7 @@ public: // No need for writeTerminator(), we wrote it above with + 1. ml_.data_ = newRC->data_; ml_.size_ = size; - ml_.capacity_ = minCapacity - | static_cast(Category::isLarge); + ml_.setCapacity(minCapacity, Category::isLarge); assert(capacity() >= minCapacity); } else if (minCapacity > maxSmallSize) { // medium @@ -627,8 +608,7 @@ public: // No need for writeTerminator(), we wrote it above with + 1. ml_.data_ = data; ml_.size_ = size; - ml_.capacity_ = (allocSizeBytes / sizeof(Char) - 1) - | static_cast(Category::isMedium); + ml_.setCapacity(allocSizeBytes / sizeof(Char) - 1, Category::isMedium); } else { // small // Nothing to do, everything stays put @@ -728,15 +708,18 @@ private: // Disabled fbstring_core & operator=(const fbstring_core & rhs); - struct MediumLarge { - Char * data_; - size_t size_; - size_t capacity_; - - size_t capacity() const { - return capacity_ & capacityExtractMask; - } - }; + // Equivalent to setSmallSize(0), but with specialized + // writeTerminator which doesn't re-check the category after + // capacity_ is overwritten. + void reset() { + // Only initialize the tag, will set the MSBs (i.e. the small + // string size) to zero too. + ml_.capacity_ = kIsLittleEndian + ? maxSmallSize << (8 * (sizeof(size_t) - sizeof(Char))) + : maxSmallSize << 2; + small_[0] = '\0'; + assert(category() == Category::isSmall && size() == 0); + } struct RefCounted { std::atomic refCount_; @@ -805,42 +788,68 @@ private: } }; + typedef std::conditional::type + category_type; + + enum class Category : category_type { + isSmall = 0, + isMedium = kIsLittleEndian + ? sizeof(size_t) == 4 ? 0x80000000 : 0x8000000000000000 + : 0x2, + isLarge = kIsLittleEndian + ? sizeof(size_t) == 4 ? 0x40000000 : 0x4000000000000000 + : 0x1, + }; + + Category category() const { + // works for both big-endian and little-endian + return static_cast(ml_.capacity_ & categoryExtractMask); + } + + struct MediumLarge { + Char * data_; + size_t size_; + size_t capacity_; + + size_t capacity() const { + return kIsLittleEndian + ? capacity_ & capacityExtractMask + : capacity_ >> 2; + } + + void setCapacity(size_t cap, Category cat) { + capacity_ = kIsLittleEndian + ? cap | static_cast(cat) + : (cap << 2) | static_cast(cat); + } + }; + union { Char small_[sizeof(MediumLarge) / sizeof(Char)]; MediumLarge ml_; }; - enum { + enum : size_t { lastChar = sizeof(MediumLarge) - 1, maxSmallSize = lastChar / sizeof(Char), maxMediumSize = 254 / sizeof(Char), // coincides with the small // bin size in dlmalloc - categoryExtractMask = sizeof(size_t) == 4 ? 0xC0000000 : 0xC000000000000000, - capacityExtractMask = ~categoryExtractMask, + categoryExtractMask = kIsLittleEndian + ? sizeof(size_t) == 4 ? 0xC0000000 : 0xC000000000000000 + : 0x3, + capacityExtractMask = kIsLittleEndian + ? ~categoryExtractMask + : 0x0 /*unused*/, }; static_assert(!(sizeof(MediumLarge) % sizeof(Char)), "Corrupt memory layout for fbstring."); - typedef std::conditional::type - category_type; - - enum class Category : category_type { - isSmall = 0, - isMedium = sizeof(size_t) == 4 ? 0x80000000 : 0x8000000000000000, - isLarge = sizeof(size_t) == 4 ? 0x40000000 : 0x4000000000000000, - }; - - Category category() const { - // Assumes little endian - return static_cast(ml_.capacity_ & categoryExtractMask); - } - size_t smallSize() const { - assert(category() == Category::isSmall && - static_cast(small_[maxSmallSize]) - <= static_cast(maxSmallSize)); - return static_cast(maxSmallSize) - - static_cast(small_[maxSmallSize]); + assert(category() == Category::isSmall); + auto shift = kIsLittleEndian ? 0 : 2; + auto smallShifted = static_cast(small_[maxSmallSize]) >> shift; + assert(static_cast(maxSmallSize) >= smallShifted); + return static_cast(maxSmallSize) - smallShifted; } void setSmallSize(size_t s) { @@ -848,15 +857,13 @@ private: // so don't assume anything about the previous value of // small_[maxSmallSize]. assert(s <= maxSmallSize); - small_[maxSmallSize] = maxSmallSize - s; + small_[maxSmallSize] = kIsLittleEndian + ? maxSmallSize - s + : (maxSmallSize - s) << 2; writeTerminator(); } }; -#if defined(__GNUC__) && !defined(__clang__) -# pragma GCC diagnostic pop -#endif - #ifndef _LIBSTDCXX_FBSTRING /** * Dummy fbstring core that uses an actual std::string. This doesn't @@ -1035,7 +1042,7 @@ public: : store_(s, n) { } - basic_fbstring(size_type n, value_type c, const A& a = A()) { + basic_fbstring(size_type n, value_type c, const A& /*a*/ = A()) { auto const data = store_.expand_noinit(n); fbstring_detail::pod_fill(data, data + n, c); store_.writeTerminator(); @@ -1045,7 +1052,7 @@ public: basic_fbstring(InIt begin, InIt end, typename std::enable_if< !std::is_same::type, - value_type*>::value, const A>::type & a = A()) { + value_type*>::value, const A>::type & /*a*/ = A()) { assign(begin, end); } @@ -2370,6 +2377,9 @@ operator<<( os.setstate(std::ios_base::badbit | std::ios_base::failbit); } } +#elif defined(_MSC_VER) + // MSVC doesn't define __ostream_insert + os.write(str.data(), str.size()); #else std::__ostream_insert(os, str.data(), str.size()); #endif