From: Nathan Bronson Date: Tue, 6 Jun 2017 16:08:51 +0000 (-0700) Subject: UninitializedMemoryHacks X-Git-Tag: v2017.06.12.00~36 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=7cf58370d0f79c92b324413797f2239653f5d2c2;p=folly.git UninitializedMemoryHacks Summary: This diff adds helper functions that can resize std::string or std::vector without constructing or initializing new elements. They are designed for retroactively optimizing code where touching every element twice (or touching never-used elements once) shows up in profiling, and where restructuring the code to use fixed-length arrays or IOBuf-s would be difficult. Implementations are provided for 5 string implementations (pre-c++11 libstdc++, libstdc++ with SSO, libc++, std::basic_fbstring, and MSVC) and 3 vector implementations (libstdc++, libc++, and MSVC). On an unsupported platform you will hopefully get a #warn if you include UninitializedMemoryHacks.h followed by a linker error if you actually use it. Reviewed By: yfeldblum Differential Revision: D5102679 fbshipit-source-id: 536c00eabae4cdb8a0affe3e919a372f4dc51ac5 --- diff --git a/folly/Makefile.am b/folly/Makefile.am index 6d46439c..0b1a10d7 100644 --- a/folly/Makefile.am +++ b/folly/Makefile.am @@ -289,6 +289,7 @@ nobase_follyinclude_HEADERS = \ Math.h \ Memory.h \ MemoryMapping.h \ + memory/UninitializedMemoryHacks.h \ MicroSpinLock.h \ MicroLock.h \ MoveWrapper.h \ @@ -336,8 +337,8 @@ nobase_follyinclude_HEADERS = \ portability/SysUio.h \ portability/Time.h \ portability/TypeTraits.h \ - portability/Windows.h \ portability/Unistd.h \ + portability/Windows.h \ Preprocessor.h \ PriorityMPMCQueue.h \ ProducerConsumerQueue.h \ diff --git a/folly/memory/UninitializedMemoryHacks.h b/folly/memory/UninitializedMemoryHacks.h new file mode 100644 index 00000000..e06bbed1 --- /dev/null +++ b/folly/memory/UninitializedMemoryHacks.h @@ -0,0 +1,329 @@ +/* + * Copyright 2017 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace { +// This struct is different in every translation unit. We use template +// instantiations to define inline freestanding methods. Since the +// methods are inline it is fine to define them in multiple translation +// units, but the instantiation itself would be an ODR violation if it is +// present in the program more than once. By tagging the instantiations +// with this struct, we avoid ODR problems for the instantiation while +// allowing the resulting methods to be inline-able. If you think that +// seems hacky keep reading... +struct FollyMemoryDetailTranslationUnitTag {}; +} // anon namespace +namespace folly { +namespace detail { +void unsafeStringSetLargerSize(std::string& s, std::size_t n); +template +void unsafeVectorSetLargerSize(std::vector& v, std::size_t n); +} // namespace detail + +/* + * This file provides helper functions resizeWithoutInitialization() + * that can resize std::string or std::vector without constructing or + * initializing new elements. + * + * IMPORTANT: These functions can be unsafe if used improperly. If you + * don't write to an element with index >= oldSize and < newSize, reading + * the element can expose arbitrary memory contents to the world, including + * the contents of old strings. Use ASAN in your tests, and pay extra + * attention to failure paths. + * + * You should only use this if you have profiling data from production + * that shows that this is not a premature optimization. This code is + * designed for retroactively optimizing code where touching every element + * twice (or touching never-used elements once) shows up in profiling, + * and where restructuring the code to use fixed-length arrays or IOBuf-s + * would be difficult. + * + * NOTE: Just because .resize() shows up in your profile (probably + * via one of the intrinsic memset implementations) doesn't mean that + * these functions will make your program faster. Most of the cost + * of memset comes from cache misses, so avoiding the memset means + * that the cache miss cost just gets pushed to the following code. + * resizeWithoutInitialization can be a win when the contents are bigger + * than a cache level, because the second access isn't free in that case. + * It can also be a win if the final length of the string or vector isn't + * actually known, so the suffix will be chopped off with a second call + * to .resize(). + */ + +/** + * Like calling s.resize(n), but when growing the string does not + * initialize new elements. It is undefined behavior to read from + * any element added to the string by this method unless it has been + * written to by an operation that follows this call. + * + * IMPORTANT: Read the warning at the top of this header file. + */ +inline void resizeWithoutInitialization(std::string& s, std::size_t n) { + if (n <= s.size()) { + s.resize(n); + } else { + // careful not to call reserve unless necessary, as it causes + // shrink_to_fit on many platforms + if (n > s.capacity()) { + s.reserve(n); + } + detail::unsafeStringSetLargerSize(s, n); + } +} + +/** + * Like calling v.resize(n), but when growing the vector does not construct + * or initialize new elements. It is undefined behavior to read from any + * element added to the vector by this method unless it has been written + * to by an operation that follows this call. + * + * Use the FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT(T) macro to + * declare (and inline define) the internals required to call + * resizeWithoutInitialization for a std::vector. This must + * be done exactly once in each translation unit that wants to call + * resizeWithoutInitialization(std::vector&,size_t). char and unsigned + * char are provided by default. If you don't do this you will get linker + * errors about folly::detail::unsafeVectorSetLargerSize. Requiring that + * T be trivially_destructible is only an approximation of the property + * required of T. In fact what is required is that any random sequence of + * bytes may be safely reinterpreted as a T and passed to T's destructor. + * + * std::vector has specialized internals and is not supported. + * + * IMPORTANT: Read the warning at the top of this header file. + */ +template < + typename T, + typename = typename std::enable_if< + std::is_trivially_destructible::value && + !std::is_same::value>::type> +void resizeWithoutInitialization(std::vector& v, std::size_t n) { + if (n <= v.size()) { + v.resize(n); + } else { + if (n > v.capacity()) { + v.reserve(n); + } + detail::unsafeVectorSetLargerSize(v, n); + } +} + +namespace detail { + +#if defined(_LIBCPP_STRING) +// libc++ + +} // namespace detail +} // namespace folly +template void std::string::__set_size(std::size_t); +namespace folly { +namespace detail { + +template +struct MakeUnsafeStringSetLargerSize { + friend void unsafeStringSetLargerSize( + std::basic_string& s, + std::size_t n) { + // s.__set_size(n); + (s.*Ptr__set_size)(n); + (&s[0])[n] = '\0'; + } +}; +template struct MakeUnsafeStringSetLargerSize< + FollyMemoryDetailTranslationUnitTag, + char, + void (std::string::*)(std::size_t), + &std::string::__set_size>; + +#elif defined(_GLIBCXX_USE_FB) +// FBString + +template +struct MakeUnsafeStringSetLargerSize { + friend void unsafeStringSetLargerSize( + std::basic_string& s, + std::size_t n) { + // s.store_.expandNoinit(n - s.size(), false); + (s.*Ptrstore_).expandNoinit(n - s.size(), false); + } +}; +template struct MakeUnsafeStringSetLargerSize< + FollyMemoryDetailTranslationUnitTag, + char, + std::fbstring_core(std::string::*), + &std::string::store_>; + +#elif defined(_GLIBCXX_STRING) && _GLIBCXX_USE_CXX11_ABI +// libstdc++ new implementation with SSO + +} // namespace detail +} // namespace folly +template void std::string::_M_set_length(std::size_t); +namespace folly { +namespace detail { + +template +struct MakeUnsafeStringSetLargerSize { + friend void unsafeStringSetLargerSize( + std::basic_string& s, + std::size_t n) { + // s._M_set_length(n); + (s.*Ptr_M_set_length)(n); + } +}; +template struct MakeUnsafeStringSetLargerSize< + FollyMemoryDetailTranslationUnitTag, + char, + void (std::string::*)(std::size_t), + &std::string::_M_set_length>; + +#elif defined(_GLIBCXX_STRING) +// libstdc++ old implementation + +} // namespace detail +} // namespace folly +template std::string::_Rep* std::string::_M_rep() const; +template void std::string::_Rep::_M_set_length_and_sharable(std::size_t); +namespace folly { +namespace detail { + +template < + typename Tag, + typename T, + typename A, + A Ptr_M_rep, + typename B, + B Ptr_M_set_length_and_sharable> +struct MakeUnsafeStringSetLargerSize { + friend void unsafeStringSetLargerSize( + std::basic_string& s, + std::size_t n) { + // s._M_rep()->_M_set_length_and_sharable(n); + auto rep = (s.*Ptr_M_rep)(); + (rep->*Ptr_M_set_length_and_sharable)(n); + } +}; +template struct MakeUnsafeStringSetLargerSize< + FollyMemoryDetailTranslationUnitTag, + char, + std::string::_Rep* (std::string::*)() const, + &std::string::_M_rep, + void (std::string::_Rep::*)(std::size_t), + &std::string::_Rep::_M_set_length_and_sharable>; + +#elif defined(_MSC_VER) +// MSVC + +inline void unsafeStringSetLargerSize(std::string& s, std::size_t n) { + s._Eos(n); +} + +#else +#warn "No implementation for resizeWithoutInitialization of std::string" +#endif + +// This machinery bridges template expansion and macro expansion +#define FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT_IMPL(TYPE) \ + namespace folly { \ + namespace detail { \ + void unsafeVectorSetLargerSizeImpl(std::vector& v, std::size_t); \ + template <> \ + inline void unsafeVectorSetLargerSize( \ + std::vector & v, \ + std::size_t n) { \ + unsafeVectorSetLargerSizeImpl(v, n); \ + } \ + } \ + } + +#if defined(_LIBCPP_VECTOR) +// libc++ + +template +struct MakeUnsafeVectorSetLargerSize { + friend void unsafeVectorSetLargerSizeImpl(std::vector& v, std::size_t n) { + // v.__end_ += (n - v.size()); + using Base = std::__vector_base>; + static_assert( + std::is_standard_layout>::value && + sizeof(std::vector) == sizeof(Base), + "reinterpret_cast safety conditions not met"); + reinterpret_cast(v).*Ptr__end_ += (n - v.size()); + } +}; + +#define FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT(TYPE) \ + template struct folly::detail::MakeUnsafeVectorSetLargerSize< \ + FollyMemoryDetailTranslationUnitTag, \ + TYPE, \ + TYPE*(std::__vector_base>::*), \ + &std::vector::__end_>; \ + FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT_IMPL(TYPE) + +#elif defined(_GLIBCXX_VECTOR) +// libstdc++ + +template < + typename Tag, + typename T, + typename A, + A Ptr_M_impl, + typename B, + B Ptr_M_finish> +struct MakeUnsafeVectorSetLargerSize : std::vector { + friend void unsafeVectorSetLargerSizeImpl(std::vector& v, std::size_t n) { + // v._M_impl._M_finish += (n - v.size()); + (v.*Ptr_M_impl).*Ptr_M_finish += (n - v.size()); + } +}; + +#define FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT(TYPE) \ + template struct folly::detail::MakeUnsafeVectorSetLargerSize< \ + FollyMemoryDetailTranslationUnitTag, \ + TYPE, \ + std::vector::_Vector_impl( \ + std::_Vector_base>::*), \ + &std::vector::_M_impl, \ + TYPE*(std::vector::_Vector_impl::*), \ + &std::vector::_Vector_impl::_M_finish>; \ + FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT_IMPL(TYPE) + +#elif defined(_MSC_VER) +// MSVC + +#define FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT(TYPE) \ + extern inline void unsafeVectorSetLargerSizeImpl( \ + std::vector& v, std::size_t n) { \ + v._Mylast() += (n - v.size()); \ + } \ + FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT_IMPL(TYPE) + +#else +#warn "No implementation for resizeWithoutInitialization of std::vector" +#endif + +} // namespace detail +} // namespace folly + +#if defined(FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT) +FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT(char) +FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT(unsigned char) +#endif diff --git a/folly/memory/test/UninitializedMemoryHacksODR.cpp b/folly/memory/test/UninitializedMemoryHacksODR.cpp new file mode 100644 index 00000000..95ebc63c --- /dev/null +++ b/folly/memory/test/UninitializedMemoryHacksODR.cpp @@ -0,0 +1,20 @@ +/* + * Copyright 2017 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "folly/memory/UninitializedMemoryHacks.h" + +// Verify that this is okay to put in multiple translation units +FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT(int) diff --git a/folly/memory/test/UninitializedMemoryHacksTest.cpp b/folly/memory/test/UninitializedMemoryHacksTest.cpp new file mode 100644 index 00000000..72581502 --- /dev/null +++ b/folly/memory/test/UninitializedMemoryHacksTest.cpp @@ -0,0 +1,316 @@ +/* + * Copyright 2017 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "folly/memory/UninitializedMemoryHacks.h" + +#include +#include +#include + +#include +#include +#include +#include + +void describePlatform() { + LOG(INFO) << "sizeof(void*) = " << sizeof(void*); + + LOG(INFO) << "sizeof(std::string) = " << sizeof(std::string); +#if defined(_LIBCPP_STRING) + LOG(INFO) << "std::string from libc++"; +#elif defined(_STLP_STRING) + LOG(INFO) << "std::string from STLport"; +#elif defined(_GLIBCXX_USE_FB) + LOG(INFO) << "std::string from FBString"; +#elif defined(_GLIBCXX_STRING) && _GLIBCXX_USE_CXX11_ABI + LOG(INFO) << "std::string from libstdc++ with SSO"; +#elif defined(_GLIBCXX_STRING) + LOG(INFO) << "std::string from old libstdc++"; +#elif defined(_MSC_VER) + LOG(INFO) << "std::string from MSVC"; +#else + LOG(INFO) << "UNKNOWN std::string implementation"; +#endif + + LOG(INFO) << "sizeof(std::vector) = " << sizeof(std::vector); +#if defined(_LIBCPP_VECTOR) + LOG(INFO) << "std::vector from libc++"; +#elif defined(_STLP_VECTOR) + LOG(INFO) << "std::vector from STLport"; +#elif defined(_GLIBCXX_VECTOR) + LOG(INFO) << "std::vector from libstdc++"; +#elif defined(_MSC_VER) + LOG(INFO) << "std::vector from MSVC"; +#else + LOG(INFO) << "UNKNOWN std::vector implementation"; +#endif +} + +// Returns a concatenation of target[i] for those i where valid[i] +template +T validData(T const& target, std::vector const& valid) { + EXPECT_EQ(target.size(), valid.size()); + T rv; + for (std::size_t i = 0; i < valid.size(); ++i) { + if (valid[i]) { + rv.push_back(target[i]); + } + } + return rv; +} + +template +void doResizeWithoutInit( + T& target, + std::vector& valid, + std::size_t newSize) { + auto oldSize = target.size(); + auto before = validData(target, valid); + folly::resizeWithoutInitialization(target, newSize); + valid.resize(newSize); + auto after = validData(target, valid); + if (oldSize <= newSize) { + EXPECT_EQ(before, after); + } else { + EXPECT_GE(before.size(), after.size()); + EXPECT_TRUE(std::equal(after.begin(), after.end(), before.begin())); + } +} + +template +void doOverwrite( + T& target, + std::vector& valid, + std::size_t b, + std::size_t e) { + for (auto i = b; i < e && i < target.size(); ++i) { + target[i] = '0' + (i % 10); + valid[i] = true; + } +} + +template +void doResize(T& target, std::vector& valid, std::size_t newSize) { + auto oldSize = target.size(); + auto before = validData(target, valid); + target.resize(newSize); + valid.resize(newSize); + for (auto i = oldSize; i < newSize; ++i) { + valid[i] = true; + } + auto after = validData(target, valid); + if (oldSize == newSize) { + EXPECT_EQ(before, after); + } else if (oldSize < newSize) { + EXPECT_LT(before.size(), after.size()); + EXPECT_TRUE(std::equal(before.begin(), before.end(), after.begin())); + } else { + EXPECT_GE(before.size(), after.size()); + EXPECT_TRUE(std::equal(after.begin(), after.end(), before.begin())); + } +} + +template +void doClear(T& target, std::vector& valid) { + target.clear(); + valid.clear(); +} + +template +void doInsert(T& target, std::vector& valid, std::size_t i) { + target.insert(target.begin() + i, 'I'); + valid.insert(valid.begin() + i, true); +} + +template +void doErase(T& target, std::vector& valid, std::size_t i) { + target.erase(target.begin() + i); + valid.erase(valid.begin() + i); +} + +template +void doPushBack(T& target, std::vector& valid) { + target.push_back('P'); + valid.push_back(true); +} + +template +void genericCheck(T& target) { + EXPECT_LE(target.size(), target.capacity()); + EXPECT_EQ(target.size() == 0, target.empty()); + EXPECT_EQ(target.size(), target.end() - target.begin()); + EXPECT_EQ(target.size(), target.cend() - target.cbegin()); + if (!target.empty()) { + EXPECT_EQ(target.data(), &target[0]); + EXPECT_EQ(target.data(), &target.front()); + EXPECT_EQ(target.data() + target.size() - 1, &target.back()); + } +} + +template +void check(T& target) { + genericCheck(target); +} + +template <> +void check(std::string& target) { + genericCheck(target); + EXPECT_EQ(target.c_str(), target.data()); + EXPECT_EQ(target.c_str()[target.size()], '\0'); +} + +template +void testSimple() { + describePlatform(); + + auto sizes = {0, 1, 10, 14, 15, 16, 17, 22, 23, 24, 32, 95, 100, 10000}; + for (auto i : sizes) { + for (auto j : sizes) { + { + T target; + std::vector valid; + doResize(target, valid, i); + doResizeWithoutInit(target, valid, j); + check(target); + } + + { + T target; + std::vector valid; + doResize(target, valid, i); + doResizeWithoutInit(target, valid, j); + doOverwrite(target, valid, i, j); + check(target); + } + + { + T target; + std::vector valid; + doResizeWithoutInit(target, valid, i); + doResize(target, valid, j); + doOverwrite(target, valid, i / 2, i / 2); + check(target); + } + + { + T target; + std::vector valid; + doResizeWithoutInit(target, valid, i); + doResize(target, valid, j); + doOverwrite(target, valid, i, j); + check(target); + } + } + } +} + +template +void testRandom(size_t numSteps = 10000) { + describePlatform(); + + auto target = folly::make_unique(); + std::vector valid; + + for (size_t step = 0; step < numSteps; ++step) { + auto pct = folly::Random::rand32(100); + auto v = folly::Random::rand32(uint32_t{3} << folly::Random::rand32(14)); + + if (pct < 5) { + doClear(*target, valid); + } else if (pct < 30) { + T copy; + folly::resizeWithoutInitialization(copy, target->size()); + for (size_t i = 0; i < copy.size(); ++i) { + if (valid[i]) { + copy[i] = target->at(i); + } + } + if (pct < 10) { + std::swap(copy, *target); + } else if (pct < 15) { + *target = std::move(copy); + } else if (pct < 20) { + *target = copy; + } else if (pct < 25) { + target = folly::make_unique(std::move(copy)); + } else { + target = folly::make_unique(copy); + } + } else if (pct < 35) { + target->reserve(v); + } else if (pct < 40) { + target->shrink_to_fit(); + } else if (pct < 45) { + doResize(*target, valid, v); + } else if (pct < 50) { + doInsert(*target, valid, v % (target->size() + 1)); + } else if (pct < 55) { + if (!target->empty()) { + doErase(*target, valid, v % target->size()); + } + } else if (pct < 60) { + doPushBack(*target, valid); + } else if (pct < 65) { + target = folly::make_unique(); + valid.clear(); + } else if (pct < 80) { + auto v2 = folly::Random::rand32(uint32_t{3} << folly::Random::rand32(14)); + doOverwrite(*target, valid, std::min(v, v2), std::max(v, v2)); + } else { + doResizeWithoutInit(*target, valid, v); + } + + // don't check every time in implementation does lazy work + if (folly::Random::rand32(100) < 50) { + check(*target); + } + } +} + +TEST(UninitializedMemoryHacks, simpleString) { + testSimple(); +} + +TEST(UninitializedMemoryHacks, simpleVectorChar) { + testSimple>(); +} + +TEST(UninitializedMemoryHacks, simpleVectorByte) { + testSimple>(); +} + +TEST(UninitializedMemoryHacks, simpleVectorInt) { + testSimple>(); +} + +TEST(UninitializedMemoryHacks, randomString) { + testRandom(); +} + +TEST(UninitializedMemoryHacks, randomVectorChar) { + testRandom>(); +} + +TEST(UninitializedMemoryHacks, randomVectorByte) { + testRandom>(); +} + +TEST(UninitializedMemoryHacks, randomVectorInt) { + testRandom>(); +} + +// We are deliberately putting this at the bottom to make sure it can follow use +FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT(int)