Summary:
1. Eliminate some string -> StringPiece -> strings conversions
2. Mcrouter: eliminated unnecessary inlining by moving slow path logic into its own method.
Using a test setup with shadow sampling enabled and shadowing some requests,
(typical prod setup), this brings down the cost from ~1.4% cpu in standalone mcrouter to ~0.2%:
```
before:
+ 0.70% 3898 mcrouter_orig mcrouter_orig [.] FbAdditionalProxyRequestLogger::logReply
+ 0.13% 864 mcrouter_orig mcrouter_orig [.] EventGroup<ScubaRow>::processExtraSamplers
+ 0.58% 3347 mcrouter_orig mcrouter_orig [.] DynamicScubaSampler::getSampler
~ 1.41% total
after:
+ 0.18% 1223 mcrouter_fix mcrouter_fix [.] FbAdditionalProxyRequestLogger::logReply
+ 0.04% 205 mcrouter_fix mcrouter_fix [.] EventGroup<ScubaRow>::processSampler
~ 0.22% total
```
Fiber local optimization might have more of an effect.
Test Plan:
unit tests
Reviewed By: pavlo@fb.com
Subscribers: trunkagent, fbcode-common-diffs@, alikhtarov, folly-diffs@, yfeldblum, darshan, chalfant
FB internal diff:
D2089133
Tasks:
5414865
Signature: t1:
2089133:
1432338487:
4158dc6b720c04f43820193e73b98d4197afcffa
}
template <typename T>
-T& Fiber::LocalData::get() {
- if (data_) {
- assert(*dataType_ == typeid(T));
- return *reinterpret_cast<T*>(data_);
- }
-
+T& Fiber::LocalData::getSlow() {
dataSize_ = sizeof(T);
dataType_ = &typeid(T);
if (sizeof(T) <= kBufferSize) {
#include <folly/CPortability.h>
#include <folly/IntrusiveList.h>
#include <folly/experimental/fibers/BoostContextCompatibility.h>
+#include <folly/Portability.h>
namespace folly { namespace fibers {
LocalData& operator=(const LocalData& other);
template <typename T>
- T& get();
+ T& get() {
+ if (data_) {
+ assert(*dataType_ == typeid(T));
+ return *reinterpret_cast<T*>(data_);
+ }
+ return getSlow<T>();
+ }
void reset();
//private:
+ template <typename T>
+ FOLLY_NOINLINE T& getSlow();
+
static void* allocateHeapBuffer(size_t size);
static void freeHeapBuffer(void* buffer);