From: Jun LI Date: Wed, 15 Oct 2014 20:53:51 +0000 (-0700) Subject: Defer query string parsing from ctor to getQueryParams() X-Git-Tag: v0.22.0~277 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=8ae778e340d1b2e5b627c600a9bb4f0a87735ee6;p=folly.git Defer query string parsing from ctor to getQueryParams() Summary: Query string parsing uses a lot of CPU, it happens in ctor killing CPU even in users that are not interested in query string. Move it to getQueryParams(). Test Plan: fbconfig folly/test fbmake runtests_dbg Reviewed By: ldemailly@fb.com Subscribers: trunkagent, njormrod, zellux FB internal diff: D1604973 Tasks: 5304484 Blame Revision: https://phabricator.fb.com/D1455158 --- diff --git a/folly/Uri.cpp b/folly/Uri.cpp index f7bf80fa..f4618d87 100644 --- a/folly/Uri.cpp +++ b/folly/Uri.cpp @@ -91,26 +91,6 @@ Uri::Uri(StringPiece str) : port_(0) { } query_ = submatch(match, 3); - if (!query_.empty()) { - // Parse query string - static const boost::regex queryParamRegex( - "(^|&)([^=&]*)=?([^=&]*)(?=(&|$))"); - boost::cregex_iterator paramBeginItr( - match[3].first, - match[3].second, - queryParamRegex); - boost::cregex_iterator paramEndItr; - for(auto itr = paramBeginItr; itr != paramEndItr; itr++) { - if (itr->length(2) == 0) { - // key is empty, ignore it - continue; - } - queryParams_.emplace_back( - fbstring((*itr)[2].first, (*itr)[2].second), // parameter name - fbstring((*itr)[3].first, (*itr)[3].second) // parameter value - ); - } - } fragment_ = submatch(match, 4); } @@ -150,4 +130,30 @@ fbstring Uri::hostname() const { return host_; } +const std::vector>& Uri::getQueryParams() { + if (!query_.empty() && queryParams_.empty()) { + // Parse query string + static const boost::regex queryParamRegex( + "(^|&)" /*start of query or start of parameter "&"*/ + "([^=&]*)=?" /*parameter name and "=" if value is expected*/ + "([^=&]*)" /*parameter value*/ + "(?=(&|$))" /*forward reference, next should be end of query or + start of next parameter*/); + boost::cregex_iterator paramBeginItr( + query_.data(), query_.data() + query_.size(), queryParamRegex); + boost::cregex_iterator paramEndItr; + for (auto itr = paramBeginItr; itr != paramEndItr; itr++) { + if (itr->length(2) == 0) { + // key is empty, ignore it + continue; + } + queryParams_.emplace_back( + fbstring((*itr)[2].first, (*itr)[2].second), // parameter name + fbstring((*itr)[3].first, (*itr)[3].second) // parameter value + ); + } + } + return queryParams_; +} + } // namespace folly diff --git a/folly/Uri.h b/folly/Uri.h index 581307d9..3d075d57 100644 --- a/folly/Uri.h +++ b/folly/Uri.h @@ -90,13 +90,15 @@ class Uri { * one equal signs, we don't know which one is the delimiter for key and * value. * + * Note, this method is not thread safe, it might update internal state, but + * only the first call to this method update the state. After the first call + * is finished, subsequent calls to this method are thread safe. + * * @return query parameter key-value pairs in a vector, each element is a * pair of which the first element is parameter name and the second * one is parameter value */ - const std::vector>& getQueryParams() const { - return queryParams_; - }; + const std::vector>& getQueryParams(); private: fbstring scheme_; diff --git a/folly/test/UriTest.cpp b/folly/test/UriTest.cpp index 7f8faee8..d16242d6 100644 --- a/folly/test/UriTest.cpp +++ b/folly/test/UriTest.cpp @@ -15,6 +15,7 @@ */ #include +#include #include #include @@ -389,3 +390,74 @@ TEST(Uri, Simple) { } } } + +/** + * Result of benchmark varies by the complexity of query. + * ============================================================================ + * folly/test/UriTest.cpp relative time/iter iters/s + * ============================================================================ + * init_uri_simple 4.88us 204.80K + * init_uri_simple_with_query_parsing 22.46us 44.52K + * init_uri_complex 5.92us 168.85K + * init_uri_complex_with_query_parsing 48.70us 20.53K + * ============================================================================ + */ +BENCHMARK(init_uri_simple, iters) { + const fbstring s("http://localhost?&key1=foo&key2=&key3&=bar&=bar=&"); + for (int i = 0; i < iters; ++i) { + Uri u(s); + } +} + +BENCHMARK(init_uri_simple_with_query_parsing, iters) { + const fbstring s("http://localhost?&key1=foo&key2=&key3&=bar&=bar=&"); + for (int i = 0; i < iters; ++i) { + Uri u(s); + u.getQueryParams(); + } +} + +BENCHMARK(init_uri_complex, iters) { + const fbstring s( + "https://mock.example.com/farm/track.php?TmOxQUDF=uSmTS_VwhjKnh_JME&DI" + "h=fbbN&GRsoIm=bGshjaUqavZxQai&UMT=36k18N4dn21&3U=CD8o4A4497W152j6m0V%14" + "%57&Hy=t%05mpr.80JUZ7ne_%23zS8DcA%0qc_%291ymamz096%11Zfb3r%09ZqPD%311ZX" + "tqJd600ot&5U96U-Rh-VZ=-D_6-9xKYj%1gW6b43s1B9-j21P0oUW5-t46G4kgt&ezgj=mcW" + "TTQ.c&Oh=%2PblUfuC%7C997048884827569%03xnyJ%2L1pi7irBioQ6D4r7nNHNdo6v7Y%" + "84aurnSJ%2wCFePHMlGZmIHGfCe7392_lImWsSvN&sBeNN=Nf%80yOE%6X10M64F4gG197aX" + "R2B4g2533x235A0i4e%57%58uWB%04Erw.60&VMS4=Ek_%02GC0Pkx%6Ov_%207WICUz007%" + "04nYX8N%46zzpv%999h&KGmBt988y=q4P57C-Dh-Nz-x_7-5oPxz%1gz3N03t6c7-R67N4DT" + "Y6-f98W1&Lts&%02dOty%8eEYEnLz4yexQQLnL4MGU2JFn3OcmXcatBcabZgBdDdy67hdgW" + "tYn4"); + for (int i = 0; i < iters; ++i) { + Uri u(s); + } +} + +BENCHMARK(init_uri_complex_with_query_parsing, iters) { + const fbstring s( + "https://mock.example.com/farm/track.php?TmOxQUDF=uSmTS_VwhjKnh_JME&DI" + "h=fbbN&GRsoIm=bGshjaUqavZxQai&UMT=36k18N4dn21&3U=CD8o4A4497W152j6m0V%14" + "%57&Hy=t%05mpr.80JUZ7ne_%23zS8DcA%0qc_%291ymamz096%11Zfb3r%09ZqPD%311ZX" + "tqJd600ot&5U96U-Rh-VZ=-D_6-9xKYj%1gW6b43s1B9-j21P0oUW5-t46G4kgt&ezgj=mcW" + "TTQ.c&Oh=%2PblUfuC%7C997048884827569%03xnyJ%2L1pi7irBioQ6D4r7nNHNdo6v7Y%" + "84aurnSJ%2wCFePHMlGZmIHGfCe7392_lImWsSvN&sBeNN=Nf%80yOE%6X10M64F4gG197aX" + "R2B4g2533x235A0i4e%57%58uWB%04Erw.60&VMS4=Ek_%02GC0Pkx%6Ov_%207WICUz007%" + "04nYX8N%46zzpv%999h&KGmBt988y=q4P57C-Dh-Nz-x_7-5oPxz%1gz3N03t6c7-R67N4DT" + "Y6-f98W1&Lts&%02dOty%8eEYEnLz4yexQQLnL4MGU2JFn3OcmXcatBcabZgBdDdy67hdgW" + "tYn4"); + for (int i = 0; i < iters; ++i) { + Uri u(s); + u.getQueryParams(); + } +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + auto r = RUN_ALL_TESTS(); + if (r) { + return r; + } + runBenchmarks(); + return 0; +}