From: Henry Filgueiras Date: Wed, 6 Aug 2014 18:39:55 +0000 (-0700) Subject: Improve IPAddress::toFullyQualified() CPU performance X-Git-Tag: v0.22.0~416 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=59bd43f8a546b0fc911b04406ebe7ae8faf7e1f2;p=folly.git Improve IPAddress::toFullyQualified() CPU performance Summary: Currently IPAddress::toFullyQualified() is fairly slow for IPv6. Change here implements more lightweight in_addr/in6_addr to string functions. I also added a benchmark for comparison with inet_ntop. This makes IPAddressV6::toFullyQualified() significantly faster than inet_ntop, and makes IPAddressV4::str() ~20ns faster than previous impementation (previously ~80ns). Previous benchmark: ============================================================================ folly/test/IPAddressBenchmark.cpp relative time/iter iters/s ============================================================================ ipv4_to_string_inet_ntop 238.91ns 4.19M ipv4_to_fully_qualified 289.96% 82.39ns 12.14M ---------------------------------------------------------------------------- ipv6_to_string_inet_ntop 780.72ns 1.28M ipv6_to_fully_qualified 51.11% 1.53us 654.59K ============================================================================ With this change: ============================================================================ folly/test/IPAddressBenchmark.cpp relative time/iter iters/s ============================================================================ ipv4_to_string_inet_ntop 238.06ns 4.20M ipv4_to_fully_qualified 364.76% 65.26ns 15.32M ---------------------------------------------------------------------------- ipv6_to_string_inet_ntop 770.74ns 1.30M ipv6_to_fully_qualified 791.63% 97.36ns 10.27M ============================================================================ Test Plan: fbconfig folly/test:network_address_test folly/test:network_address_benchmark fbmake runtests_opt Reviewed By: simpkins@fb.com Subscribers: ps, bmatheny FB internal diff: D1477925 Tasks: 4832974 --- diff --git a/folly/IPAddressV4.cpp b/folly/IPAddressV4.cpp index 625d54fa..ae403eb4 100644 --- a/folly/IPAddressV4.cpp +++ b/folly/IPAddressV4.cpp @@ -185,35 +185,8 @@ IPAddressV4 IPAddressV4::mask(size_t numBits) const { } // public -// Taken from TSocketAddress::getAddressStrIPv4Fast string IPAddressV4::str() const { - char buf[INET_ADDRSTRLEN] = {0}; - const uint8_t* ip = addr_.bytes_.data(); - int pos = 0; - for (int k = 0; k < 4; ++k) { - uint8_t num = ip[k]; - - if (num >= 200) { - buf[pos++] = '2'; - num -= 200; - } else if (num >= 100) { - buf[pos++] = '1'; - num -= 100; - } - - // num < 100 - if (ip[k] >= 10) { - buf[pos++] = '0' + num / 10; - buf[pos++] = '0' + num % 10; - } else { - buf[pos++] = '0' + num; - } - - buf[pos++] = '.'; - } - buf[pos-1] = '\0'; - string ipAddr(buf); - return std::move(ipAddr); + return detail::fastIpv4ToString(addr_.inAddr_); } // public diff --git a/folly/IPAddressV6.cpp b/folly/IPAddressV6.cpp index f07238d3..188a4776 100644 --- a/folly/IPAddressV6.cpp +++ b/folly/IPAddressV6.cpp @@ -319,14 +319,7 @@ string IPAddressV6::str() const { // public string IPAddressV6::toFullyQualified() const { - auto asHex = detail::Bytes::toHex(bytes(), 16); - uint8_t chunks = asHex.size() / 4; - for (int chunk = 1; chunk < chunks; chunk++) { - // position changes as new characters are inserted - int pos = (chunk*4) + (chunk - 1); - asHex.insert(pos, ":"); - } - return asHex; + return detail::fastIpv6ToString(addr_.in6Addr_); } // public diff --git a/folly/detail/IPAddress.h b/folly/detail/IPAddress.h index a9ffe4d9..65b42afa 100644 --- a/folly/detail/IPAddress.h +++ b/folly/detail/IPAddress.h @@ -183,4 +183,113 @@ struct Bytes : private boost::noncopyable { ~Bytes() = delete; }; +// +// Write a maximum amount of base-converted character digits, of a +// given base, from an unsigned integral type into a byte buffer of +// sufficient size. +// +// This function does not append null terminators. +// +// Output buffer size must be guaranteed by caller (indirectly +// controlled by DigitCount template parameter). +// +// Having these parameters at compile time allows compiler to +// precompute several of the values, use smaller instructions, and +// better optimize surrounding code. +// +// IntegralType: +// - Something like uint8_t, uint16_t, etc +// +// DigitCount is the maximum number of digits to be printed +// - This is tied to IntegralType and Base. For example: +// - uint8_t in base 10 will print at most 3 digits ("255") +// - uint16_t in base 16 will print at most 4 hex digits ("FFFF") +// +// Base is the desired output base of the string +// - Base 10 will print [0-9], base 16 will print [0-9a-f] +// +// PrintAllDigits: +// - Whether or not leading zeros should be printed +// +template::value && + std::is_unsigned::value, + bool>::type> + inline void writeIntegerString( + IntegralType val, + char** buffer) { + char* buf = *buffer; + + if (!PrintAllDigits && val == 0) { + *(buf++) = '0'; + *buffer = buf; + return; + } + + IntegralType powerToPrint = 1; + for (int i = 1; i < DigitCount; ++i) { + powerToPrint *= Base; + } + + bool found = PrintAllDigits; + while (powerToPrint) { + + if (found || powerToPrint <= val) { + IntegralType value = val/powerToPrint; + if (Base == 10 || value < 10) { + value += '0'; + } else { + value += ('a'-10); + } + *(buf++) = value; + val %= powerToPrint; + found = true; + } + + powerToPrint /= Base; + } + + *buffer = buf; +} + +inline std::string fastIpv4ToString( + const in_addr& inAddr) { + const uint8_t* octets = reinterpret_cast(&inAddr.s_addr); + char str[sizeof("255.255.255.255")]; + char* buf = str; + + writeIntegerString(octets[0], &buf); + *(buf++) = '.'; + writeIntegerString(octets[1], &buf); + *(buf++) = '.'; + writeIntegerString(octets[2], &buf); + *(buf++) = '.'; + writeIntegerString(octets[3], &buf); + + return std::string(str, buf-str); +} + +inline std::string fastIpv6ToString(const in6_addr& in6Addr) { + const uint16_t* bytes = reinterpret_cast(&in6Addr.s6_addr16); + char str[sizeof("2001:0db8:0000:0000:0000:ff00:0042:8329")]; + char* buf = str; + + for (int i = 0; i < 8; ++i) { + writeIntegerString(htons(bytes[i]), &buf); + + if(i != 7) { + *(buf++) = ':'; + } + } + + return std::string(str, buf-str); +} + }} // folly::detail diff --git a/folly/test/IPAddressBenchmark.cpp b/folly/test/IPAddressBenchmark.cpp new file mode 100644 index 00000000..fc2713f6 --- /dev/null +++ b/folly/test/IPAddressBenchmark.cpp @@ -0,0 +1,87 @@ +/* + * Copyright 2014 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include + +using namespace folly; +using std::string; + +BENCHMARK(ipv4_to_string_inet_ntop, iters) { + folly::IPAddressV4 ipv4Addr("127.0.0.1"); + in_addr ip = ipv4Addr.toAddr(); + char outputString[INET_ADDRSTRLEN] = {0}; + + while (iters--) { + const char* val = inet_ntop( + AF_INET, + &ip, + outputString, + sizeof(outputString)); + } +} + +BENCHMARK_RELATIVE(ipv4_to_fully_qualified, iters) { + IPAddressV4 ip("127.0.0.1"); + while (iters--) { + string outputString = ip.toFullyQualified(); + } +} + +BENCHMARK_DRAW_LINE() + +BENCHMARK(ipv6_to_string_inet_ntop, iters) { + IPAddressV6 ipv6Addr("F1E0:0ACE:FB94:7ADF:22E8:6DE6:9672:3725"); + in6_addr ip = ipv6Addr.toAddr(); + char outputString[INET6_ADDRSTRLEN] = {0}; + bool checkResult = (iters == 1); + + while (iters--) { + const char* val = inet_ntop( + AF_INET6, + &ip, + outputString, + sizeof(outputString)); + } +} + +BENCHMARK_RELATIVE(ipv6_to_fully_qualified, iters) { + IPAddressV6 ip("F1E0:0ACE:FB94:7ADF:22E8:6DE6:9672:3725"); + string outputString; + while (iters--) { + outputString = ip.toFullyQualified(); + } +} + +// Benchmark results on Intel Xeon CPU E5-2660 @ 2.20GHz +// ============================================================================ +// folly/test/IPAddressBenchmark.cpp relative time/iter iters/s +// ============================================================================ +// ipv4_to_string_inet_ntop 237.87ns 4.20M +// ipv4_to_fully_qualified 362.31% 65.65ns 15.23M +// ---------------------------------------------------------------------------- +// ipv6_to_string_inet_ntop 768.60ns 1.30M +// ipv6_to_fully_qualified 821.81% 93.53ns 10.69M +// ============================================================================ + +int main(int argc, char *argv[]) { + gflags::ParseCommandLineFlags(&argc, &argv, true); + runBenchmarks(); + return 0; +}