#ifndef FOLLY_BASE_HASH_H_
#define FOLLY_BASE_HASH_H_
-#include <stdint.h>
#include <cstring>
+#include <stdint.h>
#include <string>
+#include <utility>
/*
* Various hashing functions.
namespace folly { namespace hash {
+// This is a general-purpose way to create a single hash from multiple
+// hashable objects. It relies on std::hash<T> being available for all
+// relevant types and combines those hashes in an order-dependent way
+// to yield a new hash.
+
+// Never used, but gcc demands it.
+inline size_t hash_combine() {
+ return 0;
+}
+
+// This is the Hash128to64 function from Google's cityhash (available
+// under the MIT License). We use it to reduce multiple 64 bit hashes
+// into a single hash.
+inline size_t hash_128_to_64(const size_t upper, const size_t lower) {
+ // Murmur-inspired hashing.
+ const size_t kMul = 0x9ddfea08eb382d69ULL;
+ size_t a = (lower ^ upper) * kMul;
+ a ^= (a >> 47);
+ size_t b = (upper ^ a) * kMul;
+ b ^= (b >> 47);
+ b *= kMul;
+ return b;
+}
+
+template <typename T, typename... Ts>
+size_t hash_combine(const T& t, const Ts&... ts) {
+ size_t seed = std::hash<T>()(t);
+ if (sizeof...(ts) == 0) {
+ return seed;
+ }
+ size_t remainder = hash_combine(ts...);
+ return hash_128_to_64(seed, remainder);
+}
+
//////////////////////////////////////////////////////////////////////
/*
} // namespace folly
+// Custom hash functions.
+namespace std {
+ // Hash function for pairs. Requires default hash functions for both
+ // items in the pair.
+ template <typename T1, typename T2>
+ class hash<std::pair<T1, T2> > {
+ public:
+ size_t operator()(const std::pair<T1, T2>& x) const {
+ return folly::hash::hash_combine(x.first, x.second);
+ }
+ };
+
+ // Same as above, but for arbitrary tuples.
+ template <typename... Ts>
+ class hash<std::tuple<Ts...> > {
+ public:
+ size_t operator()(const Ts&... ts) const {
+ return folly::hash::hash_combine(ts...);
+ }
+ };
+} // namespace std
+
#endif
--- /dev/null
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_LOGGING_H_
+#define FOLLY_LOGGING_H_
+
+#include <time.h>
+#include <glog/logging.h>
+
+#ifndef FB_LOG_EVERY_MS
+/**
+ * Issues a LOG(severity) no more often that every
+ * milliseconds. Example:
+ *
+ * FB_LOG_EVERY_MS(INFO, 10000) << "At least ten seconds passed"
+ * " since you last saw this.";
+ *
+ * The implementation uses for statements to introduce variables in
+ * a nice way that doesn't mess surrounding statements.
+ */
+#define FB_LOG_EVERY_MS(severity, milliseconds) \
+ for (bool LOG_EVERY_MS_once = true; LOG_EVERY_MS_once; ) \
+ for (const ::clock_t LOG_EVERY_MS_now = ::clock(); LOG_EVERY_MS_once; ) \
+ for (static ::clock_t LOG_EVERY_MS_last; LOG_EVERY_MS_once; \
+ LOG_EVERY_MS_once = false) \
+ if (1000 * (LOG_EVERY_MS_now - LOG_EVERY_MS_last) < \
+ (milliseconds) * CLOCKS_PER_SEC) {} \
+ else \
+ (LOG_EVERY_MS_last = LOG_EVERY_MS_now, LOG(severity))
+
+#endif
+
+#endif // FOLLY_LOGGING_H_
IntrusiveList.h \
json.h \
Likely.h \
+ Logging.h \
Malloc.h \
MapUtil.h \
PackedSyncPtr.h \
ignoreEmpty);
}
+template <class String1, class String2>
+void backslashify(const String1& input, String2& output, bool hex_style) {
+ static const char hexValues[] = "0123456789abcdef";
+ output.clear();
+ output.reserve(3 * input.size());
+ for (unsigned char c : input) {
+ // less than space or greater than '~' are considered unprintable
+ if (c < 0x20 || c > 0x7e || c == '\\') {
+ bool hex_append = false;
+ output.push_back('\\');
+ if (hex_style) {
+ hex_append = true;
+ } else {
+ if (c == '\r') output += 'r';
+ else if (c == '\n') output += 'n';
+ else if (c == '\t') output += 't';
+ else if (c == '\a') output += 'a';
+ else if (c == '\b') output += 'b';
+ else if (c == '\0') output += '0';
+ else if (c == '\\') output += '\\';
+ else {
+ hex_append = true;
+ }
+ }
+ if (hex_append) {
+ output.push_back('x');
+ output.push_back(hexValues[(c >> 4) & 0xf]);
+ output.push_back(hexValues[c & 0xf]);
+ }
+ } else {
+ output += c;
+ }
+ }
+}
+
+template <class String1, class String2>
+void humanify(const String1& input, String2& output) {
+ int numUnprintable = 0;
+ int numPrintablePrefix = 0;
+ for (unsigned char c : input) {
+ if (c < 0x20 || c > 0x7e || c == '\\') {
+ ++numUnprintable;
+ }
+ if (numUnprintable == 0) {
+ ++numPrintablePrefix;
+ }
+ }
+
+ // hexlify doubles a string's size; backslashify can potentially
+ // explode it by 4x. Now, the printable range of the ascii
+ // "spectrum" is around 95 out of 256 values, so a "random" binary
+ // string should be around 60% unprintable. We use a 50% hueristic
+ // here, so if a string is 60% unprintable, then we just use hex
+ // output. Otherwise we backslash.
+ //
+ // UTF8 is completely ignored; as a result, utf8 characters will
+ // likely be \x escaped (since most common glyphs fit in two bytes).
+ // This is a tradeoff of complexity/speed instead of a convenience
+ // that likely would rarely matter. Moreover, this function is more
+ // about displaying underlying bytes, not about displaying glyphs
+ // from languages.
+ if (numUnprintable == 0) {
+ output = input;
+ } else if (5 * numUnprintable >= 3 * input.size()) {
+ // However! If we have a "meaningful" prefix of printable
+ // characters, say 20% of the string, we backslashify under the
+ // assumption viewing the prefix as ascii is worth blowing the
+ // output size up a bit.
+ if (5 * numPrintablePrefix >= input.size()) {
+ backslashify(input, output);
+ } else {
+ output = "0x";
+ hexlify(input, output, true /* append output */);
+ }
+ } else {
+ backslashify(input, output);
+ }
+}
+
+template<class InputString, class OutputString>
+bool hexlify(const InputString& input, OutputString& output,
+ bool append_output=false) {
+ if (!append_output) output.clear();
+
+ static char hexValues[] = "0123456789abcdef";
+ int j = output.size();
+ output.resize(2 * input.size() + output.size());
+ for (int i = 0; i < input.size(); ++i) {
+ int ch = input[i];
+ output[j++] = hexValues[(ch >> 4) & 0xf];
+ output[j++] = hexValues[ch & 0xf];
+ }
+ return true;
+}
+
+template<class InputString, class OutputString>
+bool unhexlify(const InputString& input, OutputString& output) {
+ if (input.size() % 2 != 0) {
+ return false;
+ }
+ output.resize(input.size() / 2);
+ int j = 0;
+ auto unhex = [](char c) -> int {
+ return c >= '0' && c <= '9' ? c - '0' :
+ c >= 'A' && c <= 'F' ? c - 'A' + 10 :
+ c >= 'a' && c <= 'f' ? c - 'a' + 10 :
+ -1;
+ };
+
+ for (int i = 0; i < input.size(); i += 2) {
+ int highBits = unhex(input[i]);
+ int lowBits = unhex(input[i + 1]);
+ if (highBits < 0 || lowBits < 0) {
+ return false;
+ }
+ output[j++] = (highBits << 4) + lowBits;
+ }
+ return true;
+}
+
namespace detail {
/**
* Hex-dump at most 16 bytes starting at offset from a memory area of size
std::string& stringAppendf(std::string* output, const char* format, ...)
__attribute__ ((format (printf, 2, 3)));
+/**
+ * Backslashify a string, that is, replace non-printable characters
+ * with C-style (but NOT C compliant) "\xHH" encoding. If hex_style
+ * is false, then shorthand notations like "\0" will be used instead
+ * of "\x00" for the most common backslash cases.
+ *
+ * There are two forms, one returning the input string, and one
+ * creating output in the specified output string.
+ *
+ * This is mainly intended for printing to a terminal, so it is not
+ * particularly optimized.
+ *
+ * Do *not* use this in situations where you expect to be able to feed
+ * the string to a C or C++ compiler, as there are nuances with how C
+ * parses such strings that lead to failures. This is for display
+ * purposed only. If you want a string you can embed for use in C or
+ * C++, use cEscape instead. This function is for display purposes
+ * only.
+ */
+template <class String1, class String2>
+void backslashify(const String1& input, String2& output, bool hex_style=false);
+
+template <class String>
+String backslashify(const String& input, bool hex_style=false) {
+ String output;
+ backslashify(input, output, hex_style);
+ return output;
+}
+
+/**
+ * Take a string and "humanify" it -- that is, make it look better.
+ * Since "better" is subjective, caveat emptor. The basic approach is
+ * to count the number of unprintable characters. If there are none,
+ * then the output is the input. If there are relatively few, or if
+ * there is a long "enough" prefix of printable characters, use
+ * backslashify. If it is mostly binary, then simply hex encode.
+ *
+ * This is an attempt to make a computer smart, and so likely is wrong
+ * most of the time.
+ */
+template <class String1, class String2>
+void humanify(const String1& input, String2& output);
+
+template <class String>
+String humanify(const String& input) {
+ String output;
+ humanify(input, output);
+ return output;
+}
+
+/**
+ * Same functionality as Python's binascii.hexlify. Returns true
+ * on successful conversion.
+ *
+ * If append_output is true, append data to the output rather than
+ * replace it.
+ */
+template<class InputString, class OutputString>
+bool hexlify(const InputString& input, OutputString& output,
+ bool append=false);
+
+/**
+ * Same functionality as Python's binascii.unhexlify. Returns true
+ * on successful conversion.
+ */
+template<class InputString, class OutputString>
+bool unhexlify(const InputString& input, OutputString& output);
+
/*
* A pretty-printer for numbers that appends suffixes of units of the
* given type. It prints 4 sig-figs of value with the most
m.insert(std::make_pair(4, 5));
EXPECT_EQ(get_default(m, 4), 5);
}
+
+TEST(Hash, hash_combine) {
+ EXPECT_NE(hash_combine(1, 2), hash_combine(2, 1));
+}
piecesTest<folly::fbvector>();
}
+TEST(String, hexlify) {
+ string input1 = "0123";
+ string output1;
+ EXPECT_TRUE(hexlify(input1, output1));
+ EXPECT_EQ(output1, "30313233");
+
+ fbstring input2 = "abcdefg";
+ input2[1] = 0;
+ input2[3] = 0xff;
+ input2[5] = 0xb6;
+ fbstring output2;
+ EXPECT_TRUE(hexlify(input2, output2));
+ EXPECT_EQ(output2, "610063ff65b667");
+}
+
+TEST(String, unhexlify) {
+ string input1 = "30313233";
+ string output1;
+ EXPECT_TRUE(unhexlify(input1, output1));
+ EXPECT_EQ(output1, "0123");
+
+ fbstring input2 = "610063ff65b667";
+ fbstring output2;
+ EXPECT_TRUE(unhexlify(input2, output2));
+ EXPECT_EQ(output2.size(), 7);
+ EXPECT_EQ(output2[0], 'a');
+ EXPECT_EQ(output2[1], 0);
+ EXPECT_EQ(output2[2], 'c');
+ EXPECT_EQ(output2[3] & 0xff, 0xff);
+ EXPECT_EQ(output2[4], 'e');
+ EXPECT_EQ(output2[5] & 0xff, 0xb6);
+ EXPECT_EQ(output2[6], 'g');
+
+ string input3 = "x";
+ string output3;
+ EXPECT_FALSE(unhexlify(input3, output3));
+
+ string input4 = "xy";
+ string output4;
+ EXPECT_FALSE(unhexlify(input4, output4));
+}
+
+TEST(String, backslashify) {
+ EXPECT_EQ("abc", string("abc"));
+ EXPECT_EQ("abc", backslashify(string("abc")));
+ EXPECT_EQ("abc\\r", backslashify(string("abc\r")));
+ EXPECT_EQ("abc\\x0d", backslashify(string("abc\r"), true));
+ EXPECT_EQ("\\0\\0", backslashify(string(2, '\0')));
+}
+
+TEST(String, humanify) {
+ // Simple cases; output is obvious.
+ EXPECT_EQ("abc", humanify(string("abc")));
+ EXPECT_EQ("abc\\\\r", humanify(string("abc\\r")));
+ EXPECT_EQ("0xff", humanify(string("\xff")));
+ EXPECT_EQ("abc\\xff", humanify(string("abc\xff")));
+ EXPECT_EQ("abc\\b", humanify(string("abc\b")));
+ EXPECT_EQ("0x00", humanify(string(1, '\0')));
+ EXPECT_EQ("0x0000", humanify(string(2, '\0')));
+
+
+ // Mostly printable, so backslash! 80, 60, and 40% printable, respectively
+ EXPECT_EQ("aaaa\\xff", humanify(string("aaaa\xff")));
+ EXPECT_EQ("aaa\\xff\\xff", humanify(string("aaa\xff\xff")));
+ EXPECT_EQ("aa\\xff\\xff\\xff", humanify(string("aa\xff\xff\xff")));
+
+ // 20% printable, and the printable portion isn't the prefix; hexify!
+ EXPECT_EQ("0xff61ffffff", humanify(string("\xff" "a\xff\xff\xff")));
+
+ // Same as previous, except swap first two chars; prefix is
+ // printable and within the threshold, so backslashify.
+ EXPECT_EQ("a\\xff\\xff\\xff\\xff", humanify(string("a\xff\xff\xff\xff")));
+
+ // Just too much unprintable; hex, despite prefix.
+ EXPECT_EQ("0x61ffffffffff", humanify(string("a\xff\xff\xff\xff\xff")));
+}
+
//////////////////////////////////////////////////////////////////////
BENCHMARK(splitOnSingleChar, iters) {