From 987da6d2fb6598111f3e1ee3a0f40f35104e6fce Mon Sep 17 00:00:00 2001 From: Tudor Bosman Date: Tue, 12 Jun 2012 09:20:03 -0700 Subject: [PATCH] Move Rabin fingerprinting code to folly. Summary: Also generate fingerprint tables every time, so the code doesn't rot. TODO(tudorb): move benchmark to folly TODO(tudorb): Include the program used to generate the polynomials (can't build as it requires NTL from http://www.shoup.net/ntl/) Test Plan: folly/test Reviewed By: andrei.alexandrescu@fb.com FB internal diff: D492455 --- folly/Fingerprint.h | 266 ++++++++++++++++++++++ folly/build/GenerateFingerprintTables.cpp | 158 +++++++++++++ folly/detail/FingerprintPolynomial.h | 146 ++++++++++++ folly/detail/SlowFingerprint.h | 93 ++++++++ folly/test/FingerprintTest.cpp | 175 ++++++++++++++ 5 files changed, 838 insertions(+) create mode 100644 folly/Fingerprint.h create mode 100644 folly/build/GenerateFingerprintTables.cpp create mode 100644 folly/detail/FingerprintPolynomial.h create mode 100644 folly/detail/SlowFingerprint.h create mode 100644 folly/test/FingerprintTest.cpp diff --git a/folly/Fingerprint.h b/folly/Fingerprint.h new file mode 100644 index 00000000..e6654083 --- /dev/null +++ b/folly/Fingerprint.h @@ -0,0 +1,266 @@ +/* + * Copyright 2012 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Compute 64-, 96-, and 128-bit Rabin fingerprints, as described in + * Michael O. Rabin (1981) + * Fingerprinting by Random Polynomials + * Center for Research in Computing Technology, Harvard University + * Tech Report TR-CSE-03-01 + * + * The implementation follows the optimization described in + * Andrei Z. Broder (1993) + * Some applications of Rabin's fingerprinting method + * + * extended for fingerprints larger than 64 bits, and modified to use + * 64-bit instead of 32-bit integers for computation. + * + * The precomputed tables are in FingerprintTable.cpp, which is automatically + * generated by ComputeFingerprintTable.cpp. + * + * Benchmarked on 10/13/2009 on a 2.5GHz quad-core Xeon L5420, + * - Fingerprint<64>::update64() takes about 12ns + * - Fingerprint<96>::update64() takes about 30ns + * - Fingerprint<128>::update128() takes about 30ns + * (unsurprisingly, Fingerprint<96> and Fingerprint<128> take the + * same amount of time, as they both use 128-bit operations; the least + * significant 32 bits of Fingerprint<96> will always be 0) + * + * @author Tudor Bosman (tudorb@facebook.com) + */ + +#ifndef FOLLY_FINGERPRINT_H_ +#define FOLLY_FINGERPRINT_H_ + +#include + +#include "folly/Range.h" + +namespace folly { + +namespace detail { +template +struct FingerprintTable { + static const uint64_t poly[1 + (BITS-1)/64]; + static const uint64_t table[8][256][1 + (BITS-1)/64]; +}; +} // namespace detail + +/** + * Compute the Rabin fingerprint. + * + * TODO(tudorb): Extend this to allow removing values from the computed + * fingerprint (so we can fingerprint a sliding window, as in the Rabin-Karp + * string matching algorithm) + * + * update* methods return *this, so you can chain them together: + * Fingerprint<96>().update8(x).update(str).update64(val).write(output); + */ +template +class Fingerprint { + public: + Fingerprint() { + // Use a non-zero starting value. We'll use (1 << (BITS-1)) + fp_[0] = 1UL << 63; + for (int i = 1; i < size(); i++) + fp_[i] = 0; + } + + Fingerprint& update8(uint8_t v) { + uint8_t out = shlor8(v); + xortab(detail::FingerprintTable::table[0][out]); + return *this; + } + + // update32 and update64 are convenience functions to update the fingerprint + // with 4 and 8 bytes at a time. They are faster than calling update8 + // in a loop. They process the bytes in big-endian order. + Fingerprint& update32(uint32_t v) { + uint32_t out = shlor32(v); + for (int i = 0; i < 4; i++) { + xortab(detail::FingerprintTable::table[i][out&0xff]); + out >>= 8; + } + return *this; + } + + Fingerprint& update64(uint64_t v) { + uint64_t out = shlor64(v); + for (int i = 0; i < 8; i++) { + xortab(detail::FingerprintTable::table[i][out&0xff]); + out >>= 8; + } + return *this; + } + + Fingerprint& update(StringPiece str) { + // TODO(tudorb): We could be smart and do update64 or update32 if aligned + for (auto c : str) { + update8(uint8_t(c)); + } + return *this; + } + + /** + * Return the number of uint64s needed to hold the fingerprint value. + */ + static int size() { + return 1 + (BITS-1)/64; + } + + /** + * Write the computed fingeprint to an array of size() uint64_t's. + * For Fingerprint<64>, size()==1; we write 64 bits in out[0] + * For Fingerprint<96>, size()==2; we write 64 bits in out[0] and + * the most significant 32 bits of out[1] + * For Fingerprint<128>, size()==2; we write 64 bits in out[0] and + * 64 bits in out[1]. + */ + void write(uint64_t* out) const { + for (int i = 0; i < size(); i++) { + out[i] = fp_[i]; + } + } + + private: + // XOR the fingerprint with a value from one of the tables. + void xortab(const uint64_t* tab) { + for (int i = 0; i < size(); i++) { + fp_[i] ^= tab[i]; + } + } + + // Helper functions: shift the fingerprint value left by 8/32/64 bits, + // return the "out" value (the bits that were shifted out), and add "v" + // in the bits on the right. + uint8_t shlor8(uint8_t v); + uint32_t shlor32(uint32_t v); + uint64_t shlor64(uint64_t v); + + uint64_t fp_[1 + (BITS-1)/64]; +}; + +// Convenience functions + +/** + * Return the 64-bit Rabin fingerprint of a string. + */ +inline uint64_t fingerprint64(StringPiece str) { + uint64_t fp; + Fingerprint<64>().update(str).write(&fp); + return fp; +} + +/** + * Compute the 96-bit Rabin fingerprint of a string. + * Return the 64 most significant bits in *msb, and the 32 least significant + * bits in *lsb. + */ +inline void fingerprint96(StringPiece str, + uint64_t* msb, uint32_t* lsb) { + uint64_t fp[2]; + Fingerprint<96>().update(str).write(fp); + *msb = fp[0]; + *lsb = (uint32_t)(fp[1] >> 32); +} + +/** + * Compute the 128-bit Rabin fingerprint of a string. + * Return the 64 most significant bits in *msb, and the 64 least significant + * bits in *lsb. + */ +inline void fingerprint128(StringPiece str, + uint64_t* msb, uint64_t* lsb) { + uint64_t fp[2]; + Fingerprint<128>().update(str).write(fp); + *msb = fp[0]; + *lsb = fp[1]; +} + + +template <> +inline uint8_t Fingerprint<64>::shlor8(uint8_t v) { + uint8_t out = (uint8_t)(fp_[0] >> 56); + fp_[0] = (fp_[0] << 8) | ((uint64_t)v); + return out; +} + +template <> +inline uint32_t Fingerprint<64>::shlor32(uint32_t v) { + uint32_t out = (uint32_t)(fp_[0] >> 32); + fp_[0] = (fp_[0] << 32) | ((uint64_t)v); + return out; +} + +template <> +inline uint64_t Fingerprint<64>::shlor64(uint64_t v) { + uint64_t out = fp_[0]; + fp_[0] = v; + return out; +} + +template <> +inline uint8_t Fingerprint<96>::shlor8(uint8_t v) { + uint8_t out = (uint8_t)(fp_[0] >> 56); + fp_[0] = (fp_[0] << 8) | (fp_[1] >> 56); + fp_[1] = (fp_[1] << 8) | ((uint64_t)v << 32); + return out; +} + +template <> +inline uint32_t Fingerprint<96>::shlor32(uint32_t v) { + uint32_t out = (uint32_t)(fp_[0] >> 32); + fp_[0] = (fp_[0] << 32) | (fp_[1] >> 32); + fp_[1] = ((uint64_t)v << 32); + return out; +} + +template <> +inline uint64_t Fingerprint<96>::shlor64(uint64_t v) { + uint64_t out = fp_[0]; + fp_[0] = fp_[1] | (v >> 32); + fp_[1] = v << 32; + return out; +} + +template <> +inline uint8_t Fingerprint<128>::shlor8(uint8_t v) { + uint8_t out = (uint8_t)(fp_[0] >> 56); + fp_[0] = (fp_[0] << 8) | (fp_[1] >> 56); + fp_[1] = (fp_[1] << 8) | ((uint64_t)v); + return out; +} + +template <> +inline uint32_t Fingerprint<128>::shlor32(uint32_t v) { + uint32_t out = (uint32_t)(fp_[0] >> 32); + fp_[0] = (fp_[0] << 32) | (fp_[1] >> 32); + fp_[1] = (fp_[1] << 32) | ((uint64_t)v); + return out; +} + +template <> +inline uint64_t Fingerprint<128>::shlor64(uint64_t v) { + uint64_t out = fp_[0]; + fp_[0] = fp_[1]; + fp_[1] = v; + return out; +} + +} // namespace folly + +#endif /* FOLLY_FINGERPRINT_H_ */ + diff --git a/folly/build/GenerateFingerprintTables.cpp b/folly/build/GenerateFingerprintTables.cpp new file mode 100644 index 00000000..b864406d --- /dev/null +++ b/folly/build/GenerateFingerprintTables.cpp @@ -0,0 +1,158 @@ +/* + * Copyright 2012 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include + +#include "folly/Format.h" + +#include "folly/detail/FingerprintPolynomial.h" + +using namespace folly; +using namespace folly::detail; + +// The defaults were generated by a separate program that requires the +// NTL (Number Theory Library) from http://www.shoup.net/ntl/ +// +// Briefly: randomly generate a polynomial of degree D, test for +// irreducibility, repeat until you find an irreducible polynomial +// (roughly 1/D of all polynomials of degree D are irreducible, so +// this will succeed in D/2 tries on average; D is small (64..128) so +// this simple method works well) +// +// DO NOT REPLACE THE POLYNOMIALS USED, EVER, as that would change the value +// of every single fingerprint in existence. +DEFINE_int64(poly64, 0xbf3736b51869e9b7, + "Generate 64-bit tables using this polynomial"); +DEFINE_int64(poly96_m, 0x51555cb0aa8d39c3, + "Generate 96-bit tables using this polynomial " + "(most significant 64 bits)"); +DEFINE_int32(poly96_l, 0xb679ec37, + "Generate 96-bit tables using this polynomial " + "(least significant 32 bits)"); +DEFINE_int64(poly128_m, 0xc91bff9b8768b51b, + "Generate 128-bit tables using this polynomial " + "(most significant 64 bits)"); +DEFINE_int64(poly128_l, 0x8c5d5853bd77b0d3, + "Generate 128-bit tables using this polynomial " + "(least significant 64 bits)"); +DEFINE_string(install_dir, ".", + "Direectory to place output files in"); +DEFINE_string(fbcode_dir, "", "fbcode directory (ignored)"); + +namespace { + +template +void computeTables(FILE* file, const FingerprintPolynomial& poly) { + uint64_t table[8][256][FingerprintPolynomial::size()]; + // table[i][q] is Q(X) * X^(k+8*i) mod P(X), + // where k is the number of bits in the fingerprint (and deg(P)) and + // Q(X) = q7*X^7 + q6*X^6 + ... + q1*X + q0 is a degree-7 polyonomial + // whose coefficients are the bits of q. + for (int x = 0; x < 256; x++) { + FingerprintPolynomial t; + t.setHigh8Bits(x); + for (int i = 0; i < 8; i++) { + t.mulXkmod(8, poly); + t.write(&(table[i][x][0])); + } + } + + // Write the actual polynomial used; this isn't needed during fast + // fingerprint calculation, but it's useful for reference and unittesting. + uint64_t poly_val[FingerprintPolynomial::size()]; + poly.write(poly_val); + CHECK_ERR(fprintf(file, + "template <>\n" + "const uint64_t FingerprintTable<%d>::poly[%d] = {", + DEG+1, FingerprintPolynomial::size())); + for (int j = 0; j < FingerprintPolynomial::size(); j++) { + CHECK_ERR(fprintf(file, "%s%luLU", j ? ", " : "", poly_val[j])); + } + CHECK_ERR(fprintf(file, "};\n\n")); + + // Write the tables. + CHECK_ERR(fprintf(file, + "template <>\n" + "const uint64_t FingerprintTable<%d>::table[8][256][%d] = {\n", + DEG+1, FingerprintPolynomial::size())); + for (int i = 0; i < 8; i++) { + CHECK_ERR(fprintf(file, + " // Table %d" + "\n" + " {\n", i)); + for (int x = 0; x < 256; x++) { + CHECK_ERR(fprintf(file, " {")); + for (int j = 0; j < FingerprintPolynomial::size(); j++) { + CHECK_ERR(fprintf(file, "%s%luLU", (j ? ", " : ""), table[i][x][j])); + } + CHECK_ERR(fprintf(file, "},\n")); + } + CHECK_ERR(fprintf(file, " },\n")); + } + CHECK_ERR(fprintf(file, "\n};\n\n")); +} + +} // namespace + +int main(int argc, char *argv[]) { + google::ParseCommandLineFlags(&argc, &argv, true); + google::InitGoogleLogging(argv[0]); + + std::string name = folly::format("{}/{}", FLAGS_install_dir, + "FingerprintTables.cpp").str(); + FILE* file = fopen(name.c_str(), "w"); + PCHECK(file); + + CHECK_ERR(fprintf(file, + "/**\n" + " * Fingerprint tables for 64-, 96-, and 128-bit Rabin fingerprints.\n" + " *\n" + " * AUTOMATICALLY GENERATED. DO NOT EDIT.\n" + " */\n" + "\n" + "#include \"folly/Fingerprint.h\"\n" + "\n" + "namespace folly {\n" + "namespace detail {\n" + "\n")); + + FingerprintPolynomial<63> poly64((const uint64_t*)&FLAGS_poly64); + computeTables(file, poly64); + + uint64_t poly96_val[2]; + poly96_val[0] = (uint64_t)FLAGS_poly96_m; + poly96_val[1] = (uint64_t)FLAGS_poly96_l << 32; + FingerprintPolynomial<95> poly96(poly96_val); + computeTables(file, poly96); + + uint64_t poly128_val[2]; + poly128_val[0] = (uint64_t)FLAGS_poly128_m; + poly128_val[1] = (uint64_t)FLAGS_poly128_l; + FingerprintPolynomial<127> poly128(poly128_val); + computeTables(file, poly128); + + CHECK_ERR(fprintf(file, + "} // namespace detail\n" + "} // namespace folly\n")); + CHECK_ERR(fclose(file)); + + return 0; +} diff --git a/folly/detail/FingerprintPolynomial.h b/folly/detail/FingerprintPolynomial.h new file mode 100644 index 00000000..2eb0c2ab --- /dev/null +++ b/folly/detail/FingerprintPolynomial.h @@ -0,0 +1,146 @@ +/* + * Copyright 2012 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FOLLY_BUILD_FINGERPRINTPOLYNOMIAL_H_ +#define FOLLY_BUILD_FINGERPRINTPOLYNOMIAL_H_ + +#include + +namespace folly { +namespace detail { + +/** + * Representation of a polynomial of degree DEG over GF(2) (that is, + * with binary coefficients). + * + * Probably of no use outside of Fingerprint code; used by + * GenerateFingerprintTables and the unittest. + */ +template +class FingerprintPolynomial { + public: + FingerprintPolynomial() { + for (int i = 0; i < size(); i++) { + val_[i] = 0; + } + } + + explicit FingerprintPolynomial(const uint64_t* vals) { + for (int i = 0; i < size(); i++) { + val_[i] = vals[i]; + } + } + + void write(uint64_t* out) const { + for (int i = 0; i < size(); i++) { + out[i] = val_[i]; + } + } + + void add(const FingerprintPolynomial& other) { + for (int i = 0; i < size(); i++) { + val_[i] ^= other.val_[i]; + } + } + + // Multiply by X. The actual degree must be < DEG. + void mulX() { + CHECK_EQ(0, val_[0] & (1UL<<63)); + uint64_t b = 0; + for (int i = size()-1; i >= 0; i--) { + uint64_t nb = val_[i] >> 63; + val_[i] = (val_[i] << 1) | b; + b = nb; + } + } + + // Compute (this * X) mod P(X), where P(X) is a monic polynomial of degree + // DEG+1 (represented as a FingerprintPolynomial object, with the + // implicit coefficient of X^(DEG+1)==1) + // + // This is a bit tricky. If k=DEG+1: + // Let P(X) = X^k + p_(k-1) * X^(k-1) + ... + p_1 * X + p_0 + // Let this = A(X) = a_(k-1) * X^(k-1) + ... + a_1 * X + a_0 + // Then: + // A(X) * X + // = a_(k-1) * X^k + (a_(k-2) * X^(k-1) + ... + a_1 * X^2 + a_0 * X) + // = a_(k-1) * X^k + (the binary representation of A, left shift by 1) + // + // if a_(k-1) = 0, we can ignore the first term. + // if a_(k-1) = 1, then: + // X^k mod P(X) + // = X^k - P(X) + // = P(X) - X^k + // = p_(k-1) * X^(k-1) + ... + p_1 * X + p_0 + // = exactly the binary representation passed in as an argument to this + // function! + // + // So A(X) * X mod P(X) is: + // the binary representation of A, left shift by 1, + // XOR p if a_(k-1) == 1 + void mulXmod(const FingerprintPolynomial& p) { + bool needXOR = (val_[0] & (1UL<<63)); + val_[0] &= ~(1UL<<63); + mulX(); + if (needXOR) { + add(p); + } + } + + // Compute (this * X^k) mod P(X) by repeatedly multiplying by X (see above) + void mulXkmod(int k, const FingerprintPolynomial& p) { + for (int i = 0; i < k; i++) { + mulXmod(p); + } + } + + // add X^k, where k <= DEG + void addXk(int k) { + DCHECK_GE(k, 0); + DCHECK_LE(k, DEG); + int word_offset = (DEG - k) / 64; + int bit_offset = 63 - (DEG - k) % 64; + val_[word_offset] ^= (1UL << bit_offset); + } + + // Set the highest 8 bits to val. + // If val is interpreted as polynomial of degree 7, then this sets *this + // to val * X^(DEG-7) + void setHigh8Bits(uint8_t val) { + val_[0] = ((uint64_t)val) << (64-8); + for (int i = 1; i < size(); i++) { + val_[i] = 0; + } + } + + static int size() { + return 1 + DEG/64; + } + private: + // Internal representation: big endian + // val_[0] contains the highest order coefficients, with bit 63 as the + // highest order coefficient + // + // If DEG+1 is not a multiple of 64, val_[size()-1] only uses the highest + // order (DEG+1)%64 bits (the others are always 0) + uint64_t val_[1 + DEG/64]; +}; + +} // namespace detail +} // namespace folly + +#endif /* FOLLY_BUILD_FINGERPRINTPOLYNOMIAL_H_ */ + diff --git a/folly/detail/SlowFingerprint.h b/folly/detail/SlowFingerprint.h new file mode 100644 index 00000000..63a9136f --- /dev/null +++ b/folly/detail/SlowFingerprint.h @@ -0,0 +1,93 @@ +/* + * Copyright 2012 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FOLLY_DETAIL_SLOWFINGERPRINT_H_ +#define FOLLY_DETAIL_SLOWFINGERPRINT_H_ + +#include "folly/Fingerprint.h" +#include "folly/detail/FingerprintPolynomial.h" +#include "folly/Range.h" + +namespace folly { +namespace detail { + +/** + * Slow, one-bit-at-a-time implementation of the Rabin fingerprint. + * + * This is useful as a reference implementation to test the Broder optimization + * for correctness in the unittest; it's probably too slow for any real use. + */ +template +class SlowFingerprint { + public: + SlowFingerprint() + : poly_(FingerprintTable::poly) { + // Use the same starting value as Fingerprint, (1 << (BITS-1)) + fp_.addXk(BITS-1); + } + + SlowFingerprint& update8(uint8_t v) { + updateLSB(v, 8); + return *this; + } + + SlowFingerprint& update32(uint32_t v) { + updateLSB(v, 32); + return *this; + } + + SlowFingerprint& update64(uint64_t v) { + updateLSB(v, 64); + return *this; + } + + SlowFingerprint& update(const folly::StringPiece& str) { + const char* p = str.start(); + for (int i = str.size(); i != 0; p++, i--) { + update8(static_cast(*p)); + } + return *this; + } + + void write(uint64_t* out) const { + fp_.write(out); + } + + private: + void updateBit(bool bit) { + fp_.mulXmod(poly_); + if (bit) { + fp_.addXk(0); + } + } + + void updateLSB(uint64_t val, int bits) { + val <<= (64-bits); + for (; bits != 0; --bits) { + updateBit(val & (1UL << 63)); + val <<= 1; + } + } + + const FingerprintPolynomial poly_; + FingerprintPolynomial fp_; +}; + +} // namespace detail +} // namespace folly + +#endif /* FOLLY_DETAIL_SLOWFINGERPRINT_H_ */ + diff --git a/folly/test/FingerprintTest.cpp b/folly/test/FingerprintTest.cpp new file mode 100644 index 00000000..76e97f4c --- /dev/null +++ b/folly/test/FingerprintTest.cpp @@ -0,0 +1,175 @@ +/* + * Copyright 2012 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "folly/Fingerprint.h" + +#include +#include + +#include "folly/detail/SlowFingerprint.h" +#include "folly/Benchmark.h" + +using namespace folly; +using namespace folly::detail; + +TEST(Fingerprint, BroderOptimization) { + // Test that the Broder optimization produces the same result as + // the default (slow) implementation that processes one bit at a time. + uint64_t val_a = 0xfaceb00cdeadbeefUL; + uint64_t val_b = 0x1234567890abcdefUL; + + uint64_t slow[2]; + uint64_t fast[2]; + + SlowFingerprint<64>().update64(val_a).update64(val_b).write(slow); + Fingerprint<64>().update64(val_a).update64(val_b).write(fast); + EXPECT_EQ(slow[0], fast[0]); + + SlowFingerprint<96>().update64(val_a).update64(val_b).write(slow); + Fingerprint<96>().update64(val_a).update64(val_b).write(fast); + EXPECT_EQ(slow[0], fast[0]); + EXPECT_EQ(slow[1], fast[1]); + + SlowFingerprint<128>().update64(val_a).update64(val_b).write(slow); + Fingerprint<128>().update64(val_a).update64(val_b).write(fast); + EXPECT_EQ(slow[0], fast[0]); + EXPECT_EQ(slow[1], fast[1]); +} + +TEST(Fingerprint, MultiByteUpdate) { + // Test that the multi-byte update functions (update32, update64, + // update(StringPiece)) produce the same result as calling update8 + // repeatedly. + uint64_t val_a = 0xfaceb00cdeadbeefUL; + uint64_t val_b = 0x1234567890abcdefUL; + uint8_t bytes[16]; + for (int i = 0; i < 8; i++) { + bytes[i] = (val_a >> (8*(7-i))) & 0xff; + } + for (int i = 0; i < 8; i++) { + bytes[i+8] = (val_b >> (8*(7-i))) & 0xff; + } + StringPiece sp((const char*)bytes, 16); + + uint64_t u8[2]; // updating 8 bits at a time + uint64_t u32[2]; // updating 32 bits at a time + uint64_t u64[2]; // updating 64 bits at a time + uint64_t usp[2]; // update(StringPiece) + uint64_t uconv[2]; // convenience function (fingerprint*(StringPiece)) + + { + Fingerprint<64> fp; + for (int i = 0; i < 16; i++) { + fp.update8(bytes[i]); + } + fp.write(u8); + } + Fingerprint<64>().update32(val_a >> 32).update32(val_a & 0xffffffff). + update32(val_b >> 32).update32(val_b & 0xffffffff).write(u32); + Fingerprint<64>().update64(val_a).update64(val_b).write(u64); + Fingerprint<64>().update(sp).write(usp); + uconv[0] = fingerprint64(sp); + + EXPECT_EQ(u8[0], u32[0]); + EXPECT_EQ(u8[0], u64[0]); + EXPECT_EQ(u8[0], usp[0]); + EXPECT_EQ(u8[0], uconv[0]); + + { + Fingerprint<96> fp; + for (int i = 0; i < 16; i++) { + fp.update8(bytes[i]); + } + fp.write(u8); + } + Fingerprint<96>().update32(val_a >> 32).update32(val_a & 0xffffffff). + update32(val_b >> 32).update32(val_b & 0xffffffff).write(u32); + Fingerprint<96>().update64(val_a).update64(val_b).write(u64); + Fingerprint<96>().update(sp).write(usp); + uint32_t uconv_lsb; + fingerprint96(sp, &(uconv[0]), &uconv_lsb); + uconv[1] = (uint64_t)uconv_lsb << 32; + + EXPECT_EQ(u8[0], u32[0]); + EXPECT_EQ(u8[1], u32[1]); + EXPECT_EQ(u8[0], u64[0]); + EXPECT_EQ(u8[1], u64[1]); + EXPECT_EQ(u8[0], usp[0]); + EXPECT_EQ(u8[1], usp[1]); + EXPECT_EQ(u8[0], uconv[0]); + EXPECT_EQ(u8[1], uconv[1]); + + { + Fingerprint<128> fp; + for (int i = 0; i < 16; i++) { + fp.update8(bytes[i]); + } + fp.write(u8); + } + Fingerprint<128>().update32(val_a >> 32).update32(val_a & 0xffffffff). + update32(val_b >> 32).update32(val_b & 0xffffffff).write(u32); + Fingerprint<128>().update64(val_a).update64(val_b).write(u64); + Fingerprint<128>().update(sp).write(usp); + fingerprint128(sp, &(uconv[0]), &(uconv[1])); + + EXPECT_EQ(u8[0], u32[0]); + EXPECT_EQ(u8[1], u32[1]); + EXPECT_EQ(u8[0], u64[0]); + EXPECT_EQ(u8[1], u64[1]); + EXPECT_EQ(u8[0], usp[0]); + EXPECT_EQ(u8[1], usp[1]); + EXPECT_EQ(u8[0], uconv[0]); + EXPECT_EQ(u8[1], uconv[1]); +} + +TEST(Fingerprint, Alignment) { + // Test that update() gives the same result regardless of string alignment + const char test_str[] = "hello world 12345"; + int len = sizeof(test_str)-1; + std::unique_ptr str(new char[len+8]); + uint64_t ref_fp; + SlowFingerprint<64>().update(StringPiece(test_str, len)).write(&ref_fp); + for (int i = 0; i < 8; i++) { + char* p = str.get(); + char* q; + // Fill the string as !!hello?????? + for (int j = 0; j < i; j++) { + *p++ = '!'; + } + q = p; + for (int j = 0; j < len; j++) { + *p++ = test_str[j]; + } + for (int j = i; j < 8; j++) { + *p++ = '?'; + } + + uint64_t fp; + Fingerprint<64>().update(StringPiece(q, len)).write(&fp); + EXPECT_EQ(ref_fp, fp); + } +} + +int main(int argc, char *argv[]) { + testing::InitGoogleTest(&argc, argv); + google::ParseCommandLineFlags(&argc, &argv, true); + auto ret = RUN_ALL_TESTS(); + if (!ret) { + folly::runBenchmarksOnFlag(); + } + return ret; +} + -- 2.34.1