2 * Copyright 2016 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * Compute 64-, 96-, and 128-bit Rabin fingerprints, as described in
19 * Michael O. Rabin (1981)
20 * Fingerprinting by Random Polynomials
21 * Center for Research in Computing Technology, Harvard University
22 * Tech Report TR-CSE-03-01
24 * The implementation follows the optimization described in
25 * Andrei Z. Broder (1993)
26 * Some applications of Rabin's fingerprinting method
28 * extended for fingerprints larger than 64 bits, and modified to use
29 * 64-bit instead of 32-bit integers for computation.
31 * The precomputed tables are in FingerprintTable.cpp, which is automatically
32 * generated by ComputeFingerprintTable.cpp.
34 * Benchmarked on 10/13/2009 on a 2.5GHz quad-core Xeon L5420,
35 * - Fingerprint<64>::update64() takes about 12ns
36 * - Fingerprint<96>::update64() takes about 30ns
37 * - Fingerprint<128>::update128() takes about 30ns
38 * (unsurprisingly, Fingerprint<96> and Fingerprint<128> take the
39 * same amount of time, as they both use 128-bit operations; the least
40 * significant 32 bits of Fingerprint<96> will always be 0)
42 * @author Tudor Bosman (tudorb@facebook.com)
49 #include <folly/Range.h>
56 struct FingerprintTable {
57 static const uint64_t poly[1 + (BITS - 1) / 64];
58 static const uint64_t table[8][256][1 + (BITS - 1) / 64];
62 const uint64_t FingerprintTable<BITS>::poly[1 + (BITS - 1) / 64] = {};
64 const uint64_t FingerprintTable<BITS>::table[8][256][1 + (BITS - 1) / 64] = {};
66 #define FOLLY_DECLARE_FINGERPRINT_TABLES(BITS) \
68 const uint64_t FingerprintTable<BITS>::poly[1 + (BITS - 1) / 64]; \
70 const uint64_t FingerprintTable<BITS>::table[8][256][1 + (BITS - 1) / 64]
72 FOLLY_DECLARE_FINGERPRINT_TABLES(64);
73 FOLLY_DECLARE_FINGERPRINT_TABLES(96);
74 FOLLY_DECLARE_FINGERPRINT_TABLES(128);
76 #undef FOLLY_DECLARE_FINGERPRINT_TABLES
81 * Compute the Rabin fingerprint.
83 * TODO(tudorb): Extend this to allow removing values from the computed
84 * fingerprint (so we can fingerprint a sliding window, as in the Rabin-Karp
85 * string matching algorithm)
87 * update* methods return *this, so you can chain them together:
88 * Fingerprint<96>().update8(x).update(str).update64(val).write(output);
94 // Use a non-zero starting value. We'll use (1 << (BITS-1))
96 for (int i = 1; i < size(); i++)
100 Fingerprint& update8(uint8_t v) {
101 uint8_t out = shlor8(v);
102 xortab(detail::FingerprintTable<BITS>::table[0][out]);
106 // update32 and update64 are convenience functions to update the fingerprint
107 // with 4 and 8 bytes at a time. They are faster than calling update8
108 // in a loop. They process the bytes in big-endian order.
109 Fingerprint& update32(uint32_t v) {
110 uint32_t out = shlor32(v);
111 for (int i = 0; i < 4; i++) {
112 xortab(detail::FingerprintTable<BITS>::table[i][out&0xff]);
118 Fingerprint& update64(uint64_t v) {
119 uint64_t out = shlor64(v);
120 for (int i = 0; i < 8; i++) {
121 xortab(detail::FingerprintTable<BITS>::table[i][out&0xff]);
127 Fingerprint& update(StringPiece str) {
128 // TODO(tudorb): We could be smart and do update64 or update32 if aligned
136 * Return the number of uint64s needed to hold the fingerprint value.
139 return 1 + (BITS-1)/64;
143 * Write the computed fingeprint to an array of size() uint64_t's.
144 * For Fingerprint<64>, size()==1; we write 64 bits in out[0]
145 * For Fingerprint<96>, size()==2; we write 64 bits in out[0] and
146 * the most significant 32 bits of out[1]
147 * For Fingerprint<128>, size()==2; we write 64 bits in out[0] and
150 void write(uint64_t* out) const {
151 for (int i = 0; i < size(); i++) {
157 // XOR the fingerprint with a value from one of the tables.
158 void xortab(const uint64_t* tab) {
159 for (int i = 0; i < size(); i++) {
164 // Helper functions: shift the fingerprint value left by 8/32/64 bits,
165 // return the "out" value (the bits that were shifted out), and add "v"
166 // in the bits on the right.
167 uint8_t shlor8(uint8_t v);
168 uint32_t shlor32(uint32_t v);
169 uint64_t shlor64(uint64_t v);
171 uint64_t fp_[1 + (BITS-1)/64];
174 // Convenience functions
177 * Return the 64-bit Rabin fingerprint of a string.
179 inline uint64_t fingerprint64(StringPiece str) {
181 Fingerprint<64>().update(str).write(&fp);
186 * Compute the 96-bit Rabin fingerprint of a string.
187 * Return the 64 most significant bits in *msb, and the 32 least significant
190 inline void fingerprint96(StringPiece str,
191 uint64_t* msb, uint32_t* lsb) {
193 Fingerprint<96>().update(str).write(fp);
195 *lsb = (uint32_t)(fp[1] >> 32);
199 * Compute the 128-bit Rabin fingerprint of a string.
200 * Return the 64 most significant bits in *msb, and the 64 least significant
203 inline void fingerprint128(StringPiece str,
204 uint64_t* msb, uint64_t* lsb) {
206 Fingerprint<128>().update(str).write(fp);
213 inline uint8_t Fingerprint<64>::shlor8(uint8_t v) {
214 uint8_t out = (uint8_t)(fp_[0] >> 56);
215 fp_[0] = (fp_[0] << 8) | ((uint64_t)v);
220 inline uint32_t Fingerprint<64>::shlor32(uint32_t v) {
221 uint32_t out = (uint32_t)(fp_[0] >> 32);
222 fp_[0] = (fp_[0] << 32) | ((uint64_t)v);
227 inline uint64_t Fingerprint<64>::shlor64(uint64_t v) {
228 uint64_t out = fp_[0];
234 inline uint8_t Fingerprint<96>::shlor8(uint8_t v) {
235 uint8_t out = (uint8_t)(fp_[0] >> 56);
236 fp_[0] = (fp_[0] << 8) | (fp_[1] >> 56);
237 fp_[1] = (fp_[1] << 8) | ((uint64_t)v << 32);
242 inline uint32_t Fingerprint<96>::shlor32(uint32_t v) {
243 uint32_t out = (uint32_t)(fp_[0] >> 32);
244 fp_[0] = (fp_[0] << 32) | (fp_[1] >> 32);
245 fp_[1] = ((uint64_t)v << 32);
250 inline uint64_t Fingerprint<96>::shlor64(uint64_t v) {
251 uint64_t out = fp_[0];
252 fp_[0] = fp_[1] | (v >> 32);
258 inline uint8_t Fingerprint<128>::shlor8(uint8_t v) {
259 uint8_t out = (uint8_t)(fp_[0] >> 56);
260 fp_[0] = (fp_[0] << 8) | (fp_[1] >> 56);
261 fp_[1] = (fp_[1] << 8) | ((uint64_t)v);
266 inline uint32_t Fingerprint<128>::shlor32(uint32_t v) {
267 uint32_t out = (uint32_t)(fp_[0] >> 32);
268 fp_[0] = (fp_[0] << 32) | (fp_[1] >> 32);
269 fp_[1] = (fp_[1] << 32) | ((uint64_t)v);
274 inline uint64_t Fingerprint<128>::shlor64(uint64_t v) {
275 uint64_t out = fp_[0];