2 * Copyright 2017 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <folly/Checksum.h>
19 #include <boost/crc.hpp>
21 #include <folly/Benchmark.h>
22 #include <folly/Hash.h>
23 #include <folly/detail/ChecksumDetail.h>
24 #include <folly/portability/GFlags.h>
25 #include <folly/portability/GTest.h>
28 const unsigned int BUFFER_SIZE = 512 * 1024 * sizeof(uint64_t);
29 uint8_t buffer[BUFFER_SIZE];
31 struct ExpectedResult {
37 ExpectedResult expectedResults[] = {
40 // Small aligned inputs to test special cases in SIMD implementations
48 // Small unaligned inputs
51 { 11, 3, 1479707779 },
52 { 12, 4, 2237687071 },
53 { 13, 5, 4063855784 },
54 { 14, 6, 2553454047 },
55 { 15, 7, 1349220140 },
56 // Larger inputs to test leftover chunks at the end of aligned blocks
59 { 8, 10, 2447068514 },
62 { 8, 13, 1411837737 },
63 { 8, 14, 2614515001 },
64 { 8, 15, 3579076296 },
65 { 8, 16, 2897079161 },
68 { 0, BUFFER_SIZE, 2096790750 },
69 { 1, BUFFER_SIZE / 2, 3854797577 },
73 std::function<uint32_t(const uint8_t*, size_t, uint32_t)> impl) {
74 for (auto expected : expectedResults) {
75 uint32_t result = impl(buffer + expected.offset, expected.length, ~0U);
76 EXPECT_EQ(expected.crc32c, result);
80 void testCRC32CContinuation(
81 std::function<uint32_t(const uint8_t*, size_t, uint32_t)> impl) {
82 for (auto expected : expectedResults) {
83 size_t partialLength = expected.length / 2;
84 uint32_t partialChecksum = impl(
85 buffer + expected.offset, partialLength, ~0U);
86 uint32_t result = impl(
87 buffer + expected.offset + partialLength,
88 expected.length - partialLength, partialChecksum);
89 EXPECT_EQ(expected.crc32c, result);
93 void testMatchesBoost32Type() {
94 for (auto expected : expectedResults) {
95 boost::crc_32_type result;
96 result.process_bytes(buffer + expected.offset, expected.length);
97 const uint32_t boostResult = result.checksum();
98 const uint32_t follyResult =
99 folly::crc32_type(buffer + expected.offset, expected.length);
100 EXPECT_EQ(follyResult, boostResult);
106 TEST(Checksum, crc32c_software) {
107 testCRC32C(folly::detail::crc32c_sw);
110 TEST(Checksum, crc32c_continuation_software) {
111 testCRC32CContinuation(folly::detail::crc32c_sw);
115 TEST(Checksum, crc32c_hardware) {
116 if (folly::detail::crc32c_hw_supported()) {
117 testCRC32C(folly::detail::crc32c_hw);
119 LOG(WARNING) << "skipping hardware-accelerated CRC-32C tests" <<
120 " (not supported on this CPU)";
124 TEST(Checksum, crc32c_continuation_hardware) {
125 if (folly::detail::crc32c_hw_supported()) {
126 testCRC32CContinuation(folly::detail::crc32c_hw);
128 LOG(WARNING) << "skipping hardware-accelerated CRC-32C tests" <<
129 " (not supported on this CPU)";
133 TEST(Checksum, crc32c_autodetect) {
134 testCRC32C(folly::crc32c);
137 TEST(Checksum, crc32c_continuation_autodetect) {
138 testCRC32CContinuation(folly::crc32c);
141 TEST(Checksum, crc32) {
142 if (folly::detail::crc32c_hw_supported()) {
143 // Just check that sw and hw match
144 for (auto expected : expectedResults) {
146 folly::detail::crc32_sw(buffer + expected.offset, expected.length, 0);
148 folly::detail::crc32_hw(buffer + expected.offset, expected.length, 0);
149 EXPECT_EQ(sw_res, hw_res);
152 LOG(WARNING) << "skipping hardware-accelerated CRC-32 tests"
153 << " (not supported on this CPU)";
157 TEST(Checksum, crc32_continuation) {
158 if (folly::detail::crc32c_hw_supported()) {
159 // Just check that sw and hw match
160 for (auto expected : expectedResults) {
161 auto halflen = expected.length / 2;
163 folly::detail::crc32_sw(buffer + expected.offset, halflen, 0);
164 sw_res = folly::detail::crc32_sw(
165 buffer + expected.offset + halflen, halflen, sw_res);
167 folly::detail::crc32_hw(buffer + expected.offset, halflen, 0);
168 hw_res = folly::detail::crc32_hw(
169 buffer + expected.offset + halflen, halflen, hw_res);
170 EXPECT_EQ(sw_res, hw_res);
172 folly::detail::crc32_sw(buffer + expected.offset, halflen * 2, 0);
173 EXPECT_EQ(sw_res, sw_res2);
175 folly::detail::crc32_hw(buffer + expected.offset, halflen * 2, 0);
176 EXPECT_EQ(hw_res, hw_res2);
179 LOG(WARNING) << "skipping hardware-accelerated CRC-32 tests"
180 << " (not supported on this CPU)";
184 TEST(Checksum, crc32_type) {
185 // Test that crc32_type matches boost::crc_32_type
186 testMatchesBoost32Type();
189 void benchmarkHardwareCRC32C(unsigned long iters, size_t blockSize) {
190 if (folly::detail::crc32c_hw_supported()) {
192 for (unsigned long i = 0; i < iters; i++) {
193 checksum = folly::detail::crc32c_hw(buffer, blockSize);
194 folly::doNotOptimizeAway(checksum);
197 LOG(WARNING) << "skipping hardware-accelerated CRC-32C benchmarks" <<
198 " (not supported on this CPU)";
202 void benchmarkSoftwareCRC32C(unsigned long iters, size_t blockSize) {
204 for (unsigned long i = 0; i < iters; i++) {
205 checksum = folly::detail::crc32c_sw(buffer, blockSize);
206 folly::doNotOptimizeAway(checksum);
210 void benchmarkHardwareCRC32(unsigned long iters, size_t blockSize) {
211 if (folly::detail::crc32_hw_supported()) {
213 for (unsigned long i = 0; i < iters; i++) {
214 checksum = folly::detail::crc32_hw(buffer, blockSize);
215 folly::doNotOptimizeAway(checksum);
218 LOG(WARNING) << "skipping hardware-accelerated CRC-32 benchmarks"
219 << " (not supported on this CPU)";
223 void benchmarkSoftwareCRC32(unsigned long iters, size_t blockSize) {
225 for (unsigned long i = 0; i < iters; i++) {
226 checksum = folly::detail::crc32_sw(buffer, blockSize);
227 folly::doNotOptimizeAway(checksum);
231 // This test fits easily in the L1 cache on modern server processors,
232 // and thus it mainly measures the speed of the checksum computation.
233 BENCHMARK(crc32c_hardware_1KB_block, iters) {
234 benchmarkHardwareCRC32C(iters, 1024);
237 BENCHMARK(crc32c_software_1KB_block, iters) {
238 benchmarkSoftwareCRC32C(iters, 1024);
241 BENCHMARK(crc32_hardware_1KB_block, iters) {
242 benchmarkHardwareCRC32(iters, 1024);
245 BENCHMARK(crc32_software_1KB_block, iters) {
246 benchmarkSoftwareCRC32(iters, 1024);
249 BENCHMARK_DRAW_LINE();
251 // This test is too big for the L1 cache but fits in L2
252 BENCHMARK(crc32c_hardware_64KB_block, iters) {
253 benchmarkHardwareCRC32C(iters, 64 * 1024);
256 BENCHMARK(crc32c_software_64KB_block, iters) {
257 benchmarkSoftwareCRC32C(iters, 64 * 1024);
260 BENCHMARK(crc32_hardware_64KB_block, iters) {
261 benchmarkHardwareCRC32(iters, 64 * 1024);
264 BENCHMARK(crc32_software_64KB_block, iters) {
265 benchmarkSoftwareCRC32(iters, 64 * 1024);
268 BENCHMARK_DRAW_LINE();
270 // This test is too big for the L2 cache but fits in L3
271 BENCHMARK(crc32c_hardware_512KB_block, iters) {
272 benchmarkHardwareCRC32C(iters, 512 * 1024);
275 BENCHMARK(crc32c_software_512KB_block, iters) {
276 benchmarkSoftwareCRC32C(iters, 512 * 1024);
279 BENCHMARK(crc32_hardware_512KB_block, iters) {
280 benchmarkHardwareCRC32(iters, 512 * 1024);
283 BENCHMARK(crc32_software_512KB_block, iters) {
284 benchmarkSoftwareCRC32(iters, 512 * 1024);
287 int main(int argc, char** argv) {
288 testing::InitGoogleTest(&argc, argv);
289 gflags::ParseCommandLineFlags(&argc, &argv, true);
291 // Populate a buffer with a deterministic pattern
292 // on which to compute checksums
293 const uint8_t* src = buffer;
294 uint64_t* dst = (uint64_t*)buffer;
295 const uint64_t* end = (const uint64_t*)(buffer + BUFFER_SIZE);
298 *dst++ = folly::hash::fnv64_buf((const char*)src, sizeof(uint64_t));
299 src += sizeof(uint64_t);
302 auto ret = RUN_ALL_TESTS();
303 if (!ret && FLAGS_benchmark) {
304 folly::runBenchmarks();