2 * Copyright 2016 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <folly/Checksum.h>
18 #include <gflags/gflags.h>
19 #include <gtest/gtest.h>
20 #include <folly/Benchmark.h>
21 #include <folly/Hash.h>
22 #include <folly/detail/ChecksumDetail.h>
25 const unsigned int BUFFER_SIZE = 512 * 1024 * sizeof(uint64_t);
26 uint8_t buffer[BUFFER_SIZE];
28 struct ExpectedResult {
34 ExpectedResult expectedResults[] = {
37 // Small aligned inputs to test special cases in SIMD implementations
45 // Small unaligned inputs
48 { 11, 3, 1479707779 },
49 { 12, 4, 2237687071 },
50 { 13, 5, 4063855784 },
51 { 14, 6, 2553454047 },
52 { 15, 7, 1349220140 },
53 // Larger inputs to test leftover chunks at the end of aligned blocks
56 { 8, 10, 2447068514 },
59 { 8, 13, 1411837737 },
60 { 8, 14, 2614515001 },
61 { 8, 15, 3579076296 },
62 { 8, 16, 2897079161 },
65 { 0, BUFFER_SIZE, 2096790750 },
66 { 1, BUFFER_SIZE / 2, 3854797577 },
70 std::function<uint32_t(const uint8_t*, size_t, uint32_t)> impl) {
71 for (auto expected : expectedResults) {
72 uint32_t result = impl(buffer + expected.offset, expected.length, ~0U);
73 EXPECT_EQ(expected.crc32c, result);
77 void testCRC32CContinuation(
78 std::function<uint32_t(const uint8_t*, size_t, uint32_t)> impl) {
79 for (auto expected : expectedResults) {
80 size_t partialLength = expected.length / 2;
81 uint32_t partialChecksum = impl(
82 buffer + expected.offset, partialLength, ~0U);
83 uint32_t result = impl(
84 buffer + expected.offset + partialLength,
85 expected.length - partialLength, partialChecksum);
86 EXPECT_EQ(expected.crc32c, result);
92 TEST(Checksum, crc32c_software) {
93 testCRC32C(folly::detail::crc32c_sw);
96 TEST(Checksum, crc32c_continuation_software) {
97 testCRC32CContinuation(folly::detail::crc32c_sw);
101 TEST(Checksum, crc32c_hardware) {
102 if (folly::detail::crc32c_hw_supported()) {
103 testCRC32C(folly::detail::crc32c_hw);
105 LOG(WARNING) << "skipping hardware-accelerated CRC-32C tests" <<
106 " (not supported on this CPU)";
110 TEST(Checksum, crc32c_continuation_hardware) {
111 if (folly::detail::crc32c_hw_supported()) {
112 testCRC32CContinuation(folly::detail::crc32c_hw);
114 LOG(WARNING) << "skipping hardware-accelerated CRC-32C tests" <<
115 " (not supported on this CPU)";
119 TEST(Checksum, crc32c_autodetect) {
120 testCRC32C(folly::crc32c);
123 TEST(Checksum, crc32c_continuation_autodetect) {
124 testCRC32CContinuation(folly::crc32c);
127 void benchmarkHardwareCRC32C(unsigned long iters, size_t blockSize) {
128 if (folly::detail::crc32c_hw_supported()) {
130 for (unsigned long i = 0; i < iters; i++) {
131 checksum = folly::detail::crc32c_hw(buffer, blockSize);
132 folly::doNotOptimizeAway(checksum);
135 LOG(WARNING) << "skipping hardware-accelerated CRC-32C benchmarks" <<
136 " (not supported on this CPU)";
140 void benchmarkSoftwareCRC32C(unsigned long iters, size_t blockSize) {
142 for (unsigned long i = 0; i < iters; i++) {
143 checksum = folly::detail::crc32c_sw(buffer, blockSize);
144 folly::doNotOptimizeAway(checksum);
148 // This test fits easily in the L1 cache on modern server processors,
149 // and thus it mainly measures the speed of the checksum computation.
150 BENCHMARK(crc32c_hardware_1KB_block, iters) {
151 benchmarkHardwareCRC32C(iters, 1024);
154 BENCHMARK(crc32c_software_1KB_block, iters) {
155 benchmarkSoftwareCRC32C(iters, 1024);
158 BENCHMARK_DRAW_LINE();
160 // This test is too big for the L1 cache but fits in L2
161 BENCHMARK(crc32c_hardware_64KB_block, iters) {
162 benchmarkHardwareCRC32C(iters, 64 * 1024);
165 BENCHMARK(crc32c_software_64KB_block, iters) {
166 benchmarkSoftwareCRC32C(iters, 64 * 1024);
169 BENCHMARK_DRAW_LINE();
171 // This test is too big for the L2 cache but fits in L3
172 BENCHMARK(crc32c_hardware_512KB_block, iters) {
173 benchmarkHardwareCRC32C(iters, 512 * 1024);
176 BENCHMARK(crc32c_software_512KB_block, iters) {
177 benchmarkSoftwareCRC32C(iters, 512 * 1024);
181 int main(int argc, char** argv) {
182 testing::InitGoogleTest(&argc, argv);
183 gflags::ParseCommandLineFlags(&argc, &argv, true);
185 // Populate a buffer with a deterministic pattern
186 // on which to compute checksums
187 const uint8_t* src = buffer;
188 uint64_t* dst = (uint64_t*)buffer;
189 const uint64_t* end = (const uint64_t*)(buffer + BUFFER_SIZE);
192 *dst++ = folly::hash::fnv64_buf((const char*)src, sizeof(uint64_t));
193 src += sizeof(uint64_t);
196 auto ret = RUN_ALL_TESTS();
197 if (!ret && FLAGS_benchmark) {
198 folly::runBenchmarks();