From: Brian Pane Date: Fri, 27 Dec 2013 02:06:44 +0000 (-0800) Subject: Add new CRC-32C checksum functions to folly X-Git-Tag: v0.22.0~745 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=19e5f7ed316251e9c559c42322b97f9a5e9f47d8;p=folly.git Add new CRC-32C checksum functions to folly Summary: * Added a new crc32c() function, with a portable implementation and an optimized version for x86 with SSE4.2 Test Plan: New unit test included Reviewed By: tudorb@fb.com FB internal diff: D1111515 --- diff --git a/folly/Checksum.cpp b/folly/Checksum.cpp new file mode 100644 index 00000000..788e0a19 --- /dev/null +++ b/folly/Checksum.cpp @@ -0,0 +1,116 @@ +/* + * Copyright 2013 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "folly/Checksum.h" +#include +#include +#include +#include "folly/CpuId.h" + +namespace folly { + +namespace detail { + +#if defined(__x86_64__) && defined (__GNUC__) + +// Fast SIMD implementation of CRC-32C for x86 with SSE 4.2 +uint32_t crc32c_hw(const uint8_t *data, size_t nbytes, + uint32_t startingChecksum) { + uint32_t sum = startingChecksum; + size_t offset = 0; + + // Process bytes one at a time until we reach an 8-byte boundary and can + // start doing aligned 64-bit reads. + static uintptr_t ALIGN_MASK = sizeof(uint64_t) - 1; + size_t mask = (size_t)((uintptr_t)data & ALIGN_MASK); + if (mask != 0) { + size_t limit = std::min(nbytes, sizeof(uint64_t) - mask); + while (offset < limit) { + sum = (uint32_t)__builtin_ia32_crc32qi(sum, data[offset]); + offset++; + } + } + + // Process 8 bytes at a time until we have fewer than 8 bytes left. + while (offset + sizeof(uint64_t) <= nbytes) { + const uint64_t* src = (const uint64_t*)(data + offset); + sum = __builtin_ia32_crc32di(sum, *src); + offset += sizeof(uint64_t); + } + + // Process any bytes remaining after the last aligned 8-byte block. + while (offset < nbytes) { + sum = (uint32_t)__builtin_ia32_crc32qi(sum, data[offset]); + offset++; + } + return sum; +} + +bool crc32c_hw_supported() { + static folly::CpuId id; + return id.sse42(); +} + +#else + +uint32_t crc32c_hw(const uint8_t *data, size_t nbytes, + uint32_t startingChecksum) { + throw std::runtime_error("crc32_hw is not implemented on this platform"); +} + +bool crc32c_hw_supported() { + return false; +} + +#endif + +uint32_t crc32c_sw(const uint8_t *data, size_t nbytes, + uint32_t startingChecksum) { + + // Reverse the bits in the starting checksum so they'll be in the + // right internal format for Boost's CRC engine. + // O(1)-time, branchless bit reversal algorithm from + // http://graphics.stanford.edu/~seander/bithacks.html + startingChecksum = ((startingChecksum >> 1) & 0x55555555) | + ((startingChecksum & 0x55555555) << 1); + startingChecksum = ((startingChecksum >> 2) & 0x33333333) | + ((startingChecksum & 0x33333333) << 2); + startingChecksum = ((startingChecksum >> 4) & 0x0f0f0f0f) | + ((startingChecksum & 0x0f0f0f0f) << 4); + startingChecksum = ((startingChecksum >> 8) & 0x00ff00ff) | + ((startingChecksum & 0x00ff00ff) << 8); + startingChecksum = (startingChecksum >> 16) | + (startingChecksum << 16); + + static const uint32_t CRC32C_POLYNOMIAL = 0x1EDC6F41; + boost::crc_optimal<32, CRC32C_POLYNOMIAL, ~0U, 0, true, true> sum( + startingChecksum); + sum.process_bytes(data, nbytes); + return sum.checksum(); +} + +} // folly::detail + +uint32_t crc32c(const uint8_t *data, size_t nbytes, + uint32_t startingChecksum) { + if (detail::crc32c_hw_supported()) { + return detail::crc32c_hw(data, nbytes, startingChecksum); + } else { + return detail::crc32c_sw(data, nbytes, startingChecksum); + } +} + +} // folly diff --git a/folly/Checksum.h b/folly/Checksum.h new file mode 100644 index 00000000..3892311c --- /dev/null +++ b/folly/Checksum.h @@ -0,0 +1,43 @@ +/* + * Copyright 2013 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FOLLY_CHECKSUM_H_ +#define FOLLY_CHECKSUM_H_ + +#include +#include + +/* + * Checksum functions + */ + +namespace folly { + +/** + * Compute the CRC-32C checksum of a buffer, using a hardware-accelerated + * implementation if available or a portable software implementation as + * a default. + * + * @note CRC-32C is different from CRC-32; CRC-32C starts with a different + * polynomial and thus yields different results for the same input + * than a traditional CRC-32. + */ +uint32_t crc32c(const uint8_t* data, size_t nbytes, + uint32_t startingChecksum = ~0U); + +} // folly + +#endif /* FOLLY_CHECKSUM_H_ */ diff --git a/folly/detail/ChecksumDetail.h b/folly/detail/ChecksumDetail.h new file mode 100644 index 00000000..ba74e216 --- /dev/null +++ b/folly/detail/ChecksumDetail.h @@ -0,0 +1,58 @@ +/* + * Copyright 2013 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FOLLY_DETAIL_CHECKSUMDETAIL_H_ +#define FOLLY_DETAIL_CHECKSUMDETAIL_H_ + +namespace folly { namespace detail { + +/** + * Compute a CRC-32C checksum of a buffer using a hardware-accelerated + * implementation. + * + * @note This function is exposed to support special cases where the + * calling code is absolutely certain it ought to invoke a hardware- + * accelerated CRC-32C implementation - unit tests, for example. For + * all other scenarios, please call crc32c() and let it pick an + * implementation based on the capabilities of the underlying CPU. + */ +uint32_t crc32c_hw(const uint8_t* data, size_t nbytes, + uint32_t startingChecksum = ~0U); + +/** + * Check whether a hardware-accelerated CRC-32C implementation is + * supported on the current CPU. + */ +bool crc32c_hw_supported(); + +/** + * Compute a CRC-32C checksum of a buffer using a portable, + * software-only implementation. + * + * @note This function is exposed to support special cases where the + * calling code is absolutely certain it wants to use the software + * implementation instead of the hardware-accelerated code - unit + * tests, for example. For all other scenarios, please call crc32c() + * and let it pick an implementation based on the capabilities of + * the underlying CPU. + */ +uint32_t crc32c_sw(const uint8_t* data, size_t nbytes, + uint32_t startingChecksum = ~0U); + + +}} // folly::detail + +#endif /* FOLLY_DETAIL_CHECKSUMDETAIL_H_ */ diff --git a/folly/test/ChecksumTest.cpp b/folly/test/ChecksumTest.cpp new file mode 100644 index 00000000..815af872 --- /dev/null +++ b/folly/test/ChecksumTest.cpp @@ -0,0 +1,147 @@ +/* + * Copyright 2013 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "folly/Checksum.h" +#include +#include +#include "folly/Benchmark.h" +#include "folly/Hash.h" +#include "folly/detail/ChecksumDetail.h" + +namespace { +const unsigned int BUFFER_SIZE = 64 * 1024 * sizeof(uint64_t); +uint8_t buffer[BUFFER_SIZE]; + +struct ExpectedResult { + size_t offset; + size_t length; + uint32_t crc32c; +}; + +ExpectedResult expectedResults[] = { + // Zero-byte input + { 0, 0, ~0U }, + // Small aligned inputs to test special cases in SIMD implementations + { 8, 1, 1543413366 }, + { 8, 2, 523493126 }, + { 8, 3, 1560427360 }, + { 8, 4, 3422504776 }, + { 8, 5, 447841138 }, + { 8, 6, 3910050499 }, + { 8, 7, 3346241981 }, + // Small unaligned inputs + { 9, 1, 3855826643 }, + { 10, 2, 560880875 }, + { 11, 3, 1479707779 }, + { 12, 4, 2237687071 }, + { 13, 5, 4063855784 }, + { 14, 6, 2553454047 }, + { 15, 7, 1349220140 }, + // Larger inputs to test leftover chunks at the end of aligned blocks + { 8, 8, 627613930 }, + { 8, 9, 2105929409 }, + { 8, 10, 2447068514 }, + { 8, 11, 863807079 }, + { 8, 12, 292050879 }, + { 8, 13, 1411837737 }, + { 8, 14, 2614515001 }, + { 8, 15, 3579076296 }, + { 8, 16, 2897079161 }, + { 8, 17, 675168386 }, + // Much larger inputs + { 0, BUFFER_SIZE, 2961263300 }, + { 1, BUFFER_SIZE / 2, 1708529329 }, +}; + +void testCRC32C( + std::function impl) { + for (auto expected : expectedResults) { + uint32_t result = impl(buffer + expected.offset, expected.length, ~0U); + EXPECT_EQ(result, expected.crc32c); + } +} + +void testCRC32CContinuation( + std::function impl) { + for (auto expected : expectedResults) { + size_t partialLength = expected.length / 2; + uint32_t partialChecksum = impl( + buffer + expected.offset, partialLength, ~0U); + uint32_t result = impl( + buffer + expected.offset + partialLength, + expected.length - partialLength, partialChecksum); + EXPECT_EQ(result, expected.crc32c); + } +} + +} // namespace + +TEST(Checksum, crc32c_software) { + testCRC32C(folly::detail::crc32c_sw); +} + +TEST(Checksum, crc32c_continuation_software) { + testCRC32CContinuation(folly::detail::crc32c_sw); +} + + +TEST(Checksum, crc32c_hardware) { + if (folly::detail::crc32c_hw_supported()) { + testCRC32C(folly::detail::crc32c_hw); + } else { + LOG(WARNING) << "skipping hardware-accelerated CRC-32C tests" << + " (not supported on this CPU)"; + } +} + +TEST(Checksum, crc32c_continuation_hardware) { + if (folly::detail::crc32c_hw_supported()) { + testCRC32CContinuation(folly::detail::crc32c_hw); + } else { + LOG(WARNING) << "skipping hardware-accelerated CRC-32C tests" << + " (not supported on this CPU)"; + } +} + +TEST(Checksum, crc32c_autodetect) { + testCRC32C(folly::crc32c); +} + +TEST(Checksum, crc32c_continuation_autodetect) { + testCRC32CContinuation(folly::crc32c); +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + google::ParseCommandLineFlags(&argc, &argv, true); + + // Populate a buffer with a deterministic pattern + // on which to compute checksums + const uint8_t* src = buffer; + uint64_t* dst = (uint64_t*)buffer; + const uint64_t* end = (const uint64_t*)(buffer + BUFFER_SIZE); + *dst++ = 0; + while (dst < end) { + *dst++ = folly::hash::fnv64_buf((const char*)src, sizeof(uint64_t)); + src += sizeof(uint64_t); + } + + auto ret = RUN_ALL_TESTS(); + if (!ret && FLAGS_benchmark) { + folly::runBenchmarks(); + } + return ret; +}