2 * Copyright 2014 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <folly/Checksum.h>
20 #include <boost/crc.hpp>
21 #include <folly/CpuId.h>
27 #if FOLLY_X64 && defined(__GNUC__) && defined(__GNUC_MINOR__) && \
28 (((__GNUC__ * 100) + __GNUC_MINOR__) >= 407)
30 // Fast SIMD implementation of CRC-32C for x86 with SSE 4.2
31 uint32_t crc32c_hw(const uint8_t *data, size_t nbytes,
32 uint32_t startingChecksum) {
33 uint32_t sum = startingChecksum;
36 // Process bytes one at a time until we reach an 8-byte boundary and can
37 // start doing aligned 64-bit reads.
38 static uintptr_t ALIGN_MASK = sizeof(uint64_t) - 1;
39 size_t mask = (size_t)((uintptr_t)data & ALIGN_MASK);
41 size_t limit = std::min(nbytes, sizeof(uint64_t) - mask);
42 while (offset < limit) {
43 sum = (uint32_t)__builtin_ia32_crc32qi(sum, data[offset]);
48 // Process 8 bytes at a time until we have fewer than 8 bytes left.
49 while (offset + sizeof(uint64_t) <= nbytes) {
50 const uint64_t* src = (const uint64_t*)(data + offset);
51 sum = __builtin_ia32_crc32di(sum, *src);
52 offset += sizeof(uint64_t);
55 // Process any bytes remaining after the last aligned 8-byte block.
56 while (offset < nbytes) {
57 sum = (uint32_t)__builtin_ia32_crc32qi(sum, data[offset]);
63 bool crc32c_hw_supported() {
64 static folly::CpuId id;
70 uint32_t crc32c_hw(const uint8_t *data, size_t nbytes,
71 uint32_t startingChecksum) {
72 throw std::runtime_error("crc32_hw is not implemented on this platform");
75 bool crc32c_hw_supported() {
81 uint32_t crc32c_sw(const uint8_t *data, size_t nbytes,
82 uint32_t startingChecksum) {
84 // Reverse the bits in the starting checksum so they'll be in the
85 // right internal format for Boost's CRC engine.
86 // O(1)-time, branchless bit reversal algorithm from
87 // http://graphics.stanford.edu/~seander/bithacks.html
88 startingChecksum = ((startingChecksum >> 1) & 0x55555555) |
89 ((startingChecksum & 0x55555555) << 1);
90 startingChecksum = ((startingChecksum >> 2) & 0x33333333) |
91 ((startingChecksum & 0x33333333) << 2);
92 startingChecksum = ((startingChecksum >> 4) & 0x0f0f0f0f) |
93 ((startingChecksum & 0x0f0f0f0f) << 4);
94 startingChecksum = ((startingChecksum >> 8) & 0x00ff00ff) |
95 ((startingChecksum & 0x00ff00ff) << 8);
96 startingChecksum = (startingChecksum >> 16) |
97 (startingChecksum << 16);
99 static const uint32_t CRC32C_POLYNOMIAL = 0x1EDC6F41;
100 boost::crc_optimal<32, CRC32C_POLYNOMIAL, ~0U, 0, true, true> sum(
102 sum.process_bytes(data, nbytes);
103 return sum.checksum();
108 uint32_t crc32c(const uint8_t *data, size_t nbytes,
109 uint32_t startingChecksum) {
110 if (detail::crc32c_hw_supported()) {
111 return detail::crc32c_hw(data, nbytes, startingChecksum);
113 return detail::crc32c_sw(data, nbytes, startingChecksum);