2 * Copyright 2016 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include <glog/logging.h>
21 #include <folly/experimental/Instructions.h>
26 extern const uint8_t kSelectInByte[2048];
30 * Returns the position of the k-th 1 in the 64-bit word x.
31 * k is 0-based, so k=0 returns the position of the first 1.
33 * Uses the broadword selection algorithm by Vigna [1], improved by Gog
34 * and Petri [2] and Vigna [3].
36 * [1] Sebastiano Vigna. Broadword Implementation of Rank/Select
39 * [2] Simon Gog, Matthias Petri. Optimized succinct data structures
40 * for massive data. Softw. Pract. Exper., 2014
42 * [3] Sebastiano Vigna. MG4J 5.2.1. http://mg4j.di.unimi.it/
44 template <class Instructions>
45 inline uint64_t select64(uint64_t x, uint64_t k) {
46 DCHECK_LT(k, Instructions::popcount(x));
48 constexpr uint64_t kOnesStep4 = 0x1111111111111111ULL;
49 constexpr uint64_t kOnesStep8 = 0x0101010101010101ULL;
50 constexpr uint64_t kMSBsStep8 = 0x80ULL * kOnesStep8;
53 s = s - ((s & 0xA * kOnesStep4) >> 1);
54 s = (s & 0x3 * kOnesStep4) + ((s >> 2) & 0x3 * kOnesStep4);
55 s = (s + (s >> 4)) & 0xF * kOnesStep8;
56 uint64_t byteSums = s * kOnesStep8;
58 uint64_t kStep8 = k * kOnesStep8;
59 uint64_t geqKStep8 = (((kStep8 | kMSBsStep8) - byteSums) & kMSBsStep8);
60 uint64_t place = Instructions::popcount(geqKStep8) * 8;
61 uint64_t byteRank = k - (((byteSums << 8) >> place) & uint64_t(0xFF));
62 return place + detail::kSelectInByte[((x >> place) & 0xFF) | (byteRank << 8)];
66 FOLLY_TARGET_ATTRIBUTE("bmi,bmi2")
67 inline uint64_t select64<compression::instructions::Haswell>(uint64_t x,
69 #if defined(__GNUC__) && !defined(__clang__) && !__GNUC_PREREQ(4, 9)
70 // GCC 4.8 doesn't support the intrinsics.
71 uint64_t result = uint64_t(1) << k;
73 asm("pdep %1, %0, %0\n\t"
80 return _tzcnt_u64(_pdep_u64(1ULL << k, x));