2 * Copyright 2016 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include <glog/logging.h>
21 #include <folly/Portability.h>
22 #include <folly/experimental/Instructions.h>
27 extern const uint8_t kSelectInByte[2048];
31 * Returns the position of the k-th 1 in the 64-bit word x.
32 * k is 0-based, so k=0 returns the position of the first 1.
34 * Uses the broadword selection algorithm by Vigna [1], improved by Gog
35 * and Petri [2] and Vigna [3].
37 * [1] Sebastiano Vigna. Broadword Implementation of Rank/Select
40 * [2] Simon Gog, Matthias Petri. Optimized succinct data structures
41 * for massive data. Softw. Pract. Exper., 2014
43 * [3] Sebastiano Vigna. MG4J 5.2.1. http://mg4j.di.unimi.it/
45 template <class Instructions>
46 inline uint64_t select64(uint64_t x, uint64_t k) {
47 DCHECK_LT(k, Instructions::popcount(x));
49 constexpr uint64_t kOnesStep4 = 0x1111111111111111ULL;
50 constexpr uint64_t kOnesStep8 = 0x0101010101010101ULL;
51 constexpr uint64_t kMSBsStep8 = 0x80ULL * kOnesStep8;
54 s = s - ((s & 0xA * kOnesStep4) >> 1);
55 s = (s & 0x3 * kOnesStep4) + ((s >> 2) & 0x3 * kOnesStep4);
56 s = (s + (s >> 4)) & 0xF * kOnesStep8;
57 uint64_t byteSums = s * kOnesStep8;
59 uint64_t kStep8 = k * kOnesStep8;
60 uint64_t geqKStep8 = (((kStep8 | kMSBsStep8) - byteSums) & kMSBsStep8);
61 uint64_t place = Instructions::popcount(geqKStep8) * 8;
62 uint64_t byteRank = k - (((byteSums << 8) >> place) & uint64_t(0xFF));
63 return place + detail::kSelectInByte[((x >> place) & 0xFF) | (byteRank << 8)];
67 FOLLY_ALWAYS_INLINE uint64_t
68 select64<compression::instructions::Haswell>(uint64_t x, uint64_t k) {
69 #if defined(__GNUC__) || defined(__clang__)
70 // GCC and Clang won't inline the intrinsics.
71 uint64_t result = uint64_t(1) << k;
73 asm("pdep %1, %0, %0\n\t"
80 return _tzcnt_u64(_pdep_u64(1ULL << k, x));