2 * Copyright 2017 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
24 #include <unordered_set>
27 #include <glog/logging.h>
29 #include <folly/portability/GTest.h>
31 namespace folly { namespace compression {
34 std::vector<uint32_t> generateRandomList(size_t n, uint32_t maxId, URNG&& g) {
35 CHECK_LT(n, 2 * maxId);
36 std::uniform_int_distribution<> uid(1, maxId);
37 std::unordered_set<uint32_t> dataset;
38 while (dataset.size() < n) {
39 uint32_t value = uid(g);
40 if (dataset.count(value) == 0) {
41 dataset.insert(value);
45 std::vector<uint32_t> ids(dataset.begin(), dataset.end());
46 std::sort(ids.begin(), ids.end());
50 inline std::vector<uint32_t> generateRandomList(size_t n, uint32_t maxId) {
52 return generateRandomList(n, maxId, gen);
55 inline std::vector<uint32_t> generateSeqList(uint32_t minId, uint32_t maxId,
57 CHECK_LE(minId, maxId);
59 std::vector<uint32_t> ids;
60 ids.reserve((maxId - minId) / step + 1);
61 for (uint32_t i = minId; i <= maxId; i += step) {
67 inline std::vector<uint32_t> loadList(const std::string& filename) {
68 std::ifstream fin(filename);
69 std::vector<uint32_t> result;
77 // Test previousValue only if Reader has it.
78 template <class... Args>
79 void maybeTestPreviousValue(Args&&...) { }
81 // Make all the arguments template because if the types are not exact,
82 // the above overload will be picked (for example i could be size_t or
84 template <class Vector, class Reader, class Index>
85 auto maybeTestPreviousValue(const Vector& data, Reader& reader, Index i)
86 -> decltype(reader.previousValue(), void()) {
88 EXPECT_EQ(reader.previousValue(), data[i - 1]);
92 template <class Reader, class List>
93 void testNext(const std::vector<uint32_t>& data, const List& list) {
95 EXPECT_FALSE(reader.valid());
97 for (size_t i = 0; i < data.size(); ++i) {
98 EXPECT_TRUE(reader.next());
99 EXPECT_TRUE(reader.valid());
100 EXPECT_EQ(reader.value(), data[i]);
101 EXPECT_EQ(reader.position(), i);
102 maybeTestPreviousValue(data, reader, i);
104 EXPECT_FALSE(reader.next());
105 EXPECT_FALSE(reader.valid());
106 EXPECT_EQ(reader.position(), reader.size());
109 template <class Reader, class List>
110 void testSkip(const std::vector<uint32_t>& data, const List& list,
112 CHECK_GT(skipStep, 0);
115 for (size_t i = skipStep - 1; i < data.size(); i += skipStep) {
116 EXPECT_TRUE(reader.skip(skipStep));
117 EXPECT_TRUE(reader.valid());
118 EXPECT_EQ(reader.value(), data[i]);
119 EXPECT_EQ(reader.position(), i);
120 maybeTestPreviousValue(data, reader, i);
122 EXPECT_FALSE(reader.skip(skipStep));
123 EXPECT_FALSE(reader.valid());
124 EXPECT_EQ(reader.position(), reader.size());
125 EXPECT_FALSE(reader.next());
128 template <class Reader, class List>
129 void testSkip(const std::vector<uint32_t>& data, const List& list) {
130 for (size_t skipStep = 1; skipStep < 25; ++skipStep) {
131 testSkip<Reader, List>(data, list, skipStep);
133 for (size_t skipStep = 25; skipStep <= 500; skipStep += 25) {
134 testSkip<Reader, List>(data, list, skipStep);
138 template <class Reader, class List>
139 void testSkipTo(const std::vector<uint32_t>& data, const List& list,
141 CHECK_GT(skipToStep, 0);
144 const uint32_t delta = std::max<uint32_t>(1, data.back() / skipToStep);
145 uint32_t value = delta;
146 auto it = data.begin();
148 it = std::lower_bound(it, data.end(), value);
149 if (it == data.end()) {
150 EXPECT_FALSE(reader.skipTo(value));
153 EXPECT_TRUE(reader.skipTo(value));
154 EXPECT_TRUE(reader.valid());
155 EXPECT_EQ(reader.value(), *it);
156 EXPECT_EQ(reader.position(), std::distance(data.begin(), it));
157 value = reader.value() + delta;
158 maybeTestPreviousValue(data, reader, std::distance(data.begin(), it));
160 EXPECT_FALSE(reader.valid());
161 EXPECT_EQ(reader.position(), reader.size());
162 EXPECT_FALSE(reader.next());
165 template <class Reader, class List>
166 void testSkipTo(const std::vector<uint32_t>& data, const List& list) {
167 for (size_t steps = 10; steps < 100; steps += 10) {
168 testSkipTo<Reader, List>(data, list, steps);
170 for (size_t steps = 100; steps <= 1000; steps += 100) {
171 testSkipTo<Reader, List>(data, list, steps);
173 testSkipTo<Reader, List>(data, list, std::numeric_limits<size_t>::max());
175 // Skip to the first element.
177 EXPECT_TRUE(reader.skipTo(data[0]));
178 EXPECT_EQ(reader.value(), data[0]);
179 EXPECT_EQ(reader.position(), 0);
182 // Skip past the last element.
184 EXPECT_FALSE(reader.skipTo(data.back() + 1));
185 EXPECT_FALSE(reader.valid());
186 EXPECT_EQ(reader.position(), reader.size());
187 EXPECT_FALSE(reader.next());
190 // Skip to maximum integer.
192 using ValueType = typename Reader::ValueType;
193 EXPECT_FALSE(reader.skipTo(std::numeric_limits<ValueType>::max()));
194 EXPECT_FALSE(reader.valid());
195 EXPECT_EQ(reader.position(), reader.size());
196 EXPECT_FALSE(reader.next());
200 template <class Reader, class List>
201 void testJump(const std::vector<uint32_t>& data, const List& list) {
203 std::vector<size_t> is(data.size());
204 for (size_t i = 0; i < data.size(); ++i) {
207 std::shuffle(is.begin(), is.end(), gen);
208 if (Reader::EncoderType::forwardQuantum == 0) {
209 is.resize(std::min<size_t>(is.size(), 100));
214 EXPECT_TRUE(reader.jump(i));
215 EXPECT_EQ(reader.value(), data[i]);
216 EXPECT_EQ(reader.position(), i);
217 maybeTestPreviousValue(data, reader, i);
219 EXPECT_FALSE(reader.jump(data.size()));
220 EXPECT_FALSE(reader.valid());
221 EXPECT_EQ(reader.position(), reader.size());
224 template <class Reader, class List>
225 void testJumpTo(const std::vector<uint32_t>& data, const List& list) {
226 CHECK(!data.empty());
231 std::uniform_int_distribution<> values(0, data.back());
232 const size_t iters = Reader::EncoderType::skipQuantum == 0 ? 100 : 10000;
233 for (size_t i = 0; i < iters; ++i) {
234 const uint32_t value = values(gen);
235 auto it = std::lower_bound(data.begin(), data.end(), value);
236 CHECK(it != data.end());
237 EXPECT_TRUE(reader.jumpTo(value));
238 EXPECT_EQ(reader.value(), *it);
239 EXPECT_EQ(reader.position(), std::distance(data.begin(), it));
242 EXPECT_TRUE(reader.jumpTo(0));
243 EXPECT_EQ(reader.value(), data[0]);
244 EXPECT_EQ(reader.position(), 0);
246 EXPECT_TRUE(reader.jumpTo(data.back()));
247 EXPECT_EQ(reader.value(), data.back());
248 EXPECT_EQ(reader.position(), reader.size() - 1);
250 EXPECT_FALSE(reader.jumpTo(data.back() + 1));
251 EXPECT_FALSE(reader.valid());
252 EXPECT_EQ(reader.position(), reader.size());
255 template <class Reader, class Encoder>
257 const typename Encoder::ValueType* const data = nullptr;
258 auto list = Encoder::encode(data, data);
261 EXPECT_FALSE(reader.next());
262 EXPECT_EQ(reader.size(), 0);
266 EXPECT_FALSE(reader.skip(1));
267 EXPECT_FALSE(reader.skip(10));
268 EXPECT_FALSE(reader.jump(0));
269 EXPECT_FALSE(reader.jump(10));
273 EXPECT_FALSE(reader.skipTo(1));
274 EXPECT_FALSE(reader.jumpTo(1));
278 template <class Reader, class Encoder>
279 void testAll(const std::vector<uint32_t>& data) {
280 auto list = Encoder::encode(data.begin(), data.end());
281 testNext<Reader>(data, list);
282 testSkip<Reader>(data, list);
283 testSkipTo<Reader>(data, list);
284 testJump<Reader>(data, list);
285 testJumpTo<Reader>(data, list);
289 template <class Reader, class List>
290 void bmNext(const List& list, const std::vector<uint32_t>& data, size_t iters) {
296 for (size_t i = 0; i < iters; ++i) {
297 if (LIKELY(reader.next())) {
298 folly::doNotOptimizeAway(reader.value());
305 template <class Reader, class List>
306 void bmSkip(const List& list,
307 const std::vector<uint32_t>& /* data */,
310 size_t avg = (size_t(1) << logAvgSkip);
311 size_t base = avg - (avg >> 2);
312 size_t mask = (avg > 1) ? (avg >> 1) - 1 : 0;
315 for (size_t i = 0; i < iters; ++i) {
316 size_t skip = base + (i & mask);
317 if (LIKELY(reader.skip(skip))) {
318 folly::doNotOptimizeAway(reader.value());
325 template <class Reader, class List>
326 void bmSkipTo(const List& list, const std::vector<uint32_t>& data,
327 size_t logAvgSkip, size_t iters) {
328 size_t avg = (size_t(1) << logAvgSkip);
329 size_t base = avg - (avg >> 2);
330 size_t mask = (avg > 1) ? (avg >> 1) - 1 : 0;
333 for (size_t i = 0, j = -1; i < iters; ++i) {
334 size_t skip = base + (i & mask);
336 if (j >= data.size()) {
341 reader.skipTo(data[j]);
342 folly::doNotOptimizeAway(reader.value());
346 template <class Reader, class List>
347 void bmJump(const List& list, const std::vector<uint32_t>& data,
348 const std::vector<size_t>& order, size_t iters) {
349 CHECK(!data.empty());
350 CHECK_EQ(data.size(), order.size());
353 for (size_t i = 0, j = 0; i < iters; ++i, ++j) {
354 if (j == order.size()) {
357 reader.jump(order[j]);
358 folly::doNotOptimizeAway(reader.value());
362 template <class Reader, class List>
363 void bmJumpTo(const List& list, const std::vector<uint32_t>& data,
364 const std::vector<size_t>& order, size_t iters) {
365 CHECK(!data.empty());
366 CHECK_EQ(data.size(), order.size());
369 for (size_t i = 0, j = 0; i < iters; ++i, ++j) {
370 if (j == order.size()) {
373 reader.jumpTo(data[order[j]]);
374 folly::doNotOptimizeAway(reader.value());