2 * Eddie Kohler, Yandong Mao, Robert Morris
3 * Copyright (c) 2012-2014 President and Fellows of Harvard College
4 * Copyright (c) 2012-2014 Massachusetts Institute of Technology
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, subject to the conditions
9 * listed in the Masstree LICENSE file. These conditions include: you must
10 * preserve this copyright notice, and you cannot mention the copyright
11 * holders in advertising related to the Software without their permission.
12 * The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This
13 * notice is a summary of the Masstree LICENSE file; the license in that file
16 #ifndef MASSTREE_COMPILER_HH
17 #define MASSTREE_COMPILER_HH 1
19 #define __STDC_FORMAT_MACROS
21 #include <arpa/inet.h>
23 #include <type_traits>
26 #define arraysize(a) (sizeof(a) / sizeof((a)[0]))
28 #define likely(x) __builtin_expect(!!(x), 1)
29 #define unlikely(x) __builtin_expect(!!(x), 0)
31 #if !HAVE_CXX_STATIC_ASSERT
32 #define static_assert(x, msg) switch (x) case 0: case !!(x):
35 #if !HAVE_CXX_CONSTEXPR
36 #define constexpr const
39 #if HAVE_OFF_T_IS_LONG_LONG
40 #define PRIdOFF_T "lld"
42 #define PRIdOFF_T "ld"
45 #if HAVE_SIZE_T_IS_UNSIGNED_LONG_LONG
46 #define PRIdSIZE_T "llu"
47 #define PRIdSSIZE_T "lld"
48 #elif HAVE_SIZE_T_IS_UNSIGNED_LONG
49 #define PRIdSIZE_T "lu"
50 #define PRIdSSIZE_T "ld"
52 #define PRIdSIZE_T "u"
53 #define PRIdSSIZE_T "d"
56 #if (__i386__ || __x86_64__) && !defined(__x86__)
60 #define ALLOW___SYNC_BUILTINS 1
62 #if !defined(HAVE_INDIFFERENT_ALIGMENT) && (__i386__ || __x86_64__ || __arch_um__)
63 # define HAVE_INDIFFERENT_ALIGNMENT 1
67 /** @brief Return the index of the most significant bit set in @a x.
68 * @return 0 if @a x = 0; otherwise the index of first bit set, where the
69 * most significant bit is numbered 1.
71 inline int ffs_msb(unsigned x) {
72 return (x ? __builtin_clz(x) + 1 : 0);
75 inline int ffs_msb(unsigned long x) {
76 return (x ? __builtin_clzl(x) + 1 : 0);
79 inline int ffs_msb(unsigned long long x) {
80 return (x ? __builtin_clzll(x) + 1 : 0);
84 /** @brief Compiler fence.
86 * Prevents reordering of loads and stores by the compiler. Not intended to
87 * synchronize the processor's caches. */
89 asm volatile("" : : : "memory");
92 /** @brief Acquire fence. */
93 inline void acquire_fence() {
94 asm volatile("" : : : "memory");
97 /** @brief Release fence. */
98 inline void release_fence() {
99 asm volatile("" : : : "memory");
102 /** @brief Compiler fence that relaxes the processor.
104 Use this in spinloops, for example. */
105 inline void relax_fence() {
106 asm volatile("pause" : : : "memory"); // equivalent to "rep; nop"
109 /** @brief Full memory fence. */
110 inline void memory_fence() {
111 asm volatile("mfence" : : : "memory");
114 /** @brief Do-nothing function object. */
116 void operator()() const {
118 template <typename T>
119 void operator()(const T&) const {
121 template <typename T, typename U>
122 void operator()(const T&, const U&) const {
126 /** @brief Function object that calls fence(). */
127 struct fence_function {
128 void operator()() const {
133 /** @brief Function object that calls relax_fence(). */
134 struct relax_fence_function {
135 void operator()() const {
140 /** @brief Function object that calls relax_fence() with backoff. */
141 struct backoff_fence_function {
142 backoff_fence_function()
146 for (int i = count_; i >= 0; --i)
148 count_ = ((count_ << 1) | 1) & 15;
155 template <int SIZE, typename BARRIER> struct sized_compiler_operations;
157 template <typename B> struct sized_compiler_operations<1, B> {
159 static inline type xchg(type* object, type new_value) {
160 asm volatile("xchgb %0,%1"
161 : "+q" (new_value), "+m" (*object));
165 static inline type val_cmpxchg(type* object, type expected, type desired) {
166 #if __x86__ && (PREFER_X86 || !HAVE___SYNC_VAL_COMPARE_AND_SWAP)
167 asm volatile("lock; cmpxchgb %2,%1"
168 : "+a" (expected), "+m" (*object)
169 : "r" (desired) : "cc");
173 return __sync_val_compare_and_swap(object, expected, desired);
176 static inline bool bool_cmpxchg(type* object, type expected, type desired) {
177 #if HAVE___SYNC_BOOL_COMPARE_AND_SWAP && ALLOW___SYNC_BUILTINS
178 return __sync_bool_compare_and_swap(object, expected, desired);
181 asm volatile("lock; cmpxchgb %3,%1; sete %b2"
182 : "+a" (expected), "+m" (*object), "=q" (result)
183 : "q" (desired) : "cc");
188 static inline type fetch_and_add(type *object, type addend) {
189 #if __x86__ && (PREFER_X86 || !HAVE___SYNC_FETCH_AND_ADD)
190 asm volatile("lock; xaddb %0,%1"
191 : "+q" (addend), "+m" (*object) : : "cc");
195 return __sync_fetch_and_add(object, addend);
198 static inline void atomic_or(type* object, type addend) {
200 asm volatile("lock; orb %0,%1"
201 : "=r" (addend), "+m" (*object) : : "cc");
204 __sync_fetch_and_or(object, addend);
209 template <typename B> struct sized_compiler_operations<2, B> {
210 #if SIZEOF_SHORT == 2
213 typedef int16_t type;
215 static inline type xchg(type* object, type new_value) {
216 asm volatile("xchgw %0,%1"
217 : "+r" (new_value), "+m" (*object));
221 static inline type val_cmpxchg(type* object, type expected, type desired) {
222 #if __x86__ && (PREFER_X86 || !HAVE___SYNC_VAL_COMPARE_AND_SWAP)
223 asm volatile("lock; cmpxchgw %2,%1"
224 : "+a" (expected), "+m" (*object)
225 : "r" (desired) : "cc");
229 return __sync_val_compare_and_swap(object, expected, desired);
232 static inline bool bool_cmpxchg(type* object, type expected, type desired) {
233 #if HAVE___SYNC_BOOL_COMPARE_AND_SWAP && ALLOW___SYNC_BUILTINS
234 return __sync_bool_compare_and_swap(object, expected, desired);
237 asm volatile("lock; cmpxchgw %3,%1; sete %b2"
238 : "+a" (expected), "+m" (*object), "=q" (result)
239 : "r" (desired) : "cc");
244 static inline type fetch_and_add(type* object, type addend) {
245 #if __x86__ && (PREFER_X86 || !HAVE___SYNC_FETCH_AND_ADD)
246 asm volatile("lock; xaddw %0,%1"
247 : "+r" (addend), "+m" (*object) : : "cc");
251 return __sync_fetch_and_add(object, addend);
254 static inline void atomic_or(type* object, type addend) {
256 asm volatile("lock; orw %0,%1"
257 : "=r" (addend), "+m" (*object) : : "cc");
260 __sync_fetch_and_or(object, addend);
265 template <typename B> struct sized_compiler_operations<4, B> {
269 typedef int32_t type;
271 static inline type xchg(type* object, type new_value) {
272 asm volatile("xchgl %0,%1"
273 : "+r" (new_value), "+m" (*object));
277 static inline type val_cmpxchg(type* object, type expected, type desired) {
278 #if __x86__ && (PREFER_X86 || !HAVE___SYNC_VAL_COMPARE_AND_SWAP)
279 asm volatile("lock; cmpxchgl %2,%1"
280 : "+a" (expected), "+m" (*object)
281 : "r" (desired) : "cc");
285 return __sync_val_compare_and_swap(object, expected, desired);
288 static inline bool bool_cmpxchg(type* object, type expected, type desired) {
289 #if HAVE___SYNC_BOOL_COMPARE_AND_SWAP && ALLOW___SYNC_BUILTINS
290 return __sync_bool_compare_and_swap(object, expected, desired);
293 asm volatile("lock; cmpxchgl %3,%1; sete %b2"
294 : "+a" (expected), "+m" (*object), "=q" (result)
295 : "r" (desired) : "cc");
300 static inline type fetch_and_add(type *object, type addend) {
301 #if __x86__ && (PREFER_X86 || !HAVE___SYNC_FETCH_AND_ADD)
302 asm volatile("lock; xaddl %0,%1"
303 : "+r" (addend), "+m" (*object) : : "cc");
307 return __sync_fetch_and_add(object, addend);
310 static inline void atomic_or(type* object, type addend) {
312 asm volatile("lock; orl %0,%1"
313 : "=r" (addend), "+m" (*object) : : "cc");
316 __sync_fetch_and_or(object, addend);
321 template <typename B> struct sized_compiler_operations<8, B> {
322 #if SIZEOF_LONG_LONG == 8
323 typedef long long type;
324 #elif SIZEOF_LONG == 8
327 typedef int64_t type;
330 static inline type xchg(type* object, type new_value) {
331 asm volatile("xchgq %0,%1"
332 : "+r" (new_value), "+m" (*object));
337 static inline type val_cmpxchg(type* object, type expected, type desired) {
338 #if __x86_64__ && (PREFER_X86 || !HAVE___SYNC_VAL_COMPARE_AND_SWAP_8)
339 asm volatile("lock; cmpxchgq %2,%1"
340 : "+a" (expected), "+m" (*object)
341 : "r" (desired) : "cc");
344 #elif __i386__ && (PREFER_X86 || !HAVE___SYNC_VAL_COMPARE_AND_SWAP_8)
345 uint32_t expected_low(expected), expected_high(expected >> 32),
346 desired_low(desired), desired_high(desired >> 32);
347 asm volatile("lock; cmpxchg8b %2"
348 : "+a" (expected_low), "+d" (expected_high), "+m" (*object)
349 : "b" (desired_low), "c" (desired_high) : "cc");
351 return ((uint64_t) expected_high << 32) | expected_low;
352 #elif HAVE___SYNC_VAL_COMPARE_AND_SWAP_8
353 return __sync_val_compare_and_swap(object, expected, desired);
356 static inline bool bool_cmpxchg(type* object, type expected, type desired) {
357 #if HAVE___SYNC_BOOL_COMPARE_AND_SWAP_8 && ALLOW___SYNC_BUILTINS
358 return __sync_bool_compare_and_swap(object, expected, desired);
361 asm volatile("lock; cmpxchgq %3,%1; sete %b2"
362 : "+a" (expected), "+m" (*object), "=q" (result)
363 : "r" (desired) : "cc");
367 uint32_t expected_low(expected), expected_high(expected >> 32),
368 desired_low(desired), desired_high(desired >> 32);
370 asm volatile("lock; cmpxchg8b %2; sete %b4"
371 : "+a" (expected_low), "+d" (expected_high),
372 "+m" (*object), "=q" (result)
373 : "b" (desired_low), "c" (desired_high) : "cc");
378 #if __x86_64__ || HAVE___SYNC_FETCH_AND_ADD_8
379 static inline type fetch_and_add(type* object, type addend) {
380 # if __x86_64__ && (PREFER_X86 || !HAVE___SYNC_FETCH_AND_ADD_8)
381 asm volatile("lock; xaddq %0,%1"
382 : "+r" (addend), "+m" (*object) : : "cc");
386 return __sync_fetch_and_add(object, addend);
390 #if __x86_64__ || HAVE___SYNC_FETCH_AND_OR_8
391 static inline void atomic_or(type* object, type addend) {
393 asm volatile("lock; orq %0,%1"
394 : "=r" (addend), "+m" (*object) : : "cc");
397 __sync_fetch_and_or(object, addend);
404 inline T xchg(T* object, T new_value) {
405 typedef sized_compiler_operations<sizeof(T), fence_function> sco_t;
406 typedef typename sco_t::type type;
407 return (T) sco_t::xchg((type*) object, (type) new_value);
410 inline int8_t xchg(int8_t* object, int new_value) {
411 return xchg(object, (int8_t) new_value);
413 inline uint8_t xchg(uint8_t* object, int new_value) {
414 return xchg(object, (uint8_t) new_value);
416 inline int16_t xchg(int16_t* object, int new_value) {
417 return xchg(object, (int16_t) new_value);
419 inline uint16_t xchg(uint16_t* object, int new_value) {
420 return xchg(object, (uint16_t) new_value);
422 inline unsigned xchg(unsigned* object, int new_value) {
423 return xchg(object, (unsigned) new_value);
426 /** @brief Atomic compare and exchange. Return actual old value.
427 * @param object pointer to memory value
428 * @param expected old value
429 * @param desired new value
430 * @return actual old value
432 * Acts like an atomic version of:
435 * if (actual == expected)
439 template <typename T>
440 inline T cmpxchg(T* object, T expected, T desired) {
441 typedef sized_compiler_operations<sizeof(T), fence_function> sco_t;
442 typedef typename sco_t::type type;
443 return (T) sco_t::val_cmpxchg((type*) object, (type) expected, (type) desired);
446 inline unsigned cmpxchg(unsigned *object, int expected, int desired) {
447 return cmpxchg(object, unsigned(expected), unsigned(desired));
450 /** @brief Atomic compare and exchange. Return true iff swap succeeds.
451 * @param object pointer to memory value
452 * @param expected old value
453 * @param desired new value
454 * @return true if swap succeeded, false otherwise
456 * Acts like an atomic version of:
459 * if (actual == expected) {
465 template <typename T>
466 inline bool bool_cmpxchg(T* object, T expected, T desired) {
467 typedef sized_compiler_operations<sizeof(T), fence_function> sco_t;
468 typedef typename sco_t::type type;
469 return sco_t::bool_cmpxchg((type*) object, (type) expected, (type) desired);
472 inline bool bool_cmpxchg(uint8_t* object, int expected, int desired) {
473 return bool_cmpxchg(object, uint8_t(expected), uint8_t(desired));
475 inline bool bool_cmpxchg(unsigned *object, int expected, int desired) {
476 return bool_cmpxchg(object, unsigned(expected), unsigned(desired));
479 /** @brief Atomic fetch-and-add. Return the old value.
480 * @param object pointer to integer
481 * @param addend value to add
482 * @return old value */
483 template <typename T>
484 inline T fetch_and_add(T* object, T addend) {
485 typedef sized_compiler_operations<sizeof(T), fence_function> sco_t;
486 typedef typename sco_t::type type;
487 return (T) sco_t::fetch_and_add((type*) object, (type) addend);
490 template <typename T>
491 inline T* fetch_and_add(T** object, int addend) {
492 typedef sized_compiler_operations<sizeof(T*), fence_function> sco_t;
493 typedef typename sco_t::type type;
494 return (T*) sco_t::fetch_and_add((type*) object, (type) (addend * sizeof(T)));
497 inline int8_t fetch_and_add(int8_t* object, int addend) {
498 return fetch_and_add(object, int8_t(addend));
500 inline uint8_t fetch_and_add(uint8_t* object, int addend) {
501 return fetch_and_add(object, uint8_t(addend));
503 inline int16_t fetch_and_add(int16_t* object, int addend) {
504 return fetch_and_add(object, int16_t(addend));
506 inline uint16_t fetch_and_add(uint16_t* object, int addend) {
507 return fetch_and_add(object, uint16_t(addend));
509 inline unsigned fetch_and_add(unsigned* object, int addend) {
510 return fetch_and_add(object, unsigned(addend));
512 inline unsigned long fetch_and_add(unsigned long* object, int addend) {
513 return fetch_and_add(object, (unsigned long)(addend));
517 /** @brief Test-and-set lock acquire. */
518 template <typename T>
519 inline void test_and_set_acquire(T* object) {
520 typedef sized_compiler_operations<sizeof(T), do_nothing> sco_t;
521 typedef typename sco_t::type type;
522 while (sco_t::xchg((type*) object, (type) 1))
527 /** @brief Test-and-set lock release. */
528 template <typename T>
529 inline void test_and_set_release(T* object) {
535 /** @brief Atomic fetch-and-or. Returns nothing.
536 * @param object pointer to integer
537 * @param addend value to or */
538 template <typename T>
539 inline void atomic_or(T* object, T addend) {
540 typedef sized_compiler_operations<sizeof(T), fence_function> sco_t;
541 typedef typename sco_t::type type;
542 sco_t::atomic_or((type*) object, (type) addend);
545 inline void atomic_or(int8_t* object, int addend) {
546 atomic_or(object, int8_t(addend));
548 inline void atomic_or(uint8_t* object, int addend) {
549 atomic_or(object, uint8_t(addend));
551 inline void atomic_or(int16_t* object, int addend) {
552 atomic_or(object, int16_t(addend));
554 inline void atomic_or(uint16_t* object, int addend) {
555 atomic_or(object, uint16_t(addend));
557 inline void atomic_or(unsigned* object, int addend) {
558 atomic_or(object, unsigned(addend));
560 inline void atomic_or(unsigned long* object, int addend) {
561 atomic_or(object, (unsigned long)(addend));
565 // prefetch instruction
566 #if !PREFETCH_DEFINED
567 inline void prefetch(const void *ptr) {
571 typedef struct { char x[CACHE_LINE_SIZE]; } cacheline_t;
572 asm volatile("prefetcht0 %0" : : "m" (*(const cacheline_t *)ptr));
577 inline void prefetchnta(const void *ptr) {
581 typedef struct { char x[CACHE_LINE_SIZE]; } cacheline_t;
582 asm volatile("prefetchnta %0" : : "m" (*(const cacheline_t *)ptr));
587 template <typename T>
588 struct value_prefetcher {
593 template <typename T>
594 struct value_prefetcher<T *> {
595 void operator()(T *p) {
596 prefetch((const void *) p);
602 inline uint64_t ntohq(uint64_t val) {
612 asm("bswapl %0; bswapl %1; xchgl %0,%1"
613 : "+r" (v.s.a), "+r" (v.s.b));
616 asm("bswapq %0" : "+r" (val));
621 inline uint64_t htonq(uint64_t val) {
628 /** @brief Return the number of leading 0 bits in @a x.
631 * "Leading" means "most significant." */
632 #if HAVE___BUILTIN_CLZ
633 inline int clz(int x) {
634 return __builtin_clz(x);
636 inline int clz(unsigned x) {
637 return __builtin_clz(x);
641 #if HAVE___BUILTIN_CLZL
642 inline int clz(long x) {
643 return __builtin_clzl(x);
645 inline int clz(unsigned long x) {
646 return __builtin_clzl(x);
650 #if HAVE___BUILTIN_CLZLL
651 inline int clz(long long x) {
652 return __builtin_clzll(x);
654 inline int clz(unsigned long long x) {
655 return __builtin_clzll(x);
659 /** @brief Return the number of trailing 0 bits in @a x.
662 * "Trailing" means "least significant." */
663 #if HAVE___BUILTIN_CTZ
664 inline int ctz(int x) {
665 return __builtin_ctz(x);
667 inline int ctz(unsigned x) {
668 return __builtin_ctz(x);
672 #if HAVE___BUILTIN_CTZL
673 inline int ctz(long x) {
674 return __builtin_ctzl(x);
676 inline int ctz(unsigned long x) {
677 return __builtin_ctzl(x);
681 #if HAVE___BUILTIN_CTZLL
682 inline int ctz(long long x) {
683 return __builtin_ctzll(x);
685 inline int ctz(unsigned long long x) {
686 return __builtin_ctzll(x);
690 template <typename T, typename U>
691 inline T iceil(T x, U y) {
693 return x + (mod ? y - mod : 0);
696 /** @brief Return the smallest power of 2 greater than or equal to @a x.
698 @pre the result is representable in type T (that is, @a x can't be
699 larger than the largest power of 2 representable in type T) */
700 template <typename T>
701 inline T iceil_log2(T x) {
702 return T(1) << (sizeof(T) * 8 - clz(x) - !(x & (x - 1)));
705 /** @brief Return the largest power of 2 less than or equal to @a x.
707 template <typename T>
708 inline T ifloor_log2(T x) {
709 return T(1) << (sizeof(T) * 8 - 1 - clz(x));
712 /** @brief Return the index of the lowest 0 nibble in @a x.
714 * 0 is the lowest-order nibble. Returns -1 if no nibbles are 0. */
715 template <typename T>
716 inline int find_lowest_zero_nibble(T x) {
717 static_assert(sizeof(T) <= sizeof(unsigned long long), "T is too big");
718 #if SIZEOF_LONG_LONG == 16
719 T h = T(0x88888888888888888888888888888888ULL), l = T(0x11111111111111111111111111111111ULL);
721 T h = T(0x8888888888888888ULL), l = T(0x1111111111111111ULL);
723 T t = h & (x - l) & ~x;
724 return t ? ctz(t) >> 2 : -1;
727 /** @brief Translate @a x to network byte order.
729 * Compare htons/htonl/htonq. host_to_net_order is particularly useful in
730 * template functions, where the type to be translated to network byte order
732 inline unsigned char host_to_net_order(unsigned char x) {
736 inline signed char host_to_net_order(signed char x) {
740 inline char host_to_net_order(char x) {
744 inline short host_to_net_order(short x) {
748 inline unsigned short host_to_net_order(unsigned short x) {
752 inline int host_to_net_order(int x) {
756 inline unsigned host_to_net_order(unsigned x) {
761 inline long host_to_net_order(long x) {
765 inline unsigned long host_to_net_order(unsigned long x) {
768 #elif SIZEOF_LONG == 8
770 inline long host_to_net_order(long x) {
774 inline unsigned long host_to_net_order(unsigned long x) {
778 #if SIZEOF_LONG_LONG == 8
780 inline long long host_to_net_order(long long x) {
784 inline unsigned long long host_to_net_order(unsigned long long x) {
788 #if !HAVE_INT64_T_IS_LONG && !HAVE_INT64_T_IS_LONG_LONG
790 inline int64_t host_to_net_order(int64_t x) {
794 inline uint64_t host_to_net_order(uint64_t x) {
799 inline double host_to_net_order(float x) {
800 union { float f; uint32_t i; } v;
802 v.i = host_to_net_order(v.i);
806 inline double host_to_net_order(double x) {
807 union { double d; uint64_t i; } v;
809 v.i = host_to_net_order(v.i);
813 /** @brief Translate @a x to host byte order.
815 * Compare ntohs/ntohl/ntohq. net_to_host_order is particularly useful in
816 * template functions, where the type to be translated to network byte order
818 inline unsigned char net_to_host_order(unsigned char x) {
822 inline signed char net_to_host_order(signed char x) {
826 inline char net_to_host_order(char x) {
830 inline short net_to_host_order(short x) {
834 inline unsigned short net_to_host_order(unsigned short x) {
838 inline int net_to_host_order(int x) {
842 inline unsigned net_to_host_order(unsigned x) {
847 inline long net_to_host_order(long x) {
851 inline unsigned long net_to_host_order(unsigned long x) {
854 #elif SIZEOF_LONG == 8
856 inline long net_to_host_order(long x) {
860 inline unsigned long net_to_host_order(unsigned long x) {
864 #if SIZEOF_LONG_LONG == 8
866 inline long long net_to_host_order(long long x) {
870 inline unsigned long long net_to_host_order(unsigned long long x) {
874 #if !HAVE_INT64_T_IS_LONG && !HAVE_INT64_T_IS_LONG_LONG
876 inline int64_t net_to_host_order(int64_t x) {
880 inline uint64_t net_to_host_order(uint64_t x) {
885 inline double net_to_host_order(float x) {
886 return host_to_net_order(x);
889 inline double net_to_host_order(double x) {
890 return host_to_net_order(x);
893 template <typename T> struct make_aliasable {};
894 #define MAKE_ALIASABLE(T) template <> struct make_aliasable<T> { typedef T type __attribute__((__may_alias__)); }
895 MAKE_ALIASABLE(unsigned char);
896 MAKE_ALIASABLE(signed char);
897 MAKE_ALIASABLE(char);
898 MAKE_ALIASABLE(unsigned short);
899 MAKE_ALIASABLE(short);
901 MAKE_ALIASABLE(unsigned);
902 MAKE_ALIASABLE(long);
903 MAKE_ALIASABLE(unsigned long);
904 MAKE_ALIASABLE(long long);
905 MAKE_ALIASABLE(unsigned long long);
906 MAKE_ALIASABLE(float);
907 MAKE_ALIASABLE(double);
908 #undef MAKE_ALIASABLE
910 template <typename T>
911 inline char* write_in_host_order(char* s, T x) {
912 #if HAVE_INDIFFERENT_ALIGNMENT
913 *reinterpret_cast<typename make_aliasable<T>::type*>(s) = x;
915 memcpy(s, &x, sizeof(x));
917 return s + sizeof(x);
920 template <typename T>
921 inline uint8_t* write_in_host_order(uint8_t* s, T x) {
922 return reinterpret_cast<uint8_t*>
923 (write_in_host_order(reinterpret_cast<char*>(s), x));
926 template <typename T>
927 inline T read_in_host_order(const char* s) {
928 #if HAVE_INDIFFERENT_ALIGNMENT
929 return *reinterpret_cast<const typename make_aliasable<T>::type*>(s);
932 memcpy(&x, s, sizeof(x));
937 template <typename T>
938 inline T read_in_host_order(const uint8_t* s) {
939 return read_in_host_order<T>(reinterpret_cast<const char*>(s));
942 template <typename T>
943 inline char* write_in_net_order(char* s, T x) {
944 return write_in_host_order<T>(s, host_to_net_order(x));
947 template <typename T>
948 inline uint8_t* write_in_net_order(uint8_t* s, T x) {
949 return reinterpret_cast<uint8_t*>
950 (write_in_net_order(reinterpret_cast<char*>(s), x));
953 template <typename T>
954 inline T read_in_net_order(const char* s) {
955 return net_to_host_order(read_in_host_order<T>(s));
958 template <typename T>
959 inline T read_in_net_order(const uint8_t* s) {
960 return read_in_net_order<T>(reinterpret_cast<const char*>(s));
964 inline uint64_t read_pmc(uint32_t ecx) {
966 __asm __volatile("rdpmc" : "=a"(a), "=d"(d) : "c"(ecx));
967 return ((uint64_t)a) | (((uint64_t)d) << 32);
970 inline uint64_t read_tsc(void)
973 asm volatile("rdtsc" : "=a" (low), "=d" (high));
974 return ((uint64_t)low) | (((uint64_t)high) << 32);
977 template <typename T>
978 inline int compare(T a, T b) {
982 return a < b ? -1 : 1;
989 template <typename T> struct type_synonym {
994 #if HAVE_CXX_TEMPLATE_ALIAS && HAVE_TYPE_TRAITS
995 template <typename T, T V>
996 using integral_constant = std::integral_constant<T, V>;
997 typedef std::true_type true_type;
998 typedef std::false_type false_type;
1000 template <typename T, T V>
1001 struct integral_constant {
1002 typedef integral_constant<T, V> type;
1003 typedef T value_type;
1004 static constexpr T value = V;
1006 template <typename T, T V> constexpr T integral_constant<T, V>::value;
1007 typedef integral_constant<bool, true> true_type;
1008 typedef integral_constant<bool, false> false_type;
1011 #if HAVE_CXX_TEMPLATE_ALIAS && HAVE_TYPE_TRAITS
1012 template <bool B, typename T, typename F>
1013 using conditional = std::conditional<B, T, F>;
1015 template <bool B, typename T, typename F> struct conditional {};
1016 template <typename T, typename F> struct conditional<true, T, F> {
1019 template <typename T, typename F> struct conditional<false, T, F> {
1024 #if HAVE_CXX_TEMPLATE_ALIAS && HAVE_TYPE_TRAITS
1025 template <typename T> using remove_const = std::remove_const<T>;
1026 template <typename T> using remove_volatile = std::remove_volatile<T>;
1027 template <typename T> using remove_cv = std::remove_cv<T>;
1029 template <typename T> struct remove_const : public type_synonym<T> {};
1030 template <typename T> struct remove_const<const T> : public type_synonym<T> {};
1031 template <typename T> struct remove_volatile : public type_synonym<T> {};
1032 template <typename T> struct remove_volatile<volatile T> : public type_synonym<T> {};
1033 template <typename T> struct remove_cv {
1034 typedef typename remove_const<typename remove_volatile<T>::type>::type type;
1038 #if HAVE_CXX_TEMPLATE_ALIAS && HAVE_TYPE_TRAITS
1039 template <typename T> using is_pointer = std::is_pointer<T>;
1041 template <typename T> struct is_pointer_helper : public false_type {};
1042 template <typename T> struct is_pointer_helper<T*> : public true_type {};
1043 template <typename T> struct is_pointer
1044 : public integral_constant<bool, is_pointer_helper<typename remove_cv<T>::type>::value> {};
1047 #if HAVE_CXX_TEMPLATE_ALIAS && HAVE_TYPE_TRAITS
1048 template <typename T> using is_reference = std::is_reference<T>;
1050 template <typename T> struct is_reference_helper : public false_type {};
1051 template <typename T> struct is_reference_helper<T&> : public true_type {};
1052 #if HAVE_CXX_RVALUE_REFERENCES
1053 template <typename T> struct is_reference_helper<T&&> : public true_type {};
1055 template <typename T> struct is_reference
1056 : public integral_constant<bool, is_reference_helper<typename remove_cv<T>::type>::value> {};
1059 #if HAVE_CXX_TEMPLATE_ALIAS && HAVE_TYPE_TRAITS
1060 template <typename T> using make_unsigned = std::make_unsigned<T>;
1061 template <typename T> using make_signed = std::make_signed<T>;
1063 template <typename T> struct make_unsigned {};
1064 template <> struct make_unsigned<char> : public type_synonym<unsigned char> {};
1065 template <> struct make_unsigned<signed char> : public type_synonym<unsigned char> {};
1066 template <> struct make_unsigned<unsigned char> : public type_synonym<unsigned char> {};
1067 template <> struct make_unsigned<short> : public type_synonym<unsigned short> {};
1068 template <> struct make_unsigned<unsigned short> : public type_synonym<unsigned short> {};
1069 template <> struct make_unsigned<int> : public type_synonym<unsigned> {};
1070 template <> struct make_unsigned<unsigned> : public type_synonym<unsigned> {};
1071 template <> struct make_unsigned<long> : public type_synonym<unsigned long> {};
1072 template <> struct make_unsigned<unsigned long> : public type_synonym<unsigned long> {};
1073 template <> struct make_unsigned<long long> : public type_synonym<unsigned long long> {};
1074 template <> struct make_unsigned<unsigned long long> : public type_synonym<unsigned long long> {};
1076 template <typename T> struct make_signed {};
1077 template <> struct make_signed<char> : public type_synonym<signed char> {};
1078 template <> struct make_signed<signed char> : public type_synonym<signed char> {};
1079 template <> struct make_signed<unsigned char> : public type_synonym<signed char> {};
1080 template <> struct make_signed<short> : public type_synonym<short> {};
1081 template <> struct make_signed<unsigned short> : public type_synonym<short> {};
1082 template <> struct make_signed<int> : public type_synonym<int> {};
1083 template <> struct make_signed<unsigned> : public type_synonym<int> {};
1084 template <> struct make_signed<long> : public type_synonym<long> {};
1085 template <> struct make_signed<unsigned long> : public type_synonym<long> {};
1086 template <> struct make_signed<long long> : public type_synonym<long long> {};
1087 template <> struct make_signed<unsigned long long> : public type_synonym<long long> {};
1090 /** @class is_trivially_copyable
1091 @brief Template determining whether T may be copied by memcpy.
1093 is_trivially_copyable<T> is equivalent to true_type if T has a trivial
1094 copy constructor, false_type if it does not. */
1096 #if HAVE_CXX_TEMPLATE_ALIAS && HAVE_TYPE_TRAITS && HAVE_STD_IS_TRIVIALLY_COPYABLE
1097 template <typename T> using is_trivially_copyable = std::is_trivially_copyable<T>;
1098 #elif HAVE___HAS_TRIVIAL_COPY
1099 template <typename T> struct is_trivially_copyable : public integral_constant<bool, __has_trivial_copy(T)> {};
1101 template <typename T> struct is_trivially_copyable : public false_type {};
1102 template <> struct is_trivially_copyable<unsigned char> : public true_type {};
1103 template <> struct is_trivially_copyable<signed char> : public true_type {};
1104 template <> struct is_trivially_copyable<char> : public true_type {};
1105 template <> struct is_trivially_copyable<unsigned short> : public true_type {};
1106 template <> struct is_trivially_copyable<short> : public true_type {};
1107 template <> struct is_trivially_copyable<unsigned> : public true_type {};
1108 template <> struct is_trivially_copyable<int> : public true_type {};
1109 template <> struct is_trivially_copyable<unsigned long> : public true_type {};
1110 template <> struct is_trivially_copyable<long> : public true_type {};
1111 template <> struct is_trivially_copyable<unsigned long long> : public true_type {};
1112 template <> struct is_trivially_copyable<long long> : public true_type {};
1113 template <typename T> struct is_trivially_copyable<T *> : public true_type {};
1117 /** @class fast_argument
1118 @brief Template defining a fast argument type for objects of type T.
1120 fast_argument<T>::type equals either "const T &" or "T".
1121 fast_argument<T>::is_reference is true iff fast_argument<T>::type is
1122 a reference. If fast_argument<T>::is_reference is true, then
1123 fast_argument<T>::enable_rvalue_reference is a typedef to void; otherwise
1124 it is not defined. */
1125 template <typename T, bool use_reference = (!is_reference<T>::value
1126 && (!is_trivially_copyable<T>::value
1127 || sizeof(T) > sizeof(void *)))>
1128 struct fast_argument;
1130 template <typename T> struct fast_argument<T, true> {
1131 static constexpr bool is_reference = true;
1132 typedef const T &type;
1133 #if HAVE_CXX_RVALUE_REFERENCES
1134 typedef void enable_rvalue_reference;
1137 template <typename T> struct fast_argument<T, false> {
1138 static constexpr bool is_reference = false;
1141 template <typename T> constexpr bool fast_argument<T, true>::is_reference;
1142 template <typename T> constexpr bool fast_argument<T, false>::is_reference;
1147 template <typename T>
1148 struct has_fast_int_multiply : public mass::false_type {
1149 // enum { check_t_integral = mass::integer_traits<T>::is_signed };
1152 #if defined(__i386__) || defined(__x86_64__)
1153 inline void int_multiply(unsigned a, unsigned b, unsigned &xlow, unsigned &xhigh)
1155 __asm__("mul %2" : "=a" (xlow), "=d" (xhigh) : "r" (a), "a" (b) : "cc");
1157 template <> struct has_fast_int_multiply<unsigned> : public mass::true_type {};
1159 # if SIZEOF_LONG == 4 || (defined(__x86_64__) && SIZEOF_LONG == 8)
1160 inline void int_multiply(unsigned long a, unsigned long b, unsigned long &xlow, unsigned long &xhigh)
1162 __asm__("mul %2" : "=a" (xlow), "=d" (xhigh) : "r" (a), "a" (b) : "cc");
1164 template <> struct has_fast_int_multiply<unsigned long> : public mass::true_type {};
1167 # if defined(__x86_64__) && SIZEOF_LONG_LONG == 8
1168 inline void int_multiply(unsigned long long a, unsigned long long b, unsigned long long &xlow, unsigned long long &xhigh)
1170 __asm__("mul %2" : "=a" (xlow), "=d" (xhigh) : "r" (a), "a" (b) : "cc");
1172 template <> struct has_fast_int_multiply<unsigned long long> : public mass::true_type {};
1176 struct uninitialized_type {};