From: Christopher Dykes Date: Tue, 24 May 2016 20:57:25 +0000 (-0700) Subject: Update generate_varint_tables to support MSVC X-Git-Tag: 2016.07.26~206 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=42c850c04cc6a72087860ebd2651ab1b04a3f001;p=folly.git Update generate_varint_tables to support MSVC Summary: MSVC is a massive pain in this respect, and, after testing many workarounds, and only ending up with it just emitting a dynamic initializer for them, it's easier to just generate the table as a `uint64_t` and load it explicitly. Reviewed By: yfeldblum Differential Revision: D3270226 fbshipit-source-id: 77bc84e58d393373de05a28a30dcb80b66c09c9f --- diff --git a/folly/GroupVarint.h b/folly/GroupVarint.h index b17d15f7..2dfb09b6 100644 --- a/folly/GroupVarint.h +++ b/folly/GroupVarint.h @@ -37,7 +37,7 @@ #include namespace folly { namespace detail { -extern const __m128i groupVarintSSEMasks[]; +alignas(16) extern const uint64_t groupVarintSSEMasks[]; } // namespace detail } // namespace folly #endif @@ -196,7 +196,8 @@ class GroupVarint : public detail::GroupVarintBase { static const char* decode(const char* p, uint32_t* dest) { uint8_t key = p[0]; __m128i val = _mm_loadu_si128((const __m128i*)(p+1)); - __m128i mask = detail::groupVarintSSEMasks[key]; + __m128i mask = + _mm_load_si128((const __m128i*)&detail::groupVarintSSEMasks[key * 2]); __m128i r = _mm_shuffle_epi8(val, mask); _mm_storeu_si128((__m128i*)dest, r); return p + detail::groupVarintLengths[key]; @@ -210,7 +211,8 @@ class GroupVarint : public detail::GroupVarintBase { uint32_t* c, uint32_t* d) { uint8_t key = p[0]; __m128i val = _mm_loadu_si128((const __m128i*)(p+1)); - __m128i mask = detail::groupVarintSSEMasks[key]; + __m128i mask = + _mm_load_si128((const __m128i*)&detail::groupVarintSSEMasks[key * 2]); __m128i r = _mm_shuffle_epi8(val, mask); // Extracting 32 bits at a time out of an XMM register is a SSE4 feature diff --git a/folly/build/generate_varint_tables.py b/folly/build/generate_varint_tables.py index 96bfd1d1..bf082d99 100755 --- a/folly/build/generate_varint_tables.py +++ b/folly/build/generate_varint_tables.py @@ -56,15 +56,11 @@ def generate(f): #include -#if (FOLLY_X64 || defined(__i386__)) && (FOLLY_SSE >= 2) -#include -#endif - namespace folly { namespace detail { #if (FOLLY_X64 || defined(__i386__)) && (FOLLY_SSE >= 2) -extern const __m128i groupVarintSSEMasks[] = { +alignas(16) extern const uint64_t groupVarintSSEMasks[512] = { """) # Compute SSE masks @@ -81,8 +77,8 @@ extern const __m128i groupVarintSSEMasks[] = { # 0xff: set corresponding byte in result to 0 for k in range(d, 4): vals[j] |= 0xff << (8 * k) - f.write(" {{static_cast(0x{1:08x}{0:08x}), " - "static_cast(0x{3:08x}{2:08x})}},\n".format(*vals)) + f.write(" 0x{1:08x}{0:08x}ULL, " + "0x{3:08x}{2:08x}ULL,\n".format(*vals)) f.write("};\n" "#endif /*#if (FOLLY_X64 || defined(__i386__)) && (FOLLY_SSE >= 2)*/\n"