X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=folly%2FConv.cpp;h=8e2f575293475d07485302ee28a2a8b12ba7c952;hb=71d0dd7fa8165a80fd9b93d99c541d550fcdfe70;hp=7c6afcff1ffee1c7c2211ba0a2fdfec146d1c2e3;hpb=54f2a4c869770c4e800f654bd4f916cdcf898864;p=folly.git diff --git a/folly/Conv.cpp b/folly/Conv.cpp index 7c6afcff..8e2f5752 100644 --- a/folly/Conv.cpp +++ b/folly/Conv.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2016 Facebook, Inc. + * Copyright 2017 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,20 +13,43 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#define FOLLY_CONV_INTERNAL #include +#include namespace folly { namespace detail { -extern const char digit1[101] = - "00000000001111111111222222222233333333334444444444" - "55555555556666666666777777777788888888889999999999"; -extern const char digit2[101] = - "01234567890123456789012345678901234567890123456789" - "01234567890123456789012345678901234567890123456789"; +namespace { + +/** + * Finds the first non-digit in a string. The number of digits + * searched depends on the precision of the Tgt integral. Assumes the + * string starts with NO whitespace and NO sign. + * + * The semantics of the routine is: + * for (;; ++b) { + * if (b >= e || !isdigit(*b)) return b; + * } + * + * Complete unrolling marks bottom-line (i.e. entire conversion) + * improvements of 20%. + */ +inline const char* findFirstNonDigit(const char* b, const char* e) { + for (; b < e; ++b) { + auto const c = static_cast(*b) - '0'; + if (c >= 10) { + break; + } + } + return b; +} + +// Maximum value of number when represented as a string +template +struct MaxString { + static const char* const value; +}; -template <> const char *const MaxString::value = "true"; template <> const char *const MaxString::value = "255"; template <> const char *const MaxString::value = "65535"; template <> const char *const MaxString::value = "4294967295"; @@ -46,12 +69,11 @@ static_assert(sizeof(unsigned long long) >= 8, "Wrong value for MaxString::value" ", please update."); -#ifdef FOLLY_HAVE_INT128_T +#if FOLLY_HAVE_INT128_T template <> const char *const MaxString<__uint128_t>::value = "340282366920938463463374607431768211455"; #endif -namespace { /* * Lookup tables that converts from a decimal character value to an integral * binary value, shifted by a decimal "shift" multiplier. @@ -179,6 +201,41 @@ FOLLY_ALIGNED(16) constexpr uint16_t shift1000[] = { OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 240 OOR, OOR, OOR, OOR, OOR, OOR // 250 }; + +struct ErrorString { + const char* string; + bool quote; +}; + +// Keep this in sync with ConversionCode in Conv.h +constexpr const std::array< + ErrorString, + static_cast(ConversionCode::NUM_ERROR_CODES)> + kErrorStrings{{ + {"Success", true}, + {"Empty input string", true}, + {"No digits found in input string", true}, + {"Integer overflow when parsing bool (must be 0 or 1)", true}, + {"Invalid value for bool", true}, + {"Non-digit character found", true}, + {"Invalid leading character", true}, + {"Overflow during conversion", true}, + {"Negative overflow during conversion", true}, + {"Unable to convert string to floating point value", true}, + {"Non-whitespace character found after end of conversion", true}, + {"Overflow during arithmetic conversion", false}, + {"Negative overflow during arithmetic conversion", false}, + {"Loss of precision during arithmetic conversion", false}, + }}; + +// Check if ASCII is really ASCII +using IsAscii = std:: + integral_constant; + +// The code in this file that uses tolower() really only cares about +// 7-bit ASCII characters, so we can take a nice shortcut here. +inline char tolower_ascii(char in) { + return IsAscii::value ? in | 0x20 : char(std::tolower(in)); } inline bool bool_str_cmp(const char** b, size_t len, const char* value) { @@ -187,7 +244,7 @@ inline bool bool_str_cmp(const char** b, size_t len, const char* value) { const char* e = *b + len; const char* v = value; while (*v != '\0') { - if (p == e || tolower(*p) != *v) { // value is already lowercase + if (p == e || tolower_ascii(*p) != *v) { // value is already lowercase return false; } ++p; @@ -198,24 +255,29 @@ inline bool bool_str_cmp(const char** b, size_t len, const char* value) { return true; } -bool str_to_bool(StringPiece* src) { +} // namespace + +Expected str_to_bool(StringPiece* src) noexcept { auto b = src->begin(), e = src->end(); for (;; ++b) { - FOLLY_RANGE_CHECK_STRINGPIECE( - b < e, "No non-whitespace characters found in input string", *src); - if (!isspace(*b)) break; + if (b >= e) { + return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING); + } + if (!std::isspace(*b)) { + break; + } } bool result; - size_t len = e - b; + size_t len = size_t(e - b); switch (*b) { case '0': case '1': { result = false; for (; b < e && isdigit(*b); ++b) { - FOLLY_RANGE_CHECK_STRINGPIECE( - !result && (*b == '0' || *b == '1'), - "Integer overflow when parsing bool: must be 0 or 1", *src); + if (result || (*b != '0' && *b != '1')) { + return makeUnexpected(ConversionCode::BOOL_OVERFLOW); + } result = (*b == '1'); } break; @@ -255,24 +317,24 @@ bool str_to_bool(StringPiece* src) { } else if (bool_str_cmp(&b, len, "off")) { result = false; } else { - FOLLY_RANGE_CHECK_STRINGPIECE(false, "Invalid value for bool", *src); + return makeUnexpected(ConversionCode::BOOL_INVALID_VALUE); } break; default: - FOLLY_RANGE_CHECK_STRINGPIECE(false, "Invalid value for bool", *src); + return makeUnexpected(ConversionCode::BOOL_INVALID_VALUE); } src->assign(b, e); + return result; } -namespace { /** * StringPiece to double, with progress information. Alters the * StringPiece parameter to munch the already-parsed characters. */ template -Tgt str_to_floating(StringPiece* src) { +Expected str_to_floating(StringPiece* src) noexcept { using namespace double_conversion; static StringToDoubleConverter conv(StringToDoubleConverter::ALLOW_TRAILING_JUNK @@ -282,8 +344,9 @@ Tgt str_to_floating(StringPiece* src) { std::numeric_limits::quiet_NaN(), nullptr, nullptr); - FOLLY_RANGE_CHECK_STRINGPIECE(!src->empty(), - "No digits found in input string", *src); + if (src->empty()) { + return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING); + } int length; auto result = conv.StringToDouble(src->data(), @@ -291,135 +354,212 @@ Tgt str_to_floating(StringPiece* src) { &length); // processed char count if (!std::isnan(result)) { - src->advance(length); - return result; + // If we get here with length = 0, the input string is empty. + // If we get here with result = 0.0, it's either because the string + // contained only whitespace, or because we had an actual zero value + // (with potential trailing junk). If it was only whitespace, we + // want to raise an error; length will point past the last character + // that was processed, so we need to check if that character was + // whitespace or not. + if (length == 0 || + (result == 0.0 && std::isspace((*src)[size_t(length) - 1]))) { + return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING); + } + src->advance(size_t(length)); + return Tgt(result); } - for (;; src->advance(1)) { - if (src->empty()) { - throw std::range_error("Unable to convert an empty string" - " to a floating point value."); - } - if (!isspace(src->front())) { + auto* e = src->end(); + auto* b = + std::find_if_not(src->begin(), e, [](char c) { return std::isspace(c); }); + + // There must be non-whitespace, otherwise we would have caught this above + assert(b < e); + size_t size = size_t(e - b); + + bool negative = false; + if (*b == '-') { + negative = true; + ++b; + --size; + } + + result = 0.0; + + switch (tolower_ascii(*b)) { + case 'i': + if (size >= 3 && tolower_ascii(b[1]) == 'n' && + tolower_ascii(b[2]) == 'f') { + if (size >= 8 && tolower_ascii(b[3]) == 'i' && + tolower_ascii(b[4]) == 'n' && tolower_ascii(b[5]) == 'i' && + tolower_ascii(b[6]) == 't' && tolower_ascii(b[7]) == 'y') { + b += 8; + } else { + b += 3; + } + result = std::numeric_limits::infinity(); + } + break; + + case 'n': + if (size >= 3 && tolower_ascii(b[1]) == 'a' && + tolower_ascii(b[2]) == 'n') { + b += 3; + result = std::numeric_limits::quiet_NaN(); + } + break; + + default: break; - } } - // Was that "inf[inity]"? - if (src->size() >= 3 && toupper((*src)[0]) == 'I' - && toupper((*src)[1]) == 'N' && toupper((*src)[2]) == 'F') { - if (src->size() >= 8 && - toupper((*src)[3]) == 'I' && - toupper((*src)[4]) == 'N' && - toupper((*src)[5]) == 'I' && - toupper((*src)[6]) == 'T' && - toupper((*src)[7]) == 'Y') { - src->advance(8); - } else { - src->advance(3); - } - return std::numeric_limits::infinity(); - } - - // Was that "-inf[inity]"? - if (src->size() >= 4 && toupper((*src)[0]) == '-' - && toupper((*src)[1]) == 'I' && toupper((*src)[2]) == 'N' - && toupper((*src)[3]) == 'F') { - if (src->size() >= 9 && - toupper((*src)[4]) == 'I' && - toupper((*src)[5]) == 'N' && - toupper((*src)[6]) == 'I' && - toupper((*src)[7]) == 'T' && - toupper((*src)[8]) == 'Y') { - src->advance(9); - } else { - src->advance(4); + if (result == 0.0) { + // All bets are off + return makeUnexpected(ConversionCode::STRING_TO_FLOAT_ERROR); + } + + if (negative) { + result = -result; + } + + src->assign(b, e); + + return Tgt(result); +} + +template Expected str_to_floating( + StringPiece* src) noexcept; +template Expected str_to_floating( + StringPiece* src) noexcept; + +/** + * This class takes care of additional processing needed for signed values, + * like leading sign character and overflow checks. + */ +template ::value> +class SignedValueHandler; + +template +class SignedValueHandler { + public: + ConversionCode init(const char*& b) { + negative_ = false; + if (!std::isdigit(*b)) { + if (*b == '-') { + negative_ = true; + } else if (UNLIKELY(*b != '+')) { + return ConversionCode::INVALID_LEADING_CHAR; + } + ++b; } - return -std::numeric_limits::infinity(); + return ConversionCode::SUCCESS; } - // "nan"? - if (src->size() >= 3 && toupper((*src)[0]) == 'N' - && toupper((*src)[1]) == 'A' && toupper((*src)[2]) == 'N') { - src->advance(3); - return std::numeric_limits::quiet_NaN(); + ConversionCode overflow() { + return negative_ ? ConversionCode::NEGATIVE_OVERFLOW + : ConversionCode::POSITIVE_OVERFLOW; } - // "-nan"? - if (src->size() >= 4 && - toupper((*src)[0]) == '-' && - toupper((*src)[1]) == 'N' && - toupper((*src)[2]) == 'A' && - toupper((*src)[3]) == 'N') { - src->advance(4); - return -std::numeric_limits::quiet_NaN(); + template + Expected finalize(U value) { + T rv; + if (negative_) { + rv = T(-value); + if (UNLIKELY(rv > 0)) { + return makeUnexpected(ConversionCode::NEGATIVE_OVERFLOW); + } + } else { + rv = T(value); + if (UNLIKELY(rv < 0)) { + return makeUnexpected(ConversionCode::POSITIVE_OVERFLOW); + } + } + return rv; } - // All bets are off - throw std::range_error("Unable to convert \"" + src->toString() - + "\" to a floating point value."); -} + private: + bool negative_; +}; -} +// For unsigned types, we don't need any extra processing +template +class SignedValueHandler { + public: + ConversionCode init(const char*&) { + return ConversionCode::SUCCESS; + } -float str_to_float(StringPiece* src) { - return str_to_floating(src); -} + ConversionCode overflow() { + return ConversionCode::POSITIVE_OVERFLOW; + } -double str_to_double(StringPiece* src) { - return str_to_floating(src); -} + Expected finalize(T value) { + return value; + } +}; /** - * String represented as a pair of pointers to char to unsigned + * String represented as a pair of pointers to char to signed/unsigned * integrals. Assumes NO whitespace before or after, and also that the - * string is composed entirely of digits. Tgt must be unsigned, and no - * sign is allowed in the string (even it's '+'). String may be empty, - * in which case digits_to throws. + * string is composed entirely of digits (and an optional sign only for + * signed types). String may be empty, in which case digits_to returns + * an appropriate error. */ template -Tgt digits_to(const char* b, const char* e) { - - static_assert(!std::is_signed::value, "Unsigned type expected"); +inline Expected digits_to( + const char* b, + const char* const e) noexcept { + using UT = typename std::make_unsigned::type; assert(b <= e); - const size_t size = e - b; + SignedValueHandler sgn; + + auto err = sgn.init(b); + if (UNLIKELY(err != ConversionCode::SUCCESS)) { + return makeUnexpected(err); + } + + size_t size = size_t(e - b); /* Although the string is entirely made of digits, we still need to * check for overflow. */ - if (size >= std::numeric_limits::digits10 + 1) { - // Leading zeros? If so, recurse to keep things simple + if (size > std::numeric_limits::digits10) { + // Leading zeros? if (b < e && *b == '0') { for (++b;; ++b) { - if (b == e) - return 0; // just zeros, e.g. "0000" - if (*b != '0') - return digits_to(b, e); + if (b == e) { + return Tgt(0); // just zeros, e.g. "0000" + } + if (*b != '0') { + size = size_t(e - b); + break; + } } } - FOLLY_RANGE_CHECK_BEGIN_END( - size == std::numeric_limits::digits10 + 1 && - strncmp(b, detail::MaxString::value, size) <= 0, - "Numeric overflow upon conversion", - b, - e); + if (size > std::numeric_limits::digits10 && + (size != std::numeric_limits::digits10 + 1 || + strncmp(b, MaxString::value, size) > 0)) { + return makeUnexpected(sgn.overflow()); + } } // Here we know that the number won't overflow when // converted. Proceed without checks. - Tgt result = 0; + UT result = 0; for (; e - b >= 4; b += 4) { - result *= 10000; + result *= static_cast(10000); const int32_t r0 = shift1000[static_cast(b[0])]; const int32_t r1 = shift100[static_cast(b[1])]; const int32_t r2 = shift10[static_cast(b[2])]; const int32_t r3 = shift1[static_cast(b[3])]; const auto sum = r0 + r1 + r2 + r3; - assert(sum < OOR && "Assumption: string only has digits"); - result += sum; + if (sum >= OOR) { + goto outOfRange; + } + result += UT(sum); } switch (e - b) { @@ -428,39 +568,198 @@ Tgt digits_to(const char* b, const char* e) { const int32_t r1 = shift10[static_cast(b[1])]; const int32_t r2 = shift1[static_cast(b[2])]; const auto sum = r0 + r1 + r2; - assert(sum < OOR && "Assumption: string only has digits"); - return result * 1000 + sum; + if (sum >= OOR) { + goto outOfRange; + } + result = UT(1000 * result + sum); + break; } case 2: { const int32_t r0 = shift10[static_cast(b[0])]; const int32_t r1 = shift1[static_cast(b[1])]; const auto sum = r0 + r1; - assert(sum < OOR && "Assumption: string only has digits"); - return result * 100 + sum; + if (sum >= OOR) { + goto outOfRange; + } + result = UT(100 * result + sum); + break; } case 1: { const int32_t sum = shift1[static_cast(b[0])]; - assert(sum < OOR && "Assumption: string only has digits"); - return result * 10 + sum; + if (sum >= OOR) { + goto outOfRange; + } + result = UT(10 * result + sum); + break; } + default: + assert(b == e); + if (size == 0) { + return makeUnexpected(ConversionCode::NO_DIGITS); + } + break; } - assert(b == e); - FOLLY_RANGE_CHECK_BEGIN_END( - size > 0, "Found no digits to convert in input", b, e); - return result; + return sgn.finalize(result); + +outOfRange: + return makeUnexpected(ConversionCode::NON_DIGIT_CHAR); +} + +template Expected digits_to( + const char*, + const char*) noexcept; +template Expected digits_to( + const char*, + const char*) noexcept; +template Expected digits_to( + const char*, + const char*) noexcept; + +template Expected digits_to( + const char*, + const char*) noexcept; +template Expected digits_to( + const char*, + const char*) noexcept; + +template Expected digits_to( + const char*, + const char*) noexcept; +template Expected digits_to( + const char*, + const char*) noexcept; + +template Expected digits_to( + const char*, + const char*) noexcept; +template Expected digits_to( + const char*, + const char*) noexcept; + +template Expected digits_to( + const char*, + const char*) noexcept; +template Expected +digits_to(const char*, const char*) noexcept; + +#if FOLLY_HAVE_INT128_T +template Expected<__int128, ConversionCode> digits_to<__int128>( + const char*, + const char*) noexcept; +template Expected +digits_to(const char*, const char*) noexcept; +#endif + +/** + * StringPiece to integrals, with progress information. Alters the + * StringPiece parameter to munch the already-parsed characters. + */ +template +Expected str_to_integral(StringPiece* src) noexcept { + using UT = typename std::make_unsigned::type; + + auto b = src->data(), past = src->data() + src->size(); + + for (;; ++b) { + if (UNLIKELY(b >= past)) { + return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING); + } + if (!std::isspace(*b)) { + break; + } + } + + SignedValueHandler sgn; + auto err = sgn.init(b); + + if (UNLIKELY(err != ConversionCode::SUCCESS)) { + return makeUnexpected(err); + } + if (std::is_signed::value && UNLIKELY(b >= past)) { + return makeUnexpected(ConversionCode::NO_DIGITS); + } + if (UNLIKELY(!isdigit(*b))) { + return makeUnexpected(ConversionCode::NON_DIGIT_CHAR); + } + + auto m = findFirstNonDigit(b + 1, past); + + auto tmp = digits_to(b, m); + + if (UNLIKELY(!tmp.hasValue())) { + return makeUnexpected( + tmp.error() == ConversionCode::POSITIVE_OVERFLOW ? sgn.overflow() + : tmp.error()); + } + + auto res = sgn.finalize(tmp.value()); + + if (res.hasValue()) { + src->advance(size_t(m - src->data())); + } + + return res; } -template unsigned char digits_to(const char* b, const char* e); -template unsigned short digits_to(const char* b, const char* e); -template unsigned int digits_to(const char* b, const char* e); -template unsigned long digits_to(const char* b, const char* e); -template unsigned long long digits_to(const char* b, - const char* e); +template Expected str_to_integral( + StringPiece* src) noexcept; +template Expected str_to_integral( + StringPiece* src) noexcept; +template Expected str_to_integral( + StringPiece* src) noexcept; + +template Expected str_to_integral( + StringPiece* src) noexcept; +template Expected +str_to_integral(StringPiece* src) noexcept; + +template Expected str_to_integral( + StringPiece* src) noexcept; +template Expected str_to_integral( + StringPiece* src) noexcept; + +template Expected str_to_integral( + StringPiece* src) noexcept; +template Expected str_to_integral( + StringPiece* src) noexcept; + +template Expected str_to_integral( + StringPiece* src) noexcept; +template Expected +str_to_integral(StringPiece* src) noexcept; + #if FOLLY_HAVE_INT128_T -template unsigned __int128 digits_to(const char* b, - const char* e); +template Expected<__int128, ConversionCode> str_to_integral<__int128>( + StringPiece* src) noexcept; +template Expected +str_to_integral(StringPiece* src) noexcept; #endif } // namespace detail + +ConversionError makeConversionError(ConversionCode code, StringPiece input) { + using namespace detail; + static_assert( + std::is_unsigned::type>::value, + "ConversionCode should be unsigned"); + assert((std::size_t)code < kErrorStrings.size()); + const ErrorString& err = kErrorStrings[(std::size_t)code]; + if (code == ConversionCode::EMPTY_INPUT_STRING && input.empty()) { + return {err.string, code}; + } + std::string tmp(err.string); + tmp.append(": "); + if (err.quote) { + tmp.append(1, '"'); + } + if (input.size() > 0) { + tmp.append(input.data(), input.size()); + } + if (err.quote) { + tmp.append(1, '"'); + } + return {tmp, code}; +} + } // namespace folly