From: Tudor Bosman Date: Wed, 10 Apr 2013 00:34:52 +0000 (-0700) Subject: URI parsing in folly X-Git-Tag: v0.22.0~1004 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=9ff69037a4faab0ffb5e781b993428cceef62c9b;p=folly.git URI parsing in folly Summary: Cleaned up from common/strings/URL.h, and it should be URI, not URL. Test Plan: tests added Reviewed By: chip@fb.com FB internal diff: D768880 --- diff --git a/folly/String-inl.h b/folly/String-inl.h index b9f7c237..8ae2d20a 100644 --- a/folly/String-inl.h +++ b/folly/String-inl.h @@ -149,6 +149,95 @@ void cUnescape(StringPiece str, String& out, bool strict) { out.append(&*last, p - last); } +namespace detail { +// Map from character code to escape mode: +// 0 = pass through +// 1 = unused +// 2 = pass through in PATH mode +// 3 = space, replace with '+' in QUERY mode +// 4 = percent-encode +extern const unsigned char uriEscapeTable[]; +} // namespace detail + +template +void uriEscape(StringPiece str, String& out, UriEscapeMode mode) { + static const char hexValues[] = "0123456789abcdef"; + char esc[3]; + esc[0] = '%'; + // Preallocate assuming that 25% of the input string will be escaped + out.reserve(out.size() + str.size() + 3 * (str.size() / 4)); + auto p = str.begin(); + auto last = p; // last regular character + // We advance over runs of passthrough characters and copy them in one go; + // this is faster than calling push_back repeatedly. + unsigned char minEncode = static_cast(mode); + while (p != str.end()) { + char c = *p; + unsigned char v = static_cast(c); + unsigned char discriminator = detail::uriEscapeTable[v]; + if (LIKELY(discriminator <= minEncode)) { + ++p; + } else if (mode == UriEscapeMode::QUERY && discriminator == 3) { + out.append(&*last, p - last); + out.push_back('+'); + ++p; + last = p; + } else { + out.append(&*last, p - last); + esc[1] = hexValues[v >> 4]; + esc[2] = hexValues[v & 0x0f]; + out.append(esc, 3); + ++p; + last = p; + } + } + out.append(&*last, p - last); +} + +template +void uriUnescape(StringPiece str, String& out, UriEscapeMode mode) { + out.reserve(out.size() + str.size()); + auto p = str.begin(); + auto last = p; + // We advance over runs of passthrough characters and copy them in one go; + // this is faster than calling push_back repeatedly. + while (p != str.end()) { + char c = *p; + unsigned char v = static_cast(v); + switch (c) { + case '%': + { + if (UNLIKELY(std::distance(p, str.end()) < 3)) { + throw std::invalid_argument("incomplete percent encode sequence"); + } + auto h1 = detail::hexTable[static_cast(p[1])]; + auto h2 = detail::hexTable[static_cast(p[2])]; + if (UNLIKELY(h1 == 16 || h2 == 16)) { + throw std::invalid_argument("invalid percent encode sequence"); + } + out.append(&*last, p - last); + out.push_back((h1 << 4) | h2); + p += 3; + last = p; + break; + } + case '+': + if (mode == UriEscapeMode::QUERY) { + out.append(&*last, p - last); + out.push_back(' '); + ++p; + last = p; + break; + } + // else fallthrough + default: + ++p; + break; + } + } + out.append(&*last, p - last); +} + namespace detail { /* diff --git a/folly/String.h b/folly/String.h index c843eed8..2edd0da9 100644 --- a/folly/String.h +++ b/folly/String.h @@ -112,6 +112,56 @@ String cUnescape(StringPiece str, bool strict = true) { return out; } +/** + * URI-escape a string. Appends the result to the output string. + * + * Alphanumeric characters and other characters marked as "unreserved" in RFC + * 3986 ( -_.~ ) are left unchanged. In PATH mode, the forward slash (/) is + * also left unchanged. In QUERY mode, spaces are replaced by '+'. All other + * characters are percent-encoded. + */ +enum class UriEscapeMode : unsigned char { + // The values are meaningful, see generate_escape_tables.py + ALL = 0, + QUERY = 1, + PATH = 2 +}; +template +void uriEscape(StringPiece str, + String& out, + UriEscapeMode mode = UriEscapeMode::ALL); + +/** + * Similar to uriEscape above, but returns the escaped string. + */ +template +String uriEscape(StringPiece str, UriEscapeMode mode = UriEscapeMode::ALL) { + String out; + uriEscape(str, out, mode); + return out; +} + +/** + * URI-unescape a string. Appends the result to the output string. + * + * In QUERY mode, '+' are replaced by space. %XX sequences are decoded if + * XX is a valid hex sequence, otherwise we throw invalid_argument. + */ +template +void uriUnescape(StringPiece str, + String& out, + UriEscapeMode mode = UriEscapeMode::ALL); + +/** + * Similar to uriUnescape above, but returns the unescaped string. + */ +template +String uriUnescape(StringPiece str, UriEscapeMode mode = UriEscapeMode::ALL) { + String out; + uriUnescape(str, out, mode); + return out; +} + /** * stringPrintf is much like printf but deposits its result into a * string. Two signatures are supported: the first simply returns the diff --git a/folly/Uri-inl.h b/folly/Uri-inl.h new file mode 100644 index 00000000..71a23cbb --- /dev/null +++ b/folly/Uri-inl.h @@ -0,0 +1,49 @@ +/* + * Copyright 2013 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FOLLY_URI_H_ +#error This file may only be included from folly/Uri.h +#endif + +#include "folly/Conv.h" + +namespace folly { + +template +String Uri::toString() const { + String str; + toAppend(scheme_, "://", &str); + if (!password_.empty()) { + toAppend(username_, ":", password_, "@", &str); + } else if (!username_.empty()) { + toAppend(username_, "@", &str); + } + toAppend(host_, &str); + if (port_ != 0) { + toAppend(":", port_, &str); + } + toAppend(path_, &str); + if (!query_.empty()) { + toAppend("?", query_, &str); + } + if (!fragment_.empty()) { + toAppend("#", fragment_, &str); + } + return str; +} + +} // namespace folly + diff --git a/folly/Uri.cpp b/folly/Uri.cpp new file mode 100644 index 00000000..9ae5c713 --- /dev/null +++ b/folly/Uri.cpp @@ -0,0 +1,95 @@ +/* + * Copyright 2013 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "folly/Uri.h" + +#include +#include + +namespace folly { + +namespace { + +fbstring submatch(const boost::cmatch& m, size_t idx) { + auto& sub = m[idx]; + return fbstring(sub.first, sub.second); +} + +template +void toLower(String& s) { + for (auto& c : s) { + c = tolower(c); + } +} + +} // namespace + +Uri::Uri(StringPiece str) : port_(0) { + static const boost::regex uriRegex( + "([a-zA-Z][a-zA-Z0-9+.-]*):" // scheme: + "([^?#]*)" // authority and path + "(?:\\?([^#]*))?" // ?query + "(?:#(.*))?"); // #fragment + static const boost::regex authorityAndPathRegex("//([^/]*)(/.*)?"); + + boost::cmatch match; + if (UNLIKELY(!boost::regex_match(str.begin(), str.end(), match, uriRegex))) { + throw std::invalid_argument("invalid URI"); + } + + scheme_ = submatch(match, 1); + toLower(scheme_); + + StringPiece authorityAndPath(match[2].first, match[2].second); + boost::cmatch authorityAndPathMatch; + if (!boost::regex_match(authorityAndPath.begin(), + authorityAndPath.end(), + authorityAndPathMatch, + authorityAndPathRegex)) { + // Does not start with //, doesn't have authority + path_ = authorityAndPath.fbstr(); + } else { + static const boost::regex authorityRegex( + "(?:([^@:]*)(?::([^@]*))?@)?" // username, password + "(\\[[^\\]]*\\]|[^\\[:]*)" // host (IP-literal, dotted-IPv4, or + // named host) + "(?::(\\d*))?"); // port + + auto authority = authorityAndPathMatch[1]; + boost::cmatch authorityMatch; + if (!boost::regex_match(authority.first, + authority.second, + authorityMatch, + authorityRegex)) { + throw std::invalid_argument("invalid URI authority"); + } + + StringPiece port(authorityMatch[4].first, authorityMatch[4].second); + if (!port.empty()) { + port_ = to(port); + } + + username_ = submatch(authorityMatch, 1); + password_ = submatch(authorityMatch, 2); + host_ = submatch(authorityMatch, 3); + path_ = submatch(authorityAndPathMatch, 2); + } + + query_ = submatch(match, 3); + fragment_ = submatch(match, 4); +} + +} // namespace folly diff --git a/folly/Uri.h b/folly/Uri.h new file mode 100644 index 00000000..8885bb9f --- /dev/null +++ b/folly/Uri.h @@ -0,0 +1,77 @@ +/* + * Copyright 2013 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FOLLY_URI_H_ +#define FOLLY_URI_H_ + +#include "folly/String.h" + +namespace folly { + +/** + * Class representing a URI. + * + * Consider http://www.facebook.com/foo/bar?key=foo#anchor + * + * The URI is broken down into its parts: scheme ("http"), authority + * (ie. host and port, in most cases: "www.facebook.com"), path + * ("/foo/bar"), query ("key=foo") and fragment ("anchor"). The scheme is + * lower-cased. + * + * If this Uri represents a URL, note that, to prevent ambiguity, the component + * parts are NOT percent-decoded; you should do this yourself with + * uriUnescape() (for the authority and path) and uriUnescape(..., + * UriEscapeMode::QUERY) (for the query, but probably only after splitting at + * '&' to identify the individual parameters). + */ +class Uri { + public: + /** + * Parse a Uri from a string. Throws std::invalid_argument on parse error. + */ + explicit Uri(StringPiece str); + + const fbstring& scheme() const { return scheme_; } + const fbstring& username() const { return username_; } + const fbstring& password() const { return password_; } + const fbstring& host() const { return host_; } + uint32_t port() const { return port_; } + const fbstring& path() const { return path_; } + const fbstring& query() const { return query_; } + const fbstring& fragment() const { return fragment_; } + + template + String toString() const; + + std::string str() const { return toString(); } + fbstring fbstr() const { return toString(); } + + private: + fbstring scheme_; + fbstring username_; + fbstring password_; + fbstring host_; + uint32_t port_; + fbstring path_; + fbstring query_; + fbstring fragment_; +}; + +} // namespace folly + +#include "folly/Uri-inl.h" + +#endif /* FOLLY_URI_H_ */ diff --git a/folly/build/generate_escape_tables.py b/folly/build/generate_escape_tables.py index e84179e7..0d18978e 100755 --- a/folly/build/generate_escape_tables.py +++ b/folly/build/generate_escape_tables.py @@ -14,6 +14,7 @@ def generate(f): f.write("namespace folly {\n" "namespace detail {\n" "\n") + f.write("extern const char cEscapeTable[] =\n") escapes = dict(( ('"', '\\"'), @@ -36,8 +37,7 @@ def generate(f): else: c = 'P' # printable f.write(c) - f.write("\";\n" - "\n") + f.write("\";\n\n") f.write("extern const char cUnescapeTable[] =\n") for i in range(0, 256): @@ -56,10 +56,9 @@ def generate(f): f.write("X") # hex else: f.write("I") # invalid - f.write("\";\n" - "\n" - "extern const unsigned char hexTable[] = {") + f.write("\";\n\n") + f.write("extern const unsigned char hexTable[] = {") for i in range(0, 256): if i % 16 == 0: f.write("\n ") @@ -71,8 +70,31 @@ def generate(f): f.write("{0:2d}, ".format(i - ord('A') + 10)) else: f.write("16, ") - f.write("\n};\n" - "\n") + f.write("\n};\n\n") + + # 0 = passthrough + # 1 = unused + # 2 = safe in path (/) + # 3 = space (replace with '+' in query) + # 4 = always percent-encode + f.write("extern const unsigned char uriEscapeTable[] = {") + passthrough = ( + range(ord('0'), ord('9')) + + range(ord('A'), ord('Z')) + + range(ord('a'), ord('z')) + + map(ord, '-_.~')) + for i in range(0, 256): + if i % 16 == 0: + f.write("\n ") + if i in passthrough: + f.write("0, ") + elif i == ord('/'): + f.write("2, ") + elif i == ord(' '): + f.write("3, ") + else: + f.write("4, ") + f.write("\n};\n\n") f.write("} // namespace detail\n" "} // namespace folly\n") diff --git a/folly/test/StringTest.cpp b/folly/test/StringTest.cpp index 43eed510..8c6d6d61 100644 --- a/folly/test/StringTest.cpp +++ b/folly/test/StringTest.cpp @@ -131,25 +131,117 @@ TEST(Escape, cUnescape) { std::invalid_argument); } +TEST(Escape, uriEscape) { + EXPECT_EQ("hello%2c%20%2fworld", uriEscape("hello, /world")); + EXPECT_EQ("hello%2c%20/world", uriEscape("hello, /world", + UriEscapeMode::PATH)); + EXPECT_EQ("hello%2c+%2fworld", uriEscape("hello, /world", + UriEscapeMode::QUERY)); +} + +TEST(Escape, uriUnescape) { + EXPECT_EQ("hello, /world", uriUnescape("hello, /world")); + EXPECT_EQ("hello, /world", uriUnescape("hello%2c%20%2fworld")); + EXPECT_EQ("hello,+/world", uriUnescape("hello%2c+%2fworld")); + EXPECT_EQ("hello, /world", uriUnescape("hello%2c+%2fworld", + UriEscapeMode::QUERY)); + EXPECT_EQ("hello/", uriUnescape("hello%2f")); + EXPECT_EQ("hello/", uriUnescape("hello%2F")); + EXPECT_THROW({uriUnescape("hello%");}, + std::invalid_argument); + EXPECT_THROW({uriUnescape("hello%2");}, + std::invalid_argument); + EXPECT_THROW({uriUnescape("hello%2g");}, + std::invalid_argument); +} + namespace { -fbstring bmString; -fbstring bmEscapedString; -fbstring escapedString; -fbstring unescapedString; -const size_t kBmStringLength = 64 << 10; -const uint32_t kPrintablePercentage = 90; +void expectPrintable(StringPiece s) { + for (char c : s) { + EXPECT_LE(32, c); + EXPECT_GE(127, c); + } +} +} // namespace -void initBenchmark() { - bmString.reserve(kBmStringLength); +TEST(Escape, uriEscapeAllCombinations) { + char c[3]; + c[2] = '\0'; + StringPiece in(c, 2); + fbstring tmp; + fbstring out; + for (int i = 0; i < 256; ++i) { + c[0] = i; + for (int j = 0; j < 256; ++j) { + c[1] = j; + tmp.clear(); + out.clear(); + uriEscape(in, tmp); + expectPrintable(tmp); + uriUnescape(tmp, out); + EXPECT_EQ(in, out); + } + } +} + +namespace { +bool isHex(int v) { + return ((v >= '0' && v <= '9') || + (v >= 'A' && v <= 'F') || + (v >= 'a' && v <= 'f')); +} +} // namespace + +TEST(Escape, uriUnescapePercentDecoding) { + char c[4] = {'%', '\0', '\0', '\0'}; + StringPiece in(c, 3); + fbstring out; + unsigned int expected = 0; + for (int i = 0; i < 256; ++i) { + c[1] = i; + for (int j = 0; j < 256; ++j) { + c[2] = j; + if (isHex(i) && isHex(j)) { + out.clear(); + uriUnescape(in, out); + EXPECT_EQ(1, out.size()); + EXPECT_EQ(1, sscanf(c + 1, "%x", &expected)); + unsigned char v = out[0]; + EXPECT_EQ(expected, v); + } else { + EXPECT_THROW({uriUnescape(in, out);}, std::invalid_argument); + } + } + } +} + +namespace { +fbstring cbmString; +fbstring cbmEscapedString; +fbstring cEscapedString; +fbstring cUnescapedString; +const size_t kCBmStringLength = 64 << 10; +const uint32_t kCPrintablePercentage = 90; + +fbstring uribmString; +fbstring uribmEscapedString; +fbstring uriEscapedString; +fbstring uriUnescapedString; +const size_t kURIBmStringLength = 256; +const uint32_t kURIPassThroughPercentage = 50; +void initBenchmark() { std::mt19937 rnd; + + // C escape std::uniform_int_distribution printable(32, 126); std::uniform_int_distribution nonPrintable(0, 160); std::uniform_int_distribution percentage(0, 99); - for (size_t i = 0; i < kBmStringLength; ++i) { + cbmString.reserve(kCBmStringLength); + for (size_t i = 0; i < kCBmStringLength; ++i) { unsigned char c; - if (percentage(rnd) < kPrintablePercentage) { + if (percentage(rnd) < kCPrintablePercentage) { c = printable(rnd); } else { c = nonPrintable(rnd); @@ -159,23 +251,55 @@ void initBenchmark() { c += (126 - 32) + 1; } } - bmString.push_back(c); + cbmString.push_back(c); + } + + cbmEscapedString = cEscape(cbmString); + + // URI escape + std::uniform_int_distribution passthrough('a', 'z'); + std::string encodeChars = " ?!\"',+[]"; + std::uniform_int_distribution encode(0, encodeChars.size() - 1); + + uribmString.reserve(kURIBmStringLength); + for (size_t i = 0; i < kURIBmStringLength; ++i) { + unsigned char c; + if (percentage(rnd) < kURIPassThroughPercentage) { + c = passthrough(rnd); + } else { + c = encodeChars[encode(rnd)]; + } + uribmString.push_back(c); } - bmEscapedString = cEscape(bmString); + uribmEscapedString = uriEscape(uribmString); } BENCHMARK(BM_cEscape, iters) { while (iters--) { - escapedString = cEscape(bmString); - doNotOptimizeAway(escapedString.size()); + cEscapedString = cEscape(cbmString); + doNotOptimizeAway(cEscapedString.size()); } } BENCHMARK(BM_cUnescape, iters) { while (iters--) { - unescapedString = cUnescape(bmEscapedString); - doNotOptimizeAway(unescapedString.size()); + cUnescapedString = cUnescape(cbmEscapedString); + doNotOptimizeAway(cUnescapedString.size()); + } +} + +BENCHMARK(BM_uriEscape, iters) { + while (iters--) { + uriEscapedString = uriEscape(uribmString); + doNotOptimizeAway(uriEscapedString.size()); + } +} + +BENCHMARK(BM_uriUnescape, iters) { + while (iters--) { + uriUnescapedString = uriUnescape(uribmEscapedString); + doNotOptimizeAway(uriUnescapedString.size()); } } diff --git a/folly/test/UriTest.cpp b/folly/test/UriTest.cpp new file mode 100644 index 00000000..97b14637 --- /dev/null +++ b/folly/test/UriTest.cpp @@ -0,0 +1,213 @@ +/* + * Copyright 2013 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "folly/Uri.h" + +#include +#include + +using namespace folly; + +namespace { + +} // namespace + +TEST(Uri, Simple) { + { + fbstring s("http://www.facebook.com/hello/world?query#fragment"); + Uri u(s); + EXPECT_EQ("http", u.scheme()); + EXPECT_EQ("", u.username()); + EXPECT_EQ("", u.password()); + EXPECT_EQ("www.facebook.com", u.host()); + EXPECT_EQ(0, u.port()); + EXPECT_EQ("/hello/world", u.path()); + EXPECT_EQ("query", u.query()); + EXPECT_EQ("fragment", u.fragment()); + EXPECT_EQ(s, u.fbstr()); // canonical + } + + { + fbstring s("http://www.facebook.com:8080/hello/world?query#fragment"); + Uri u(s); + EXPECT_EQ("http", u.scheme()); + EXPECT_EQ("", u.username()); + EXPECT_EQ("", u.password()); + EXPECT_EQ("www.facebook.com", u.host()); + EXPECT_EQ(8080, u.port()); + EXPECT_EQ("/hello/world", u.path()); + EXPECT_EQ("query", u.query()); + EXPECT_EQ("fragment", u.fragment()); + EXPECT_EQ(s, u.fbstr()); // canonical + } + + { + fbstring s("http://127.0.0.1:8080/hello/world?query#fragment"); + Uri u(s); + EXPECT_EQ("http", u.scheme()); + EXPECT_EQ("", u.username()); + EXPECT_EQ("", u.password()); + EXPECT_EQ("127.0.0.1", u.host()); + EXPECT_EQ(8080, u.port()); + EXPECT_EQ("/hello/world", u.path()); + EXPECT_EQ("query", u.query()); + EXPECT_EQ("fragment", u.fragment()); + EXPECT_EQ(s, u.fbstr()); // canonical + } + + { + fbstring s("http://[::1]:8080/hello/world?query#fragment"); + Uri u(s); + EXPECT_EQ("http", u.scheme()); + EXPECT_EQ("", u.username()); + EXPECT_EQ("", u.password()); + EXPECT_EQ("[::1]", u.host()); + EXPECT_EQ(8080, u.port()); + EXPECT_EQ("/hello/world", u.path()); + EXPECT_EQ("query", u.query()); + EXPECT_EQ("fragment", u.fragment()); + EXPECT_EQ(s, u.fbstr()); // canonical + } + + { + fbstring s("http://user:pass@host.com/"); + Uri u(s); + EXPECT_EQ("http", u.scheme()); + EXPECT_EQ("user", u.username()); + EXPECT_EQ("pass", u.password()); + EXPECT_EQ("host.com", u.host()); + EXPECT_EQ(0, u.port()); + EXPECT_EQ("/", u.path()); + EXPECT_EQ("", u.query()); + EXPECT_EQ("", u.fragment()); + EXPECT_EQ(s, u.fbstr()); + } + + { + fbstring s("http://user@host.com/"); + Uri u(s); + EXPECT_EQ("http", u.scheme()); + EXPECT_EQ("user", u.username()); + EXPECT_EQ("", u.password()); + EXPECT_EQ("host.com", u.host()); + EXPECT_EQ(0, u.port()); + EXPECT_EQ("/", u.path()); + EXPECT_EQ("", u.query()); + EXPECT_EQ("", u.fragment()); + EXPECT_EQ(s, u.fbstr()); + } + + { + fbstring s("http://user:@host.com/"); + Uri u(s); + EXPECT_EQ("http", u.scheme()); + EXPECT_EQ("user", u.username()); + EXPECT_EQ("", u.password()); + EXPECT_EQ("host.com", u.host()); + EXPECT_EQ(0, u.port()); + EXPECT_EQ("/", u.path()); + EXPECT_EQ("", u.query()); + EXPECT_EQ("", u.fragment()); + EXPECT_EQ("http://user@host.com/", u.fbstr()); + } + + { + fbstring s("http://:pass@host.com/"); + Uri u(s); + EXPECT_EQ("http", u.scheme()); + EXPECT_EQ("", u.username()); + EXPECT_EQ("pass", u.password()); + EXPECT_EQ("host.com", u.host()); + EXPECT_EQ(0, u.port()); + EXPECT_EQ("/", u.path()); + EXPECT_EQ("", u.query()); + EXPECT_EQ("", u.fragment()); + EXPECT_EQ(s, u.fbstr()); + } + + { + fbstring s("http://@host.com/"); + Uri u(s); + EXPECT_EQ("http", u.scheme()); + EXPECT_EQ("", u.username()); + EXPECT_EQ("", u.password()); + EXPECT_EQ("host.com", u.host()); + EXPECT_EQ(0, u.port()); + EXPECT_EQ("/", u.path()); + EXPECT_EQ("", u.query()); + EXPECT_EQ("", u.fragment()); + EXPECT_EQ("http://host.com/", u.fbstr()); + } + + { + fbstring s("http://:@host.com/"); + Uri u(s); + EXPECT_EQ("http", u.scheme()); + EXPECT_EQ("", u.username()); + EXPECT_EQ("", u.password()); + EXPECT_EQ("host.com", u.host()); + EXPECT_EQ(0, u.port()); + EXPECT_EQ("/", u.path()); + EXPECT_EQ("", u.query()); + EXPECT_EQ("", u.fragment()); + EXPECT_EQ("http://host.com/", u.fbstr()); + } + + { + fbstring s("file:///etc/motd"); + Uri u(s); + EXPECT_EQ("file", u.scheme()); + EXPECT_EQ("", u.username()); + EXPECT_EQ("", u.password()); + EXPECT_EQ("", u.host()); + EXPECT_EQ(0, u.port()); + EXPECT_EQ("/etc/motd", u.path()); + EXPECT_EQ("", u.query()); + EXPECT_EQ("", u.fragment()); + EXPECT_EQ(s, u.fbstr()); + } + + { + fbstring s("file:/etc/motd"); + Uri u(s); + EXPECT_EQ("file", u.scheme()); + EXPECT_EQ("", u.username()); + EXPECT_EQ("", u.password()); + EXPECT_EQ("", u.host()); + EXPECT_EQ(0, u.port()); + EXPECT_EQ("/etc/motd", u.path()); + EXPECT_EQ("", u.query()); + EXPECT_EQ("", u.fragment()); + EXPECT_EQ("file:///etc/motd", u.fbstr()); + } + + { + fbstring s("file://etc/motd"); + Uri u(s); + EXPECT_EQ("file", u.scheme()); + EXPECT_EQ("", u.username()); + EXPECT_EQ("", u.password()); + EXPECT_EQ("etc", u.host()); + EXPECT_EQ(0, u.port()); + EXPECT_EQ("/motd", u.path()); + EXPECT_EQ("", u.query()); + EXPECT_EQ("", u.fragment()); + EXPECT_EQ(s, u.fbstr()); + } + + EXPECT_THROW({Uri("2http://www.facebook.com/");}, + std::invalid_argument); +}