2 * Copyright 2017 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <folly/Uri.h>
21 #include <boost/regex.hpp>
27 std::string submatch(const boost::cmatch& m, int idx) {
29 return std::string(sub.first, sub.second);
32 template <class String>
33 void toLower(String& s) {
41 Uri::Uri(StringPiece str) : hasAuthority_(false), port_(0) {
42 static const boost::regex uriRegex(
43 "([a-zA-Z][a-zA-Z0-9+.-]*):" // scheme:
44 "([^?#]*)" // authority and path
45 "(?:\\?([^#]*))?" // ?query
46 "(?:#(.*))?"); // #fragment
47 static const boost::regex authorityAndPathRegex("//([^/]*)(/.*)?");
50 if (UNLIKELY(!boost::regex_match(str.begin(), str.end(), match, uriRegex))) {
51 throw std::invalid_argument(to<std::string>("invalid URI ", str));
54 scheme_ = submatch(match, 1);
57 StringPiece authorityAndPath(match[2].first, match[2].second);
58 boost::cmatch authorityAndPathMatch;
59 if (!boost::regex_match(authorityAndPath.begin(),
60 authorityAndPath.end(),
61 authorityAndPathMatch,
62 authorityAndPathRegex)) {
63 // Does not start with //, doesn't have authority
64 hasAuthority_ = false;
65 path_ = authorityAndPath.str();
67 static const boost::regex authorityRegex(
68 "(?:([^@:]*)(?::([^@]*))?@)?" // username, password
69 "(\\[[^\\]]*\\]|[^\\[:]*)" // host (IP-literal (e.g. '['+IPv6+']',
70 // dotted-IPv4, or named host)
71 "(?::(\\d*))?"); // port
73 auto authority = authorityAndPathMatch[1];
74 boost::cmatch authorityMatch;
75 if (!boost::regex_match(authority.first,
79 throw std::invalid_argument(
80 to<std::string>("invalid URI authority ",
81 StringPiece(authority.first, authority.second)));
84 StringPiece port(authorityMatch[4].first, authorityMatch[4].second);
86 port_ = to<uint16_t>(port);
90 username_ = submatch(authorityMatch, 1);
91 password_ = submatch(authorityMatch, 2);
92 host_ = submatch(authorityMatch, 3);
93 path_ = submatch(authorityAndPathMatch, 2);
96 query_ = submatch(match, 3);
97 fragment_ = submatch(match, 4);
100 std::string Uri::authority() const {
103 // Port is 5 characters max and we have up to 3 delimiters.
104 result.reserve(host().size() + username().size() + password().size() + 8);
106 if (!username().empty() || !password().empty()) {
107 result.append(username());
109 if (!password().empty()) {
110 result.push_back(':');
111 result.append(password());
114 result.push_back('@');
117 result.append(host());
120 result.push_back(':');
121 toAppend(port(), &result);
127 std::string Uri::hostname() const {
128 if (host_.size() > 0 && host_[0] == '[') {
129 // If it starts with '[', then it should end with ']', this is ensured by
131 return host_.substr(1, host_.size() - 2);
136 const std::vector<std::pair<std::string, std::string>>& Uri::getQueryParams() {
137 if (!query_.empty() && queryParams_.empty()) {
138 // Parse query string
139 static const boost::regex queryParamRegex(
140 "(^|&)" /*start of query or start of parameter "&"*/
141 "([^=&]*)=?" /*parameter name and "=" if value is expected*/
142 "([^=&]*)" /*parameter value*/
143 "(?=(&|$))" /*forward reference, next should be end of query or
144 start of next parameter*/);
145 boost::cregex_iterator paramBeginItr(
146 query_.data(), query_.data() + query_.size(), queryParamRegex);
147 boost::cregex_iterator paramEndItr;
148 for (auto itr = paramBeginItr; itr != paramEndItr; itr++) {
149 if (itr->length(2) == 0) {
150 // key is empty, ignore it
153 queryParams_.emplace_back(
154 std::string((*itr)[2].first, (*itr)[2].second), // parameter name
155 std::string((*itr)[3].first, (*itr)[3].second) // parameter value