2 * Copyright (c) 2014, Peter Thorson. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 * * Redistributions of source code must retain the above copyright
7 * notice, this list of conditions and the following disclaimer.
8 * * Redistributions in binary form must reproduce the above copyright
9 * notice, this list of conditions and the following disclaimer in the
10 * documentation and/or other materials provided with the distribution.
11 * * Neither the name of the WebSocket++ Project nor the
12 * names of its contributors may be used to endorse or promote products
13 * derived from this software without specific prior written permission.
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL PETER THORSON BE LIABLE FOR ANY
19 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #ifndef HTTP_PARSER_HPP
29 #define HTTP_PARSER_HPP
36 #include <websocketpp/utilities.hpp>
37 #include <websocketpp/http/constants.hpp>
39 namespace websocketpp {
52 namespace body_encoding {
60 typedef std::map<std::string, std::string, utility::ci_less > header_list;
62 /// Read and return the next token in the stream
64 * Read until a non-token character is found and then return the token and
65 * iterator to the next character to read
67 * @param begin An iterator to the beginning of the sequence
68 * @param end An iterator to the end of the sequence
69 * @return A pair containing the token and an iterator to the next character in
72 template <typename InputIterator>
73 std::pair<std::string,InputIterator> extract_token(InputIterator begin,
76 InputIterator it = std::find_if(begin,end,&is_not_token_char);
77 return std::make_pair(std::string(begin,it),it);
80 /// Read and return the next quoted string in the stream
82 * Read a double quoted string starting at `begin`. The quotes themselves are
83 * stripped. The quoted value is returned along with an iterator to the next
86 * @param begin An iterator to the beginning of the sequence
87 * @param end An iterator to the end of the sequence
88 * @return A pair containing the string read and an iterator to the next
89 * character in the stream
91 template <typename InputIterator>
92 std::pair<std::string,InputIterator> extract_quoted_string(InputIterator begin,
98 return std::make_pair(s,begin);
102 return std::make_pair(s,begin);
105 InputIterator cursor = begin+1;
106 InputIterator marker = cursor;
108 cursor = std::find(cursor,end,'"');
110 while (cursor != end) {
111 // either this is the end or a quoted string
112 if (*(cursor-1) == '\\') {
113 s.append(marker,cursor-1);
118 s.append(marker,cursor);
120 return std::make_pair(s,cursor);
123 cursor = std::find(cursor,end,'"');
126 return std::make_pair("",begin);
129 /// Read and discard one unit of linear whitespace
131 * Read one unit of linear white space and return the iterator to the character
132 * afterwards. If `begin` is returned, no whitespace was extracted.
134 * @param begin An iterator to the beginning of the sequence
135 * @param end An iterator to the end of the sequence
136 * @return An iterator to the character after the linear whitespace read
138 template <typename InputIterator>
139 InputIterator extract_lws(InputIterator begin, InputIterator end) {
140 InputIterator it = begin;
142 // strip leading CRLF
143 if (end-begin > 2 && *begin == '\r' && *(begin+1) == '\n' &&
144 is_whitespace_char(static_cast<unsigned char>(*(begin+2))))
149 it = std::find_if(it,end,&is_not_whitespace_char);
153 /// Read and discard linear whitespace
155 * Read linear white space until a non-lws character is read and return an
156 * iterator to that character. If `begin` is returned, no whitespace was
159 * @param begin An iterator to the beginning of the sequence
160 * @param end An iterator to the end of the sequence
161 * @return An iterator to the character after the linear whitespace read
163 template <typename InputIterator>
164 InputIterator extract_all_lws(InputIterator begin, InputIterator end) {
165 InputIterator old_it;
166 InputIterator new_it = begin;
169 // Pull value from previous iteration
172 // look ahead another pass
173 new_it = extract_lws(old_it,end);
174 } while (new_it != end && old_it != new_it);
179 /// Extract HTTP attributes
181 * An http attributes list is a semicolon delimited list of key value pairs in
182 * the format: *( ";" attribute "=" value ) where attribute is a token and value
183 * is a token or quoted string.
185 * Attributes extracted are appended to the supplied attributes list
188 * @param [in] begin An iterator to the beginning of the sequence
189 * @param [in] end An iterator to the end of the sequence
190 * @param [out] attributes A reference to the attributes list to append
191 * attribute/value pairs extracted to
192 * @return An iterator to the character after the last atribute read
194 template <typename InputIterator>
195 InputIterator extract_attributes(InputIterator begin, InputIterator end,
196 attribute_list & attributes)
198 InputIterator cursor;
206 std::pair<std::string,InputIterator> ret;
208 while (cursor != end) {
211 cursor = http::parser::extract_all_lws(cursor,end);
217 // ignore this check for the very first pass
220 if (*cursor == ';') {
221 // advance past the ';'
224 // non-semicolon in this position indicates end end of the
225 // attribute list, break and return.
230 cursor = http::parser::extract_all_lws(cursor,end);
231 ret = http::parser::extract_token(cursor,end);
233 if (ret.first.empty()) {
234 // error: expected a token
241 cursor = http::parser::extract_all_lws(cursor,end);
242 if (cursor == end || *cursor != '=') {
243 // if there is an equals sign, read the attribute value. Otherwise
244 // record a blank value and continue
245 attributes[name].clear();
249 // advance past the '='
252 cursor = http::parser::extract_all_lws(cursor,end);
254 // error: expected a token or quoted string
258 ret = http::parser::extract_quoted_string(cursor,end);
259 if (ret.second != cursor) {
260 attributes[name] = ret.first;
265 ret = http::parser::extract_token(cursor,end);
266 if (ret.first.empty()) {
267 // error : expected token or quoted string
270 attributes[name] = ret.first;
278 /// Extract HTTP parameters
280 * An http parameters list is a comma delimited list of tokens followed by
281 * optional semicolon delimited attributes lists.
283 * Parameters extracted are appended to the supplied parameters list
286 * @param [in] begin An iterator to the beginning of the sequence
287 * @param [in] end An iterator to the end of the sequence
288 * @param [out] parameters A reference to the parameters list to append
289 * paramter values extracted to
290 * @return An iterator to the character after the last parameter read
292 template <typename InputIterator>
293 InputIterator extract_parameters(InputIterator begin, InputIterator end,
294 parameter_list ¶meters)
296 InputIterator cursor;
299 // error: expected non-zero length range
304 std::pair<std::string,InputIterator> ret;
310 * *(";" method-param)
314 while (cursor != end) {
315 std::string parameter_name;
316 attribute_list attributes;
318 // extract any stray whitespace
319 cursor = http::parser::extract_all_lws(cursor,end);
320 if (cursor == end) {break;}
322 ret = http::parser::extract_token(cursor,end);
324 if (ret.first.empty()) {
325 // error: expected a token
328 parameter_name = ret.first;
332 // Safe break point, insert parameter with blank attributes and exit
333 cursor = http::parser::extract_all_lws(cursor,end);
335 //parameters[parameter_name] = attributes;
336 parameters.push_back(std::make_pair(parameter_name,attributes));
340 // If there is an attribute list, read it in
341 if (*cursor == ';') {
342 InputIterator acursor;
345 acursor = http::parser::extract_attributes(cursor,end,attributes);
347 if (acursor == cursor) {
348 // attribute extraction ended in syntax error
355 // insert parameter into output list
356 //parameters[parameter_name] = attributes;
357 parameters.push_back(std::make_pair(parameter_name,attributes));
359 cursor = http::parser::extract_all_lws(cursor,end);
360 if (cursor == end) {break;}
362 // if next char is ',' then read another parameter, else stop
363 if (*cursor != ',') {
367 // advance past comma
371 // expected more bytes after a comma
379 inline std::string strip_lws(std::string const & input) {
380 std::string::const_iterator begin = extract_all_lws(input.begin(),input.end());
381 if (begin == input.end()) {
382 return std::string();
385 std::string::const_reverse_iterator rbegin = extract_all_lws(input.rbegin(),input.rend());
386 if (rbegin == input.rend()) {
387 return std::string();
390 return std::string(begin,rbegin.base());
395 * Includes methods and data elements common to all types of HTTP messages such
396 * as headers, versions, bodies, etc.
402 , m_body_bytes_needed(0)
403 , m_body_bytes_max(max_body_size)
404 , m_body_encoding(body_encoding::unknown) {}
406 /// Get the HTTP version string
408 * @return The version string for this parser
410 std::string const & get_version() const {
414 /// Set HTTP parser Version
416 * Input should be in format: HTTP/x.y where x and y are positive integers.
417 * @todo Does this method need any validation?
419 * @param [in] version The value to set the HTTP version to.
421 void set_version(std::string const & version);
423 /// Get the value of an HTTP header
425 * @todo Make this method case insensitive.
427 * @param [in] key The name/key of the header to get.
428 * @return The value associated with the given HTTP header key.
430 std::string const & get_header(std::string const & key) const;
432 /// Extract an HTTP parameter list from a parser header.
434 * If the header requested doesn't exist or exists and is empty the
435 * parameter list is valid (but empty).
437 * @param [in] key The name/key of the HTTP header to use as input.
438 * @param [out] out The parameter list to store extracted parameters in.
439 * @return Whether or not the input was a valid parameter list.
441 bool get_header_as_plist(std::string const & key, parameter_list & out)
444 /// Append a value to an existing HTTP header
446 * This method will set the value of the HTTP header `key` with the
447 * indicated value. If a header with the name `key` already exists, `val`
448 * will be appended to the existing value.
450 * @todo Make this method case insensitive.
451 * @todo Should there be any restrictions on which keys are allowed?
452 * @todo Exception free varient
454 * @see replace_header
456 * @param [in] key The name/key of the header to append to.
457 * @param [in] val The value to append.
459 void append_header(std::string const & key, std::string const & val);
461 /// Set a value for an HTTP header, replacing an existing value
463 * This method will set the value of the HTTP header `key` with the
464 * indicated value. If a header with the name `key` already exists, `val`
465 * will replace the existing value.
467 * @todo Make this method case insensitive.
468 * @todo Should there be any restrictions on which keys are allowed?
469 * @todo Exception free varient
473 * @param [in] key The name/key of the header to append to.
474 * @param [in] val The value to append.
476 void replace_header(std::string const & key, std::string const & val);
478 /// Remove a header from the parser
480 * Removes the header entirely from the parser. This is different than
481 * setting the value of the header to blank.
483 * @todo Make this method case insensitive.
485 * @param [in] key The name/key of the header to remove.
487 void remove_header(std::string const & key);
491 * Gets the body of the HTTP object
493 * @return The body of the HTTP message.
495 std::string const & get_body() const {
501 * Set the body content of the HTTP response to the parameter string. Note
502 * set_body will also set the Content-Length HTTP header to the appropriate
503 * value. If you want the Content-Length header to be something else, do so
504 * via replace_header("Content-Length") after calling set_body()
506 * @param value String data to include as the body content.
508 void set_body(std::string const & value);
510 /// Get body size limit
512 * Retrieves the maximum number of bytes to parse & buffer before canceling
517 * @return The maximum length of a message body.
519 size_t get_max_body_size() const {
520 return m_body_bytes_max;
523 /// Set body size limit
525 * Set the maximum number of bytes to parse and buffer before canceling a
530 * @param value The size to set the max body length to.
532 void set_max_body_size(size_t value) {
533 m_body_bytes_max = value;
536 /// Extract an HTTP parameter list from a string.
538 * @param [in] in The input string.
539 * @param [out] out The parameter list to store extracted parameters in.
540 * @return Whether or not the input was a valid parameter list.
542 bool parse_parameter_list(std::string const & in, parameter_list & out)
545 /// Process a header line
547 * @todo Update this method to be exception free.
549 * @param [in] begin An iterator to the beginning of the sequence.
550 * @param [in] end An iterator to the end of the sequence.
552 void process_header(std::string::iterator begin, std::string::iterator end);
554 /// Prepare the parser to begin parsing body data
556 * Inspects headers to determine if the message has a body that needs to be
557 * read. If so, sets up the necessary state, otherwise returns false. If
558 * this method returns true and loading the message body is desired call
559 * `process_body` until it returns zero bytes or an error.
561 * Must not be called until after all headers have been processed.
565 * @return True if more bytes are needed to load the body, false otherwise.
569 /// Process body data
575 * @param [in] begin An iterator to the beginning of the sequence.
576 * @param [in] end An iterator to the end of the sequence.
577 * @return The number of bytes processed
579 size_t process_body(char const * buf, size_t len);
581 /// Check if the parser is done parsing the body
583 * Behavior before a call to `prepare_body` is undefined.
587 * @return True if the message body has been completed loaded.
589 bool body_ready() const {
590 return (m_body_bytes_needed == 0);
593 /// Generate and return the HTTP headers as a string
595 * Each headers will be followed by the \r\n sequence including the last one.
596 * A second \r\n sequence (blank header) is not appended by this method
598 * @return The HTTP headers as a string.
600 std::string raw_headers() const;
602 std::string m_version;
603 header_list m_headers;
605 size_t m_header_bytes;
608 size_t m_body_bytes_needed;
609 size_t m_body_bytes_max;
610 body_encoding::value m_body_encoding;
613 } // namespace parser
615 } // namespace websocketpp
617 #include <websocketpp/http/impl/parser.hpp>
619 #endif // HTTP_PARSER_HPP