2 * Copyright 2012 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef FOLLY_STRING_INL_H_
18 #define FOLLY_STRING_INL_H_
22 #ifndef FOLLY_BASE_STRING_H_
23 #error This file may only be included from String.h
29 // Map from character code to value of one-character escape sequence
30 // ('\n' = 10 maps to 'n'), 'O' if the character should be printed as
31 // an octal escape sequence, or 'P' if the character is printable and
32 // should be printed as is.
33 extern const char cEscapeTable[];
36 template <class String>
37 void cEscape(StringPiece str, String& out) {
40 out.reserve(out.size() + str.size());
42 auto last = p; // last regular character
43 // We advance over runs of regular characters (printable, not double-quote or
44 // backslash) and copy them in one go; this is faster than calling push_back
46 while (p != str.end()) {
48 unsigned char v = static_cast<unsigned char>(c);
49 char e = detail::cEscapeTable[v];
50 if (e == 'P') { // printable
52 } else if (e == 'O') { // octal
53 out.append(&*last, p - last);
54 esc[1] = '0' + ((v >> 6) & 7);
55 esc[2] = '0' + ((v >> 3) & 7);
56 esc[3] = '0' + (v & 7);
60 } else { // special 1-character escape
61 out.append(&*last, p - last);
68 out.append(&*last, p - last);
72 // Map from the character code of the character following a backslash to
73 // the unescaped character if a valid one-character escape sequence
74 // ('n' maps to 10 = '\n'), 'O' if this is the first character of an
75 // octal escape sequence, 'X' if this is the first character of a
76 // hexadecimal escape sequence, or 'I' if this escape sequence is invalid.
77 extern const char cUnescapeTable[];
79 // Map from the character code to the hex value, or 16 if invalid hex char.
80 extern const unsigned char hexTable[];
83 template <class String>
84 void cUnescape(StringPiece str, String& out, bool strict) {
85 out.reserve(out.size() + str.size());
87 auto last = p; // last regular character (not part of an escape sequence)
88 // We advance over runs of regular characters (not backslash) and copy them
89 // in one go; this is faster than calling push_back repeatedly.
90 while (p != str.end()) {
92 if (c != '\\') { // normal case
96 out.append(&*last, p - last);
97 if (p == str.end()) { // backslash at end of string
99 throw std::invalid_argument("incomplete escape sequence");
106 char e = detail::cUnescapeTable[static_cast<unsigned char>(*p)];
107 if (e == 'O') { // octal
108 unsigned char val = 0;
109 for (int i = 0; i < 3 && p != str.end() && *p >= '0' && *p <= '7';
111 val = (val << 3) | (*p - '0');
115 } else if (e == 'X') { // hex
117 if (p == str.end()) { // \x at end of string
119 throw std::invalid_argument("incomplete hex escape sequence");
125 unsigned char val = 0;
127 for (; (p != str.end() &&
128 (h = detail::hexTable[static_cast<unsigned char>(*p)]) < 16);
130 val = (val << 4) | h;
134 } else if (e == 'I') { // invalid
136 throw std::invalid_argument("invalid escape sequence");
142 } else { // standard escape sequence, \' etc
148 out.append(&*last, p - last);
154 * The following functions are type-overloaded helpers for
157 inline size_t delimSize(char) { return 1; }
158 inline size_t delimSize(StringPiece s) { return s.size(); }
159 inline bool atDelim(const char* s, char c) {
162 inline bool atDelim(const char* s, StringPiece sp) {
163 return !std::memcmp(s, sp.start(), sp.size());
166 // These are used to short-circuit internalSplit() in the case of
167 // 1-character strings.
168 inline char delimFront(char c) {
169 // This one exists only for compile-time; it should never be called.
173 inline char delimFront(StringPiece s) {
174 assert(!s.empty() && s.start() != nullptr);
179 * These output conversion templates allow us to support multiple
180 * output string types, even when we are using an arbitrary
183 template<class OutStringT> struct OutputConverter {};
185 template<> struct OutputConverter<std::string> {
186 std::string operator()(StringPiece sp) const {
187 return sp.toString();
191 template<> struct OutputConverter<fbstring> {
192 fbstring operator()(StringPiece sp) const {
193 return sp.toFbstring();
197 template<> struct OutputConverter<StringPiece> {
198 StringPiece operator()(StringPiece sp) const { return sp; }
202 * Shared implementation for all the split() overloads.
204 * This uses some external helpers that are overloaded to let this
205 * algorithm be more performant if the deliminator is a single
206 * character instead of a whole string.
208 * @param ignoreEmpty iff true, don't copy empty segments to output
210 template<class OutStringT, class DelimT, class OutputIterator>
211 void internalSplit(DelimT delim, StringPiece sp, OutputIterator out,
213 assert(sp.start() != nullptr);
215 const char* s = sp.start();
216 const size_t strSize = sp.size();
217 const size_t dSize = delimSize(delim);
219 OutputConverter<OutStringT> conv;
221 if (dSize > strSize || dSize == 0) {
222 if (!ignoreEmpty || strSize > 0) {
227 if (boost::is_same<DelimT,StringPiece>::value && dSize == 1) {
228 // Call the char version because it is significantly faster.
229 return internalSplit<OutStringT>(delimFront(delim), sp, out,
233 int tokenStartPos = 0;
235 for (int i = 0; i <= strSize - dSize; ++i) {
236 if (atDelim(&s[i], delim)) {
237 if (!ignoreEmpty || tokenSize > 0) {
238 *out++ = conv(StringPiece(&s[tokenStartPos], tokenSize));
241 tokenStartPos = i + dSize;
249 if (!ignoreEmpty || tokenSize > 0) {
250 tokenSize = strSize - tokenStartPos;
251 *out++ = conv(StringPiece(&s[tokenStartPos], tokenSize));
255 template<class String> StringPiece prepareDelim(const String& s) {
256 return StringPiece(s);
258 inline char prepareDelim(char c) { return c; }
262 //////////////////////////////////////////////////////////////////////
264 template<class Delim, class String, class OutputType>
265 void split(const Delim& delimiter,
267 std::vector<OutputType>& out,
269 detail::internalSplit<OutputType>(
270 detail::prepareDelim(delimiter),
272 std::back_inserter(out),
276 template<class Delim, class String, class OutputType>
277 void split(const Delim& delimiter,
279 fbvector<OutputType>& out,
280 bool ignoreEmpty = false) {
281 detail::internalSplit<OutputType>(
282 detail::prepareDelim(delimiter),
284 std::back_inserter(out),
288 template<class OutputValueType, class Delim, class String,
289 class OutputIterator>
290 void splitTo(const Delim& delimiter,
294 detail::internalSplit<OutputValueType>(
295 detail::prepareDelim(delimiter),
303 * Hex-dump at most 16 bytes starting at offset from a memory area of size
304 * bytes. Return the number of bytes actually dumped.
306 size_t hexDumpLine(const void* ptr, size_t offset, size_t size,
308 } // namespace detail
310 template <class OutIt>
311 void hexDump(const void* ptr, size_t size, OutIt out) {
314 while (offset < size) {
315 offset += detail::hexDumpLine(ptr, offset, size, line);
322 #endif /* FOLLY_STRING_INL_H_ */