2 * Eddie Kohler, Yandong Mao, Robert Morris
3 * Copyright (c) 2012-2013 President and Fellows of Harvard College
4 * Copyright (c) 2012-2013 Massachusetts Institute of Technology
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, subject to the conditions
9 * listed in the Masstree LICENSE file. These conditions include: you must
10 * preserve this copyright notice, and you cannot mention the copyright
11 * holders in advertising related to the Software without their permission.
12 * The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This
13 * notice is a summary of the Masstree LICENSE file; the license in that file
16 #ifndef LCDF_STRING_HH
17 #define LCDF_STRING_HH
18 #include "string_base.hh"
23 class String : public String_base<String> {
28 enum { max_length = 0x7FFFFFE0 };
30 typedef String substring_type;
31 typedef const String& argument_type;
34 inline String(const String &x);
35 #if HAVE_CXX_RVALUE_REFERENCES
36 inline String(String &&x);
39 explicit inline String(const String_base<T>& str);
40 inline String(const char* cstr);
41 inline String(const std::string& str);
42 inline String(const char* s, int len);
43 inline String(const unsigned char* s, int len);
44 inline String(const char* first, const char* last);
45 inline String(const unsigned char* first, const unsigned char* last);
46 explicit inline String(bool x);
47 explicit inline String(char c);
48 explicit inline String(unsigned char c);
49 explicit String(int x);
50 explicit String(unsigned x);
51 explicit String(long x);
52 explicit String(unsigned long x);
53 explicit String(long long x);
54 explicit String(unsigned long long x);
55 explicit String(double x);
57 inline String(const rep_type& r);
61 static inline const String& make_empty();
62 static inline const String& make_out_of_memory();
63 static String make_uninitialized(int len);
64 static inline String make_stable(const char* cstr);
65 static inline String make_stable(const char* s, int len);
66 static inline String make_stable(const char* first, const char* last);
68 static inline String make_stable(const String_base<T>& str);
69 static String make_fill(int c, int n);
70 static inline const String& make_zero();
72 inline const char* data() const;
73 inline int length() const;
75 inline const char *c_str() const;
77 inline String substring(const char* first, const char* last) const;
78 inline String substring(const unsigned char* first,
79 const unsigned char* last) const;
80 inline String fast_substring(const char* first, const char* last) const;
81 inline String fast_substring(const unsigned char* first,
82 const unsigned char* last) const;
83 String substr(int pos, int len) const;
84 inline String substr(int pos) const;
91 String printable(int type = 0) const;
92 String to_hex() const;
101 u_replacement = 0xFFFD
104 String windows1252_to_utf8() const;
105 String utf16be_to_utf8(int flags = 0) const;
106 String utf16le_to_utf8(int flags = 0) const;
107 String utf16_to_utf8(int flags = 0) const;
108 String cesu8_to_utf8(int flags = 0) const;
109 String utf8_to_utf8(int flags = 0) const;
110 String to_utf8(int flags = 0) const;
112 using String_base<String>::encode_json;
113 String encode_json() const;
115 using String_base<String>::encode_base64;
116 String encode_base64(bool pad = false) const;
117 using String_base<String>::decode_base64;
118 String decode_base64() const;
120 inline String& operator=(const String& x);
121 #if HAVE_CXX_RVALUE_REFERENCES
122 inline String& operator=(String&& x);
124 template <typename T>
125 inline String& operator=(const String_base<T>& str);
126 inline String& operator=(const char* cstr);
127 inline String& operator=(const std::string& str);
129 inline void assign(const String& x);
130 #if HAVE_CXX_RVALUE_REFERENCES
131 inline void assign(String&& x);
133 template <typename T>
134 inline void assign(const String_base<T>& str);
135 inline void assign(const char* cstr);
136 inline void assign(const std::string& str);
137 inline void assign(const char* first, const char* last);
139 inline void swap(String& x);
141 inline void append(const String& x);
142 inline void append(const char* cstr);
143 inline void append(const char* s, int len);
144 inline void append(const char* first, const char* last);
145 inline void append(const unsigned char* first, const unsigned char* last);
146 void append_fill(int c, int len);
147 char *append_uninitialized(int len);
149 inline String& operator+=(const String& x);
150 template <typename T>
151 inline String& operator+=(const String_base<T>& x);
152 inline String& operator+=(const char* cstr);
153 inline String& operator+=(char c);
155 // String operator+(String, const String &);
156 // String operator+(String, const char *);
157 // String operator+(const char *, const String &);
159 inline bool is_shared() const;
160 inline bool is_stable() const;
162 inline String unique() const;
163 inline void shrink_to_fit();
165 char* mutable_data();
166 inline unsigned char* mutable_udata();
167 char* mutable_c_str();
171 static const unsigned char* skip_utf8_char(const unsigned char* first,
172 const unsigned char* last);
173 static const char* skip_utf8_char(const char* first, const char* last);
174 static const unsigned char* skip_utf8_bom(const unsigned char* first,
175 const unsigned char* last);
176 static const char* skip_utf8_bom(const char* first, const char* last);
184 inline void ref() const {
188 inline void deref() const {
189 if (memo_offset && --xmemo()->refcount == 0)
190 String::delete_memo(xmemo());
192 inline void reset_ref() {
197 inline memo_type* xmemo() const {
198 return reinterpret_cast<memo_type*>
199 (const_cast<char*>(data + memo_offset));
201 inline memo_type* memo() const {
202 return memo_offset ? xmemo() : nullptr;
204 inline void assign(const char* d, int l, memo_type* m) {
209 memo_offset = static_cast<int>(reinterpret_cast<char*>(m) - d);
213 inline void assign_noref(const char* d, int l, memo_type* m) {
217 memo_offset = static_cast<int>(reinterpret_cast<char*>(m) - d);
222 friend class StringAccum;
225 const rep_type& internal_rep() const {
228 void swap(rep_type& other_rep) {
232 inline void assign(const rep_type& rep);
233 /** @endcond never */
238 volatile uint32_t refcount;
240 volatile uint32_t dirty;
241 #if HAVE_STRING_PROFILING > 1
245 char real_data[8]; // but it might be more or less
247 inline void initialize(uint32_t capacity, uint32_t dirty);
248 #if HAVE_STRING_PROFILING
250 void account_destroy();
252 inline void account_destroy() {}
257 MEMO_SPACE = sizeof(memo_type) - 8 /* == sizeof(memo_type::real_data) */
262 /** @endcond never */
264 mutable rep_type _r; // mutable for c_str()
266 #if HAVE_STRING_PROFILING
267 static uint64_t live_memo_count;
268 static uint64_t memo_sizes[55];
269 static uint64_t live_memo_sizes[55];
270 static uint64_t live_memo_bytes[55];
271 # if HAVE_STRING_PROFILING > 1
272 static memo_t *live_memos[55];
275 static inline int profile_memo_size_bucket(uint32_t dirty, uint32_t capacity) {
278 else if (capacity <= 32)
279 return 17 + (capacity - 17) / 2;
280 else if (capacity <= 64)
281 return 25 + (capacity - 33) / 8;
283 return 29 + 26 - ffs_msb(capacity - 1);
286 static void profile_update_memo_dirty(memo_type* memo, uint32_t old_dirty, uint32_t new_dirty, uint32_t capacity) {
287 if (capacity <= 16 && new_dirty != old_dirty) {
288 ++memo_sizes[new_dirty];
289 ++live_memo_sizes[new_dirty];
290 live_memo_bytes[new_dirty] += capacity;
291 --live_memo_sizes[old_dirty];
292 live_memo_bytes[old_dirty] -= capacity;
293 # if HAVE_STRING_PROFILING > 1
294 if ((*memo->pprev = memo->next))
295 memo->next->pprev = memo->pprev;
296 memo->pprev = &live_memos[new_dirty];
297 if ((memo->next = *memo->pprev))
298 memo->next->pprev = &memo->next;
306 static void one_profile_report(StringAccum &sa, int i, int examples);
309 inline String(const char* data, int length, memo_type* memo) {
310 _r.assign_noref(data, length, memo);
312 inline String(const char* data, int length, const null_memo&)
313 : _r{data, length, 0} {
316 inline void deref() const {
320 void assign(const char* s, int len, bool need_deref);
321 void assign_out_of_memory();
322 void append(const char* s, int len, memo_type* memo);
323 static String hard_make_stable(const char *s, int len);
324 static inline memo_type* absent_memo() {
325 return reinterpret_cast<memo_type*>(uintptr_t(1));
327 static inline memo_type* create_memo(int capacity, int dirty);
328 static void delete_memo(memo_type* memo);
329 const char* hard_c_str() const;
330 bool hard_equals(const char* s, int len) const;
332 static const char int_data[20];
333 static const rep_type null_string_rep;
334 static const rep_type oom_string_rep;
335 static const rep_type zero_string_rep;
337 static int parse_cesu8_char(const unsigned char* s,
338 const unsigned char* end);
340 friend struct rep_type;
341 friend class StringAccum;
346 inline void String::memo_type::initialize(uint32_t capacity, uint32_t dirty) {
348 this->capacity = capacity;
350 #if HAVE_STRING_PROFILING
354 /** @endcond never */
356 /** @brief Construct an empty String (with length 0). */
357 inline String::String()
358 : _r{String_generic::empty_data, 0, 0} {
361 /** @brief Construct a copy of the String @a x. */
362 inline String::String(const String& x)
367 #if HAVE_CXX_RVALUE_REFERENCES
368 /** @brief Move-construct a String from @a x. */
369 inline String::String(String &&x)
375 /** @brief Construct a copy of the string @a str. */
376 template <typename T>
377 inline String::String(const String_base<T> &str) {
378 assign(str.data(), str.length(), false);
381 /** @brief Construct a String containing the C string @a cstr.
382 @param cstr a null-terminated C string
383 @return A String containing the characters of @a cstr, up to but not
384 including the terminating null character. */
385 inline String::String(const char* cstr) {
386 if (LCDF_CONSTANT_CSTR(cstr))
387 _r.assign(cstr, strlen(cstr), 0);
389 assign(cstr, -1, false);
392 /** @brief Construct a String containing the first @a len characters of
395 @param len number of characters to take from @a s. If @a len @< 0,
396 then takes @c strlen(@a s) characters.
397 @return A String containing @a len characters of @a s. */
398 inline String::String(const char* s, int len) {
399 assign(s, len, false);
403 inline String::String(const unsigned char* s, int len) {
404 assign(reinterpret_cast<const char*>(s), len, false);
407 /** @brief Construct a String containing the characters from @a first
409 @param first first character in string (begin iterator)
410 @param last pointer one past last character in string (end iterator)
411 @return A String containing the characters from @a first to @a last.
413 Constructs an empty string if @a first @>= @a last. */
414 inline String::String(const char *first, const char *last) {
415 assign(first, (first < last ? last - first : 0), false);
419 inline String::String(const unsigned char* first, const unsigned char* last) {
420 assign(reinterpret_cast<const char*>(first),
421 (first < last ? last - first : 0), false);
424 /** @brief Construct a String from a std::string. */
425 inline String::String(const std::string& str) {
426 assign(str.data(), str.length(), false);
429 /** @brief Construct a String equal to "true" or "false" depending on the
431 inline String::String(bool x)
432 : _r{String_generic::bool_data + (-x & 6), 5 - x, 0} {
433 // bool_data equals "false\0true\0"
436 /** @brief Construct a String containing the single character @a c. */
437 inline String::String(char c) {
438 assign(&c, 1, false);
442 inline String::String(unsigned char c) {
443 assign(reinterpret_cast<char*>(&c), 1, false);
446 inline String::String(const rep_type& r)
451 /** @brief Destroy a String, freeing memory if necessary. */
452 inline String::~String() {
456 /** @brief Return a const reference to an empty String.
458 May be quicker than String::String(). */
459 inline const String& String::make_empty() {
460 return reinterpret_cast<const String &>(null_string_rep);
463 /** @brief Return a String containing @a len unknown characters. */
464 inline String String::make_uninitialized(int len) {
466 s.append_uninitialized(len);
470 /** @brief Return a const reference to the string "0". */
471 inline const String& String::make_zero() {
472 return reinterpret_cast<const String&>(zero_string_rep);
475 /** @brief Return a String that directly references the C string @a cstr.
477 The make_stable() functions are suitable for static constant strings
478 whose data is known to stay around forever, such as C string constants.
480 @warning The String implementation may access @a cstr's terminating null
482 inline String String::make_stable(const char *cstr) {
483 if (LCDF_CONSTANT_CSTR(cstr))
484 return String(cstr, strlen(cstr), null_memo());
486 return hard_make_stable(cstr, -1);
489 /** @brief Return a String that directly references the first @a len
492 If @a len @< 0, treats @a s as a null-terminated C string.
494 @warning The String implementation may access @a s[@a len], which
495 should remain constant even though it's not part of the String. */
496 inline String String::make_stable(const char* s, int len) {
497 if (__builtin_constant_p(len) && len >= 0)
498 return String(s, len, null_memo());
500 return hard_make_stable(s, len);
503 /** @brief Return a String that directly references the character data in
505 @param first pointer to the first character in the character data
506 @param last pointer one beyond the last character in the character data
507 (but see the warning)
509 This function is suitable for static constant strings whose data is
510 known to stay around forever, such as C string constants. Returns an
511 empty string if @a first @>= @a last.
513 @warning The String implementation may access *@a last, which should
514 remain constant even though it's not part of the String. */
515 inline String String::make_stable(const char* first, const char* last) {
516 return String(first, (first < last ? last - first : 0), null_memo());
520 template <typename T>
521 inline String String::make_stable(const String_base<T>& str) {
522 return String(str.data(), str.length(), null_memo());
525 /** @brief Return a pointer to the string's data.
527 Only the first length() characters are valid, and the string
528 might not be null-terminated. */
529 inline const char* String::data() const {
533 /** @brief Return the string's length. */
534 inline int String::length() const {
538 /** @brief Null-terminate the string.
540 The terminating null character isn't considered part of the string, so
541 this->length() doesn't change. Returns a corresponding C string
542 pointer. The returned pointer is semi-temporary; it will persist until
543 the string is destroyed or appended to. */
544 inline const char* String::c_str() const {
545 // See also hard_c_str().
546 #if HAVE_OPTIMIZE_SIZE || __OPTIMIZE_SIZE__
549 // We may already have a '\0' in the right place. If _memo has no
550 // capacity, then this is one of the special strings (null or
551 // stable). We are guaranteed, in these strings, that _data[_length]
552 // exists. Otherwise must check that _data[_length] exists.
553 const char* end_data = _r.data + _r.length;
554 memo_type* m = _r.memo();
555 if ((m && end_data >= m->real_data + m->dirty)
556 || *end_data != '\0') {
557 if (char *x = const_cast<String*>(this)->append_uninitialized(1)) {
566 /** @brief Return a substring of the current string starting at @a first
567 and ending before @a last.
568 @param first pointer to the first substring character
569 @param last pointer one beyond the last substring character
571 Returns an empty string if @a first @>= @a last. Also returns an empty
572 string if @a first or @a last is out of range (i.e., either less than
573 this->begin() or greater than this->end()), but this should be
574 considered a programming error; a future version may generate a warning
576 inline String String::substring(const char* first, const char* last) const {
577 if (first < last && first >= _r.data && last <= _r.data + _r.length) {
579 return String(first, last - first, _r.memo());
584 inline String String::substring(const unsigned char* first, const unsigned char* last) const {
585 return substring(reinterpret_cast<const char*>(first),
586 reinterpret_cast<const char*>(last));
589 /** @brief Return a substring of the current string starting at @a first
590 and ending before @a last.
591 @param first pointer to the first substring character
592 @param last pointer one beyond the last substring character
593 @pre begin() <= @a first <= @a last <= end() */
594 inline String String::fast_substring(const char* first, const char* last) const {
595 assert(begin() <= first && first <= last && last <= end());
597 return String(first, last - first, _r.memo());
600 inline String String::fast_substring(const unsigned char* first, const unsigned char* last) const {
601 return fast_substring(reinterpret_cast<const char*>(first),
602 reinterpret_cast<const char*>(last));
605 /** @brief Return the suffix of the current string starting at index @a pos.
607 If @a pos is negative, starts that far from the end of the string.
608 If @a pos is so negative that the suffix starts outside the string,
609 then the entire string is returned. If the substring is beyond the
610 end of the string (@a pos > length()), returns an empty string (but
611 this should be considered a programming error; a future version may
612 generate a warning for this case).
614 @note String::substr() is intended to behave like Perl's
616 inline String String::substr(int pos) const {
617 return substr((pos <= -_r.length ? 0 : pos), _r.length);
620 inline void String::assign(const rep_type& rep) {
626 /** @brief Assign this string to @a x. */
627 inline void String::assign(const String& x) {
631 /** @brief Assign this string to @a x. */
632 inline String& String::operator=(const String& x) {
637 #if HAVE_CXX_RVALUE_REFERENCES
638 /** @brief Move-assign this string to @a x. */
639 inline void String::assign(String&& x) {
645 /** @brief Move-assign this string to @a x. */
646 inline String& String::operator=(String&& x) {
647 assign(std::move(x));
652 /** @brief Assign this string to the C string @a cstr. */
653 inline void String::assign(const char* cstr) {
654 if (LCDF_CONSTANT_CSTR(cstr)) {
656 _r.assign(cstr, strlen(cstr), 0);
658 assign(cstr, -1, true);
661 /** @brief Assign this string to the C string @a cstr. */
662 inline String& String::operator=(const char* cstr) {
667 /** @brief Assign this string to the string @a str. */
668 template <typename T>
669 inline void String::assign(const String_base<T>& str) {
670 assign(str.data(), str.length(), true);
673 /** @brief Assign this string to the string @a str. */
674 template <typename T>
675 inline String& String::operator=(const String_base<T>& str) {
680 /** @brief Assign this string to the std::string @a str. */
681 inline void String::assign(const std::string& str) {
682 assign(str.data(), str.length(), true);
685 /** @brief Assign this string to the std::string @a str. */
686 inline String& String::operator=(const std::string& str) {
691 /** @brief Assign this string to string [@a first, @a last). */
692 inline void String::assign(const char *first, const char *last) {
693 assign(first, last - first, true);
696 /** @brief Swap the values of this string and @a x. */
697 inline void String::swap(String &x) {
702 /** @brief Append @a x to this string. */
703 inline void String::append(const String& x) {
704 append(x.data(), x.length(), x._r.memo());
707 /** @brief Append the null-terminated C string @a cstr to this string.
708 @param cstr data to append */
709 inline void String::append(const char* cstr) {
710 if (LCDF_CONSTANT_CSTR(cstr))
711 append(cstr, strlen(cstr), absent_memo());
713 append(cstr, -1, absent_memo());
716 /** @brief Append the first @a len characters of @a s to this string.
717 @param s data to append
718 @param len length of data
720 If @a len @< 0, treats @a s as a null-terminated C string. */
721 inline void String::append(const char* s, int len) {
722 append(s, len, absent_memo());
725 /** @brief Appends the data from @a first to @a last to this string.
727 Does nothing if @a first @>= @a last. */
728 inline void String::append(const char* first, const char* last) {
730 append(first, last - first);
733 inline void String::append(const unsigned char* first,
734 const unsigned char* last) {
736 append(reinterpret_cast<const char*>(first), last - first);
739 /** @brief Append @a x to this string.
741 inline String& String::operator+=(const String &x) {
742 append(x.data(), x.length(), x._r.memo());
746 /** @brief Append the null-terminated C string @a cstr to this string.
748 inline String& String::operator+=(const char* cstr) {
753 /** @brief Append the character @a c to this string.
755 inline String &String::operator+=(char c) {
760 /** @brief Append the string @a x to this string.
762 template <typename T>
763 inline String &String::operator+=(const String_base<T> &x) {
764 append(x.data(), x.length());
768 /** @brief Test if the String's data is shared or stable. */
769 inline bool String::is_shared() const {
770 memo_type* m = _r.memo();
771 return !m || m->refcount != 1;
774 /** @brief Test if the String's data is stable. */
775 inline bool String::is_stable() const {
779 /** @brief Return a unique version of this String.
781 The return value shares no data with any other non-stable String. */
782 inline String String::unique() const {
783 memo_type* m = _r.memo();
784 if (!m || m->refcount == 1)
787 return String(_r.data, _r.data + _r.length);
790 /** @brief Reduce the memory allocation for this String.
792 After calling this function, this String shares no more than 256 bytes
793 of data with any other non-stable String. */
794 inline void String::shrink_to_fit() {
795 memo_type* m = _r.memo();
796 if (m && m->refcount > 1 && (uint32_t) _r.length + 256 < m->capacity)
797 *this = String(_r.data, _r.data + _r.length);
800 /** @brief Return the unsigned char* version of mutable_data(). */
801 inline unsigned char* String::mutable_udata() {
802 return reinterpret_cast<unsigned char*>(mutable_data());
805 /** @brief Return a const reference to a canonical out-of-memory String. */
806 inline const String &String::make_out_of_memory() {
807 return reinterpret_cast<const String &>(oom_string_rep);
810 /** @brief Return a pointer to the next character in UTF-8 encoding.
811 @pre @a first @< @a last
813 If @a first doesn't point at a valid UTF-8 character, returns @a first. */
814 inline const char* String::skip_utf8_char(const char* first, const char* last) {
815 return reinterpret_cast<const char*>(
816 skip_utf8_char(reinterpret_cast<const unsigned char*>(first),
817 reinterpret_cast<const unsigned char*>(last)));
820 inline const unsigned char* String::skip_utf8_bom(const unsigned char* first,
821 const unsigned char* last) {
822 if (last - first >= 3
823 && first[0] == 0xEF && first[1] == 0xBB && first[2] == 0xBF)
829 inline const char* String::skip_utf8_bom(const char* first, const char* last) {
830 return reinterpret_cast<const char*>(
831 skip_utf8_bom(reinterpret_cast<const unsigned char*>(first),
832 reinterpret_cast<const unsigned char*>(last)));
837 @brief Concatenate the operands and return the result.
839 At most one of the two operands can be a null-terminated C string. */
840 inline String operator+(String a, const String& b) {
845 /** @relates String */
846 inline String operator+(String a, const char* b) {
851 /** @relates String */
852 inline String operator+(const char* a, const String& b) {
859 @brief Concatenate the operands and return the result.
861 The second operand is a single character. */
862 inline String operator+(String a, char b) {
867 #if HAVE_CXX_USER_LITERALS
868 inline String operator"" _S(const char* s, size_t len) {
869 return String::make_stable(s, s + len);
873 inline void swap(String& a, String& b) {
879 LCDF_MAKE_STRING_HASH(lcdf::String)