From: Chandler Carruth Date: Thu, 10 Sep 2015 06:07:03 +0000 (+0000) Subject: [ADT] Add a single-character version of the small vector split routine X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=82c835626fc509771e3927e2ca4886cf0f4245db;p=oota-llvm.git [ADT] Add a single-character version of the small vector split routine on StringRef. Finding and splitting on a single character is substantially faster than doing it on even a single character StringRef -- we immediately get to a *very* tuned memchr call this way. Even nicer, we get to this even in a debug build, shaving 18% off the runtime of TripleTest.Normalization, helping PR23676 some more. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@247244 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h index 95660a49f1f..414fbe4a675 100644 --- a/include/llvm/ADT/StringRef.h +++ b/include/llvm/ADT/StringRef.h @@ -489,6 +489,23 @@ namespace llvm { StringRef Separator, int MaxSplit = -1, bool KeepEmpty = true) const; + /// Split into substrings around the occurrences of a separator character. + /// + /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most + /// \p MaxSplit splits are done and consequently <= \p MaxSplit + /// elements are added to A. + /// If \p KeepEmpty is false, empty strings are not added to \p A. They + /// still count when considering \p MaxSplit + /// An useful invariant is that + /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true + /// + /// \param A - Where to put the substrings. + /// \param Separator - The string to split on. + /// \param MaxSplit - The maximum number of times the string is split. + /// \param KeepEmpty - True if empty substring should be added. + void split(SmallVectorImpl &A, char Separator, int MaxSplit = -1, + bool KeepEmpty = true) const; + /// Split into two substrings around the last occurrence of a separator /// character. /// diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index ddece087a9e..f2e587cb527 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -294,6 +294,26 @@ void StringRef::split(SmallVectorImpl &A, A.push_back(rest); } +void StringRef::split(SmallVectorImpl &A, char Separator, + int MaxSplit, bool KeepEmpty) const { + StringRef rest = *this; + + // rest.data() is used to distinguish cases like "a," that splits into + // "a" + "" and "a" that splits into "a" + 0. + for (int splits = 0; + rest.data() != nullptr && (MaxSplit < 0 || splits < MaxSplit); + ++splits) { + std::pair p = rest.split(Separator); + + if (KeepEmpty || p.first.size() != 0) + A.push_back(p.first); + rest = p.second; + } + // If we have a tail left, add it. + if (rest.data() != nullptr && (rest.size() != 0 || KeepEmpty)) + A.push_back(rest); +} + //===----------------------------------------------------------------------===// // Helpful Algorithms //===----------------------------------------------------------------------===// diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index c1e68cac2f5..dfe719d1f11 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -624,7 +624,7 @@ std::string Triple::normalize(StringRef Str) { // Parse into components. SmallVector Components; - Str.split(Components, "-"); + Str.split(Components, '-'); // If the first component corresponds to a known architecture, preferentially // use it for the architecture. If the second component corresponds to a diff --git a/unittests/ADT/StringRefTest.cpp b/unittests/ADT/StringRefTest.cpp index d80179bd787..8af07da24ea 100644 --- a/unittests/ADT/StringRefTest.cpp +++ b/unittests/ADT/StringRefTest.cpp @@ -225,6 +225,11 @@ TEST(StringRefTest, Split2) { expected.push_back("a"); expected.push_back("b"); expected.push_back("c"); StringRef("a,,b,c").split(parts, ",", 3, false); EXPECT_TRUE(parts == expected); + + expected.clear(); parts.clear(); + expected.push_back("a"); expected.push_back("b"); expected.push_back("c"); + StringRef("a,,b,c").split(parts, ',', 3, false); + EXPECT_TRUE(parts == expected); } TEST(StringRefTest, Trim) {