[ADT] Fix a confusing interface spec and some annoying peculiarities

author Chandler Carruth <chandlerc@gmail.com>

Thu, 10 Sep 2015 07:51:37 +0000 (07:51 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Thu, 10 Sep 2015 07:51:37 +0000 (07:51 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Thu, 10 Sep 2015 07:51:37 +0000 (07:51 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Thu, 10 Sep 2015 07:51:37 +0000 (07:51 +0000)
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h

index 414fbe4a6756b5011ce4e5df55cec1d8ea09a427..34caeee801a2f477975492907df7867c42ea0527 100644 (file)
--- a/include/llvm/ADT/StringRef.h
+++ b/include/llvm/ADT/StringRef.h
@@ -474,7 +474,7 @@ namespace llvm {
      /// Split into substrings around the occurrences of a separator string.
      ///
      /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
-    /// \p MaxSplit splits are done and consequently <= \p MaxSplit
+    /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
      /// elements are added to A.
      /// If \p KeepEmpty is false, empty strings are not added to \p A. They
      /// still count when considering \p MaxSplit
@@ -492,7 +492,7 @@ namespace llvm {
      /// Split into substrings around the occurrences of a separator character.
      ///
      /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
-    /// \p MaxSplit splits are done and consequently <= \p MaxSplit
+    /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
      /// elements are added to A.
      /// If \p KeepEmpty is false, empty strings are not added to \p A. They
      /// still count when considering \p MaxSplit
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp

index f2e587cb527759b7e8fbbc27291f7f16228a24cd..88f920479dd2fefe7ca58547136fa239ce35ac31 100644 (file)
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -274,44 +274,56 @@ StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
  }
  
  void StringRef::split(SmallVectorImpl<StringRef> &A,
-                      StringRef Separators, int MaxSplit,
+                      StringRef Separator, int MaxSplit,
                        bool KeepEmpty) const {
-  StringRef rest = *this;
-
-  // rest.data() is used to distinguish cases like "a," that splits into
-  // "a" + "" and "a" that splits into "a" + 0.
-  for (int splits = 0;
-       rest.data() != nullptr && (MaxSplit < 0 || splits < MaxSplit);
-       ++splits) {
-    std::pair<StringRef, StringRef> p = rest.split(Separators);
-
-    if (KeepEmpty || p.first.size() != 0)
-      A.push_back(p.first);
-    rest = p.second;
+  StringRef S = *this;
+
+  // Count down from MaxSplit. When MaxSplit is -1, this will just split
+  // "forever". This doesn't support splitting more than 2^31 times
+  // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
+  // but that seems unlikely to be useful.
+  while (MaxSplit-- != 0) {
+    size_t Idx = S.find(Separator);
+    if (Idx == npos)
+      break;
+
+    // Push this split.
+    if (KeepEmpty || Idx > 0)
+      A.push_back(S.slice(0, Idx));
+
+    // Jump forward.
+    S = S.slice(Idx + Separator.size(), npos);
    }
-  // If we have a tail left, add it.
-  if (rest.data() != nullptr && (rest.size() != 0 || KeepEmpty))
-    A.push_back(rest);
+
+  // Push the tail.
+  if (KeepEmpty || !S.empty())
+    A.push_back(S);
  }
  
  void StringRef::split(SmallVectorImpl<StringRef> &A, char Separator,
                        int MaxSplit, bool KeepEmpty) const {
-  StringRef rest = *this;
-
-  // rest.data() is used to distinguish cases like "a," that splits into
-  // "a" + "" and "a" that splits into "a" + 0.
-  for (int splits = 0;
-       rest.data() != nullptr && (MaxSplit < 0 || splits < MaxSplit);
-       ++splits) {
-    std::pair<StringRef, StringRef> p = rest.split(Separator);
-
-    if (KeepEmpty || p.first.size() != 0)
-      A.push_back(p.first);
-    rest = p.second;
+  StringRef S = *this;
+
+  // Count down from MaxSplit. When MaxSplit is -1, this will just split
+  // "forever". This doesn't support splitting more than 2^31 times
+  // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
+  // but that seems unlikely to be useful.
+  while (MaxSplit-- != 0) {
+    size_t Idx = S.find(Separator);
+    if (Idx == npos)
+      break;
+
+    // Push this split.
+    if (KeepEmpty || Idx > 0)
+      A.push_back(S.slice(0, Idx));
+
+    // Jump forward.
+    S = S.slice(Idx + 1, npos);
    }
-  // If we have a tail left, add it.
-  if (rest.data() != nullptr && (rest.size() != 0 || KeepEmpty))
-    A.push_back(rest);
+
+  // Push the tail.
+  if (KeepEmpty || !S.empty())
+    A.push_back(S);
  }
  
  //===----------------------------------------------------------------------===//
diff --git a/unittests/ADT/StringRefTest.cpp b/unittests/ADT/StringRefTest.cpp

index 8af07da24ea12d8706bb320f12063e277fa7ec07..6cf2e6a0454dcba5802721e5f5a1b72b70be6ebc 100644 (file)
--- a/unittests/ADT/StringRefTest.cpp
+++ b/unittests/ADT/StringRefTest.cpp
@@ -230,6 +230,54 @@ TEST(StringRefTest, Split2) {
    expected.push_back("a"); expected.push_back("b"); expected.push_back("c");
    StringRef("a,,b,c").split(parts, ',', 3, false);
    EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("");
+  StringRef().split(parts, ",", 0, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back(StringRef());
+  StringRef("").split(parts, ",", 0, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  StringRef("").split(parts, ",", 0, false);
+  EXPECT_TRUE(parts == expected);
+  StringRef().split(parts, ",", 0, false);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("a");
+  expected.push_back("");
+  expected.push_back("b");
+  expected.push_back("c,d");
+  StringRef("a,,b,c,d").split(parts, ",", 3, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("");
+  StringRef().split(parts, ',', 0, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back(StringRef());
+  StringRef("").split(parts, ',', 0, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  StringRef("").split(parts, ',', 0, false);
+  EXPECT_TRUE(parts == expected);
+  StringRef().split(parts, ',', 0, false);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("a");
+  expected.push_back("");
+  expected.push_back("b");
+  expected.push_back("c,d");
+  StringRef("a,,b,c,d").split(parts, ',', 3, true);
+  EXPECT_TRUE(parts == expected);
  }
  
  TEST(StringRefTest, Trim) {
author	Chandler Carruth <chandlerc@gmail.com>
	Thu, 10 Sep 2015 07:51:37 +0000 (07:51 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Thu, 10 Sep 2015 07:51:37 +0000 (07:51 +0000)
include/llvm/ADT/StringRef.h		patch \| blob \| history
lib/Support/StringRef.cpp		patch \| blob \| history
unittests/ADT/StringRefTest.cpp		patch \| blob \| history