Extend StringRef's edit-distance algorithm to permit an upper bound on the allowed...
authorDouglas Gregor <dgregor@apple.com>
Tue, 19 Oct 2010 22:13:48 +0000 (22:13 +0000)
committerDouglas Gregor <dgregor@apple.com>
Tue, 19 Oct 2010 22:13:48 +0000 (22:13 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@116867 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/ADT/StringRef.h
lib/Support/StringRef.cpp

index 8386d3ee428b1401643d4c6c3eba60b30ade39a1..ccf8ca9a6647ee95475d90bbd98f53892bc94ae8 100644 (file)
@@ -142,11 +142,16 @@ namespace llvm {
     /// operation, rather than as two operations (an insertion and a
     /// removal).
     ///
+    /// \param MaxEditDistance If non-zero, the maximum edit distance that
+    /// this routine is allowed to compute. If the edit distance will exceed
+    /// that maximum, returns \c MaxEditDistance+1.
+    ///
     /// \returns the minimum number of character insertions, removals,
     /// or (if \p AllowReplacements is \c true) replacements needed to
     /// transform one of the given strings into the other. If zero,
     /// the strings are identical.
-    unsigned edit_distance(StringRef Other, bool AllowReplacements = true);
+    unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
+                           unsigned MaxEditDistance = 0);
 
     /// str - Get the contents as an std::string.
     std::string str() const {
index 46f26b242aac36f843234d51d3a36a320ab4d9eb..5ad862815b53b8c500095b67ec5adb9901b606c3 100644 (file)
@@ -68,7 +68,8 @@ int StringRef::compare_numeric(StringRef RHS) const {
 
 // Compute the edit distance between the two given strings.
 unsigned StringRef::edit_distance(llvm::StringRef Other, 
-                                  bool AllowReplacements) {
+                                  bool AllowReplacements,
+                                  unsigned MaxEditDistance) {
   // The algorithm implemented below is the "classic"
   // dynamic-programming algorithm for computing the Levenshtein
   // distance, which is described here:
@@ -94,6 +95,8 @@ unsigned StringRef::edit_distance(llvm::StringRef Other,
 
   for (size_type y = 1; y <= m; ++y) {
     current[0] = y;
+    unsigned BestThisRow = current[0];
+    
     for (size_type x = 1; x <= n; ++x) {
       if (AllowReplacements) {
         current[x] = min(previous[x-1] + ((*this)[y-1] == Other[x-1]? 0u:1u),
@@ -103,8 +106,12 @@ unsigned StringRef::edit_distance(llvm::StringRef Other,
         if ((*this)[y-1] == Other[x-1]) current[x] = previous[x-1];
         else current[x] = min(current[x-1], previous[x]) + 1;
       }
+      BestThisRow = min(BestThisRow, current[x]);
     }
     
+    if (MaxEditDistance && BestThisRow > MaxEditDistance)
+      return MaxEditDistance + 1;
+    
     unsigned *tmp = current;
     current = previous;
     previous = tmp;