From 5ee568ac2704d7302017d42ad162d4b53d076cbc Mon Sep 17 00:00:00 2001 From: Douglas Gregor Date: Tue, 19 Oct 2010 22:13:48 +0000 Subject: [PATCH] Extend StringRef's edit-distance algorithm to permit an upper bound on the allowed edit distance git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@116867 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/ADT/StringRef.h | 7 ++++++- lib/Support/StringRef.cpp | 9 ++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h index 8386d3ee428..ccf8ca9a664 100644 --- a/include/llvm/ADT/StringRef.h +++ b/include/llvm/ADT/StringRef.h @@ -142,11 +142,16 @@ namespace llvm { /// operation, rather than as two operations (an insertion and a /// removal). /// + /// \param MaxEditDistance If non-zero, the maximum edit distance that + /// this routine is allowed to compute. If the edit distance will exceed + /// that maximum, returns \c MaxEditDistance+1. + /// /// \returns the minimum number of character insertions, removals, /// or (if \p AllowReplacements is \c true) replacements needed to /// transform one of the given strings into the other. If zero, /// the strings are identical. - unsigned edit_distance(StringRef Other, bool AllowReplacements = true); + unsigned edit_distance(StringRef Other, bool AllowReplacements = true, + unsigned MaxEditDistance = 0); /// str - Get the contents as an std::string. std::string str() const { diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index 46f26b242aa..5ad862815b5 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -68,7 +68,8 @@ int StringRef::compare_numeric(StringRef RHS) const { // Compute the edit distance between the two given strings. unsigned StringRef::edit_distance(llvm::StringRef Other, - bool AllowReplacements) { + bool AllowReplacements, + unsigned MaxEditDistance) { // The algorithm implemented below is the "classic" // dynamic-programming algorithm for computing the Levenshtein // distance, which is described here: @@ -94,6 +95,8 @@ unsigned StringRef::edit_distance(llvm::StringRef Other, for (size_type y = 1; y <= m; ++y) { current[0] = y; + unsigned BestThisRow = current[0]; + for (size_type x = 1; x <= n; ++x) { if (AllowReplacements) { current[x] = min(previous[x-1] + ((*this)[y-1] == Other[x-1]? 0u:1u), @@ -103,8 +106,12 @@ unsigned StringRef::edit_distance(llvm::StringRef Other, if ((*this)[y-1] == Other[x-1]) current[x] = previous[x-1]; else current[x] = min(current[x-1], previous[x]) + 1; } + BestThisRow = min(BestThisRow, current[x]); } + if (MaxEditDistance && BestThisRow > MaxEditDistance) + return MaxEditDistance + 1; + unsigned *tmp = current; current = previous; previous = tmp; -- 2.34.1