From: Rui Ueyama Date: Mon, 26 Oct 2015 19:58:29 +0000 (+0000) Subject: Optimize StringTableBuilder. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=d2a6a14ca4a8642f49ee081c1209543415bcb2fb;p=oota-llvm.git Optimize StringTableBuilder. This is a patch to improve StringTableBuilder's performance. That class' finalize function is very hot particularly in LLD because the function does tail-merge strings in string tables or SHF_MERGE sections. Generic std::sort-style sorter is not efficient for sorting strings. The function implemented in this patch seems to be more efficient. Here's a benchmark of LLD to link Clang with or without this patch. The numbers are medians of 50 runs. -O0 real 0m0.455s real 0m0.430s (5.5% faster) -O3 real 0m0.487s real 0m0.452s (7.2% faster) Since that is a benchmark of the whole linker, the speedup of StringTableBuilder itself is much more than that. http://reviews.llvm.org/D14053 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@251337 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/MC/StringTableBuilder.cpp b/lib/MC/StringTableBuilder.cpp index 6210238e72a..a7b3c2c23c1 100644 --- a/lib/MC/StringTableBuilder.cpp +++ b/lib/MC/StringTableBuilder.cpp @@ -18,20 +18,47 @@ using namespace llvm; StringTableBuilder::StringTableBuilder(Kind K) : K(K) {} -static int compareBySuffix(std::pair *const *AP, - std::pair *const *BP) { - StringRef A = (*AP)->first; - StringRef B = (*BP)->first; - size_t SizeA = A.size(); - size_t SizeB = B.size(); - size_t Len = std::min(SizeA, SizeB); - for (size_t I = 0; I < Len; ++I) { - char CA = A[SizeA - I - 1]; - char CB = B[SizeB - I - 1]; - if (CA != CB) - return CB - CA; +typedef std::pair StringPair; + +// Returns the character at Pos from end of a string. +static int charTailAt(StringPair *P, size_t Pos) { + StringRef S = P->first; + if (Pos >= S.size()) + return -1; + return (unsigned char)S[S.size() - Pos - 1]; +} + +// Three-way radix quicksort. This is much faster than std::sort with strcmp +// because it does not compare characters that we already know the same. +static void qsort(StringPair **Begin, StringPair **End, int Pos) { +tailcall: + if (End - Begin <= 1) + return; + + // Partition items. Items in [Begin, P) are greater than the pivot, + // [P, Q) are the same as the pivot, and [Q, End) are less than the pivot. + int Pivot = charTailAt(*Begin, Pos); + StringPair **P = Begin; + StringPair **Q = End; + for (StringPair **R = Begin + 1; R < Q;) { + int C = charTailAt(*R, Pos); + if (C > Pivot) + std::swap(*P++, *R++); + else if (C < Pivot) + std::swap(*--Q, *R); + else + R++; + } + + qsort(Begin, P, Pos); + qsort(Q, End, Pos); + if (Pivot != -1) { + // qsort(P, Q, Pos + 1), but with tail call optimization. + Begin = P; + End = Q; + ++Pos; + goto tailcall; } - return SizeB - SizeA; } void StringTableBuilder::finalize() { @@ -40,7 +67,8 @@ void StringTableBuilder::finalize() { for (std::pair &P : StringIndexMap) Strings.push_back(&P); - array_pod_sort(Strings.begin(), Strings.end(), compareBySuffix); + if (!Strings.empty()) + qsort(&Strings[0], &Strings[0] + Strings.size(), 0); switch (K) { case RAW: