/// the original (static) function name.
StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName);
+/// Given a vector of strings (function PGO names) \c NameStrs, the
+/// method generates a combined string \c Result thatis ready to be
+/// serialized. The \c Result string is comprised of three fields:
+/// The first field is the legnth of the uncompressed strings, and the
+/// the second field is the length of the zlib-compressed string.
+/// Both fields are encoded in ULEB128. If \c doCompress is false, the
+/// third field is the uncompressed strings; otherwise it is the
+/// compressed string. When the string compression is off, the
+/// second field will have value zero.
+int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
+ bool doCompression, std::string &Result);
+/// Produce \c Result string with the same format described above. The input
+/// is vector of PGO function name variables that are referenced.
+int collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars,
+ std::string &Result);
+class InstrProfSymtab;
+/// \c NameStrings is a string composed of one of more sub-strings encoded in
+/// the
+/// format described above. The substrings are seperated by 0 or more zero
+/// bytes.
+/// This method decodes the string and populates the \c Symtab.
+int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab);
+
const std::error_category &instrprof_category();
enum class instrprof_error {
/// This interface is used by reader of CoverageMapping test
/// format.
inline std::error_code create(StringRef D, uint64_t BaseAddr);
+ /// \c NameStrings is a string composed of one of more sub-strings
+ /// encoded in the format described above. The substrings are
+ /// seperated by 0 or more zero bytes. This method decodes the
+ /// string and populates the \c Symtab.
+ inline std::error_code create(StringRef NameStrings);
/// Create InstrProfSymtab from a set of names iteratable from
/// \p IterRange. This interface is used by IndexedProfReader.
template <typename NameIterRange> void create(const NameIterRange &IterRange);
AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val));
}
AddrHashMap &getAddrHashMap() { return AddrToMD5Map; }
- /// Return function's PGO name from the function name's symabol
- /// address in the object file. If an error occurs, Return
+ /// Return function's PGO name from the function name's symbol
+ /// address in the object file. If an error occurs, return
/// an empty string.
StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize);
/// Return function's PGO name from the name's md5 hash value.
return std::error_code();
}
+std::error_code InstrProfSymtab::create(StringRef NameStrings) {
+ if (readPGOFuncNameStrings(NameStrings, *this))
+ return make_error_code(instrprof_error::malformed);
+ return std::error_code();
+}
+
template <typename NameIterRange>
void InstrProfSymtab::create(const NameIterRange &IterRange) {
for (auto Name : IterRange)
//
//===----------------------------------------------------------------------===//
+#include "llvm/ProfileData/InstrProf.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/GlobalVariable.h"
-#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Compression.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
#include "llvm/Support/ManagedStatic.h"
using namespace llvm;
return createPGOFuncNameVar(*F.getParent(), F.getLinkage(), FuncName);
}
+int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
+ bool doCompression, std::string &Result) {
+ uint8_t Header[16], *P = Header;
+ std::string UncompressedNameStrings;
+
+ for (auto NameStr : NameStrs) {
+ UncompressedNameStrings += NameStr;
+ UncompressedNameStrings.append(" ");
+ }
+ unsigned EncLen = encodeULEB128(UncompressedNameStrings.length(), P);
+ P += EncLen;
+ if (!doCompression) {
+ EncLen = encodeULEB128(0, P);
+ P += EncLen;
+ Result.append(reinterpret_cast<char *>(&Header[0]), P - &Header[0]);
+ Result += UncompressedNameStrings;
+ return 0;
+ }
+ SmallVector<char, 128> CompressedNameStrings;
+ zlib::Status Success =
+ zlib::compress(StringRef(UncompressedNameStrings), CompressedNameStrings,
+ zlib::BestSizeCompression);
+ assert(Success == zlib::StatusOK);
+ if (Success != zlib::StatusOK)
+ return 1;
+ EncLen = encodeULEB128(CompressedNameStrings.size(), P);
+ P += EncLen;
+ Result.append(reinterpret_cast<char *>(&Header[0]), P - &Header[0]);
+ Result +=
+ std::string(CompressedNameStrings.data(), CompressedNameStrings.size());
+ return 0;
+}
+
+int collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars,
+ std::string &Result) {
+ std::vector<std::string> NameStrs;
+ for (auto *NameVar : NameVars) {
+ auto *Arr = cast<ConstantDataArray>(NameVar->getInitializer());
+ StringRef NameStr =
+ Arr->isCString() ? Arr->getAsCString() : Arr->getAsString();
+ NameStrs.push_back(NameStr.str());
+ }
+ return collectPGOFuncNameStrings(NameStrs, zlib::isAvailable(), Result);
+}
+
+int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
+ const uint8_t *P = reinterpret_cast<const uint8_t *>(NameStrings.data());
+ const uint8_t *EndP = reinterpret_cast<const uint8_t *>(NameStrings.data() +
+ NameStrings.size());
+ while (P < EndP) {
+ uint32_t N;
+ uint64_t UncompressedSize = decodeULEB128(P, &N);
+ P += N;
+ uint64_t CompressedSize = decodeULEB128(P, &N);
+ P += N;
+ bool isCompressed = (CompressedSize != 0);
+ SmallString<128> UncompressedNameStrings;
+ StringRef NameStrings;
+ if (isCompressed) {
+ StringRef CompressedNameStrings(reinterpret_cast<const char *>(P),
+ CompressedSize);
+ if (zlib::uncompress(CompressedNameStrings, UncompressedNameStrings,
+ UncompressedSize) != zlib::StatusOK)
+ return 1;
+ P += CompressedSize;
+ NameStrings = StringRef(UncompressedNameStrings.data(),
+ UncompressedNameStrings.size());
+ } else {
+ NameStrings =
+ StringRef(reinterpret_cast<const char *>(P), UncompressedSize);
+ P += UncompressedSize;
+ }
+ // Now parse the name strings.
+ size_t NameStart = 0;
+ bool isLast = false;
+ do {
+ size_t NameStop = NameStrings.find(' ', NameStart);
+ if (NameStop == StringRef::npos)
+ return 1;
+ if (NameStop == NameStrings.size() - 1)
+ isLast = true;
+ StringRef Name = NameStrings.substr(NameStart, NameStop - NameStart);
+ Symtab.addFuncName(Name);
+ if (isLast)
+ break;
+ NameStart = NameStop + 1;
+ } while (true);
+
+ while (P < EndP && *P == 0)
+ P++;
+ }
+ Symtab.finalizeSymtab();
+ return 0;
+}
+
instrprof_error
InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
uint64_t Weight) {
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ProfileData/InstrProfWriter.h"
+#include "llvm/Support/Compression.h"
#include "gtest/gtest.h"
#include <cstdarg>
ASSERT_EQ(StringRef("bar3"), R);
}
+TEST_F(InstrProfTest, instr_prof_symtab_compression_test) {
+ std::vector<std::string> FuncNames1;
+ std::vector<std::string> FuncNames2;
+ for (int I = 0; I < 10 * 1024; I++) {
+ std::string str;
+ raw_string_ostream OS(str);
+ OS << "func_" << I;
+ FuncNames1.push_back(OS.str());
+ str.clear();
+ OS << "fooooooooooooooo_" << I;
+ FuncNames1.push_back(OS.str());
+ str.clear();
+ OS << "BAR_" << I;
+ FuncNames2.push_back(OS.str());
+ str.clear();
+ OS << "BlahblahBlahblahBar_" << I;
+ FuncNames2.push_back(OS.str());
+ }
+
+ for (int Padding = 0; Padding < 10; Padding++) {
+ for (int DoCompression = 0; DoCompression < 2; DoCompression++) {
+ // Compressing:
+ std::string FuncNameStrings1;
+ collectPGOFuncNameStrings(FuncNames1,
+ (DoCompression != 0 && zlib::isAvailable()),
+ FuncNameStrings1);
+
+ // Compressing:
+ std::string FuncNameStrings2;
+ collectPGOFuncNameStrings(FuncNames2,
+ (DoCompression != 0 && zlib::isAvailable()),
+ FuncNameStrings2);
+
+ // Join with paddings:
+ std::string FuncNameStrings = FuncNameStrings1;
+ for (int P = 0; P < Padding; P++) {
+ FuncNameStrings.push_back('\0');
+ }
+ FuncNameStrings += FuncNameStrings2;
+
+ // Now decompress
+ InstrProfSymtab Symtab;
+ Symtab.create(StringRef(FuncNameStrings));
+
+ // Now check
+ for (int I = 0; I < 10 * 1024; I++) {
+ std::string N[4];
+ N[0] = FuncNames1[2 * I];
+ N[1] = FuncNames1[2 * I + 1];
+ N[2] = FuncNames2[2 * I];
+ N[3] = FuncNames2[2 * I + 1];
+ for (int J = 0; J < 4; J++) {
+ StringRef R = Symtab.getFuncName(IndexedInstrProf::ComputeHash(N[J]));
+ ASSERT_EQ(StringRef(N[J]), R);
+ }
+ }
+ }
+ }
+}
+
} // end anonymous namespace