[llvm-profdata] Add support for weighted merge of profile data (2nd try)
[oota-llvm.git] / tools / llvm-profdata / llvm-profdata.cpp
index 25531c776a315cb7ded4241520ee2b13393be59f..4fa36c4b0b6a400bcd32c3bad1e2b9b824deb405 100644 (file)
@@ -11,6 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/ProfileData/SampleProfReader.h"
 #include "llvm/ProfileData/SampleProfWriter.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <tuple>
 
 using namespace llvm;
 
-static void exitWithError(const Twine &Message, StringRef Whence = "") {
+enum ProfileFormat { PF_None = 0, PF_Text, PF_Binary, PF_GCC };
+
+static void exitWithError(const Twine &Message, StringRef Whence = "",
+                          StringRef Hint = "") {
   errs() << "error: ";
   if (!Whence.empty())
     errs() << Whence << ": ";
   errs() << Message << "\n";
+  if (!Hint.empty())
+    errs() << Hint << "\n";
   ::exit(1);
 }
 
+static void exitWithErrorCode(const std::error_code &Error,
+                              StringRef Whence = "") {
+  if (Error.category() == instrprof_category()) {
+    instrprof_error instrError = static_cast<instrprof_error>(Error.value());
+    if (instrError == instrprof_error::unrecognized_format) {
+      // Hint for common error of forgetting -sample for sample profiles.
+      exitWithError(Error.message(), Whence,
+                    "Perhaps you forgot to use the -sample option?");
+    }
+  }
+  exitWithError(Error.message(), Whence);
+}
+
+namespace {
 enum ProfileKinds { instr, sample };
+}
+
+static void handleMergeWriterError(std::error_code &Error,
+                                   StringRef WhenceFile = "",
+                                   StringRef WhenceFunction = "",
+                                   bool ShowHint = true) {
+  if (!WhenceFile.empty())
+    errs() << WhenceFile << ": ";
+  if (!WhenceFunction.empty())
+    errs() << WhenceFunction << ": ";
+  errs() << Error.message() << "\n";
+
+  if (ShowHint) {
+    StringRef Hint = "";
+    if (Error.category() == instrprof_category()) {
+      instrprof_error instrError = static_cast<instrprof_error>(Error.value());
+      switch (instrError) {
+      case instrprof_error::hash_mismatch:
+      case instrprof_error::count_mismatch:
+      case instrprof_error::value_site_count_mismatch:
+        Hint = "Make sure that all profile data to be merged is generated "
+               "from the same binary.";
+        break;
+      default:
+        break;
+      }
+    }
+
+    if (!Hint.empty())
+      errs() << Hint << "\n";
+  }
+}
 
-void mergeInstrProfile(cl::list<std::string> Inputs, StringRef OutputFilename) {
+struct WeightedFile {
+  StringRef Filename;
+  uint64_t Weight;
+
+  WeightedFile() {}
+
+  WeightedFile(StringRef F, uint64_t W) : Filename{F}, Weight{W} {}
+};
+typedef SmallVector<WeightedFile, 5> WeightedFileVector;
+
+static void mergeInstrProfile(const WeightedFileVector &Inputs,
+                              StringRef OutputFilename,
+                              ProfileFormat OutputFormat) {
   if (OutputFilename.compare("-") == 0)
     exitWithError("Cannot write indexed profdata format to stdout.");
 
+  if (OutputFormat != PF_Binary && OutputFormat != PF_Text)
+    exitWithError("Unknown format is specified.");
+
   std::error_code EC;
   raw_fd_ostream Output(OutputFilename.data(), EC, sys::fs::F_None);
   if (EC)
-    exitWithError(EC.message(), OutputFilename);
+    exitWithErrorCode(EC, OutputFilename);
 
   InstrProfWriter Writer;
-  for (const auto &Filename : Inputs) {
-    auto ReaderOrErr = InstrProfReader::create(Filename);
+  SmallSet<std::error_code, 4> WriterErrorCodes;
+  for (const auto &Input : Inputs) {
+    auto ReaderOrErr = InstrProfReader::create(Input.Filename);
     if (std::error_code ec = ReaderOrErr.getError())
-      exitWithError(ec.message(), Filename);
+      exitWithErrorCode(ec, Input.Filename);
 
     auto Reader = std::move(ReaderOrErr.get());
-    for (const auto &I : *Reader)
-      if (std::error_code EC =
-              Writer.addFunctionCounts(I.Name, I.Hash, I.Counts))
-        errs() << Filename << ": " << I.Name << ": " << EC.message() << "\n";
+    for (auto &I : *Reader) {
+      if (std::error_code EC = Writer.addRecord(std::move(I), Input.Weight)) {
+        // Only show hint the first time an error occurs.
+        bool firstTime = WriterErrorCodes.insert(EC).second;
+        handleMergeWriterError(EC, Input.Filename, I.Name, firstTime);
+      }
+    }
     if (Reader->hasError())
-      exitWithError(Reader->getError().message(), Filename);
+      exitWithErrorCode(Reader->getError(), Input.Filename);
   }
-  Writer.write(Output);
+  if (OutputFormat == PF_Text)
+    Writer.writeText(Output);
+  else
+    Writer.write(Output);
 }
 
-void mergeSampleProfile(cl::list<std::string> Inputs, StringRef OutputFilename,
-                        sampleprof::SampleProfileFormat OutputFormat) {
+static sampleprof::SampleProfileFormat FormatMap[] = {
+    sampleprof::SPF_None, sampleprof::SPF_Text, sampleprof::SPF_Binary,
+    sampleprof::SPF_GCC};
+
+static void mergeSampleProfile(const WeightedFileVector &Inputs,
+                               StringRef OutputFilename,
+                               ProfileFormat OutputFormat) {
   using namespace sampleprof;
-  auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
+  auto WriterOrErr =
+      SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]);
   if (std::error_code EC = WriterOrErr.getError())
-    exitWithError(EC.message(), OutputFilename);
+    exitWithErrorCode(EC, OutputFilename);
 
   auto Writer = std::move(WriterOrErr.get());
   StringMap<FunctionSamples> ProfileMap;
-  for (const auto &Filename : Inputs) {
+  SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
+  for (const auto &Input : Inputs) {
     auto ReaderOrErr =
-        SampleProfileReader::create(Filename, getGlobalContext());
+        SampleProfileReader::create(Input.Filename, getGlobalContext());
     if (std::error_code EC = ReaderOrErr.getError())
-      exitWithError(EC.message(), Filename);
-
-    auto Reader = std::move(ReaderOrErr.get());
+      exitWithErrorCode(EC, Input.Filename);
+
+    // We need to keep the readers around until after all the files are
+    // read so that we do not lose the function names stored in each
+    // reader's memory. The function names are needed to write out the
+    // merged profile map.
+    Readers.push_back(std::move(ReaderOrErr.get()));
+    const auto Reader = Readers.back().get();
     if (std::error_code EC = Reader->read())
-      exitWithError(EC.message(), Filename);
+      exitWithErrorCode(EC, Input.Filename);
 
     StringMap<FunctionSamples> &Profiles = Reader->getProfiles();
     for (StringMap<FunctionSamples>::iterator I = Profiles.begin(),
@@ -89,16 +180,32 @@ void mergeSampleProfile(cl::list<std::string> Inputs, StringRef OutputFilename,
          I != E; ++I) {
       StringRef FName = I->first();
       FunctionSamples &Samples = I->second;
-      ProfileMap[FName].merge(Samples);
+      ProfileMap[FName].merge(Samples, Input.Weight);
     }
   }
   Writer->write(ProfileMap);
 }
 
-int merge_main(int argc, const char *argv[]) {
-  cl::list<std::string> Inputs(cl::Positional, cl::Required, cl::OneOrMore,
-                               cl::desc("<filenames...>"));
+static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
+  StringRef WeightStr, FileName;
+  std::tie(WeightStr, FileName) = WeightedFilename.split(',');
+
+  uint64_t Weight;
+  if (WeightStr.getAsInteger(10, Weight) || Weight < 1)
+    exitWithError("Input weight must be a positive integer.");
+
+  if (!sys::fs::exists(FileName))
+    exitWithErrorCode(make_error_code(errc::no_such_file_or_directory),
+                      FileName);
 
+  return WeightedFile(FileName, Weight);
+}
+
+static int merge_main(int argc, const char *argv[]) {
+  cl::list<std::string> InputFilenames(cl::Positional,
+                                       cl::desc("<filename...>"));
+  cl::list<std::string> WeightedInputFilenames("weighted-input",
+                                               cl::desc("<weight>,<filename>"));
   cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
                                       cl::init("-"), cl::Required,
                                       cl::desc("Output file"));
@@ -109,31 +216,41 @@ int merge_main(int argc, const char *argv[]) {
       cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
                  clEnumVal(sample, "Sample profile"), clEnumValEnd));
 
-  cl::opt<sampleprof::SampleProfileFormat> OutputFormat(
-      cl::desc("Format of output profile (only meaningful with --sample)"),
-      cl::init(sampleprof::SPF_Binary),
-      cl::values(clEnumValN(sampleprof::SPF_Binary, "binary",
-                            "Binary encoding (default)"),
-                 clEnumValN(sampleprof::SPF_Text, "text", "Text encoding"),
-                 clEnumValN(sampleprof::SPF_GCC, "gcc", "GCC encoding"),
+  cl::opt<ProfileFormat> OutputFormat(
+      cl::desc("Format of output profile"), cl::init(PF_Binary),
+      cl::values(clEnumValN(PF_Binary, "binary", "Binary encoding (default)"),
+                 clEnumValN(PF_Text, "text", "Text encoding"),
+                 clEnumValN(PF_GCC, "gcc",
+                            "GCC encoding (only meaningful for -sample)"),
                  clEnumValEnd));
 
   cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
 
+  if (InputFilenames.empty() && WeightedInputFilenames.empty())
+    exitWithError("No input files specified. See " +
+                  sys::path::filename(argv[0]) + " -help");
+
+  WeightedFileVector WeightedInputs;
+  for (StringRef Filename : InputFilenames)
+    WeightedInputs.push_back(WeightedFile(Filename, 1));
+  for (StringRef WeightedFilename : WeightedInputFilenames)
+    WeightedInputs.push_back(parseWeightedFile(WeightedFilename));
+
   if (ProfileKind == instr)
-    mergeInstrProfile(Inputs, OutputFilename);
+    mergeInstrProfile(WeightedInputs, OutputFilename, OutputFormat);
   else
-    mergeSampleProfile(Inputs, OutputFilename, OutputFormat);
+    mergeSampleProfile(WeightedInputs, OutputFilename, OutputFormat);
 
   return 0;
 }
 
-int showInstrProfile(std::string Filename, bool ShowCounts,
-                     bool ShowAllFunctions, std::string ShowFunction,
-                     raw_fd_ostream &OS) {
+static int showInstrProfile(std::string Filename, bool ShowCounts,
+                            bool ShowIndirectCallTargets, bool ShowAllFunctions,
+                            std::string ShowFunction, bool TextFormat,
+                            raw_fd_ostream &OS) {
   auto ReaderOrErr = InstrProfReader::create(Filename);
   if (std::error_code EC = ReaderOrErr.getError())
-    exitWithError(EC.message(), Filename);
+    exitWithErrorCode(EC, Filename);
 
   auto Reader = std::move(ReaderOrErr.get());
   uint64_t MaxFunctionCount = 0, MaxBlockCount = 0;
@@ -143,35 +260,68 @@ int showInstrProfile(std::string Filename, bool ShowCounts,
         ShowAllFunctions || (!ShowFunction.empty() &&
                              Func.Name.find(ShowFunction) != Func.Name.npos);
 
+    bool doTextFormatDump = (Show && ShowCounts && TextFormat);
+
+    if (doTextFormatDump) {
+      InstrProfWriter::writeRecordInText(Func, OS);
+      continue;
+    }
+
     ++TotalFunctions;
     assert(Func.Counts.size() > 0 && "function missing entry counter");
     if (Func.Counts[0] > MaxFunctionCount)
       MaxFunctionCount = Func.Counts[0];
 
+    for (size_t I = 1, E = Func.Counts.size(); I < E; ++I) {
+      if (Func.Counts[I] > MaxBlockCount)
+        MaxBlockCount = Func.Counts[I];
+    }
+
     if (Show) {
+
       if (!ShownFunctions)
         OS << "Counters:\n";
+
       ++ShownFunctions;
 
       OS << "  " << Func.Name << ":\n"
          << "    Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n"
          << "    Counters: " << Func.Counts.size() << "\n"
          << "    Function count: " << Func.Counts[0] << "\n";
-    }
 
-    if (Show && ShowCounts)
-      OS << "    Block counts: [";
-    for (size_t I = 1, E = Func.Counts.size(); I < E; ++I) {
-      if (Func.Counts[I] > MaxBlockCount)
-        MaxBlockCount = Func.Counts[I];
-      if (Show && ShowCounts)
-        OS << (I == 1 ? "" : ", ") << Func.Counts[I];
+      if (ShowIndirectCallTargets)
+        OS << "    Indirect Call Site Count: "
+           << Func.getNumValueSites(IPVK_IndirectCallTarget) << "\n";
+
+      if (ShowCounts) {
+        OS << "    Block counts: [";
+        for (size_t I = 1, E = Func.Counts.size(); I < E; ++I) {
+          OS << (I == 1 ? "" : ", ") << Func.Counts[I];
+        }
+        OS << "]\n";
+      }
+
+      if (ShowIndirectCallTargets) {
+        uint32_t NS = Func.getNumValueSites(IPVK_IndirectCallTarget);
+        OS << "    Indirect Target Results: \n";
+        for (size_t I = 0; I < NS; ++I) {
+          uint32_t NV = Func.getNumValueDataForSite(IPVK_IndirectCallTarget, I);
+          std::unique_ptr<InstrProfValueData[]> VD =
+              Func.getValueForSite(IPVK_IndirectCallTarget, I);
+          for (uint32_t V = 0; V < NV; V++) {
+            OS << "\t[ " << I << ", ";
+            OS << (const char *)VD[V].Value << ", " << VD[V].Count << " ]\n";
+          }
+        }
+      }
     }
-    if (Show && ShowCounts)
-      OS << "]\n";
   }
+
   if (Reader->hasError())
-    exitWithError(Reader->getError().message(), Filename);
+    exitWithErrorCode(Reader->getError(), Filename);
+
+  if (ShowCounts && TextFormat)
+    return 0;
 
   if (ShowAllFunctions || !ShowFunction.empty())
     OS << "Functions shown: " << ShownFunctions << "\n";
@@ -181,16 +331,18 @@ int showInstrProfile(std::string Filename, bool ShowCounts,
   return 0;
 }
 
-int showSampleProfile(std::string Filename, bool ShowCounts,
-                      bool ShowAllFunctions, std::string ShowFunction,
-                      raw_fd_ostream &OS) {
+static int showSampleProfile(std::string Filename, bool ShowCounts,
+                             bool ShowAllFunctions, std::string ShowFunction,
+                             raw_fd_ostream &OS) {
   using namespace sampleprof;
   auto ReaderOrErr = SampleProfileReader::create(Filename, getGlobalContext());
   if (std::error_code EC = ReaderOrErr.getError())
-    exitWithError(EC.message(), Filename);
+    exitWithErrorCode(EC, Filename);
 
   auto Reader = std::move(ReaderOrErr.get());
-  Reader->read();
+  if (std::error_code EC = Reader->read())
+    exitWithErrorCode(EC, Filename);
+
   if (ShowAllFunctions || ShowFunction.empty())
     Reader->dump(OS);
   else
@@ -199,12 +351,18 @@ int showSampleProfile(std::string Filename, bool ShowCounts,
   return 0;
 }
 
-int show_main(int argc, const char *argv[]) {
+static int show_main(int argc, const char *argv[]) {
   cl::opt<std::string> Filename(cl::Positional, cl::Required,
                                 cl::desc("<profdata-file>"));
 
   cl::opt<bool> ShowCounts("counts", cl::init(false),
                            cl::desc("Show counter values for shown functions"));
+  cl::opt<bool> TextFormat(
+      "text", cl::init(false),
+      cl::desc("Show instr profile data in text dump format"));
+  cl::opt<bool> ShowIndirectCallTargets(
+      "ic-targets", cl::init(false),
+      cl::desc("Show indirect call site target values for shown functions"));
   cl::opt<bool> ShowAllFunctions("all-functions", cl::init(false),
                                  cl::desc("Details for every function"));
   cl::opt<std::string> ShowFunction("function",
@@ -227,14 +385,14 @@ int show_main(int argc, const char *argv[]) {
   std::error_code EC;
   raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::F_Text);
   if (EC)
-    exitWithError(EC.message(), OutputFilename);
+    exitWithErrorCode(EC, OutputFilename);
 
   if (ShowAllFunctions && !ShowFunction.empty())
     errs() << "warning: -function argument ignored: showing all functions\n";
 
   if (ProfileKind == instr)
-    return showInstrProfile(Filename, ShowCounts, ShowAllFunctions,
-                            ShowFunction, OS);
+    return showInstrProfile(Filename, ShowCounts, ShowIndirectCallTargets,
+                            ShowAllFunctions, ShowFunction, TextFormat, OS);
   else
     return showSampleProfile(Filename, ShowCounts, ShowAllFunctions,
                              ShowFunction, OS);
@@ -261,8 +419,7 @@ int main(int argc, const char *argv[]) {
       return func(argc - 1, argv + 1);
     }
 
-    if (strcmp(argv[1], "-h") == 0 ||
-        strcmp(argv[1], "-help") == 0 ||
+    if (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "-help") == 0 ||
         strcmp(argv[1], "--help") == 0) {
 
       errs() << "OVERVIEW: LLVM profile data tools\n\n"