InstrProf: Allow multiple functions with the same name

author Justin Bogner <mail@justinbogner.com>

Fri, 1 Aug 2014 22:50:07 +0000 (22:50 +0000)

committer Justin Bogner <mail@justinbogner.com>

Fri, 1 Aug 2014 22:50:07 +0000 (22:50 +0000)
author Justin Bogner <mail@justinbogner.com>
Fri, 1 Aug 2014 22:50:07 +0000 (22:50 +0000)
committer Justin Bogner <mail@justinbogner.com>
Fri, 1 Aug 2014 22:50:07 +0000 (22:50 +0000)
diff --git a/include/llvm/ProfileData/InstrProfReader.h b/include/llvm/ProfileData/InstrProfReader.h

index b6073a44bb3543b4b4394a5c7ff494cdea46ad85..87489befc0ecc03ed1fdeb7d63a86b8373213f4b 100644 (file)
--- a/include/llvm/ProfileData/InstrProfReader.h
+++ b/include/llvm/ProfileData/InstrProfReader.h
@@ -206,12 +206,17 @@ enum class HashT : uint32_t;
  /// Trait for lookups into the on-disk hash table for the binary instrprof
  /// format.
  class InstrProfLookupTrait {
-  std::vector<uint64_t> CountBuffer;
+  std::vector<uint64_t> DataBuffer;
    IndexedInstrProf::HashT HashType;
  public:
    InstrProfLookupTrait(IndexedInstrProf::HashT HashType) : HashType(HashType) {}
  
-  typedef InstrProfRecord data_type;
+  struct data_type {
+    data_type(StringRef Name, ArrayRef<uint64_t> Data)
+        : Name(Name), Data(Data) {}
+    StringRef Name;
+    ArrayRef<uint64_t> Data;
+  };
    typedef StringRef internal_key_type;
    typedef StringRef external_key_type;
    typedef uint64_t hash_value_type;
@@ -234,25 +239,20 @@ public:
      return StringRef((const char *)D, N);
    }
  
-  InstrProfRecord ReadData(StringRef K, const unsigned char *D, offset_type N) {
-    if (N < 2 * sizeof(uint64_t) || N % sizeof(uint64_t)) {
+  data_type ReadData(StringRef K, const unsigned char *D, offset_type N) {
+    DataBuffer.clear();
+    if (N % sizeof(uint64_t))
        // The data is corrupt, don't try to read it.
-      CountBuffer.clear();
-      return InstrProfRecord("", 0, CountBuffer);
-    }
+      return data_type("", DataBuffer);
  
      using namespace support;
-
-    // The first stored value is the hash.
-    uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
-    // Each counter follows.
-    unsigned NumCounters = N / sizeof(uint64_t) - 1;
-    CountBuffer.clear();
-    CountBuffer.reserve(NumCounters - 1);
-    for (unsigned I = 0; I < NumCounters; ++I)
-      CountBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
-
-    return InstrProfRecord(K, Hash, CountBuffer);
+    // We just treat the data as opaque here. It's simpler to handle in
+    // IndexedInstrProfReader.
+    unsigned NumEntries = N / sizeof(uint64_t);
+    DataBuffer.reserve(NumEntries);
+    for (unsigned I = 0; I < NumEntries; ++I)
+      DataBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
+    return data_type(K, DataBuffer);
    }
  };
  typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait>
@@ -267,7 +267,11 @@ private:
    std::unique_ptr<InstrProfReaderIndex> Index;
    /// Iterator over the profile data.
    InstrProfReaderIndex::data_iterator RecordIterator;
-  /// The maximal execution count among all fucntions.
+  /// Offset into our current data set.
+  size_t CurrentOffset;
+  /// The file format version of the profile data.
+  uint64_t FormatVersion;
+  /// The maximal execution count among all functions.
    uint64_t MaxFunctionCount;
  
    IndexedInstrProfReader(const IndexedInstrProfReader &) LLVM_DELETED_FUNCTION;
@@ -275,7 +279,7 @@ private:
      LLVM_DELETED_FUNCTION;
  public:
    IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
-      : DataBuffer(std::move(DataBuffer)), Index(nullptr) {}
+      : DataBuffer(std::move(DataBuffer)), Index(nullptr), CurrentOffset(0) {}
  
    /// Return true if the given buffer is in an indexed instrprof format.
    static bool hasFormat(const MemoryBuffer &DataBuffer);
@@ -286,7 +290,7 @@ public:
    std::error_code readNextRecord(InstrProfRecord &Record) override;
  
    /// Fill Counts with the profile data for the given function name.
-  std::error_code getFunctionCounts(StringRef FuncName, uint64_t &FuncHash,
+  std::error_code getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
                                      std::vector<uint64_t> &Counts);
    /// Return the maximum of all known function counts.
    uint64_t getMaximumFunctionCount() { return MaxFunctionCount; }
diff --git a/include/llvm/ProfileData/InstrProfWriter.h b/include/llvm/ProfileData/InstrProfWriter.h

index 6e68bee30eb8ad15f39efa0a15dfa924db9d3fc8..93f4128c0736346bac55a12a350b18c19a46e8eb 100644 (file)
--- a/include/llvm/ProfileData/InstrProfWriter.h
+++ b/include/llvm/ProfileData/InstrProfWriter.h
@@ -16,6 +16,7 @@
  #define LLVM_PROFILEDATA_INSTRPROF_WRITER_H_
  
  #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
  #include "llvm/ADT/StringMap.h"
  #include "llvm/ProfileData/InstrProf.h"
  #include "llvm/Support/DataTypes.h"
@@ -28,13 +29,13 @@ namespace llvm {
  /// Writer for instrumentation based profile data.
  class InstrProfWriter {
  public:
-  struct CounterData {
-    uint64_t Hash;
-    std::vector<uint64_t> Counts;
-  };
+  typedef SmallDenseMap<uint64_t, std::vector<uint64_t>, 1> CounterData;
  private:
    StringMap<CounterData> FunctionData;
+  uint64_t MaxFunctionCount;
  public:
+  InstrProfWriter() : MaxFunctionCount(0) {}
+
    /// Add function counts for the given function. If there are already counts
    /// for this function and the hash and number of counts match, each counter is
    /// summed.
diff --git a/lib/ProfileData/InstrProfIndexed.h b/lib/ProfileData/InstrProfIndexed.h

index 776170407bc3d1af2a0d2316512e558bee3abbb6..792863d0707f3569c09d8f9f398c6c750ff499f5 100644 (file)
--- a/lib/ProfileData/InstrProfIndexed.h
+++ b/lib/ProfileData/InstrProfIndexed.h
@@ -46,7 +46,7 @@ static inline uint64_t ComputeHash(HashT Type, StringRef K) {
  }
  
  const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
-const uint64_t Version = 1;
+const uint64_t Version = 2;
  const HashT HashType = HashT::MD5;
  }
  
diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp

index e8f64614df50a5a1e4c20ca4b30c710dce338eb1..5c1993766aacedf4ebb48821057c98e737d9443f 100644 (file)
--- a/lib/ProfileData/InstrProfReader.cpp
+++ b/lib/ProfileData/InstrProfReader.cpp
@@ -307,8 +307,8 @@ std::error_code IndexedInstrProfReader::readHeader() {
      return error(instrprof_error::bad_magic);
  
    // Read the version.
-  uint64_t Version = endian::readNext<uint64_t, little, unaligned>(Cur);
-  if (Version != IndexedInstrProf::Version)
+  FormatVersion = endian::readNext<uint64_t, little, unaligned>(Cur);
+  if (FormatVersion > IndexedInstrProf::Version)
      return error(instrprof_error::unsupported_version);
  
    // Read the maximal function count.
@@ -331,18 +331,31 @@ std::error_code IndexedInstrProfReader::readHeader() {
  }
  
  std::error_code IndexedInstrProfReader::getFunctionCounts(
-    StringRef FuncName, uint64_t &FuncHash, std::vector<uint64_t> &Counts) {
-  const auto &Iter = Index->find(FuncName);
+    StringRef FuncName, uint64_t FuncHash, std::vector<uint64_t> &Counts) {
+  auto Iter = Index->find(FuncName);
    if (Iter == Index->end())
      return error(instrprof_error::unknown_function);
  
-  // Found it. Make sure it's valid before giving back a result.
-  const InstrProfRecord &Record = *Iter;
-  if (Record.Name.empty())
-    return error(instrprof_error::malformed);
-  FuncHash = Record.Hash;
-  Counts = Record.Counts;
-  return success();
+  // Found it. Look for counters with the right hash.
+  ArrayRef<uint64_t> Data = (*Iter).Data;
+  uint64_t NumCounts;
+  for (uint64_t I = 0, E = Data.size(); I != E; I += NumCounts) {
+    // The function hash comes first.
+    uint64_t FoundHash = Data[I++];
+    // In v1, we have at least one count. Later, we have the number of counts.
+    if (I == E)
+      return error(instrprof_error::malformed);
+    NumCounts = FormatVersion == 1 ? E - I : Data[I++];
+    // If we have more counts than data, this is bogus.
+    if (I + NumCounts > E)
+      return error(instrprof_error::malformed);
+    // Check for a match and fill the vector if there is one.
+    if (FoundHash == FuncHash) {
+      Counts = Data.slice(I, NumCounts);
+      return success();
+    }
+  }
+  return error(instrprof_error::hash_mismatch);
  }
  
  std::error_code
@@ -351,10 +364,30 @@ IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) {
    if (RecordIterator == Index->data_end())
      return error(instrprof_error::eof);
  
-  // Read the next one.
-  Record = *RecordIterator;
-  ++RecordIterator;
-  if (Record.Name.empty())
+  // Record the current function name.
+  Record.Name = (*RecordIterator).Name;
+
+  ArrayRef<uint64_t> Data = (*RecordIterator).Data;
+  // Valid data starts with a hash and either a count or the number of counts.
+  if (CurrentOffset + 1 > Data.size())
+    return error(instrprof_error::malformed);
+  // First we have a function hash.
+  Record.Hash = Data[CurrentOffset++];
+  // In version 1 we knew the number of counters implicitly, but in newer
+  // versions we store the number of counters next.
+  uint64_t NumCounts =
+      FormatVersion == 1 ? Data.size() - CurrentOffset : Data[CurrentOffset++];
+  if (CurrentOffset + NumCounts > Data.size())
      return error(instrprof_error::malformed);
+  // And finally the counts themselves.
+  Record.Counts = Data.slice(CurrentOffset, NumCounts);
+
+  // If we've exhausted this function's data, increment the record.
+  CurrentOffset += NumCounts;
+  if (CurrentOffset == Data.size()) {
+    ++RecordIterator;
+    CurrentOffset = 0;
+  }
+
    return success();
  }
diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp

index e55c299181367c99c68d8f357e3436f908bc9b7e..1c4a4fede2868f1fc655b6cf57528bb81e590bfa 100644 (file)
--- a/lib/ProfileData/InstrProfWriter.cpp
+++ b/lib/ProfileData/InstrProfWriter.cpp
@@ -45,7 +45,9 @@ public:
      offset_type N = K.size();
      LE.write<offset_type>(N);
  
-    offset_type M = (1 + V->Counts.size()) * sizeof(uint64_t);
+    offset_type M = 0;
+    for (const auto &Counts : *V)
+      M += (2 + Counts.second.size()) * sizeof(uint64_t);
      LE.write<offset_type>(M);
  
      return std::make_pair(N, M);
@@ -59,9 +61,13 @@ public:
                         offset_type) {
      using namespace llvm::support;
      endian::Writer<little> LE(Out);
-    LE.write<uint64_t>(V->Hash);
-    for (uint64_t I : V->Counts)
-      LE.write<uint64_t>(I);
+
+    for (const auto &Counts : *V) {
+      LE.write<uint64_t>(Counts.first);
+      LE.write<uint64_t>(Counts.second.size());
+      for (uint64_t I : Counts.second)
+        LE.write<uint64_t>(I);
+    }
    }
  };
  }
@@ -70,41 +76,44 @@ std::error_code
  InstrProfWriter::addFunctionCounts(StringRef FunctionName,
                                     uint64_t FunctionHash,
                                     ArrayRef<uint64_t> Counters) {
-  auto Where = FunctionData.find(FunctionName);
-  if (Where == FunctionData.end()) {
-    // If this is the first time we've seen this function, just add it.
-    auto &Data = FunctionData[FunctionName];
-    Data.Hash = FunctionHash;
-    Data.Counts = Counters;
+  auto &CounterData = FunctionData[FunctionName];
+
+  auto Where = CounterData.find(FunctionHash);
+  if (Where == CounterData.end()) {
+    // We've never seen a function with this name and hash, add it.
+    CounterData[FunctionHash] = Counters;
+    // We keep track of the max function count as we go for simplicity.
+    if (Counters[0] > MaxFunctionCount)
+      MaxFunctionCount = Counters[0];
      return instrprof_error::success;
    }
  
-  auto &Data = Where->getValue();
-  // We can only add to existing functions if they match, so we check the hash
-  // and number of counters.
-  if (Data.Hash != FunctionHash)
-    return instrprof_error::hash_mismatch;
-  if (Data.Counts.size() != Counters.size())
+  // We're updating a function we've seen before.
+  auto &FoundCounters = Where->second;
+  // If the number of counters doesn't match we either have bad data or a hash
+  // collision.
+  if (FoundCounters.size() != Counters.size())
      return instrprof_error::count_mismatch;
-  // These match, add up the counters.
+
    for (size_t I = 0, E = Counters.size(); I < E; ++I) {
-    if (Data.Counts[I] + Counters[I] < Data.Counts[I])
+    if (FoundCounters[I] + Counters[I] < FoundCounters[I])
        return instrprof_error::counter_overflow;
-    Data.Counts[I] += Counters[I];
+    FoundCounters[I] += Counters[I];
    }
+  // We keep track of the max function count as we go for simplicity.
+  if (FoundCounters[0] > MaxFunctionCount)
+    MaxFunctionCount = FoundCounters[0];
+
    return instrprof_error::success;
  }
  
  void InstrProfWriter::write(raw_fd_ostream &OS) {
    OnDiskChainedHashTableGenerator<InstrProfRecordTrait> Generator;
-  uint64_t MaxFunctionCount = 0;
  
    // Populate the hash table generator.
-  for (const auto &I : FunctionData) {
+  std::vector<uint64_t> CounterBuffer;
+  for (const auto &I : FunctionData)
      Generator.insert(I.getKey(), &I.getValue());
-    if (I.getValue().Counts[0] > MaxFunctionCount)
-      MaxFunctionCount = I.getValue().Counts[0];
-  }
  
    using namespace llvm::support;
    endian::Writer<little> LE(OS);
diff --git a/test/tools/llvm-profdata/Inputs/compat.profdata.v1 b/test/tools/llvm-profdata/Inputs/compat.profdata.v1

new file mode 100644 (file)

index 0000000..fd17459

Binary files /dev/null and b/test/tools/llvm-profdata/Inputs/compat.profdata.v1 differ
diff --git a/test/tools/llvm-profdata/compat.proftext b/test/tools/llvm-profdata/compat.proftext

new file mode 100644 (file)

index 0000000..14da337
--- /dev/null
+++ b/test/tools/llvm-profdata/compat.proftext
@@ -0,0 +1,47 @@
+# Compatibility tests for older profile format versions. These ensure
+# that we don't break compatibility with an older profile version
+# without noticing it.
+
+# The input file at %S/Inputs/compat.profdata.v1 was generated with
+# llvm-profdata merge from r214548.
+
+# RUN: llvm-profdata show %S/Inputs/compat.profdata.v1 --function function_count_only --counts | FileCheck %s -check-prefix=FUNC_COUNT_ONLY
+function_count_only
+0
+1
+97531
+# FUNC_COUNT_ONLY:      Hash: 0x{{0+$}}
+# FUNC_COUNT_ONLY-NEXT: Counters: 1
+# FUNC_COUNT_ONLY-NEXT: Function count: 97531
+# FUNC_COUNT_ONLY-NEXT: Block counts: []
+
+# RUN: llvm-profdata show %S/Inputs/compat.profdata.v1 --function "name with spaces" --counts | FileCheck %s -check-prefix=SPACES
+name with spaces
+1024
+2
+0
+0
+# SPACES:      Hash: 0x{{0+}}400
+# SPACES-NEXT: Counters: 2
+# SPACES-NEXT: Function count: 0
+# SPACES-NEXT: Block counts: [0]
+
+# RUN: llvm-profdata show %S/Inputs/compat.profdata.v1 --function large_numbers --counts | FileCheck %s -check-prefix=LARGENUM
+large_numbers
+4611686018427387903
+6
+2305843009213693952
+1152921504606846976
+576460752303423488
+288230376151711744
+144115188075855872
+72057594037927936
+# LARGENUM:      Hash: 0x3fffffffffffffff
+# LARGENUM-NEXT: Counters: 6
+# LARGENUM-NEXT: Function count: 2305843009213693952
+# LARGENUM-NEXT: Block counts: [1152921504606846976, 576460752303423488, 288230376151711744, 144115188075855872, 72057594037927936]
+
+# RUN: llvm-profdata show %S/Inputs/compat.profdata.v1 | FileCheck %s -check-prefix=SUMMARY
+# SUMMARY: Total functions: 3
+# SUMMARY: Maximum function count: 2305843009213693952
+# SUMMARY: Maximum internal block count: 1152921504606846976
diff --git a/test/tools/llvm-profdata/hash-mismatch.proftext b/test/tools/llvm-profdata/hash-mismatch.proftext

index e4f1a4ca4310356865b086ad189c7e935fa12d9b..fe0d4fb4f6b54e75d659231026dcdc459d47c8d0 100644 (file)
--- a/test/tools/llvm-profdata/hash-mismatch.proftext
+++ b/test/tools/llvm-profdata/hash-mismatch.proftext
@@ -1,6 +1,18 @@
-# RUN: llvm-profdata merge %s -o %t.out 2>&1 | FileCheck %s
-# CHECK: hash-mismatch.proftext: foo: Function hash mismatch
+# If we see the same function name, but with different hashes, make
+# sure we keep both.
  
+# RUN: llvm-profdata merge %s -o %t 2>&1
+# RUN: llvm-profdata show %t -all-functions -counts > %t.out
+
+# The function ordering is non-deterministic, so we need to do our
+# checks in multiple runs.
+# RUN: FileCheck -check-prefix=FOO3 -check-prefix=BOTH %s -input-file %t.out
+# RUN: FileCheck -check-prefix=FOO4 -check-prefix=BOTH %s -input-file %t.out
+
+# FOO3: Hash: 0x{{0+}}3
+# FOO3-NEXT: Counters: 3
+# FOO3-NEXT: Function count: 1
+# FOO3-NEXT: Block counts: [2, 3]
  foo
  3
  3
@@ -8,6 +20,10 @@ foo
  2
  3
  
+# FOO4: Hash: 0x{{0+}}4
+# FOO4-NEXT: Counters: 4
+# FOO4-NEXT: Function count: 11
+# FOO4-NEXT: Block counts: [22, 33, 44]
  foo
  4
  4
@@ -15,3 +31,7 @@ foo
  22
  33
  44
+
+# BOTH: Total functions: 2
+# BOTH: Maximum function count: 11
+# BOTH: Maximum internal block count: 44
author	Justin Bogner <mail@justinbogner.com>
	Fri, 1 Aug 2014 22:50:07 +0000 (22:50 +0000)
committer	Justin Bogner <mail@justinbogner.com>
	Fri, 1 Aug 2014 22:50:07 +0000 (22:50 +0000)
include/llvm/ProfileData/InstrProfReader.h		patch \| blob \| history
include/llvm/ProfileData/InstrProfWriter.h		patch \| blob \| history
lib/ProfileData/InstrProfIndexed.h		patch \| blob \| history
lib/ProfileData/InstrProfReader.cpp		patch \| blob \| history
lib/ProfileData/InstrProfWriter.cpp		patch \| blob \| history
test/tools/llvm-profdata/Inputs/compat.profdata.v1	[new file with mode: 0644]	patch \| blob
test/tools/llvm-profdata/compat.proftext	[new file with mode: 0644]	patch \| blob
test/tools/llvm-profdata/hash-mismatch.proftext		patch \| blob \| history