/// data of the function. See \c getPGOFuncName() method for details
/// on how PGO name is formed.
class InstrProfSymtab {
+public:
+ typedef std::vector<std::pair<uint64_t, uint64_t>> AddrHashMap;
+
private:
StringRef Data;
uint64_t Address;
+ // A map from MD5 hash keys to function name strings.
std::vector<std::pair<uint64_t, std::string>> HashNameMap;
+ // A map from function runtime address to function name MD5 hash.
+ // This map is only populated and used by raw instr profile reader.
+ AddrHashMap AddrToMD5Map;
public:
- InstrProfSymtab() : Data(), Address(0) {}
+ InstrProfSymtab() : Data(), Address(0), HashNameMap(), AddrToMD5Map() {}
- /// Create InstrProfSymtab from a object file section which
+ /// Create InstrProfSymtab from an object file section which
/// contains function PGO names that are uncompressed.
+ /// This interface is used by CoverageMappingReader.
std::error_code create(object::SectionRef &Section);
- std::error_code create(StringRef D, uint64_t BaseAddr) {
- Data = D;
- Address = BaseAddr;
- return std::error_code();
- }
- template <typename NameIterRange> void create(NameIterRange &IterRange) {
- for (auto Name : IterRange)
- HashNameMap.push_back(
- std::make_pair(IndexedInstrProf::ComputeHash(Name), Name.str()));
- finalizeSymtab();
- }
-
- // If the symtab is created by a series calls to \c addFuncName, \c
- // finalizeSymtab needs to
- // be called before function name/symbol lookup using MD5 hash. This is
- // required because
- // the underlying map is vector (for space efficiency) which needs to be
- // sorted.
- void finalizeSymtab() {
- std::sort(HashNameMap.begin(), HashNameMap.end(), less_first());
- HashNameMap.erase(std::unique(HashNameMap.begin(), HashNameMap.end()),
- HashNameMap.end());
- }
-
+ /// This interface is used by reader of CoverageMapping test
+ /// format.
+ inline std::error_code create(StringRef D, uint64_t BaseAddr);
+ /// Create InstrProfSymtab from a set of names iteratable from
+ /// \p IterRange. This interface is used by IndexedProfReader.
+ template <typename NameIterRange> void create(const NameIterRange &IterRange);
+ // If the symtab is created by a series of calls to \c addFuncName, \c
+ // finalizeSymtab needs to be called before looking up function names.
+ // This is required because the underlying map is a vector (for space
+ // efficiency) which needs to be sorted.
+ inline void finalizeSymtab();
+ /// Update the symtab by adding \p FuncName to the table. This interface
+ /// is used by the raw and text profile readers.
void addFuncName(StringRef FuncName) {
HashNameMap.push_back(std::make_pair(
IndexedInstrProf::ComputeHash(FuncName), FuncName.str()));
}
-
+ /// Map a function address to its name's MD5 hash. This interface
+ /// is only used by the raw profiler reader.
+ void mapAddress(uint64_t Addr, uint64_t MD5Val) {
+ AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val));
+ }
+ AddrHashMap &getAddrHashMap() { return AddrToMD5Map; }
/// Return function's PGO name from the function name's symabol
/// address in the object file. If an error occurs, Return
/// an empty string.
StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize);
/// Return function's PGO name from the name's md5 hash value.
/// If not found, return an empty string.
- StringRef getFuncName(uint64_t FuncMD5Hash) {
- auto Result =
- std::lower_bound(HashNameMap.begin(), HashNameMap.end(), FuncMD5Hash,
- [](const std::pair<uint64_t, std::string> &LHS,
- uint64_t RHS) { return LHS.first < RHS; });
- if (Result != HashNameMap.end())
- return Result->second;
- return StringRef();
- }
+ inline StringRef getFuncName(uint64_t FuncMD5Hash);
};
-struct InstrProfStringTable {
- // Set of string values in profiling data.
- StringSet<> StringValueSet;
- InstrProfStringTable() { StringValueSet.clear(); }
- // Get a pointer to internal storage of a string in set
- const char *getStringData(StringRef Str) {
- auto Result = StringValueSet.find(Str);
- return (Result == StringValueSet.end()) ? nullptr : Result->first().data();
- }
- // Insert a string to StringTable
- const char *insertString(StringRef Str) {
- auto Result = StringValueSet.insert(Str);
- return Result.first->first().data();
- }
-};
+std::error_code InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) {
+ Data = D;
+ Address = BaseAddr;
+ return std::error_code();
+}
+
+template <typename NameIterRange>
+void InstrProfSymtab::create(const NameIterRange &IterRange) {
+ for (auto Name : IterRange)
+ HashNameMap.push_back(
+ std::make_pair(IndexedInstrProf::ComputeHash(Name), Name.str()));
+ finalizeSymtab();
+}
+
+void InstrProfSymtab::finalizeSymtab() {
+ std::sort(HashNameMap.begin(), HashNameMap.end(), less_first());
+ HashNameMap.erase(std::unique(HashNameMap.begin(), HashNameMap.end()),
+ HashNameMap.end());
+ std::sort(AddrToMD5Map.begin(), AddrToMD5Map.end(), less_first());
+ AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()),
+ AddrToMD5Map.end());
+}
+
+StringRef InstrProfSymtab::getFuncName(uint64_t FuncMD5Hash) {
+ auto Result =
+ std::lower_bound(HashNameMap.begin(), HashNameMap.end(), FuncMD5Hash,
+ [](const std::pair<uint64_t, std::string> &LHS,
+ uint64_t RHS) { return LHS.first < RHS; });
+ if (Result != HashNameMap.end())
+ return Result->second;
+ return StringRef();
+}
struct InstrProfValueSiteRecord {
/// Value profiling data pairs at a given value site.
uint64_t Hash;
std::vector<uint64_t> Counts;
- typedef std::vector<std::pair<uint64_t, const char *>> ValueMapType;
+ typedef std::vector<std::pair<uint64_t, uint64_t>> ValueMapType;
/// Return the number of value profile kinds with non-zero number
/// of profile sites.
/// Add ValueData for ValueKind at value Site.
void addValueData(uint32_t ValueKind, uint32_t Site,
InstrProfValueData *VData, uint32_t N,
- ValueMapType *HashKeys);
+ ValueMapType *ValueMap);
/// Merge the counts in \p Other into this one.
/// Optionally scale merged counts by \p Weight.
instrprof_error merge(InstrProfRecord &Other, uint64_t Weight = 1);
- /// Used by InstrProfWriter: update the value strings to commoned strings in
- /// the writer instance.
- void updateStrings(InstrProfStringTable *StrTab);
-
/// Clear value data entries
void clearValueData() {
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
std::error_code LastError;
public:
- InstrProfReader() : LastError(instrprof_error::success) {}
+ InstrProfReader() : LastError(instrprof_error::success), Symtab() {}
virtual ~InstrProfReader() {}
/// Read the header. Required before reading first record.
InstrProfIterator begin() { return InstrProfIterator(this); }
InstrProfIterator end() { return InstrProfIterator(); }
- protected:
+ /// Return the PGO symtab. There are three different readers:
+ /// Raw, Text, and Indexed profile readers. The first two types
+ /// of readers are used only by llvm-profdata tool, while the indexed
+ /// profile reader is also used by llvm-cov tool and the compiler (
+ /// backend or frontend). Since creating PGO symtab can create
+ /// significant runtime and memory overhead (as it touches data
+ /// for the whole program), InstrProfSymtab for the indexed profile
+ /// reader should be created on demand and it is recommended to be
+ /// only used for dumping purpose with llvm-proftool, not with the
+ /// compiler.
+ virtual InstrProfSymtab &getSymtab() = 0;
+
+protected:
+ std::unique_ptr<InstrProfSymtab> Symtab;
/// Set the current std::error_code and return same.
std::error_code error(std::error_code EC) {
LastError = EC;
/// Iterator over the profile data.
line_iterator Line;
- // String table for holding a unique copy of all the strings in the profile.
- InstrProfStringTable StringTable;
-
TextInstrProfReader(const TextInstrProfReader &) = delete;
TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
std::error_code readValueProfileData(InstrProfRecord &Record);
static bool hasFormat(const MemoryBuffer &Buffer);
/// Read the header.
- std::error_code readHeader() override { return success(); }
+ std::error_code readHeader() override;
/// Read a single record.
std::error_code readNextRecord(InstrProfRecord &Record) override;
+
+ InstrProfSymtab &getSymtab() override {
+ assert(Symtab.get());
+ return *Symtab.get();
+ }
};
/// Reader for the raw instrprof binary format from runtime.
uint32_t ValueKindLast;
uint32_t CurValueDataSize;
- // String table for holding a unique copy of all the strings in the profile.
- InstrProfStringTable StringTable;
InstrProfRecord::ValueMapType FunctionPtrToNameMap;
RawInstrProfReader(const RawInstrProfReader &) = delete;
std::error_code readHeader() override;
std::error_code readNextRecord(InstrProfRecord &Record) override;
+ InstrProfSymtab &getSymtab() override {
+ assert(Symtab.get());
+ return *Symtab.get();
+ }
+
private:
+ void createSymtab(InstrProfSymtab &Symtab);
std::error_code readNextHeader(const char *CurrentPos);
std::error_code readHeader(const RawInstrProf::Header &Header);
template <class IntT> IntT swap(IntT Int) const {
// It should be LE by default, but can be changed
// for testing purpose.
support::endianness ValueProfDataEndianness;
- std::vector<std::pair<uint64_t, const char *>> HashKeys;
public:
InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
hash_value_type ComputeHash(StringRef K);
- void setHashKeys(std::vector<std::pair<uint64_t, const char *>> HashKeys) {
- this->HashKeys = std::move(HashKeys);
- }
static std::pair<offset_type, offset_type>
ReadKeyDataLength(const unsigned char *&D) {
using namespace support;
virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
virtual ~InstrProfReaderIndexBase() {}
virtual uint64_t getVersion() const = 0;
+ virtual void populateSymtab(InstrProfSymtab &) = 0;
};
typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait>
typename HashTableImpl::data_iterator RecordIterator;
uint64_t FormatVersion;
- // String table for holding a unique copy of all the strings in the profile.
- InstrProfStringTable StringTable;
-
public:
InstrProfReaderIndex(const unsigned char *Buckets,
const unsigned char *const Payload,
}
~InstrProfReaderIndex() override {}
uint64_t getVersion() const override { return FormatVersion; }
+ void populateSymtab(InstrProfSymtab &Symtab) override {
+ Symtab.create(HashTable->keys());
+ }
};
/// Reader for the indexed binary instrprof format.
void setValueProfDataEndianness(support::endianness Endianness) {
Index->setValueProfDataEndianness(Endianness);
}
+
+ // See description in the base class. This interface is designed
+ // to be used by llvm-profdata (for dumping). Avoid using this when
+ // the client is the compiler.
+ InstrProfSymtab &getSymtab() override;
};
} // end namespace llvm
typedef SmallDenseMap<uint64_t, InstrProfRecord, 1> ProfilingData;
private:
- InstrProfStringTable StringTable;
StringMap<ProfilingData> FunctionData;
uint64_t MaxFunctionCount;
+
public:
InstrProfWriter() : MaxFunctionCount(0) {}
- /// Update string entries in profile data with references to StringTable.
- void updateStringTableReferences(InstrProfRecord &I);
/// Add function counts for the given function. If there are already counts
/// for this function and the hash and number of counts match, each counter is
/// summed. Optionally scale counts by \p Weight.
void writeText(raw_fd_ostream &OS);
/// Write \c Record in text format to \c OS
static void writeRecordInText(const InstrProfRecord &Record,
- raw_fd_ostream &OS);
+ InstrProfSymtab &Symtab, raw_fd_ostream &OS);
/// Write the profile, returning the raw data. For testing.
std::unique_ptr<MemoryBuffer> writeBuffer();
return Result;
}
+
// Map indirect call target name hash to name string.
uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind,
- ValueMapType *HashKeys) {
- if (!HashKeys)
+ ValueMapType *ValueMap) {
+ if (!ValueMap)
return Value;
switch (ValueKind) {
case IPVK_IndirectCallTarget: {
auto Result =
- std::lower_bound(HashKeys->begin(), HashKeys->end(), Value,
- [](const std::pair<uint64_t, const char *> &LHS,
+ std::lower_bound(ValueMap->begin(), ValueMap->end(), Value,
+ [](const std::pair<uint64_t, uint64_t> &LHS,
uint64_t RHS) { return LHS.first < RHS; });
- if (Result != HashKeys->end())
+ if (Result != ValueMap->end())
Value = (uint64_t)Result->second;
break;
}
return Value;
}
-void InstrProfRecord::updateStrings(InstrProfStringTable *StrTab) {
- if (!StrTab)
- return;
-
- Name = StrTab->insertString(Name);
- for (auto &VSite : IndirectCallSites)
- for (auto &VData : VSite.ValueData)
- VData.Value = (uint64_t)StrTab->insertString((const char *)VData.Value);
-}
-
void InstrProfRecord::addValueData(uint32_t ValueKind, uint32_t Site,
InstrProfValueData *VData, uint32_t N,
- ValueMapType *HashKeys) {
+ ValueMapType *ValueMap) {
for (uint32_t I = 0; I < N; I++) {
- VData[I].Value = remapValue(VData[I].Value, ValueKind, HashKeys);
+ VData[I].Value = remapValue(VData[I].Value, ValueKind, ValueMap);
}
std::vector<InstrProfValueSiteRecord> &ValueSites =
getValueSitesForKind(ValueKind);
void getValueForSiteInstrProf(const void *R, InstrProfValueData *Dst,
uint32_t K, uint32_t S,
uint64_t (*Mapper)(uint32_t, uint64_t)) {
- return reinterpret_cast<const InstrProfRecord *>(R)
- ->getValueForSite(Dst, K, S, Mapper);
-}
-
-uint64_t stringToHash(uint32_t ValueKind, uint64_t Value) {
- switch (ValueKind) {
- case IPVK_IndirectCallTarget:
- return IndexedInstrProf::ComputeHash((const char *)Value);
- break;
- default:
- llvm_unreachable("value kind not handled !");
- }
- return Value;
+ return reinterpret_cast<const InstrProfRecord *>(R)->getValueForSite(
+ Dst, K, S, Mapper);
}
ValueProfData *allocValueProfDataInstrProf(size_t TotalSizeInBytes) {
getNumValueSitesInstrProf,
getNumValueDataInstrProf,
getNumValueDataForSiteInstrProf,
- stringToHash,
+ 0,
getValueForSiteInstrProf,
allocValueProfDataInstrProf};
[](char c) { return ::isprint(c) || ::isspace(c); });
}
+std::error_code TextInstrProfReader::readHeader() {
+ Symtab.reset(new InstrProfSymtab());
+ return success();
+}
+
std::error_code
TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
if (Line.is_at_end())
return success();
+
uint32_t NumValueKinds;
if (Line->getAsInteger(10, NumValueKinds)) {
// No value profile data
CHECK_LINE_END(Line);
std::pair<StringRef, StringRef> VD = Line->split(':');
uint64_t TakenCount, Value;
- READ_NUM(VD.second, TakenCount);
- if (VK == IPVK_IndirectCallTarget)
- Value = (uint64_t)StringTable.insertString(VD.first);
- else {
+ if (VK == IPVK_IndirectCallTarget) {
+ Symtab->addFuncName(VD.first);
+ Value = IndexedInstrProf::ComputeHash(VD.first);
+ } else {
READ_NUM(VD.first, Value);
}
+ READ_NUM(VD.second, TakenCount);
CurrentValues.push_back({Value, TakenCount});
Line++;
}
while (!Line.is_at_end() && (Line->empty() || Line->startswith("#")))
++Line;
// If we hit EOF while looking for a name, we're done.
- if (Line.is_at_end())
+ if (Line.is_at_end()) {
+ Symtab->finalizeSymtab();
return error(instrprof_error::eof);
+ }
// Read the function name.
Record.Name = *Line++;
+ Symtab->addFuncName(Record.Name);
// Read the function hash.
if (Line.is_at_end())
if (std::error_code EC = readValueProfileData(Record))
return EC;
+ // This is needed to avoid two pass parsing because llvm-profdata
+ // does dumping while reading.
+ Symtab->finalizeSymtab();
return success();
}
}
template <class IntPtrT>
-std::error_code RawInstrProfReader<IntPtrT>::readHeader(
- const RawInstrProf::Header &Header) {
+void RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) {
+ for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) {
+ StringRef FunctionName(getName(I->NamePtr), swap(I->NameSize));
+ Symtab.addFuncName(FunctionName);
+ const IntPtrT FPtr = swap(I->FunctionPointer);
+ if (!FPtr)
+ continue;
+ Symtab.mapAddress(FPtr, IndexedInstrProf::ComputeHash(FunctionName));
+ }
+ Symtab.finalizeSymtab();
+}
+
+template <class IntPtrT>
+std::error_code
+RawInstrProfReader<IntPtrT>::readHeader(const RawInstrProf::Header &Header) {
if (swap(Header.Version) != RawInstrProf::Version)
return error(instrprof_error::unsupported_version);
DataEnd = Data + DataSize;
CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset);
NamesStart = Start + NamesOffset;
- ValueDataStart = reinterpret_cast<const uint8_t*>(Start + ValueDataOffset);
+ ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset);
ProfileEnd = Start + ProfileSize;
- FunctionPtrToNameMap.clear();
- for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) {
- const IntPtrT FPtr = swap(I->FunctionPointer);
- if (!FPtr)
- continue;
- StringRef FunctionName(getName(I->NamePtr), swap(I->NameSize));
- const char* NameEntryPtr = StringTable.insertString(FunctionName);
- FunctionPtrToNameMap.push_back(std::pair<const IntPtrT, const char*>
- (FPtr, NameEntryPtr));
- }
- std::sort(FunctionPtrToNameMap.begin(), FunctionPtrToNameMap.end(), less_first());
- FunctionPtrToNameMap.erase(std::unique(FunctionPtrToNameMap.begin(),
- FunctionPtrToNameMap.end()),
- FunctionPtrToNameMap.end());
+ std::unique_ptr<InstrProfSymtab> NewSymtab = make_unique<InstrProfSymtab>();
+ createSymtab(*NewSymtab.get());
+ Symtab = std::move(NewSymtab);
return success();
}
if (VDataPtrOrErr.getError())
return VDataPtrOrErr.getError();
- VDataPtrOrErr.get()->deserializeTo(Record, &FunctionPtrToNameMap);
+ VDataPtrOrErr.get()->deserializeTo(Record, &Symtab->getAddrHashMap());
CurValueDataSize = VDataPtrOrErr.get()->getSize();
return success();
}
if (VDataPtrOrErr.getError())
return false;
- VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), &HashKeys);
+ VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr);
D += VDataPtrOrErr.get()->TotalSize;
return true;
HashTable.reset(HashTableImpl::Create(
Buckets, Payload, Base,
typename HashTableImpl::InfoType(HashType, Version)));
- // Form the map of hash values to const char* keys in profiling data.
- std::vector<std::pair<uint64_t, const char *>> HashKeys;
- for (auto Key : HashTable->keys()) {
- const char *KeyTableRef = StringTable.insertString(Key);
- HashKeys.push_back(std::make_pair(ComputeHash(HashType, Key), KeyTableRef));
- }
- std::sort(HashKeys.begin(), HashKeys.end(), less_first());
- HashKeys.erase(std::unique(HashKeys.begin(), HashKeys.end()), HashKeys.end());
- // Set the hash key map for the InstrLookupTrait
- HashTable->getInfoObj().setHashKeys(std::move(HashKeys));
RecordIterator = HashTable->data_begin();
}
return success();
}
+InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
+ if (Symtab.get())
+ return *Symtab.get();
+
+ std::unique_ptr<InstrProfSymtab> NewSymtab = make_unique<InstrProfSymtab>();
+ Index->populateSymtab(*NewSymtab.get());
+
+ Symtab = std::move(NewSymtab);
+ return *Symtab.get();
+}
+
ErrorOr<InstrProfRecord>
IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName,
uint64_t FuncHash) {
ValueProfDataEndianness = Endianness;
}
-void InstrProfWriter::updateStringTableReferences(InstrProfRecord &I) {
- I.updateStrings(&StringTable);
-}
-
std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I,
uint64_t Weight) {
- updateStringTableReferences(I);
auto &ProfileDataMap = FunctionData[I.Name];
bool NewFunc;
};
void InstrProfWriter::writeRecordInText(const InstrProfRecord &Func,
+ InstrProfSymtab &Symtab,
raw_fd_ostream &OS) {
OS << Func.Name << "\n";
OS << "# Func Hash:\n" << Func.Hash << "\n";
std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(VK, S);
for (uint32_t I = 0; I < ND; I++) {
if (VK == IPVK_IndirectCallTarget)
- OS << reinterpret_cast<const char *>(VD[I].Value) << ":"
- << VD[I].Count << "\n";
+ OS << Symtab.getFuncName(VD[I].Value) << ":" << VD[I].Count << "\n";
else
OS << VD[I].Value << ":" << VD[I].Count << "\n";
}
}
void InstrProfWriter::writeText(raw_fd_ostream &OS) {
+ InstrProfSymtab Symtab;
+ for (const auto &I : FunctionData)
+ Symtab.addFuncName(I.getKey());
+ Symtab.finalizeSymtab();
+
for (const auto &I : FunctionData)
for (const auto &Func : I.getValue())
- writeRecordInText(Func.second, OS);
+ writeRecordInText(Func.second, Symtab, OS);
}
std::unique_ptr<MemoryBuffer> InstrProfWriter::writeBuffer() {
bool doTextFormatDump = (Show && ShowCounts && TextFormat);
if (doTextFormatDump) {
- InstrProfWriter::writeRecordInText(Func, OS);
+ InstrProfSymtab &Symtab = Reader->getSymtab();
+ InstrProfWriter::writeRecordInText(Func, Symtab, OS);
continue;
}
}
if (ShowIndirectCallTargets) {
+ InstrProfSymtab &Symtab = Reader->getSymtab();
uint32_t NS = Func.getNumValueSites(IPVK_IndirectCallTarget);
OS << " Indirect Target Results: \n";
for (size_t I = 0; I < NS; ++I) {
Func.getValueForSite(IPVK_IndirectCallTarget, I);
for (uint32_t V = 0; V < NV; V++) {
OS << "\t[ " << I << ", ";
- OS << (const char *)VD[V].Value << ", " << VD[V].Count << " ]\n";
+ OS << Symtab.getFuncName(VD[V].Value) << ", " << VD[V].Count
+ << " ]\n";
}
}
}