398 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			398 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			C++
		
	
	
	
| //===- Profile.cpp - XRay Profile Abstraction -----------------------------===//
 | |
| //
 | |
| //                     The LLVM Compiler Infrastructure
 | |
| //
 | |
| // This file is distributed under the University of Illinois Open Source
 | |
| // License. See LICENSE.TXT for details.
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| //
 | |
| // Defines the XRay Profile class representing the latency profile generated by
 | |
| // XRay's profiling mode.
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| #include "llvm/XRay/Profile.h"
 | |
| 
 | |
| #include "llvm/Support/DataExtractor.h"
 | |
| #include "llvm/Support/Error.h"
 | |
| #include "llvm/Support/FileSystem.h"
 | |
| #include "llvm/XRay/Trace.h"
 | |
| #include <deque>
 | |
| #include <memory>
 | |
| 
 | |
| namespace llvm {
 | |
| namespace xray {
 | |
| 
 | |
| Profile::Profile(const Profile &O) {
 | |
|   // We need to re-create all the tries from the original (O), into the current
 | |
|   // Profile being initialized, through the Block instances we see.
 | |
|   for (const auto &Block : O) {
 | |
|     Blocks.push_back({Block.Thread, {}});
 | |
|     auto &B = Blocks.back();
 | |
|     for (const auto &PathData : Block.PathData)
 | |
|       B.PathData.push_back({internPath(cantFail(O.expandPath(PathData.first))),
 | |
|                             PathData.second});
 | |
|   }
 | |
| }
 | |
| 
 | |
| Profile &Profile::operator=(const Profile &O) {
 | |
|   Profile P = O;
 | |
|   *this = std::move(P);
 | |
|   return *this;
 | |
| }
 | |
| 
 | |
| namespace {
 | |
| 
 | |
| struct BlockHeader {
 | |
|   uint32_t Size;
 | |
|   uint32_t Number;
 | |
|   uint64_t Thread;
 | |
| };
 | |
| 
 | |
| static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor,
 | |
|                                              uint32_t &Offset) {
 | |
|   BlockHeader H;
 | |
|   uint32_t CurrentOffset = Offset;
 | |
|   H.Size = Extractor.getU32(&Offset);
 | |
|   if (Offset == CurrentOffset)
 | |
|     return make_error<StringError>(
 | |
|         Twine("Error parsing block header size at offset '") +
 | |
|             Twine(CurrentOffset) + "'",
 | |
|         std::make_error_code(std::errc::invalid_argument));
 | |
|   CurrentOffset = Offset;
 | |
|   H.Number = Extractor.getU32(&Offset);
 | |
|   if (Offset == CurrentOffset)
 | |
|     return make_error<StringError>(
 | |
|         Twine("Error parsing block header number at offset '") +
 | |
|             Twine(CurrentOffset) + "'",
 | |
|         std::make_error_code(std::errc::invalid_argument));
 | |
|   CurrentOffset = Offset;
 | |
|   H.Thread = Extractor.getU64(&Offset);
 | |
|   if (Offset == CurrentOffset)
 | |
|     return make_error<StringError>(
 | |
|         Twine("Error parsing block header thread id at offset '") +
 | |
|             Twine(CurrentOffset) + "'",
 | |
|         std::make_error_code(std::errc::invalid_argument));
 | |
|   return H;
 | |
| }
 | |
| 
 | |
| static Expected<std::vector<Profile::FuncID>> readPath(DataExtractor &Extractor,
 | |
|                                                        uint32_t &Offset) {
 | |
|   // We're reading a sequence of int32_t's until we find a 0.
 | |
|   std::vector<Profile::FuncID> Path;
 | |
|   auto CurrentOffset = Offset;
 | |
|   int32_t FuncId;
 | |
|   do {
 | |
|     FuncId = Extractor.getSigned(&Offset, 4);
 | |
|     if (CurrentOffset == Offset)
 | |
|       return make_error<StringError>(
 | |
|           Twine("Error parsing path at offset '") + Twine(CurrentOffset) + "'",
 | |
|           std::make_error_code(std::errc::invalid_argument));
 | |
|     CurrentOffset = Offset;
 | |
|     Path.push_back(FuncId);
 | |
|   } while (FuncId != 0);
 | |
|   return std::move(Path);
 | |
| }
 | |
| 
 | |
| static Expected<Profile::Data> readData(DataExtractor &Extractor,
 | |
|                                         uint32_t &Offset) {
 | |
|   // We expect a certain number of elements for Data:
 | |
|   //   - A 64-bit CallCount
 | |
|   //   - A 64-bit CumulativeLocalTime counter
 | |
|   Profile::Data D;
 | |
|   auto CurrentOffset = Offset;
 | |
|   D.CallCount = Extractor.getU64(&Offset);
 | |
|   if (CurrentOffset == Offset)
 | |
|     return make_error<StringError>(
 | |
|         Twine("Error parsing call counts at offset '") + Twine(CurrentOffset) +
 | |
|             "'",
 | |
|         std::make_error_code(std::errc::invalid_argument));
 | |
|   CurrentOffset = Offset;
 | |
|   D.CumulativeLocalTime = Extractor.getU64(&Offset);
 | |
|   if (CurrentOffset == Offset)
 | |
|     return make_error<StringError>(
 | |
|         Twine("Error parsing cumulative local time at offset '") +
 | |
|             Twine(CurrentOffset) + "'",
 | |
|         std::make_error_code(std::errc::invalid_argument));
 | |
|   return D;
 | |
| }
 | |
| 
 | |
| } // namespace
 | |
| 
 | |
| Error Profile::addBlock(Block &&B) {
 | |
|   if (B.PathData.empty())
 | |
|     return make_error<StringError>(
 | |
|         "Block may not have empty path data.",
 | |
|         std::make_error_code(std::errc::invalid_argument));
 | |
| 
 | |
|   Blocks.emplace_back(std::move(B));
 | |
|   return Error::success();
 | |
| }
 | |
| 
 | |
| Expected<std::vector<Profile::FuncID>> Profile::expandPath(PathID P) const {
 | |
|   auto It = PathIDMap.find(P);
 | |
|   if (It == PathIDMap.end())
 | |
|     return make_error<StringError>(
 | |
|         Twine("PathID not found: ") + Twine(P),
 | |
|         std::make_error_code(std::errc::invalid_argument));
 | |
|   std::vector<Profile::FuncID> Path;
 | |
|   for (auto Node = It->second; Node; Node = Node->Caller)
 | |
|     Path.push_back(Node->Func);
 | |
|   return std::move(Path);
 | |
| }
 | |
| 
 | |
| Profile::PathID Profile::internPath(ArrayRef<FuncID> P) {
 | |
|   if (P.empty())
 | |
|     return 0;
 | |
| 
 | |
|   auto RootToLeafPath = reverse(P);
 | |
| 
 | |
|   // Find the root.
 | |
|   auto It = RootToLeafPath.begin();
 | |
|   auto PathRoot = *It++;
 | |
|   auto RootIt =
 | |
|       find_if(Roots, [PathRoot](TrieNode *N) { return N->Func == PathRoot; });
 | |
| 
 | |
|   // If we've not seen this root before, remember it.
 | |
|   TrieNode *Node = nullptr;
 | |
|   if (RootIt == Roots.end()) {
 | |
|     NodeStorage.emplace_back();
 | |
|     Node = &NodeStorage.back();
 | |
|     Node->Func = PathRoot;
 | |
|     Roots.push_back(Node);
 | |
|   } else {
 | |
|     Node = *RootIt;
 | |
|   }
 | |
| 
 | |
|   // Now traverse the path, re-creating if necessary.
 | |
|   while (It != RootToLeafPath.end()) {
 | |
|     auto NodeFuncID = *It++;
 | |
|     auto CalleeIt = find_if(Node->Callees, [NodeFuncID](TrieNode *N) {
 | |
|       return N->Func == NodeFuncID;
 | |
|     });
 | |
|     if (CalleeIt == Node->Callees.end()) {
 | |
|       NodeStorage.emplace_back();
 | |
|       auto NewNode = &NodeStorage.back();
 | |
|       NewNode->Func = NodeFuncID;
 | |
|       NewNode->Caller = Node;
 | |
|       Node->Callees.push_back(NewNode);
 | |
|       Node = NewNode;
 | |
|     } else {
 | |
|       Node = *CalleeIt;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   // At this point, Node *must* be pointing at the leaf.
 | |
|   assert(Node->Func == P.front());
 | |
|   if (Node->ID == 0) {
 | |
|     Node->ID = NextID++;
 | |
|     PathIDMap.insert({Node->ID, Node});
 | |
|   }
 | |
|   return Node->ID;
 | |
| }
 | |
| 
 | |
| Profile mergeProfilesByThread(const Profile &L, const Profile &R) {
 | |
|   Profile Merged;
 | |
|   using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
 | |
|   using PathDataMapPtr = std::unique_ptr<PathDataMap>;
 | |
|   using PathDataVector = decltype(Profile::Block::PathData);
 | |
|   using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>;
 | |
|   ThreadProfileIndexMap ThreadProfileIndex;
 | |
| 
 | |
|   for (const auto &P : {std::ref(L), std::ref(R)})
 | |
|     for (const auto &Block : P.get()) {
 | |
|       ThreadProfileIndexMap::iterator It;
 | |
|       std::tie(It, std::ignore) = ThreadProfileIndex.insert(
 | |
|           {Block.Thread, PathDataMapPtr{new PathDataMap()}});
 | |
|       for (const auto &PathAndData : Block.PathData) {
 | |
|         auto &PathID = PathAndData.first;
 | |
|         auto &Data = PathAndData.second;
 | |
|         auto NewPathID =
 | |
|             Merged.internPath(cantFail(P.get().expandPath(PathID)));
 | |
|         PathDataMap::iterator PathDataIt;
 | |
|         bool Inserted;
 | |
|         std::tie(PathDataIt, Inserted) = It->second->insert({NewPathID, Data});
 | |
|         if (!Inserted) {
 | |
|           auto &ExistingData = PathDataIt->second;
 | |
|           ExistingData.CallCount += Data.CallCount;
 | |
|           ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
 | |
|         }
 | |
|       }
 | |
|     }
 | |
| 
 | |
|   for (const auto &IndexedThreadBlock : ThreadProfileIndex) {
 | |
|     PathDataVector PathAndData;
 | |
|     PathAndData.reserve(IndexedThreadBlock.second->size());
 | |
|     copy(*IndexedThreadBlock.second, std::back_inserter(PathAndData));
 | |
|     cantFail(
 | |
|         Merged.addBlock({IndexedThreadBlock.first, std::move(PathAndData)}));
 | |
|   }
 | |
|   return Merged;
 | |
| }
 | |
| 
 | |
| Profile mergeProfilesByStack(const Profile &L, const Profile &R) {
 | |
|   Profile Merged;
 | |
|   using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
 | |
|   PathDataMap PathData;
 | |
|   using PathDataVector = decltype(Profile::Block::PathData);
 | |
|   for (const auto &P : {std::ref(L), std::ref(R)})
 | |
|     for (const auto &Block : P.get())
 | |
|       for (const auto &PathAndData : Block.PathData) {
 | |
|         auto &PathId = PathAndData.first;
 | |
|         auto &Data = PathAndData.second;
 | |
|         auto NewPathID =
 | |
|             Merged.internPath(cantFail(P.get().expandPath(PathId)));
 | |
|         PathDataMap::iterator PathDataIt;
 | |
|         bool Inserted;
 | |
|         std::tie(PathDataIt, Inserted) = PathData.insert({NewPathID, Data});
 | |
|         if (!Inserted) {
 | |
|           auto &ExistingData = PathDataIt->second;
 | |
|           ExistingData.CallCount += Data.CallCount;
 | |
|           ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
 | |
|         }
 | |
|       }
 | |
| 
 | |
|   // In the end there's a single Block, for thread 0.
 | |
|   PathDataVector Block;
 | |
|   Block.reserve(PathData.size());
 | |
|   copy(PathData, std::back_inserter(Block));
 | |
|   cantFail(Merged.addBlock({0, std::move(Block)}));
 | |
|   return Merged;
 | |
| }
 | |
| 
 | |
| Expected<Profile> loadProfile(StringRef Filename) {
 | |
|   int Fd;
 | |
|   if (auto EC = sys::fs::openFileForRead(Filename, Fd))
 | |
|     return make_error<StringError>(
 | |
|         Twine("Cannot read profile from '") + Filename + "'", EC);
 | |
| 
 | |
|   uint64_t FileSize;
 | |
|   if (auto EC = sys::fs::file_size(Filename, FileSize))
 | |
|     return make_error<StringError>(
 | |
|         Twine("Cannot get filesize of '") + Filename + "'", EC);
 | |
| 
 | |
|   std::error_code EC;
 | |
|   sys::fs::mapped_file_region MappedFile(
 | |
|       Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC);
 | |
|   if (EC)
 | |
|     return make_error<StringError>(
 | |
|         Twine("Cannot mmap profile '") + Filename + "'", EC);
 | |
|   StringRef Data(MappedFile.data(), MappedFile.size());
 | |
| 
 | |
|   Profile P;
 | |
|   uint32_t Offset = 0;
 | |
|   DataExtractor Extractor(Data, true, 8);
 | |
| 
 | |
|   // For each block we get from the file:
 | |
|   while (Offset != MappedFile.size()) {
 | |
|     auto HeaderOrError = readBlockHeader(Extractor, Offset);
 | |
|     if (!HeaderOrError)
 | |
|       return HeaderOrError.takeError();
 | |
| 
 | |
|     // TODO: Maybe store this header information for each block, even just for
 | |
|     // debugging?
 | |
|     const auto &Header = HeaderOrError.get();
 | |
| 
 | |
|     // Read in the path data.
 | |
|     auto PathOrError = readPath(Extractor, Offset);
 | |
|     if (!PathOrError)
 | |
|       return PathOrError.takeError();
 | |
|     const auto &Path = PathOrError.get();
 | |
| 
 | |
|     // For each path we encounter, we should intern it to get a PathID.
 | |
|     auto DataOrError = readData(Extractor, Offset);
 | |
|     if (!DataOrError)
 | |
|       return DataOrError.takeError();
 | |
|     auto &Data = DataOrError.get();
 | |
| 
 | |
|     if (auto E =
 | |
|             P.addBlock(Profile::Block{Profile::ThreadID{Header.Thread},
 | |
|                                       {{P.internPath(Path), std::move(Data)}}}))
 | |
|       return std::move(E);
 | |
|   }
 | |
| 
 | |
|   return P;
 | |
| }
 | |
| 
 | |
| namespace {
 | |
| 
 | |
| struct StackEntry {
 | |
|   uint64_t Timestamp;
 | |
|   Profile::FuncID FuncId;
 | |
| };
 | |
| 
 | |
| } // namespace
 | |
| 
 | |
| Expected<Profile> profileFromTrace(const Trace &T) {
 | |
|   Profile P;
 | |
| 
 | |
|   // The implementation of the algorithm re-creates the execution of
 | |
|   // the functions based on the trace data. To do this, we set up a number of
 | |
|   // data structures to track the execution context of every thread in the
 | |
|   // Trace.
 | |
|   DenseMap<Profile::ThreadID, std::vector<StackEntry>> ThreadStacks;
 | |
|   DenseMap<Profile::ThreadID, DenseMap<Profile::PathID, Profile::Data>>
 | |
|       ThreadPathData;
 | |
| 
 | |
|   //  We then do a pass through the Trace to account data on a per-thread-basis.
 | |
|   for (const auto &E : T) {
 | |
|     auto &TSD = ThreadStacks[E.TId];
 | |
|     switch (E.Type) {
 | |
|     case RecordTypes::ENTER:
 | |
|     case RecordTypes::ENTER_ARG:
 | |
| 
 | |
|       // Push entries into the function call stack.
 | |
|       TSD.push_back({E.TSC, E.FuncId});
 | |
|       break;
 | |
| 
 | |
|     case RecordTypes::EXIT:
 | |
|     case RecordTypes::TAIL_EXIT:
 | |
| 
 | |
|       // Exits cause some accounting to happen, based on the state of the stack.
 | |
|       // For each function we pop off the stack, we take note of the path and
 | |
|       // record the cumulative state for this path. As we're doing this, we
 | |
|       // intern the path into the Profile.
 | |
|       while (!TSD.empty()) {
 | |
|         auto Top = TSD.back();
 | |
|         auto FunctionLocalTime = AbsoluteDifference(Top.Timestamp, E.TSC);
 | |
|         SmallVector<Profile::FuncID, 16> Path;
 | |
|         transform(reverse(TSD), std::back_inserter(Path),
 | |
|                   std::mem_fn(&StackEntry::FuncId));
 | |
|         auto InternedPath = P.internPath(Path);
 | |
|         auto &TPD = ThreadPathData[E.TId][InternedPath];
 | |
|         ++TPD.CallCount;
 | |
|         TPD.CumulativeLocalTime += FunctionLocalTime;
 | |
|         TSD.pop_back();
 | |
| 
 | |
|         // If we've matched the corresponding entry event for this function,
 | |
|         // then we exit the loop.
 | |
|         if (Top.FuncId == E.FuncId)
 | |
|           break;
 | |
| 
 | |
|         // FIXME: Consider the intermediate times and the cumulative tree time
 | |
|         // as well.
 | |
|       }
 | |
| 
 | |
|       break;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   // Once we've gone through the Trace, we now create one Block per thread in
 | |
|   // the Profile.
 | |
|   for (const auto &ThreadPaths : ThreadPathData) {
 | |
|     const auto &TID = ThreadPaths.first;
 | |
|     const auto &PathsData = ThreadPaths.second;
 | |
|     if (auto E = P.addBlock({
 | |
|             TID,
 | |
|             std::vector<std::pair<Profile::PathID, Profile::Data>>(
 | |
|                 PathsData.begin(), PathsData.end()),
 | |
|         }))
 | |
|       return std::move(E);
 | |
|   }
 | |
| 
 | |
|   return P;
 | |
| }
 | |
| 
 | |
| } // namespace xray
 | |
| } // namespace llvm
 |