344 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			344 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
//===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
 | 
						|
//
 | 
						|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | 
						|
// See https://llvm.org/LICENSE.txt for license information.
 | 
						|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
//
 | 
						|
// This file implements hierarchical time profiler.
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
 | 
						|
#include "llvm/Support/TimeProfiler.h"
 | 
						|
#include "llvm/ADT/STLFunctionalExtras.h"
 | 
						|
#include "llvm/ADT/StringMap.h"
 | 
						|
#include "llvm/Support/JSON.h"
 | 
						|
#include "llvm/Support/Path.h"
 | 
						|
#include "llvm/Support/Process.h"
 | 
						|
#include "llvm/Support/Threading.h"
 | 
						|
#include <algorithm>
 | 
						|
#include <cassert>
 | 
						|
#include <chrono>
 | 
						|
#include <mutex>
 | 
						|
#include <string>
 | 
						|
#include <vector>
 | 
						|
 | 
						|
using namespace std::chrono;
 | 
						|
using namespace llvm;
 | 
						|
 | 
						|
namespace {
 | 
						|
 | 
						|
struct TimeTraceProfilerInstances {
 | 
						|
  std::mutex Lock;
 | 
						|
  std::vector<TimeTraceProfiler *> List;
 | 
						|
};
 | 
						|
 | 
						|
TimeTraceProfilerInstances &getTimeTraceProfilerInstances() {
 | 
						|
  static TimeTraceProfilerInstances Instances;
 | 
						|
  return Instances;
 | 
						|
}
 | 
						|
 | 
						|
} // anonymous namespace
 | 
						|
 | 
						|
// Per Thread instance
 | 
						|
static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
 | 
						|
 | 
						|
TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() {
 | 
						|
  return TimeTraceProfilerInstance;
 | 
						|
}
 | 
						|
 | 
						|
typedef duration<steady_clock::rep, steady_clock::period> DurationType;
 | 
						|
typedef time_point<steady_clock> TimePointType;
 | 
						|
typedef std::pair<size_t, DurationType> CountAndDurationType;
 | 
						|
typedef std::pair<std::string, CountAndDurationType>
 | 
						|
    NameAndCountAndDurationType;
 | 
						|
 | 
						|
namespace {
 | 
						|
struct Entry {
 | 
						|
  const TimePointType Start;
 | 
						|
  TimePointType End;
 | 
						|
  const std::string Name;
 | 
						|
  const std::string Detail;
 | 
						|
 | 
						|
  Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt)
 | 
						|
      : Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
 | 
						|
        Detail(std::move(Dt)) {}
 | 
						|
 | 
						|
  // Calculate timings for FlameGraph. Cast time points to microsecond precision
 | 
						|
  // rather than casting duration. This avoid truncation issues causing inner
 | 
						|
  // scopes overruning outer scopes.
 | 
						|
  steady_clock::rep getFlameGraphStartUs(TimePointType StartTime) const {
 | 
						|
    return (time_point_cast<microseconds>(Start) -
 | 
						|
            time_point_cast<microseconds>(StartTime))
 | 
						|
        .count();
 | 
						|
  }
 | 
						|
 | 
						|
  steady_clock::rep getFlameGraphDurUs() const {
 | 
						|
    return (time_point_cast<microseconds>(End) -
 | 
						|
            time_point_cast<microseconds>(Start))
 | 
						|
        .count();
 | 
						|
  }
 | 
						|
};
 | 
						|
} // namespace
 | 
						|
 | 
						|
struct llvm::TimeTraceProfiler {
 | 
						|
  TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "")
 | 
						|
      : BeginningOfTime(system_clock::now()), StartTime(steady_clock::now()),
 | 
						|
        ProcName(ProcName), Pid(sys::Process::getProcessId()),
 | 
						|
        Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) {
 | 
						|
    llvm::get_thread_name(ThreadName);
 | 
						|
  }
 | 
						|
 | 
						|
  void begin(std::string Name, llvm::function_ref<std::string()> Detail) {
 | 
						|
    Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name),
 | 
						|
                       Detail());
 | 
						|
  }
 | 
						|
 | 
						|
  void end() {
 | 
						|
    assert(!Stack.empty() && "Must call begin() first");
 | 
						|
    Entry &E = Stack.back();
 | 
						|
    E.End = steady_clock::now();
 | 
						|
 | 
						|
    // Check that end times monotonically increase.
 | 
						|
    assert((Entries.empty() ||
 | 
						|
            (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >=
 | 
						|
             Entries.back().getFlameGraphStartUs(StartTime) +
 | 
						|
                 Entries.back().getFlameGraphDurUs())) &&
 | 
						|
           "TimeProfiler scope ended earlier than previous scope");
 | 
						|
 | 
						|
    // Calculate duration at full precision for overall counts.
 | 
						|
    DurationType Duration = E.End - E.Start;
 | 
						|
 | 
						|
    // Only include sections longer or equal to TimeTraceGranularity msec.
 | 
						|
    if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity)
 | 
						|
      Entries.emplace_back(E);
 | 
						|
 | 
						|
    // Track total time taken by each "name", but only the topmost levels of
 | 
						|
    // them; e.g. if there's a template instantiation that instantiates other
 | 
						|
    // templates from within, we only want to add the topmost one. "topmost"
 | 
						|
    // happens to be the ones that don't have any currently open entries above
 | 
						|
    // itself.
 | 
						|
    if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack)),
 | 
						|
                      [&](const Entry &Val) { return Val.Name == E.Name; })) {
 | 
						|
      auto &CountAndTotal = CountAndTotalPerName[E.Name];
 | 
						|
      CountAndTotal.first++;
 | 
						|
      CountAndTotal.second += Duration;
 | 
						|
    }
 | 
						|
 | 
						|
    Stack.pop_back();
 | 
						|
  }
 | 
						|
 | 
						|
  // Write events from this TimeTraceProfilerInstance and
 | 
						|
  // ThreadTimeTraceProfilerInstances.
 | 
						|
  void write(raw_pwrite_stream &OS) {
 | 
						|
    // Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
 | 
						|
    auto &Instances = getTimeTraceProfilerInstances();
 | 
						|
    std::lock_guard<std::mutex> Lock(Instances.Lock);
 | 
						|
    assert(Stack.empty() &&
 | 
						|
           "All profiler sections should be ended when calling write");
 | 
						|
    assert(llvm::all_of(Instances.List,
 | 
						|
                        [](const auto &TTP) { return TTP->Stack.empty(); }) &&
 | 
						|
           "All profiler sections should be ended when calling write");
 | 
						|
 | 
						|
    json::OStream J(OS);
 | 
						|
    J.objectBegin();
 | 
						|
    J.attributeBegin("traceEvents");
 | 
						|
    J.arrayBegin();
 | 
						|
 | 
						|
    // Emit all events for the main flame graph.
 | 
						|
    auto writeEvent = [&](const auto &E, uint64_t Tid) {
 | 
						|
      auto StartUs = E.getFlameGraphStartUs(StartTime);
 | 
						|
      auto DurUs = E.getFlameGraphDurUs();
 | 
						|
 | 
						|
      J.object([&] {
 | 
						|
        J.attribute("pid", Pid);
 | 
						|
        J.attribute("tid", int64_t(Tid));
 | 
						|
        J.attribute("ph", "X");
 | 
						|
        J.attribute("ts", StartUs);
 | 
						|
        J.attribute("dur", DurUs);
 | 
						|
        J.attribute("name", E.Name);
 | 
						|
        if (!E.Detail.empty()) {
 | 
						|
          J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
 | 
						|
        }
 | 
						|
      });
 | 
						|
    };
 | 
						|
    for (const Entry &E : Entries)
 | 
						|
      writeEvent(E, this->Tid);
 | 
						|
    for (const TimeTraceProfiler *TTP : Instances.List)
 | 
						|
      for (const Entry &E : TTP->Entries)
 | 
						|
        writeEvent(E, TTP->Tid);
 | 
						|
 | 
						|
    // Emit totals by section name as additional "thread" events, sorted from
 | 
						|
    // longest one.
 | 
						|
    // Find highest used thread id.
 | 
						|
    uint64_t MaxTid = this->Tid;
 | 
						|
    for (const TimeTraceProfiler *TTP : Instances.List)
 | 
						|
      MaxTid = std::max(MaxTid, TTP->Tid);
 | 
						|
 | 
						|
    // Combine all CountAndTotalPerName from threads into one.
 | 
						|
    StringMap<CountAndDurationType> AllCountAndTotalPerName;
 | 
						|
    auto combineStat = [&](const auto &Stat) {
 | 
						|
      StringRef Key = Stat.getKey();
 | 
						|
      auto Value = Stat.getValue();
 | 
						|
      auto &CountAndTotal = AllCountAndTotalPerName[Key];
 | 
						|
      CountAndTotal.first += Value.first;
 | 
						|
      CountAndTotal.second += Value.second;
 | 
						|
    };
 | 
						|
    for (const auto &Stat : CountAndTotalPerName)
 | 
						|
      combineStat(Stat);
 | 
						|
    for (const TimeTraceProfiler *TTP : Instances.List)
 | 
						|
      for (const auto &Stat : TTP->CountAndTotalPerName)
 | 
						|
        combineStat(Stat);
 | 
						|
 | 
						|
    std::vector<NameAndCountAndDurationType> SortedTotals;
 | 
						|
    SortedTotals.reserve(AllCountAndTotalPerName.size());
 | 
						|
    for (const auto &Total : AllCountAndTotalPerName)
 | 
						|
      SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue());
 | 
						|
 | 
						|
    llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A,
 | 
						|
                                const NameAndCountAndDurationType &B) {
 | 
						|
      return A.second.second > B.second.second;
 | 
						|
    });
 | 
						|
 | 
						|
    // Report totals on separate threads of tracing file.
 | 
						|
    uint64_t TotalTid = MaxTid + 1;
 | 
						|
    for (const NameAndCountAndDurationType &Total : SortedTotals) {
 | 
						|
      auto DurUs = duration_cast<microseconds>(Total.second.second).count();
 | 
						|
      auto Count = AllCountAndTotalPerName[Total.first].first;
 | 
						|
 | 
						|
      J.object([&] {
 | 
						|
        J.attribute("pid", Pid);
 | 
						|
        J.attribute("tid", int64_t(TotalTid));
 | 
						|
        J.attribute("ph", "X");
 | 
						|
        J.attribute("ts", 0);
 | 
						|
        J.attribute("dur", DurUs);
 | 
						|
        J.attribute("name", "Total " + Total.first);
 | 
						|
        J.attributeObject("args", [&] {
 | 
						|
          J.attribute("count", int64_t(Count));
 | 
						|
          J.attribute("avg ms", int64_t(DurUs / Count / 1000));
 | 
						|
        });
 | 
						|
      });
 | 
						|
 | 
						|
      ++TotalTid;
 | 
						|
    }
 | 
						|
 | 
						|
    auto writeMetadataEvent = [&](const char *Name, uint64_t Tid,
 | 
						|
                                  StringRef arg) {
 | 
						|
      J.object([&] {
 | 
						|
        J.attribute("cat", "");
 | 
						|
        J.attribute("pid", Pid);
 | 
						|
        J.attribute("tid", int64_t(Tid));
 | 
						|
        J.attribute("ts", 0);
 | 
						|
        J.attribute("ph", "M");
 | 
						|
        J.attribute("name", Name);
 | 
						|
        J.attributeObject("args", [&] { J.attribute("name", arg); });
 | 
						|
      });
 | 
						|
    };
 | 
						|
 | 
						|
    writeMetadataEvent("process_name", Tid, ProcName);
 | 
						|
    writeMetadataEvent("thread_name", Tid, ThreadName);
 | 
						|
    for (const TimeTraceProfiler *TTP : Instances.List)
 | 
						|
      writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName);
 | 
						|
 | 
						|
    J.arrayEnd();
 | 
						|
    J.attributeEnd();
 | 
						|
 | 
						|
    // Emit the absolute time when this TimeProfiler started.
 | 
						|
    // This can be used to combine the profiling data from
 | 
						|
    // multiple processes and preserve actual time intervals.
 | 
						|
    J.attribute("beginningOfTime",
 | 
						|
                time_point_cast<microseconds>(BeginningOfTime)
 | 
						|
                    .time_since_epoch()
 | 
						|
                    .count());
 | 
						|
 | 
						|
    J.objectEnd();
 | 
						|
  }
 | 
						|
 | 
						|
  SmallVector<Entry, 16> Stack;
 | 
						|
  SmallVector<Entry, 128> Entries;
 | 
						|
  StringMap<CountAndDurationType> CountAndTotalPerName;
 | 
						|
  const time_point<system_clock> BeginningOfTime;
 | 
						|
  const TimePointType StartTime;
 | 
						|
  const std::string ProcName;
 | 
						|
  const sys::Process::Pid Pid;
 | 
						|
  SmallString<0> ThreadName;
 | 
						|
  const uint64_t Tid;
 | 
						|
 | 
						|
  // Minimum time granularity (in microseconds)
 | 
						|
  const unsigned TimeTraceGranularity;
 | 
						|
};
 | 
						|
 | 
						|
void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
 | 
						|
                                       StringRef ProcName) {
 | 
						|
  assert(TimeTraceProfilerInstance == nullptr &&
 | 
						|
         "Profiler should not be initialized");
 | 
						|
  TimeTraceProfilerInstance = new TimeTraceProfiler(
 | 
						|
      TimeTraceGranularity, llvm::sys::path::filename(ProcName));
 | 
						|
}
 | 
						|
 | 
						|
// Removes all TimeTraceProfilerInstances.
 | 
						|
// Called from main thread.
 | 
						|
void llvm::timeTraceProfilerCleanup() {
 | 
						|
  delete TimeTraceProfilerInstance;
 | 
						|
  TimeTraceProfilerInstance = nullptr;
 | 
						|
 | 
						|
  auto &Instances = getTimeTraceProfilerInstances();
 | 
						|
  std::lock_guard<std::mutex> Lock(Instances.Lock);
 | 
						|
  for (auto *TTP : Instances.List)
 | 
						|
    delete TTP;
 | 
						|
  Instances.List.clear();
 | 
						|
}
 | 
						|
 | 
						|
// Finish TimeTraceProfilerInstance on a worker thread.
 | 
						|
// This doesn't remove the instance, just moves the pointer to global vector.
 | 
						|
void llvm::timeTraceProfilerFinishThread() {
 | 
						|
  auto &Instances = getTimeTraceProfilerInstances();
 | 
						|
  std::lock_guard<std::mutex> Lock(Instances.Lock);
 | 
						|
  Instances.List.push_back(TimeTraceProfilerInstance);
 | 
						|
  TimeTraceProfilerInstance = nullptr;
 | 
						|
}
 | 
						|
 | 
						|
void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) {
 | 
						|
  assert(TimeTraceProfilerInstance != nullptr &&
 | 
						|
         "Profiler object can't be null");
 | 
						|
  TimeTraceProfilerInstance->write(OS);
 | 
						|
}
 | 
						|
 | 
						|
Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName,
 | 
						|
                                   StringRef FallbackFileName) {
 | 
						|
  assert(TimeTraceProfilerInstance != nullptr &&
 | 
						|
         "Profiler object can't be null");
 | 
						|
 | 
						|
  std::string Path = PreferredFileName.str();
 | 
						|
  if (Path.empty()) {
 | 
						|
    Path = FallbackFileName == "-" ? "out" : FallbackFileName.str();
 | 
						|
    Path += ".time-trace";
 | 
						|
  }
 | 
						|
 | 
						|
  std::error_code EC;
 | 
						|
  raw_fd_ostream OS(Path, EC, sys::fs::OF_TextWithCRLF);
 | 
						|
  if (EC)
 | 
						|
    return createStringError(EC, "Could not open " + Path);
 | 
						|
 | 
						|
  timeTraceProfilerWrite(OS);
 | 
						|
  return Error::success();
 | 
						|
}
 | 
						|
 | 
						|
void llvm::timeTraceProfilerBegin(StringRef Name, StringRef Detail) {
 | 
						|
  if (TimeTraceProfilerInstance != nullptr)
 | 
						|
    TimeTraceProfilerInstance->begin(std::string(Name),
 | 
						|
                                     [&]() { return std::string(Detail); });
 | 
						|
}
 | 
						|
 | 
						|
void llvm::timeTraceProfilerBegin(StringRef Name,
 | 
						|
                                  llvm::function_ref<std::string()> Detail) {
 | 
						|
  if (TimeTraceProfilerInstance != nullptr)
 | 
						|
    TimeTraceProfilerInstance->begin(std::string(Name), Detail);
 | 
						|
}
 | 
						|
 | 
						|
void llvm::timeTraceProfilerEnd() {
 | 
						|
  if (TimeTraceProfilerInstance != nullptr)
 | 
						|
    TimeTraceProfilerInstance->end();
 | 
						|
}
 |