262 lines
		
	
	
		
			9.1 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			262 lines
		
	
	
		
			9.1 KiB
		
	
	
	
		
			C++
		
	
	
	
//===-- X86Counter.cpp ------------------------------------------*- C++ -*-===//
 | 
						|
//
 | 
						|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | 
						|
// See https://llvm.org/LICENSE.txt for license information.
 | 
						|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
 | 
						|
#include "X86Counter.h"
 | 
						|
 | 
						|
#if defined(__linux__) && defined(HAVE_LIBPFM) &&                              \
 | 
						|
    defined(LIBPFM_HAS_FIELD_CYCLES)
 | 
						|
 | 
						|
// FIXME: Use appropriate wrappers for poll.h and mman.h
 | 
						|
// to support Windows and remove this linux-only guard.
 | 
						|
 | 
						|
#include "llvm/Support/Endian.h"
 | 
						|
#include "llvm/Support/Errc.h"
 | 
						|
 | 
						|
#include <perfmon/perf_event.h>
 | 
						|
#include <perfmon/pfmlib.h>
 | 
						|
#include <perfmon/pfmlib_perf_event.h>
 | 
						|
 | 
						|
#include <atomic>
 | 
						|
#include <chrono>
 | 
						|
#include <cstddef>
 | 
						|
#include <cstdint>
 | 
						|
#include <limits>
 | 
						|
#include <memory>
 | 
						|
#include <vector>
 | 
						|
 | 
						|
#include <poll.h>
 | 
						|
#include <sys/mman.h>
 | 
						|
#include <unistd.h>
 | 
						|
 | 
						|
namespace llvm {
 | 
						|
namespace exegesis {
 | 
						|
 | 
						|
// Number of entries in the LBR.
 | 
						|
static constexpr int kLbrEntries = 16;
 | 
						|
static constexpr size_t kBufferPages = 8;
 | 
						|
static const size_t kDataBufferSize = kBufferPages * getpagesize();
 | 
						|
 | 
						|
// First page is reserved for perf_event_mmap_page. Data buffer starts on
 | 
						|
// the next page, so we allocate one more page.
 | 
						|
static const size_t kMappedBufferSize = (kBufferPages + 1) * getpagesize();
 | 
						|
 | 
						|
// Waits for the LBR perf events.
 | 
						|
static int pollLbrPerfEvent(const int FileDescriptor) {
 | 
						|
  struct pollfd PollFd;
 | 
						|
  PollFd.fd = FileDescriptor;
 | 
						|
  PollFd.events = POLLIN;
 | 
						|
  PollFd.revents = 0;
 | 
						|
  return poll(&PollFd, 1 /* num of fds */, 10000 /* timeout in ms */);
 | 
						|
}
 | 
						|
 | 
						|
// Copies the data-buffer into Buf, given the pointer to MMapped.
 | 
						|
static void copyDataBuffer(void *MMappedBuffer, char *Buf, uint64_t Tail,
 | 
						|
                           size_t DataSize) {
 | 
						|
  // First page is reserved for perf_event_mmap_page. Data buffer starts on
 | 
						|
  // the next page.
 | 
						|
  char *Start = reinterpret_cast<char *>(MMappedBuffer) + getpagesize();
 | 
						|
  // The LBR buffer is a cyclic buffer, we copy data to another buffer.
 | 
						|
  uint64_t Offset = Tail % kDataBufferSize;
 | 
						|
  size_t CopySize = kDataBufferSize - Offset;
 | 
						|
  memcpy(Buf, Start + Offset, CopySize);
 | 
						|
  if (CopySize >= DataSize)
 | 
						|
    return;
 | 
						|
 | 
						|
  memcpy(Buf + CopySize, Start, Offset);
 | 
						|
  return;
 | 
						|
}
 | 
						|
 | 
						|
// Parses the given data-buffer for stats and fill the CycleArray.
 | 
						|
// If data has been extracted successfully, also modifies the code to jump
 | 
						|
// out the benchmark loop.
 | 
						|
static llvm::Error parseDataBuffer(const char *DataBuf, size_t DataSize,
 | 
						|
                                   const void *From, const void *To,
 | 
						|
                                   llvm::SmallVector<int64_t, 4> *CycleArray) {
 | 
						|
  const char *DataPtr = DataBuf;
 | 
						|
  while (DataPtr < DataBuf + DataSize) {
 | 
						|
    struct perf_event_header Header;
 | 
						|
    memcpy(&Header, DataPtr, sizeof(struct perf_event_header));
 | 
						|
    if (Header.type != PERF_RECORD_SAMPLE) {
 | 
						|
      // Ignores non-sample records.
 | 
						|
      DataPtr += Header.size;
 | 
						|
      continue;
 | 
						|
    }
 | 
						|
    DataPtr += sizeof(Header);
 | 
						|
    uint64_t Count = llvm::support::endian::read64(DataPtr, support::native);
 | 
						|
    DataPtr += sizeof(Count);
 | 
						|
 | 
						|
    struct perf_branch_entry Entry;
 | 
						|
    memcpy(&Entry, DataPtr, sizeof(struct perf_branch_entry));
 | 
						|
 | 
						|
    // Read the perf_branch_entry array.
 | 
						|
    for (uint64_t i = 0; i < Count; ++i) {
 | 
						|
      const uint64_t BlockStart = From == nullptr
 | 
						|
                                      ? std::numeric_limits<uint64_t>::min()
 | 
						|
                                      : reinterpret_cast<uint64_t>(From);
 | 
						|
      const uint64_t BlockEnd = To == nullptr
 | 
						|
                                    ? std::numeric_limits<uint64_t>::max()
 | 
						|
                                    : reinterpret_cast<uint64_t>(To);
 | 
						|
 | 
						|
      if (BlockStart <= Entry.from && BlockEnd >= Entry.to)
 | 
						|
        CycleArray->push_back(Entry.cycles);
 | 
						|
 | 
						|
      if (i == Count - 1)
 | 
						|
        // We've reached the last entry.
 | 
						|
        return llvm::Error::success();
 | 
						|
 | 
						|
      // Advance to next entry
 | 
						|
      DataPtr += sizeof(Entry);
 | 
						|
      memcpy(&Entry, DataPtr, sizeof(struct perf_branch_entry));
 | 
						|
    }
 | 
						|
  }
 | 
						|
  return llvm::make_error<llvm::StringError>("Unable to parse databuffer.",
 | 
						|
                                             llvm::errc::io_error);
 | 
						|
}
 | 
						|
 | 
						|
X86LbrPerfEvent::X86LbrPerfEvent(unsigned SamplingPeriod) {
 | 
						|
  assert(SamplingPeriod > 0 && "SamplingPeriod must be positive");
 | 
						|
  EventString = "BR_INST_RETIRED.NEAR_TAKEN";
 | 
						|
  Attr = new perf_event_attr();
 | 
						|
  Attr->size = sizeof(*Attr);
 | 
						|
  Attr->type = PERF_TYPE_RAW;
 | 
						|
  // FIXME This is SKL's encoding. Not sure if it'll change.
 | 
						|
  Attr->config = 0x20c4; // BR_INST_RETIRED.NEAR_TAKEN
 | 
						|
  Attr->sample_type = PERF_SAMPLE_BRANCH_STACK;
 | 
						|
  // Don't need to specify "USER" because we've already excluded HV and Kernel.
 | 
						|
  Attr->branch_sample_type = PERF_SAMPLE_BRANCH_ANY;
 | 
						|
  Attr->sample_period = SamplingPeriod;
 | 
						|
  Attr->wakeup_events = 1; // We need this even when using ioctl REFRESH.
 | 
						|
  Attr->disabled = 1;
 | 
						|
  Attr->exclude_kernel = 1;
 | 
						|
  Attr->exclude_hv = 1;
 | 
						|
  Attr->read_format = PERF_FORMAT_GROUP;
 | 
						|
 | 
						|
  FullQualifiedEventString = EventString;
 | 
						|
}
 | 
						|
 | 
						|
X86LbrCounter::X86LbrCounter(pfm::PerfEvent &&NewEvent)
 | 
						|
    : Counter(std::move(NewEvent)) {
 | 
						|
  MMappedBuffer = mmap(nullptr, kMappedBufferSize, PROT_READ | PROT_WRITE,
 | 
						|
                       MAP_SHARED, FileDescriptor, 0);
 | 
						|
  if (MMappedBuffer == MAP_FAILED)
 | 
						|
    llvm::errs() << "Failed to mmap buffer.";
 | 
						|
}
 | 
						|
 | 
						|
X86LbrCounter::~X86LbrCounter() {
 | 
						|
  if (0 != munmap(MMappedBuffer, kMappedBufferSize))
 | 
						|
    llvm::errs() << "Failed to munmap buffer.";
 | 
						|
}
 | 
						|
 | 
						|
void X86LbrCounter::start() {
 | 
						|
  ioctl(FileDescriptor, PERF_EVENT_IOC_REFRESH, 1024 /* kMaxPollsPerFd */);
 | 
						|
}
 | 
						|
 | 
						|
llvm::Error X86LbrCounter::checkLbrSupport() {
 | 
						|
  // Do a sample read and check if the results contain non-zero values.
 | 
						|
 | 
						|
  X86LbrCounter counter(X86LbrPerfEvent(123));
 | 
						|
  counter.start();
 | 
						|
 | 
						|
  // Prevent the compiler from unrolling the loop and get rid of all the
 | 
						|
  // branches. We need at least 16 iterations.
 | 
						|
  int Sum = 0;
 | 
						|
  int V = 1;
 | 
						|
 | 
						|
  volatile int *P = &V;
 | 
						|
  auto TimeLimit =
 | 
						|
      std::chrono::high_resolution_clock::now() + std::chrono::microseconds(5);
 | 
						|
 | 
						|
  for (int I = 0;
 | 
						|
       I < kLbrEntries || std::chrono::high_resolution_clock::now() < TimeLimit;
 | 
						|
       ++I) {
 | 
						|
    Sum += *P;
 | 
						|
  }
 | 
						|
 | 
						|
  counter.stop();
 | 
						|
  (void)Sum;
 | 
						|
 | 
						|
  auto ResultOrError = counter.doReadCounter(nullptr, nullptr);
 | 
						|
  if (ResultOrError)
 | 
						|
    if (!ResultOrError.get().empty())
 | 
						|
      // If there is at least one non-zero entry, then LBR is supported.
 | 
						|
      for (const int64_t &Value : ResultOrError.get())
 | 
						|
        if (Value != 0)
 | 
						|
          return Error::success();
 | 
						|
 | 
						|
  return llvm::make_error<llvm::StringError>(
 | 
						|
      "LBR format with cycles is not suppported on the host.",
 | 
						|
      llvm::errc::not_supported);
 | 
						|
}
 | 
						|
 | 
						|
llvm::Expected<llvm::SmallVector<int64_t, 4>>
 | 
						|
X86LbrCounter::readOrError(StringRef FunctionBytes) const {
 | 
						|
  // Disable the event before reading
 | 
						|
  ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0);
 | 
						|
 | 
						|
  // Find the boundary of the function so that we could filter the LBRs
 | 
						|
  // to keep only the relevant records.
 | 
						|
  if (FunctionBytes.empty())
 | 
						|
    return llvm::make_error<llvm::StringError>("Empty function bytes",
 | 
						|
                                               llvm::errc::invalid_argument);
 | 
						|
  const void *From = reinterpret_cast<const void *>(FunctionBytes.data());
 | 
						|
  const void *To = reinterpret_cast<const void *>(FunctionBytes.data() +
 | 
						|
                                                  FunctionBytes.size());
 | 
						|
  return doReadCounter(From, To);
 | 
						|
}
 | 
						|
 | 
						|
llvm::Expected<llvm::SmallVector<int64_t, 4>>
 | 
						|
X86LbrCounter::doReadCounter(const void *From, const void *To) const {
 | 
						|
  // The max number of time-outs/retries before we give up.
 | 
						|
  static constexpr int kMaxTimeouts = 160;
 | 
						|
 | 
						|
  // Parses the LBR buffer and fills CycleArray with the sequence of cycle
 | 
						|
  // counts from the buffer.
 | 
						|
  llvm::SmallVector<int64_t, 4> CycleArray;
 | 
						|
  auto DataBuf = std::make_unique<char[]>(kDataBufferSize);
 | 
						|
  int NumTimeouts = 0;
 | 
						|
  int PollResult = 0;
 | 
						|
 | 
						|
  while (PollResult <= 0) {
 | 
						|
    PollResult = pollLbrPerfEvent(FileDescriptor);
 | 
						|
    if (PollResult > 0)
 | 
						|
      break;
 | 
						|
    if (PollResult == -1)
 | 
						|
      return llvm::make_error<llvm::StringError>("Cannot poll LBR perf event.",
 | 
						|
                                                 llvm::errc::io_error);
 | 
						|
    if (NumTimeouts++ >= kMaxTimeouts)
 | 
						|
      return llvm::make_error<llvm::StringError>(
 | 
						|
          "LBR polling still timed out after max number of attempts.",
 | 
						|
          llvm::errc::device_or_resource_busy);
 | 
						|
  }
 | 
						|
 | 
						|
  struct perf_event_mmap_page Page;
 | 
						|
  memcpy(&Page, MMappedBuffer, sizeof(struct perf_event_mmap_page));
 | 
						|
 | 
						|
  const uint64_t DataTail = Page.data_tail;
 | 
						|
  const uint64_t DataHead = Page.data_head;
 | 
						|
  // We're supposed to use a barrier after reading data_head.
 | 
						|
  std::atomic_thread_fence(std::memory_order_acq_rel);
 | 
						|
  const size_t DataSize = DataHead - DataTail;
 | 
						|
  if (DataSize > kDataBufferSize)
 | 
						|
    return llvm::make_error<llvm::StringError>(
 | 
						|
        "DataSize larger than buffer size.", llvm::errc::invalid_argument);
 | 
						|
 | 
						|
  copyDataBuffer(MMappedBuffer, DataBuf.get(), DataTail, DataSize);
 | 
						|
  llvm::Error error =
 | 
						|
      parseDataBuffer(DataBuf.get(), DataSize, From, To, &CycleArray);
 | 
						|
  if (!error)
 | 
						|
    return CycleArray;
 | 
						|
  return std::move(error);
 | 
						|
}
 | 
						|
 | 
						|
} // namespace exegesis
 | 
						|
} // namespace llvm
 | 
						|
 | 
						|
#endif // defined(__linux__) && defined(HAVE_LIBPFM) &&
 | 
						|
       // defined(LIBPFM_HAS_FIELD_CYCLES)
 |