forked from OSchip/llvm-project
				
			
		
			
				
	
	
		
			354 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			354 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
| #include "cpuid.h"
 | |
| #include "sanitizer_common/sanitizer_common.h"
 | |
| #if !SANITIZER_FUCHSIA
 | |
| #include "sanitizer_common/sanitizer_posix.h"
 | |
| #endif
 | |
| #include "xray_defs.h"
 | |
| #include "xray_interface_internal.h"
 | |
| 
 | |
| #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC
 | |
| #include <sys/types.h>
 | |
| #if SANITIZER_OPENBSD
 | |
| #include <sys/time.h>
 | |
| #include <machine/cpu.h>
 | |
| #endif
 | |
| #include <sys/sysctl.h>
 | |
| #elif SANITIZER_FUCHSIA
 | |
| #include <zircon/syscalls.h>
 | |
| #endif
 | |
| 
 | |
| #include <atomic>
 | |
| #include <cstdint>
 | |
| #include <errno.h>
 | |
| #include <fcntl.h>
 | |
| #include <iterator>
 | |
| #include <limits>
 | |
| #include <tuple>
 | |
| #include <unistd.h>
 | |
| 
 | |
| namespace __xray {
 | |
| 
 | |
| #if SANITIZER_LINUX
 | |
| static std::pair<ssize_t, bool>
 | |
| retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT {
 | |
|   auto BytesToRead = std::distance(Begin, End);
 | |
|   ssize_t BytesRead;
 | |
|   ssize_t TotalBytesRead = 0;
 | |
|   while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) {
 | |
|     if (BytesRead == -1) {
 | |
|       if (errno == EINTR)
 | |
|         continue;
 | |
|       Report("Read error; errno = %d\n", errno);
 | |
|       return std::make_pair(TotalBytesRead, false);
 | |
|     }
 | |
| 
 | |
|     TotalBytesRead += BytesRead;
 | |
|     BytesToRead -= BytesRead;
 | |
|     Begin += BytesRead;
 | |
|   }
 | |
|   return std::make_pair(TotalBytesRead, true);
 | |
| }
 | |
| 
 | |
| static bool readValueFromFile(const char *Filename,
 | |
|                               long long *Value) XRAY_NEVER_INSTRUMENT {
 | |
|   int Fd = open(Filename, O_RDONLY | O_CLOEXEC);
 | |
|   if (Fd == -1)
 | |
|     return false;
 | |
|   static constexpr size_t BufSize = 256;
 | |
|   char Line[BufSize] = {};
 | |
|   ssize_t BytesRead;
 | |
|   bool Success;
 | |
|   std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
 | |
|   close(Fd);
 | |
|   if (!Success)
 | |
|     return false;
 | |
|   const char *End = nullptr;
 | |
|   long long Tmp = internal_simple_strtoll(Line, &End, 10);
 | |
|   bool Result = false;
 | |
|   if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
 | |
|     *Value = Tmp;
 | |
|     Result = true;
 | |
|   }
 | |
|   return Result;
 | |
| }
 | |
| 
 | |
| uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
 | |
|   long long TSCFrequency = -1;
 | |
|   if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
 | |
|                         &TSCFrequency)) {
 | |
|     TSCFrequency *= 1000;
 | |
|   } else if (readValueFromFile(
 | |
|                  "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
 | |
|                  &TSCFrequency)) {
 | |
|     TSCFrequency *= 1000;
 | |
|   } else {
 | |
|     Report("Unable to determine CPU frequency for TSC accounting.\n");
 | |
|   }
 | |
|   return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency);
 | |
| }
 | |
| #elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC
 | |
| uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
 | |
|     long long TSCFrequency = -1;
 | |
|     size_t tscfreqsz = sizeof(TSCFrequency);
 | |
| #if SANITIZER_OPENBSD
 | |
|     int Mib[2] = { CTL_MACHDEP, CPU_TSCFREQ };
 | |
|     if (internal_sysctl(Mib, 2, &TSCFrequency, &tscfreqsz, NULL, 0) != -1) {
 | |
| #elif SANITIZER_MAC
 | |
|     if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency,
 | |
|                               &tscfreqsz, NULL, 0) != -1) {
 | |
| 
 | |
| #else
 | |
|     if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz,
 | |
|                               NULL, 0) != -1) {
 | |
| #endif
 | |
|         return static_cast<uint64_t>(TSCFrequency);
 | |
|     } else {
 | |
|       Report("Unable to determine CPU frequency for TSC accounting.\n");
 | |
|     }
 | |
| 
 | |
|     return 0;
 | |
| }
 | |
| #elif !SANITIZER_FUCHSIA
 | |
| uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
 | |
|     /* Not supported */
 | |
|     return 0;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| static constexpr uint8_t CallOpCode = 0xe8;
 | |
| static constexpr uint16_t MovR10Seq = 0xba41;
 | |
| static constexpr uint16_t Jmp9Seq = 0x09eb;
 | |
| static constexpr uint16_t Jmp20Seq = 0x14eb;
 | |
| static constexpr uint16_t Jmp15Seq = 0x0feb;
 | |
| static constexpr uint8_t JmpOpCode = 0xe9;
 | |
| static constexpr uint8_t RetOpCode = 0xc3;
 | |
| static constexpr uint16_t NopwSeq = 0x9066;
 | |
| 
 | |
| static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
 | |
| static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
 | |
| 
 | |
| bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
 | |
|                         const XRaySledEntry &Sled,
 | |
|                         void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
 | |
|   // Here we do the dance of replacing the following sled:
 | |
|   //
 | |
|   // xray_sled_n:
 | |
|   //   jmp +9
 | |
|   //   <9 byte nop>
 | |
|   //
 | |
|   // With the following:
 | |
|   //
 | |
|   //   mov r10d, <function id>
 | |
|   //   call <relative 32bit offset to entry trampoline>
 | |
|   //
 | |
|   // We need to do this in the following order:
 | |
|   //
 | |
|   // 1. Put the function id first, 2 bytes from the start of the sled (just
 | |
|   // after the 2-byte jmp instruction).
 | |
|   // 2. Put the call opcode 6 bytes from the start of the sled.
 | |
|   // 3. Put the relative offset 7 bytes from the start of the sled.
 | |
|   // 4. Do an atomic write over the jmp instruction for the "mov r10d"
 | |
|   // opcode and first operand.
 | |
|   //
 | |
|   // Prerequisite is to compute the relative offset to the trampoline's address.
 | |
|   int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
 | |
|                              (static_cast<int64_t>(Sled.Address) + 11);
 | |
|   if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
 | |
|     Report("XRay Entry trampoline (%p) too far from sled (%p)\n",
 | |
|            Trampoline, reinterpret_cast<void *>(Sled.Address));
 | |
|     return false;
 | |
|   }
 | |
|   if (Enable) {
 | |
|     *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
 | |
|     *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
 | |
|     *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
 | |
|     std::atomic_store_explicit(
 | |
|         reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
 | |
|         std::memory_order_release);
 | |
|   } else {
 | |
|     std::atomic_store_explicit(
 | |
|         reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
 | |
|         std::memory_order_release);
 | |
|     // FIXME: Write out the nops still?
 | |
|   }
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
 | |
|                        const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
 | |
|   // Here we do the dance of replacing the following sled:
 | |
|   //
 | |
|   // xray_sled_n:
 | |
|   //   ret
 | |
|   //   <10 byte nop>
 | |
|   //
 | |
|   // With the following:
 | |
|   //
 | |
|   //   mov r10d, <function id>
 | |
|   //   jmp <relative 32bit offset to exit trampoline>
 | |
|   //
 | |
|   // 1. Put the function id first, 2 bytes from the start of the sled (just
 | |
|   // after the 1-byte ret instruction).
 | |
|   // 2. Put the jmp opcode 6 bytes from the start of the sled.
 | |
|   // 3. Put the relative offset 7 bytes from the start of the sled.
 | |
|   // 4. Do an atomic write over the jmp instruction for the "mov r10d"
 | |
|   // opcode and first operand.
 | |
|   //
 | |
|   // Prerequisite is to compute the relative offset fo the
 | |
|   // __xray_FunctionExit function's address.
 | |
|   int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) -
 | |
|                              (static_cast<int64_t>(Sled.Address) + 11);
 | |
|   if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
 | |
|     Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
 | |
|            __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address));
 | |
|     return false;
 | |
|   }
 | |
|   if (Enable) {
 | |
|     *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
 | |
|     *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode;
 | |
|     *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
 | |
|     std::atomic_store_explicit(
 | |
|         reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
 | |
|         std::memory_order_release);
 | |
|   } else {
 | |
|     std::atomic_store_explicit(
 | |
|         reinterpret_cast<std::atomic<uint8_t> *>(Sled.Address), RetOpCode,
 | |
|         std::memory_order_release);
 | |
|     // FIXME: Write out the nops still?
 | |
|   }
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
 | |
|                            const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
 | |
|   // Here we do the dance of replacing the tail call sled with a similar
 | |
|   // sequence as the entry sled, but calls the tail exit sled instead.
 | |
|   int64_t TrampolineOffset =
 | |
|       reinterpret_cast<int64_t>(__xray_FunctionTailExit) -
 | |
|       (static_cast<int64_t>(Sled.Address) + 11);
 | |
|   if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
 | |
|     Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n",
 | |
|            __xray_FunctionTailExit, reinterpret_cast<void *>(Sled.Address));
 | |
|     return false;
 | |
|   }
 | |
|   if (Enable) {
 | |
|     *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
 | |
|     *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
 | |
|     *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
 | |
|     std::atomic_store_explicit(
 | |
|         reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
 | |
|         std::memory_order_release);
 | |
|   } else {
 | |
|     std::atomic_store_explicit(
 | |
|         reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
 | |
|         std::memory_order_release);
 | |
|     // FIXME: Write out the nops still?
 | |
|   }
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
 | |
|                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
 | |
|   // Here we do the dance of replacing the following sled:
 | |
|   //
 | |
|   // In Version 0:
 | |
|   //
 | |
|   // xray_sled_n:
 | |
|   //   jmp +20          // 2 bytes
 | |
|   //   ...
 | |
|   //
 | |
|   // With the following:
 | |
|   //
 | |
|   //   nopw             // 2 bytes*
 | |
|   //   ...
 | |
|   //
 | |
|   //
 | |
|   // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
 | |
|   //
 | |
|   // ---
 | |
|   //
 | |
|   // In Version 1:
 | |
|   //
 | |
|   //   The jump offset is now 15 bytes (0x0f), so when restoring the nopw back
 | |
|   //   to a jmp, use 15 bytes instead.
 | |
|   //
 | |
|   if (Enable) {
 | |
|     std::atomic_store_explicit(
 | |
|         reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq,
 | |
|         std::memory_order_release);
 | |
|   } else {
 | |
|     switch (Sled.Version) {
 | |
|     case 1:
 | |
|       std::atomic_store_explicit(
 | |
|           reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp15Seq,
 | |
|           std::memory_order_release);
 | |
|       break;
 | |
|     case 0:
 | |
|     default:
 | |
|       std::atomic_store_explicit(
 | |
|           reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
 | |
|           std::memory_order_release);
 | |
|       break;
 | |
|     }
 | |
|     }
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
 | |
|                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
 | |
|   // Here we do the dance of replacing the following sled:
 | |
|   //
 | |
|   // xray_sled_n:
 | |
|   //   jmp +20          // 2 byte instruction
 | |
|   //   ...
 | |
|   //
 | |
|   // With the following:
 | |
|   //
 | |
|   //   nopw             // 2 bytes
 | |
|   //   ...
 | |
|   //
 | |
|   //
 | |
|   // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
 | |
|   // The 20 byte sled stashes three argument registers, calls the trampoline,
 | |
|   // unstashes the registers and returns. If the arguments are already in
 | |
|   // the correct registers, the stashing and unstashing become equivalently
 | |
|   // sized nops.
 | |
|   if (Enable) {
 | |
|     std::atomic_store_explicit(
 | |
|         reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq,
 | |
|         std::memory_order_release);
 | |
|   } else {
 | |
|       std::atomic_store_explicit(
 | |
|           reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
 | |
|           std::memory_order_release);
 | |
|   }
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| #if !SANITIZER_FUCHSIA
 | |
| // We determine whether the CPU we're running on has the correct features we
 | |
| // need. In x86_64 this will be rdtscp support.
 | |
| bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
 | |
|   unsigned int EAX, EBX, ECX, EDX;
 | |
| 
 | |
|   // We check whether rdtscp support is enabled. According to the x86_64 manual,
 | |
|   // level should be set at 0x80000001, and we should have a look at bit 27 in
 | |
|   // EDX. That's 0x8000000 (or 1u << 27).
 | |
|   __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX)
 | |
|     : "0"(0x80000001));
 | |
|   if (!(EDX & (1u << 27))) {
 | |
|     Report("Missing rdtscp support.\n");
 | |
|     return false;
 | |
|   }
 | |
|   // Also check whether we can determine the CPU frequency, since if we cannot,
 | |
|   // we should use the emulated TSC instead.
 | |
|   if (!getTSCFrequency()) {
 | |
|     Report("Unable to determine CPU frequency.\n");
 | |
|     return false;
 | |
|   }
 | |
|   return true;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| } // namespace __xray
 |