[TSan][Darwin] Avoid crashes due to interpreting non-zero shadow content as a pointer

We would like to use TLS to store the ThreadState object (or at least a
reference ot it), but on Darwin accessing TLS via __thread or manually
by using pthread_key_* is problematic, because there are several places
where interceptors are called when TLS is not accessible (early process
startup, thread cleanup, ...).

Previously, we used a "poor man's TLS" implementation, where we use the
shadow memory of the pointer returned by pthread_self() to store a
pointer to the ThreadState object.

The problem with that was that certain operations can populate shadow
bytes unbeknownst to TSan, and we later interpret these non-zero bytes
as the pointer to our ThreadState object and crash on when dereferencing
the pointer.

This patch changes the storage location of our reference to the
ThreadState object to "real" TLS.  We make this work by artificially
keeping this reference alive in the pthread_key destructor by resetting
the key value with pthread_setspecific().

This change also fixes the issue were the ThreadState object is
re-allocated after DestroyThreadState() because intercepted functions
can still get called on the terminating thread after the
THREAD_TERMINATE event.

Radar-Id: rdar://problem/72010355

Reviewed By: dvyukov

Differential Revision: https://reviews.llvm.org/D110236
This commit is contained in:
Julian Lettner 2021-11-30 12:12:14 -08:00
parent 618f8dc5e5
commit 858eb8fc11
1 changed files with 70 additions and 71 deletions

View File

@ -25,6 +25,7 @@
#include "tsan_rtl.h" #include "tsan_rtl.h"
#include "tsan_flags.h" #include "tsan_flags.h"
#include <limits.h>
#include <mach/mach.h> #include <mach/mach.h>
#include <pthread.h> #include <pthread.h>
#include <signal.h> #include <signal.h>
@ -45,70 +46,83 @@
namespace __tsan { namespace __tsan {
#if !SANITIZER_GO #if !SANITIZER_GO
static void *SignalSafeGetOrAllocate(uptr *dst, uptr size) { static char main_thread_state[sizeof(ThreadState)] ALIGNED(
atomic_uintptr_t *a = (atomic_uintptr_t *)dst; SANITIZER_CACHE_LINE_SIZE);
void *val = (void *)atomic_load_relaxed(a); static ThreadState *dead_thread_state;
atomic_signal_fence(memory_order_acquire); // Turns the previous load into static pthread_key_t thread_state_key;
// acquire wrt signals.
if (UNLIKELY(val == nullptr)) { // We rely on the following documented, but Darwin-specific behavior to keep the
val = (void *)internal_mmap(nullptr, size, PROT_READ | PROT_WRITE, // reference to the ThreadState object alive in TLS:
MAP_PRIVATE | MAP_ANON, -1, 0); // pthread_key_create man page:
CHECK(val); // If, after all the destructors have been called for all non-NULL values with
void *cmp = nullptr; // associated destructors, there are still some non-NULL values with
if (!atomic_compare_exchange_strong(a, (uintptr_t *)&cmp, (uintptr_t)val, // associated destructors, then the process is repeated. If, after at least
memory_order_acq_rel)) { // [PTHREAD_DESTRUCTOR_ITERATIONS] iterations of destructor calls for
internal_munmap(val, size); // outstanding non-NULL values, there are still some non-NULL values with
val = cmp; // associated destructors, the implementation stops calling destructors.
} static_assert(PTHREAD_DESTRUCTOR_ITERATIONS == 4, "Small number of iterations");
} static void ThreadStateDestructor(void *thr) {
return val; int res = pthread_setspecific(thread_state_key, thr);
CHECK_EQ(res, 0);
} }
// On OS X, accessing TLVs via __thread or manually by using pthread_key_* is static void InitializeThreadStateStorage() {
// problematic, because there are several places where interceptors are called int res;
// when TLVs are not accessible (early process startup, thread cleanup, ...). CHECK_EQ(thread_state_key, 0);
// The following provides a "poor man's TLV" implementation, where we use the res = pthread_key_create(&thread_state_key, ThreadStateDestructor);
// shadow memory of the pointer returned by pthread_self() to store a pointer to CHECK_EQ(res, 0);
// the ThreadState object. The main thread's ThreadState is stored separately res = pthread_setspecific(thread_state_key, main_thread_state);
// in a static variable, because we need to access it even before the CHECK_EQ(res, 0);
// shadow memory is set up.
static uptr main_thread_identity = 0;
ALIGNED(64) static char main_thread_state[sizeof(ThreadState)];
static ThreadState *main_thread_state_loc = (ThreadState *)main_thread_state;
// We cannot use pthread_self() before libpthread has been initialized. Our auto dts = (ThreadState *)MmapOrDie(sizeof(ThreadState), "ThreadState");
// current heuristic for guarding this is checking `main_thread_identity` which dts->fast_state.SetIgnoreBit();
// is only assigned in `__tsan::InitializePlatform`. dts->ignore_interceptors = 1;
static ThreadState **cur_thread_location() { dts->is_dead = true;
if (main_thread_identity == 0) const_cast<Tid &>(dts->tid) = kInvalidTid;
return &main_thread_state_loc; res = internal_mprotect(dts, sizeof(ThreadState), PROT_READ); // immutable
uptr thread_identity = (uptr)pthread_self(); CHECK_EQ(res, 0);
if (thread_identity == main_thread_identity) dead_thread_state = dts;
return &main_thread_state_loc;
return (ThreadState **)MemToMeta(thread_identity);
} }
ThreadState *cur_thread() { ThreadState *cur_thread() {
return (ThreadState *)SignalSafeGetOrAllocate( // Some interceptors get called before libpthread has been initialized and in
(uptr *)cur_thread_location(), sizeof(ThreadState)); // these cases we must avoid calling any pthread APIs.
if (UNLIKELY(!thread_state_key)) {
return (ThreadState *)main_thread_state;
}
// We only reach this line after InitializeThreadStateStorage() ran, i.e,
// after TSan (and therefore libpthread) have been initialized.
ThreadState *thr = (ThreadState *)pthread_getspecific(thread_state_key);
if (UNLIKELY(!thr)) {
thr = (ThreadState *)MmapOrDie(sizeof(ThreadState), "ThreadState");
int res = pthread_setspecific(thread_state_key, thr);
CHECK_EQ(res, 0);
}
return thr;
} }
void set_cur_thread(ThreadState *thr) { void set_cur_thread(ThreadState *thr) {
*cur_thread_location() = thr; int res = pthread_setspecific(thread_state_key, thr);
CHECK_EQ(res, 0);
} }
// TODO(kuba.brecka): This is not async-signal-safe. In particular, we call
// munmap first and then clear `fake_tls`; if we receive a signal in between,
// handler will try to access the unmapped ThreadState.
void cur_thread_finalize() { void cur_thread_finalize() {
ThreadState **thr_state_loc = cur_thread_location(); ThreadState *thr = (ThreadState *)pthread_getspecific(thread_state_key);
if (thr_state_loc == &main_thread_state_loc) { CHECK(thr);
if (thr == (ThreadState *)main_thread_state) {
// Calling dispatch_main() or xpc_main() actually invokes pthread_exit to // Calling dispatch_main() or xpc_main() actually invokes pthread_exit to
// exit the main thread. Let's keep the main thread's ThreadState. // exit the main thread. Let's keep the main thread's ThreadState.
return; return;
} }
internal_munmap(*thr_state_loc, sizeof(ThreadState)); // Intercepted functions can still get called after cur_thread_finalize()
*thr_state_loc = nullptr; // (called from DestroyThreadState()), so put a fake thread state for "dead"
// threads. An alternative solution would be to release the ThreadState
// object from THREAD_DESTROY (which is delivered later and on the parent
// thread) instead of THREAD_TERMINATE.
int res = pthread_setspecific(thread_state_key, dead_thread_state);
CHECK_EQ(res, 0);
UnmapOrDie(thr, sizeof(ThreadState));
} }
#endif #endif
@ -215,11 +229,10 @@ static void my_pthread_introspection_hook(unsigned int event, pthread_t thread,
ThreadStart(thr, tid, GetTid(), ThreadType::Worker); ThreadStart(thr, tid, GetTid(), ThreadType::Worker);
} }
} else if (event == PTHREAD_INTROSPECTION_THREAD_TERMINATE) { } else if (event == PTHREAD_INTROSPECTION_THREAD_TERMINATE) {
if (thread == pthread_self()) { CHECK_EQ(thread, pthread_self());
ThreadState *thr = cur_thread(); ThreadState *thr = cur_thread();
if (thr->tctx) { if (thr->tctx) {
DestroyThreadState(); DestroyThreadState();
}
} }
} }
@ -246,8 +259,7 @@ void InitializePlatform() {
#if !SANITIZER_GO #if !SANITIZER_GO
CheckAndProtect(); CheckAndProtect();
CHECK_EQ(main_thread_identity, 0); InitializeThreadStateStorage();
main_thread_identity = (uptr)pthread_self();
prev_pthread_introspection_hook = prev_pthread_introspection_hook =
pthread_introspection_hook_install(&my_pthread_introspection_hook); pthread_introspection_hook_install(&my_pthread_introspection_hook);
@ -279,24 +291,11 @@ uptr ExtractLongJmpSp(uptr *env) {
extern "C" void __tsan_tls_initialization() {} extern "C" void __tsan_tls_initialization() {}
void ImitateTlsWrite(ThreadState *thr, uptr tls_addr, uptr tls_size) { void ImitateTlsWrite(ThreadState *thr, uptr tls_addr, uptr tls_size) {
// The pointer to the ThreadState object is stored in the shadow memory
// of the tls.
uptr tls_end = tls_addr + tls_size;
uptr thread_identity = (uptr)pthread_self();
const uptr pc = StackTrace::GetNextInstructionPc( const uptr pc = StackTrace::GetNextInstructionPc(
reinterpret_cast<uptr>(__tsan_tls_initialization)); reinterpret_cast<uptr>(__tsan_tls_initialization));
if (thread_identity == main_thread_identity) { // Unlike Linux, we only store a pointer to the ThreadState object in TLS;
MemoryRangeImitateWrite(thr, pc, tls_addr, tls_size); // just mark the entire range as written to.
} else { MemoryRangeImitateWrite(thr, pc, tls_addr, tls_size);
uptr thr_state_start = thread_identity;
uptr thr_state_end = thr_state_start + sizeof(uptr);
CHECK_GE(thr_state_start, tls_addr);
CHECK_LE(thr_state_start, tls_addr + tls_size);
CHECK_GE(thr_state_end, tls_addr);
CHECK_LE(thr_state_end, tls_addr + tls_size);
MemoryRangeImitateWrite(thr, pc, tls_addr, thr_state_start - tls_addr);
MemoryRangeImitateWrite(thr, pc, thr_state_end, tls_end - thr_state_end);
}
} }
#endif #endif