tsan: mmap shadow stack

We used to mmap C++ shadow stack as part of the trace region
before ed7f3f5bc9 ("tsan: move shadow stack into ThreadState"),
which moved the shadow stack into TLS. This started causing
timeouts and OOMs on some of our internal tests that repeatedly
create and destroy thousands of threads.
Allocate C++ shadow stack with mmap and small pages again.
This prevents the observed timeouts and OOMs.
But we now need to be more careful with interceptors that
run after thread finalization because FuncEntry/Exit and
TraceAddEvent all need the shadow stack.

Reviewed By: vitalybuka

Differential Revision: https://reviews.llvm.org/D113786
This commit is contained in:
Dmitry Vyukov 2021-11-12 19:28:39 +01:00
parent dd87c5b322
commit b5ff187b7b
4 changed files with 25 additions and 17 deletions

View File

@ -220,7 +220,7 @@ void *user_reallocarray(ThreadState *thr, uptr pc, void *p, uptr size, uptr n) {
void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write) {
DPrintf("#%d: alloc(%zu) = 0x%zx\n", thr->tid, sz, p);
ctx->metamap.AllocBlock(thr, pc, p, sz);
if (write && thr->ignore_reads_and_writes == 0)
if (write && thr->ignore_reads_and_writes == 0 && thr->is_inited)
MemoryRangeImitateWrite(thr, pc, (uptr)p, sz);
else
MemoryResetRange(thr, pc, (uptr)p, sz);
@ -230,7 +230,7 @@ void OnUserFree(ThreadState *thr, uptr pc, uptr p, bool write) {
CHECK_NE(p, (void*)0);
uptr sz = ctx->metamap.FreeBlock(thr->proc(), p);
DPrintf("#%d: free(0x%zx, %zu)\n", thr->tid, p, sz);
if (write && thr->ignore_reads_and_writes == 0)
if (write && thr->ignore_reads_and_writes == 0 && thr->is_inited)
MemoryRangeFreed(thr, pc, (uptr)p, sz);
}

View File

@ -148,15 +148,19 @@ ThreadState::ThreadState(Context *ctx, Tid tid, int unique_id, u64 epoch,
{
CHECK_EQ(reinterpret_cast<uptr>(this) % SANITIZER_CACHE_LINE_SIZE, 0);
#if !SANITIZER_GO
shadow_stack_pos = shadow_stack;
shadow_stack_end = shadow_stack + kShadowStackSize;
// C/C++ uses fixed size shadow stack.
const int kInitStackSize = kShadowStackSize;
shadow_stack = static_cast<uptr *>(
MmapNoReserveOrDie(kInitStackSize * sizeof(uptr), "shadow stack"));
SetShadowRegionHugePageMode(reinterpret_cast<uptr>(shadow_stack),
kInitStackSize * sizeof(uptr));
#else
// Setup dynamic shadow stack.
// Go uses malloc-allocated shadow stack with dynamic size.
const int kInitStackSize = 8;
shadow_stack = (uptr *)Alloc(kInitStackSize * sizeof(uptr));
shadow_stack = static_cast<uptr *>(Alloc(kInitStackSize * sizeof(uptr)));
#endif
shadow_stack_pos = shadow_stack;
shadow_stack_end = shadow_stack + kInitStackSize;
#endif
}
#if !SANITIZER_GO

View File

@ -159,12 +159,8 @@ struct ThreadState {
#if !SANITIZER_GO
IgnoreSet mop_ignore_set;
IgnoreSet sync_ignore_set;
// C/C++ uses fixed size shadow stack.
uptr shadow_stack[kShadowStackSize];
#else
// Go uses malloc-allocated shadow stack with dynamic size.
uptr *shadow_stack;
#endif
uptr *shadow_stack;
uptr *shadow_stack_end;
uptr *shadow_stack_pos;
RawShadow *racy_shadow_addr;
@ -616,6 +612,9 @@ void ALWAYS_INLINE TraceAddEvent(ThreadState *thr, FastState fs,
EventType typ, u64 addr) {
if (!kCollectHistory)
return;
// TraceSwitch accesses shadow_stack, but it's called infrequently,
// so we check it here proactively.
DCHECK(thr->shadow_stack);
DCHECK_GE((int)typ, 0);
DCHECK_LE((int)typ, 7);
DCHECK_EQ(GetLsb(addr, kEventPCBits), addr);

View File

@ -227,15 +227,11 @@ void ThreadFinish(ThreadState *thr) {
if (thr->tls_addr && thr->tls_size)
DontNeedShadowFor(thr->tls_addr, thr->tls_size);
thr->is_dead = true;
thr->is_inited = false;
ctx->thread_registry.FinishThread(thr->tid);
}
void ThreadContext::OnFinished() {
#if SANITIZER_GO
Free(thr->shadow_stack);
thr->shadow_stack_pos = nullptr;
thr->shadow_stack_end = nullptr;
#endif
if (!detached) {
thr->fast_state.IncrementEpoch();
// Can't increment epoch w/o writing to the trace as well.
@ -244,6 +240,15 @@ void ThreadContext::OnFinished() {
}
epoch1 = thr->fast_state.epoch();
#if !SANITIZER_GO
UnmapOrDie(thr->shadow_stack, kShadowStackSize * sizeof(uptr));
#else
Free(thr->shadow_stack);
#endif
thr->shadow_stack = nullptr;
thr->shadow_stack_pos = nullptr;
thr->shadow_stack_end = nullptr;
if (common_flags()->detect_deadlocks)
ctx->dd->DestroyLogicalThread(thr->dd_lt);
thr->clock.ResetCached(&thr->proc()->clock_cache);