Whenever reasonable, merge ASAN quarantine batches to save memory.

Summary:
There are cases when thread local quarantine drains almost empty
quarantine batches into the global quarantine. The current approach leaves
them almost empty, which might create a huge memory overhead (each batch
is 4K/8K, depends on bitness).

Reviewers: eugenis

Subscribers: kubabrecka, llvm-commits

Differential Revision: https://reviews.llvm.org/D28068

llvm-svn: 292525
This commit is contained in:
Alex Shlyapnikov 2017-01-19 22:15:54 +00:00
parent 7c650777b0
commit a8ba9c8e66
7 changed files with 400 additions and 50 deletions

View File

@ -70,6 +70,17 @@ struct IntrusiveList {
size_--;
}
void extract(Item *prev, Item *x) {
CHECK(!empty());
CHECK_NE(prev, nullptr);
CHECK_NE(x, nullptr);
CHECK_EQ(prev->next, x);
prev->next = x->next;
if (last_ == x)
last_ = prev;
size_--;
}
Item *front() { return first_; }
const Item *front() const { return first_; }
Item *back() { return last_; }

View File

@ -31,6 +31,40 @@ struct QuarantineBatch {
uptr size;
uptr count;
void *batch[kSize];
void init(void *ptr, uptr size) {
count = 1;
batch[0] = ptr;
this->size = size + sizeof(QuarantineBatch); // Account for the batch size.
}
// The total size of quarantined nodes recorded in this batch.
uptr quarantined_size() const {
return size - sizeof(QuarantineBatch);
}
void push_back(void *ptr, uptr size) {
CHECK_LT(count, kSize);
batch[count++] = ptr;
this->size += size;
}
bool can_merge(const QuarantineBatch* const from) const {
return count + from->count <= kSize;
}
void merge(QuarantineBatch* const from) {
CHECK_LE(count + from->count, kSize);
CHECK_GE(size, sizeof(QuarantineBatch));
for (uptr i = 0; i < from->count; ++i)
batch[count + i] = from->batch[i];
count += from->count;
size += from->quarantined_size();
from->count = 0;
from->size = sizeof(QuarantineBatch);
}
};
COMPILER_CHECK(sizeof(QuarantineBatch) <= (1 << 13)); // 8Kb.
@ -69,7 +103,7 @@ class Quarantine {
if (cache_size) {
c->Enqueue(cb, ptr, size);
} else {
// cache_size == 0 only when size == 0 (see Init).
// GetCacheSize() == 0 only when GetSize() == 0 (see Init).
cb.Recycle(ptr);
}
// Check cache size anyway to accommodate for runtime cache_size change.
@ -88,6 +122,8 @@ class Quarantine {
void PrintStats() const {
// It assumes that the world is stopped, just as the allocator's PrintStats.
Printf("Quarantine limits: global: %zdMb; thread local: %zdKb\n",
GetSize() >> 20, GetCacheSize() >> 10);
cache_.PrintStats();
}
@ -108,9 +144,27 @@ class Quarantine {
uptr min_size = atomic_load(&min_size_, memory_order_relaxed);
{
SpinMutexLock l(&cache_mutex_);
// Go over the batches and merge partially filled ones to
// save some memory, otherwise batches themselves (since the memory used
// by them is counted against quarantine limit) can overcome the actual
// user's quarantined chunks, which diminishes the purpose of the
// quarantine.
uptr cache_size = cache_.Size();
uptr overhead_size = cache_.OverheadSize();
CHECK_GE(cache_size, overhead_size);
// Do the merge only when overhead exceeds this predefined limit (might
// require some tuning). It saves us merge attempt when the batch list
// quarantine is unlikely to contain batches suitable for merge.
const uptr kOverheadThresholdPercents = 100;
if (cache_size > overhead_size &&
overhead_size * (100 + kOverheadThresholdPercents) >
cache_size * kOverheadThresholdPercents) {
cache_.MergeBatches(&tmp);
}
// Extract enough chunks from the quarantine to get below the max
// quarantine size and leave some leeway for the newly quarantined chunks.
while (cache_.Size() > min_size) {
QuarantineBatch *b = cache_.DequeueBatch();
tmp.EnqueueBatch(b);
tmp.EnqueueBatch(cache_.DequeueBatch());
}
}
recycle_mutex_.Unlock();
@ -145,26 +199,33 @@ class QuarantineCache {
list_.clear();
}
// Total memory used, including internal accounting.
uptr Size() const {
return atomic_load(&size_, memory_order_relaxed);
}
void Enqueue(Callback cb, void *ptr, uptr size) {
if (list_.empty() || list_.back()->count == QuarantineBatch::kSize) {
AllocBatch(cb);
size += sizeof(QuarantineBatch); // Count the batch in Quarantine size.
}
QuarantineBatch *b = list_.back();
CHECK(b);
b->batch[b->count++] = ptr;
b->size += size;
SizeAdd(size);
// Memory used for internal accounting.
uptr OverheadSize() const {
return list_.size() * sizeof(QuarantineBatch);
}
void Transfer(QuarantineCache *c) {
list_.append_back(&c->list_);
SizeAdd(c->Size());
atomic_store(&c->size_, 0, memory_order_relaxed);
void Enqueue(Callback cb, void *ptr, uptr size) {
if (list_.empty() || list_.back()->count == QuarantineBatch::kSize) {
QuarantineBatch *b = (QuarantineBatch *)cb.Allocate(sizeof(*b));
CHECK(b);
b->init(ptr, size);
EnqueueBatch(b);
} else {
list_.back()->push_back(ptr, size);
SizeAdd(size);
}
}
void Transfer(QuarantineCache *from_cache) {
list_.append_back(&from_cache->list_);
SizeAdd(from_cache->Size());
atomic_store(&from_cache->size_, 0, memory_order_relaxed);
}
void EnqueueBatch(QuarantineBatch *b) {
@ -181,19 +242,51 @@ class QuarantineCache {
return b;
}
void MergeBatches(QuarantineCache *to_deallocate) {
uptr extracted_size = 0;
QuarantineBatch *current = list_.front();
while (current && current->next) {
if (current->can_merge(current->next)) {
QuarantineBatch *extracted = current->next;
// Move all the chunks into the current batch.
current->merge(extracted);
CHECK_EQ(extracted->count, 0);
CHECK_EQ(extracted->size, sizeof(QuarantineBatch));
// Remove the next batch from the list and account for its size.
list_.extract(current, extracted);
extracted_size += extracted->size;
// Add it to deallocation list.
to_deallocate->EnqueueBatch(extracted);
} else {
current = current->next;
}
}
SizeSub(extracted_size);
}
void PrintStats() const {
uptr batch_count = 0;
uptr total_quarantine_bytes = 0;
uptr total_overhead_bytes = 0;
uptr total_bytes = 0;
uptr total_quarantine_chunks = 0;
for (List::ConstIterator it = list_.begin(); it != list_.end(); ++it) {
batch_count++;
total_quarantine_bytes += (*it).size;
total_bytes += (*it).size;
total_overhead_bytes += (*it).size - (*it).quarantined_size();
total_quarantine_chunks += (*it).count;
}
Printf("Global quarantine stats: batches: %zd; bytes: %zd; chunks: %zd "
"(capacity: %zd chunks)\n",
batch_count, total_quarantine_bytes, total_quarantine_chunks,
batch_count * QuarantineBatch::kSize);
uptr quarantine_chunks_capacity = batch_count * QuarantineBatch::kSize;
int chunks_usage_percent = quarantine_chunks_capacity == 0 ?
0 : total_quarantine_chunks * 100 / quarantine_chunks_capacity;
uptr total_quarantined_bytes = total_bytes - total_overhead_bytes;
int memory_overhead_percent = total_quarantined_bytes == 0 ?
0 : total_overhead_bytes * 100 / total_quarantined_bytes;
Printf("Global quarantine stats: batches: %zd; bytes: %zd (user: %zd); "
"chunks: %zd (capacity: %zd); %d%% chunks used; %d%% memory overhead"
"\n",
batch_count, total_bytes, total_quarantined_bytes,
total_quarantine_chunks, quarantine_chunks_capacity,
chunks_usage_percent, memory_overhead_percent);
}
private:
@ -208,15 +301,6 @@ class QuarantineCache {
void SizeSub(uptr sub) {
atomic_store(&size_, Size() - sub, memory_order_relaxed);
}
NOINLINE QuarantineBatch* AllocBatch(Callback cb) {
QuarantineBatch *b = (QuarantineBatch *)cb.Allocate(sizeof(*b));
CHECK(b);
b->count = 0;
b->size = 0;
list_.push_back(b);
return b;
}
};
} // namespace __sanitizer

View File

@ -26,6 +26,7 @@ set(SANITIZER_UNITTESTS
sanitizer_posix_test.cc
sanitizer_printf_test.cc
sanitizer_procmaps_test.cc
sanitizer_quarantine_test.cc
sanitizer_stackdepot_test.cc
sanitizer_stacktrace_printer_test.cc
sanitizer_stacktrace_test.cc

View File

@ -125,6 +125,22 @@ TEST(SanitizerCommon, IntrusiveList) {
CHECK(l.empty());
l.CheckConsistency();
l.push_back(x);
l.push_back(y);
l.push_back(z);
l.extract(x, y);
CHECK_EQ(l.size(), 2);
CHECK_EQ(l.front(), x);
CHECK_EQ(l.back(), z);
l.CheckConsistency();
l.extract(x, z);
CHECK_EQ(l.size(), 1);
CHECK_EQ(l.front(), x);
CHECK_EQ(l.back(), x);
l.CheckConsistency();
l.pop_front();
CHECK(l.empty());
List l1, l2;
l1.clear();
l2.clear();

View File

@ -0,0 +1,180 @@
//===-- sanitizer_quarantine_test.cc --------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of ThreadSanitizer/AddressSanitizer runtime.
//
//===----------------------------------------------------------------------===//
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_quarantine.h"
#include "gtest/gtest.h"
#include <stdlib.h>
namespace __sanitizer {
struct QuarantineCallback {
void Recycle(void *m) {}
void *Allocate(uptr size) {
return malloc(size);
}
void Deallocate(void *p) {
free(p);
}
};
typedef QuarantineCache<QuarantineCallback> Cache;
static void* kFakePtr = reinterpret_cast<void*>(0xFA83FA83);
static const size_t kBlockSize = 8;
static QuarantineCallback cb;
static void DeallocateCache(Cache *cache) {
while (QuarantineBatch *batch = cache->DequeueBatch())
cb.Deallocate(batch);
}
TEST(SanitizerCommon, QuarantineBatchMerge) {
// Verify the trivial case.
QuarantineBatch into;
into.init(kFakePtr, 4UL);
QuarantineBatch from;
from.init(kFakePtr, 8UL);
into.merge(&from);
ASSERT_EQ(into.count, 2UL);
ASSERT_EQ(into.batch[0], kFakePtr);
ASSERT_EQ(into.batch[1], kFakePtr);
ASSERT_EQ(into.size, 12UL + sizeof(QuarantineBatch));
ASSERT_EQ(into.quarantined_size(), 12UL);
ASSERT_EQ(from.count, 0UL);
ASSERT_EQ(from.size, sizeof(QuarantineBatch));
ASSERT_EQ(from.quarantined_size(), 0UL);
// Merge the batch to the limit.
for (uptr i = 2; i < QuarantineBatch::kSize; ++i)
from.push_back(kFakePtr, 8UL);
ASSERT_TRUE(into.count + from.count == QuarantineBatch::kSize);
ASSERT_TRUE(into.can_merge(&from));
into.merge(&from);
ASSERT_TRUE(into.count == QuarantineBatch::kSize);
// No more space, not even for one element.
from.init(kFakePtr, 8UL);
ASSERT_FALSE(into.can_merge(&from));
}
TEST(SanitizerCommon, QuarantineCacheMergeBatchesEmpty) {
Cache cache;
Cache to_deallocate;
cache.MergeBatches(&to_deallocate);
ASSERT_EQ(to_deallocate.Size(), 0UL);
ASSERT_EQ(to_deallocate.DequeueBatch(), nullptr);
}
TEST(SanitizerCommon, QuarantineCacheMergeBatchesOneBatch) {
Cache cache;
cache.Enqueue(cb, kFakePtr, kBlockSize);
ASSERT_EQ(kBlockSize + sizeof(QuarantineBatch), cache.Size());
Cache to_deallocate;
cache.MergeBatches(&to_deallocate);
// Nothing to merge, nothing to deallocate.
ASSERT_EQ(kBlockSize + sizeof(QuarantineBatch), cache.Size());
ASSERT_EQ(to_deallocate.Size(), 0UL);
ASSERT_EQ(to_deallocate.DequeueBatch(), nullptr);
DeallocateCache(&cache);
}
TEST(SanitizerCommon, QuarantineCacheMergeBatchesSmallBatches) {
// Make a cache with two batches small enough to merge.
Cache from;
from.Enqueue(cb, kFakePtr, kBlockSize);
Cache cache;
cache.Enqueue(cb, kFakePtr, kBlockSize);
cache.Transfer(&from);
ASSERT_EQ(kBlockSize * 2 + sizeof(QuarantineBatch) * 2, cache.Size());
Cache to_deallocate;
cache.MergeBatches(&to_deallocate);
// Batches merged, one batch to deallocate.
ASSERT_EQ(kBlockSize * 2 + sizeof(QuarantineBatch), cache.Size());
ASSERT_EQ(to_deallocate.Size(), sizeof(QuarantineBatch));
DeallocateCache(&cache);
DeallocateCache(&to_deallocate);
}
TEST(SanitizerCommon, QuarantineCacheMergeBatchesTooBigToMerge) {
const uptr kNumBlocks = QuarantineBatch::kSize - 1;
// Make a cache with two batches small enough to merge.
Cache from;
Cache cache;
for (uptr i = 0; i < kNumBlocks; ++i) {
from.Enqueue(cb, kFakePtr, kBlockSize);
cache.Enqueue(cb, kFakePtr, kBlockSize);
}
cache.Transfer(&from);
ASSERT_EQ(kBlockSize * kNumBlocks * 2 +
sizeof(QuarantineBatch) * 2, cache.Size());
Cache to_deallocate;
cache.MergeBatches(&to_deallocate);
// Batches cannot be merged.
ASSERT_EQ(kBlockSize * kNumBlocks * 2 +
sizeof(QuarantineBatch) * 2, cache.Size());
ASSERT_EQ(to_deallocate.Size(), 0UL);
DeallocateCache(&cache);
}
TEST(SanitizerCommon, QuarantineCacheMergeBatchesALotOfBatches) {
const uptr kNumBatchesAfterMerge = 3;
const uptr kNumBlocks = QuarantineBatch::kSize * kNumBatchesAfterMerge;
const uptr kNumBatchesBeforeMerge = kNumBlocks;
// Make a cache with many small batches.
Cache cache;
for (uptr i = 0; i < kNumBlocks; ++i) {
Cache from;
from.Enqueue(cb, kFakePtr, kBlockSize);
cache.Transfer(&from);
}
ASSERT_EQ(kBlockSize * kNumBlocks +
sizeof(QuarantineBatch) * kNumBatchesBeforeMerge, cache.Size());
Cache to_deallocate;
cache.MergeBatches(&to_deallocate);
// All blocks should fit into 3 batches.
ASSERT_EQ(kBlockSize * kNumBlocks +
sizeof(QuarantineBatch) * kNumBatchesAfterMerge, cache.Size());
ASSERT_EQ(to_deallocate.Size(),
sizeof(QuarantineBatch) *
(kNumBatchesBeforeMerge - kNumBatchesAfterMerge));
DeallocateCache(&cache);
DeallocateCache(&to_deallocate);
}
} // namespace __sanitizer

View File

@ -0,0 +1,56 @@
// Test how creating and joining a lot of threads making only a few allocations
// each affect total quarantine (and overall heap) size.
// RUN: %clangxx_asan %s -o %t
// RUN: %env_asan_opts=thread_local_quarantine_size_kb=64:quarantine_size_mb=1:allocator_release_to_os_interval_ms=-1 %run %t 2>&1 | \
// RUN: FileCheck %s --allow-empty --check-prefix=CHECK-SMALL-LOCAL-CACHE-SMALL-OVERHEAD
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sanitizer/allocator_interface.h>
// Thread local quarantine is merged to the global one when thread exits and
// this scenario (a few allocations per thread) used to generate a huge overhead
// of practically empty quarantine batches (one per thread).
static const size_t kHeapSizeIncrementLimit = 2 << 20;
static const int kNumThreads = 2048;
// The allocation size is so small because all we want to test is that
// quarantine block merging process does not leak memory used for quarantine
// blocks.
// TODO(alekseyshl): Add more comprehensive test verifying quarantine size
// directly (requires quarantine stats exposed in allocator stats and API).
static const int kAllocSize = 1;
void *ThreadFn(void *unused) {
char *temp = new char[kAllocSize];
memset(temp, -1, kAllocSize);
delete [] (temp);
return NULL;
}
int main() {
// Warm up all internal structures.
pthread_t t;
pthread_create(&t, 0, ThreadFn, 0);
pthread_join(t, 0);
size_t heap_size = __sanitizer_get_heap_size();
fprintf(stderr, "Heap size: %zd\n", heap_size);
for (int i = 0; i < kNumThreads; i++) {
pthread_t t;
pthread_create(&t, 0, ThreadFn, 0);
pthread_join(t, 0);
size_t new_heap_size = __sanitizer_get_heap_size();
}
size_t new_heap_size = __sanitizer_get_heap_size();
fprintf(stderr, "New heap size: %zd\n", new_heap_size);
if (new_heap_size - heap_size < kHeapSizeIncrementLimit)
fprintf(stderr, "Heap growth is within limits\n");
}
// CHECK-SMALL-LOCAL-CACHE-SMALL-OVERHEAD: Heap growth is within limits

View File

@ -1,12 +1,10 @@
// Test thread_local_quarantine_size_kb
// RUN: %clangxx_asan %s -o %t
// RUN: %env_asan_opts=thread_local_quarantine_size_kb=256:verbosity=1 %run %t 2>&1 | \
// RUN: FileCheck %s --check-prefix=CHECK-VALUE
// RUN: %env_asan_opts=thread_local_quarantine_size_kb=64:quarantine_size_mb=64 %run %t 2>&1 | \
// RUN: %env_asan_opts=thread_local_quarantine_size_kb=64:quarantine_size_mb=64:verbosity=1 %run %t 2>&1 | \
// RUN: FileCheck %s --allow-empty --check-prefix=CHECK-SMALL-LOCAL-CACHE-SMALL-OVERHEAD
// RUN: %env_asan_opts=thread_local_quarantine_size_kb=0:quarantine_size_mb=0 %run %t 2>&1 | \
// RUN: FileCheck %s --check-prefix=CHECK-QUARANTINE-DISABLED
// RUN: FileCheck %s --check-prefix=CHECK-QUARANTINE-DISABLED-SMALL-OVERHEAD
// RUN: %env_asan_opts=thread_local_quarantine_size_kb=0:quarantine_size_mb=64 not %run %t 2>&1 | \
// RUN: FileCheck %s --check-prefix=CHECK-FOR-PARAMETER-ERROR
@ -15,29 +13,33 @@
#include <string.h>
#include <sanitizer/allocator_interface.h>
// The idea is allocate a lot of small blocks, totaling 5Mb of user memory
// total, and verify that quarantine does not incur too much memory overhead.
// The idea is allocate a lot of small blocks, totaling 5Mb of user memory,
// and verify that quarantine does not incur too much memory overhead.
// There's always an overhead for red zones, shadow memory and such, but
// quarantine accounting should not significantly contribute to that.
// The zero sized thread local cache is specifically tested since it used to
// generate a huge overhead of almost empty quarantine batches.
static const size_t kHeapSizeIncrementLimit = 12 << 20;
static const int kNumAllocs = 20000;
static const int kAllocSize = 256;
static const size_t kHeapSizeLimit = 12 << 20;
int main() {
size_t old_heap_size = __sanitizer_get_heap_size();
size_t heap_size = __sanitizer_get_heap_size();
fprintf(stderr, "Heap size: %zd\n", heap_size);
for (int i = 0; i < kNumAllocs; i++) {
char *g = new char[kAllocSize];
memset(g, -1, kAllocSize);
delete [] (g);
char *temp = new char[kAllocSize];
memset(temp, -1, kAllocSize);
delete [] (temp);
}
size_t new_heap_size = __sanitizer_get_heap_size();
fprintf(stderr, "heap size: new: %zd old: %zd\n", new_heap_size,
old_heap_size);
if (new_heap_size - old_heap_size > kHeapSizeLimit)
fprintf(stderr, "Heap size limit exceeded");
fprintf(stderr, "New heap size: %zd\n", new_heap_size);
if (new_heap_size - heap_size < kHeapSizeIncrementLimit)
fprintf(stderr, "Heap growth is within limits\n");
}
// CHECK-VALUE: thread_local_quarantine_size_kb=256K
// CHECK-SMALL-LOCAL-CACHE-SMALL-OVERHEAD-NOT: Heap size limit exceeded
// CHECK-QUARANTINE-DISABLED-NOT: Heap size limit exceeded
// CHECK-SMALL-LOCAL-CACHE-SMALL-OVERHEAD: thread_local_quarantine_size_kb=64K
// CHECK-SMALL-LOCAL-CACHE-SMALL-OVERHEAD: Heap growth is within limits
// CHECK-QUARANTINE-DISABLED-SMALL-OVERHEAD: Heap growth is within limits
// CHECK-FOR-PARAMETER-ERROR: thread_local_quarantine_size_kb can be set to 0 only when quarantine_size_mb is set to 0