forked from OSchip/llvm-project
				
			
		
			
				
	
	
		
			88 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			88 lines
		
	
	
		
			2.4 KiB
		
	
	
	
		
			C++
		
	
	
	
| #include <stdio.h>
 | |
| #include <vector>
 | |
| #include <pthread.h>
 | |
| #include <malloc.h>
 | |
| #include <algorithm>
 | |
| 
 | |
| using namespace std;
 | |
| 
 | |
| const size_t kNumThreds = 16;
 | |
| const size_t kNumIters = 1 << 23;
 | |
| 
 | |
| inline void break_optimization(void *arg) {
 | |
|   __asm__ __volatile__("" : : "r" (arg) : "memory");
 | |
| }
 | |
| 
 | |
| __attribute__((noinline))
 | |
| static void *MallocThread(void *t) {
 | |
|   size_t total_malloced = 0, total_freed = 0;
 | |
|   size_t max_in_use = 0;
 | |
|   size_t tid = reinterpret_cast<size_t>(t);
 | |
|   vector<pair<char *, size_t> > allocated;
 | |
|   allocated.reserve(kNumIters);
 | |
|   for (size_t i = 1; i < kNumIters; i++) {
 | |
|     if ((i % (kNumIters / 4)) == 0 && tid == 0)
 | |
|       fprintf(stderr, "   T[%ld] iter %ld\n", tid, i);
 | |
|     bool allocate = (i % 5) <= 2;  // 60% malloc, 40% free
 | |
|     if (i > kNumIters / 4)
 | |
|       allocate = i % 2;  // then switch to 50% malloc, 50% free
 | |
|     if (allocate) {
 | |
|       size_t size = 1 + (i % 200);
 | |
|       if ((i % 10001) == 0)
 | |
|         size *= 4096;
 | |
|       total_malloced += size;
 | |
|       char *x = new char[size];
 | |
|       x[0] = x[size - 1] = x[size / 2] = 0;
 | |
|       allocated.push_back(make_pair(x, size));
 | |
|       max_in_use = max(max_in_use, total_malloced - total_freed);
 | |
|     } else {
 | |
|       if (allocated.empty()) continue;
 | |
|       size_t slot = i % allocated.size();
 | |
|       char *p = allocated[slot].first;
 | |
|       p[0] = 0;  // emulate last user touch of the block
 | |
|       size_t size = allocated[slot].second;
 | |
|       total_freed += size;
 | |
|       swap(allocated[slot], allocated.back());
 | |
|       allocated.pop_back();
 | |
|       delete [] p;
 | |
|     }
 | |
|   }
 | |
|   if (tid == 0)
 | |
|     fprintf(stderr, "   T[%ld] total_malloced: %ldM in use %ldM max %ldM\n",
 | |
|            tid, total_malloced >> 20, (total_malloced - total_freed) >> 20,
 | |
|            max_in_use >> 20);
 | |
|   for (size_t i = 0; i < allocated.size(); i++)
 | |
|     delete [] allocated[i].first;
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| template <int depth>
 | |
| struct DeepStack {
 | |
|   __attribute__((noinline))
 | |
|   static void *run(void *t) {
 | |
|     break_optimization(0);
 | |
|     DeepStack<depth - 1>::run(t);
 | |
|     break_optimization(0);
 | |
|     return 0;
 | |
|   }
 | |
| };
 | |
| 
 | |
| template<>
 | |
| struct DeepStack<0> {
 | |
|   static void *run(void *t) {
 | |
|     MallocThread(t);
 | |
|     return 0;
 | |
|   }
 | |
| };
 | |
| 
 | |
| // Build with -Dstandalone_malloc_test=main to make it a separate program.
 | |
| int standalone_malloc_test() {
 | |
|   pthread_t t[kNumThreds];
 | |
|   for (size_t i = 0; i < kNumThreds; i++)
 | |
|     pthread_create(&t[i], 0, DeepStack<200>::run, reinterpret_cast<void *>(i));
 | |
|   for (size_t i = 0; i < kNumThreds; i++)
 | |
|     pthread_join(t[i], 0);
 | |
|   malloc_stats();
 | |
|   return 0;
 | |
| }
 |