forked from OSchip/llvm-project
				
			
		
			
				
	
	
		
			121 lines
		
	
	
		
			3.5 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			121 lines
		
	
	
		
			3.5 KiB
		
	
	
	
		
			C++
		
	
	
	
| // Mini-benchmark for tsan VTS worst case performance
 | |
| // Idea:
 | |
| // 1) Spawn M + N threads (M >> N)
 | |
| //    We'll call the 'M' threads as 'garbage threads'.
 | |
| // 2) Make sure all threads have created thus no TIDs were reused
 | |
| // 3) Join the garbage threads
 | |
| // 4) Do many sync operations on the remaining N threads
 | |
| //
 | |
| // It turns out that due to O(M+N) VTS complexity the (4) is much slower with
 | |
| // when N is large.
 | |
| //
 | |
| // Some numbers:
 | |
| // a) clang++ native O1 with n_iterations=200kk takes
 | |
| //      5s regardless of M
 | |
| //    clang++ tsanv2 O1 with n_iterations=20kk takes
 | |
| //      23.5s with M=200
 | |
| //      11.5s with M=1
 | |
| //    i.e. tsanv2 is ~23x to ~47x slower than native, depends on M.
 | |
| // b) g++ native O1 with n_iterations=200kk takes
 | |
| //      5.5s regardless of M
 | |
| //    g++ tsanv1 O1 with n_iterations=2kk takes
 | |
| //      39.5s with M=200
 | |
| //      20.5s with M=1
 | |
| //    i.e. tsanv1 is ~370x to ~720x slower than native, depends on M.
 | |
| 
 | |
| #include <assert.h>
 | |
| #include <pthread.h>
 | |
| #include <stdio.h>
 | |
| #include <stdlib.h>
 | |
| 
 | |
| class __attribute__((aligned(64))) Mutex {
 | |
|  public:
 | |
|   Mutex()  { pthread_mutex_init(&m_, NULL); }
 | |
|   ~Mutex() { pthread_mutex_destroy(&m_); }
 | |
|   void Lock() { pthread_mutex_lock(&m_); }
 | |
|   void Unlock() { pthread_mutex_unlock(&m_); }
 | |
| 
 | |
|  private:
 | |
|   pthread_mutex_t m_;
 | |
| };
 | |
| 
 | |
| const int kNumMutexes = 1024;
 | |
| Mutex mutexes[kNumMutexes];
 | |
| 
 | |
| int n_threads, n_iterations;
 | |
| 
 | |
| pthread_barrier_t all_threads_ready, main_threads_ready;
 | |
| 
 | |
| void* GarbageThread(void *unused) {
 | |
|   pthread_barrier_wait(&all_threads_ready);
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| void *Thread(void *arg) {
 | |
|   long idx = (long)arg;
 | |
|   pthread_barrier_wait(&all_threads_ready);
 | |
| 
 | |
|   // Wait for the main thread to join the garbage threads.
 | |
|   pthread_barrier_wait(&main_threads_ready);
 | |
| 
 | |
|   printf("Thread %ld go!\n", idx);
 | |
|   int offset = idx * kNumMutexes / n_threads;
 | |
|   for (int i = 0; i < n_iterations; i++) {
 | |
|     mutexes[(offset + i) % kNumMutexes].Lock();
 | |
|     mutexes[(offset + i) % kNumMutexes].Unlock();
 | |
|   }
 | |
|   printf("Thread %ld done\n", idx);
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| int main(int argc, char **argv) {
 | |
|   int n_garbage_threads;
 | |
|   if (argc == 1) {
 | |
|     n_threads = 2;
 | |
|     n_garbage_threads = 200;
 | |
|     n_iterations = 20000000;
 | |
|   } else if (argc == 4) {
 | |
|     n_threads = atoi(argv[1]);
 | |
|     assert(n_threads > 0 && n_threads <= 32);
 | |
|     n_garbage_threads = atoi(argv[2]);
 | |
|     assert(n_garbage_threads > 0 && n_garbage_threads <= 16000);
 | |
|     n_iterations = atoi(argv[3]);
 | |
|   } else {
 | |
|     printf("Usage: %s n_threads n_garbage_threads n_iterations\n", argv[0]);
 | |
|     return 1;
 | |
|   }
 | |
|   printf("%s: n_threads=%d n_garbage_threads=%d n_iterations=%d\n",
 | |
|          __FILE__, n_threads, n_garbage_threads, n_iterations);
 | |
| 
 | |
|   pthread_barrier_init(&all_threads_ready, NULL, n_garbage_threads + n_threads + 1);
 | |
|   pthread_barrier_init(&main_threads_ready, NULL, n_threads + 1);
 | |
| 
 | |
|   pthread_t *t = new pthread_t[n_threads];
 | |
|   {
 | |
|     pthread_t *g_t = new pthread_t[n_garbage_threads];
 | |
|     for (int i = 0; i < n_garbage_threads; i++) {
 | |
|       int status = pthread_create(&g_t[i], 0, GarbageThread, NULL);
 | |
|       assert(status == 0);
 | |
|     }
 | |
|     for (int i = 0; i < n_threads; i++) {
 | |
|       int status = pthread_create(&t[i], 0, Thread, (void*)i);
 | |
|       assert(status == 0);
 | |
|     }
 | |
|     pthread_barrier_wait(&all_threads_ready);
 | |
|     printf("All threads started! Killing the garbage threads.\n");
 | |
|     for (int i = 0; i < n_garbage_threads; i++) {
 | |
|       pthread_join(g_t[i], 0);
 | |
|     }
 | |
|     delete [] g_t;
 | |
|   }
 | |
|   printf("Resuming the main threads.\n");
 | |
|   pthread_barrier_wait(&main_threads_ready);
 | |
| 
 | |
| 
 | |
|   for (int i = 0; i < n_threads; i++) {
 | |
|     pthread_join(t[i], 0);
 | |
|   }
 | |
|   delete [] t;
 | |
|   return 0;
 | |
| }
 |