#include <stdio.h> #include <vector> #include <pthread.h> #include <malloc.h> #include <algorithm> using namespace std; const size_t kNumThreds = 16; const size_t kNumIters = 1 << 23; inline void break_optimization(void *arg) { __asm__ __volatile__("" : : "r" (arg) : "memory"); } __attribute__((noinline)) static void *MallocThread(void *t) { size_t total_malloced = 0, total_freed = 0; size_t max_in_use = 0; size_t tid = reinterpret_cast<size_t>(t); vector<pair<char *, size_t> > allocated; allocated.reserve(kNumIters); for (size_t i = 1; i < kNumIters; i++) { if ((i % (kNumIters / 4)) == 0 && tid == 0) fprintf(stderr, " T[%ld] iter %ld\n", tid, i); bool allocate = (i % 5) <= 2; // 60% malloc, 40% free if (i > kNumIters / 4) allocate = i % 2; // then switch to 50% malloc, 50% free if (allocate) { size_t size = 1 + (i % 200); if ((i % 10001) == 0) size *= 4096; total_malloced += size; char *x = new char[size]; x[0] = x[size - 1] = x[size / 2] = 0; allocated.push_back(make_pair(x, size)); max_in_use = max(max_in_use, total_malloced - total_freed); } else { if (allocated.empty()) continue; size_t slot = i % allocated.size(); char *p = allocated[slot].first; p[0] = 0; // emulate last user touch of the block size_t size = allocated[slot].second; total_freed += size; swap(allocated[slot], allocated.back()); allocated.pop_back(); delete [] p; } } if (tid == 0) fprintf(stderr, " T[%ld] total_malloced: %ldM in use %ldM max %ldM\n", tid, total_malloced >> 20, (total_malloced - total_freed) >> 20, max_in_use >> 20); for (size_t i = 0; i < allocated.size(); i++) delete [] allocated[i].first; return 0; } template <int depth> struct DeepStack { __attribute__((noinline)) static void *run(void *t) { break_optimization(0); DeepStack<depth - 1>::run(t); break_optimization(0); return 0; } }; template<> struct DeepStack<0> { static void *run(void *t) { MallocThread(t); return 0; } }; // Build with -Dstandalone_malloc_test=main to make it a separate program. int standalone_malloc_test() { pthread_t t[kNumThreds]; for (size_t i = 0; i < kNumThreds; i++) pthread_create(&t[i], 0, DeepStack<200>::run, reinterpret_cast<void *>(i)); for (size_t i = 0; i < kNumThreds; i++) pthread_join(t[i], 0); malloc_stats(); return 0; }