#define JEMALLOC_PROF_C_ #include "jemalloc/internal/jemalloc_internal.h" /******************************************************************************/ #ifdef JEMALLOC_PROF_LIBUNWIND #define UNW_LOCAL_ONLY #include <libunwind.h> #endif #ifdef JEMALLOC_PROF_LIBGCC #include <unwind.h> #endif /******************************************************************************/ /* Data. */ malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL) bool opt_prof = false; bool opt_prof_active = true; size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; bool opt_prof_gdump = false; bool opt_prof_final = true; bool opt_prof_leak = false; bool opt_prof_accum = false; char opt_prof_prefix[ /* Minimize memory bloat for non-prof builds. */ #ifdef JEMALLOC_PROF PATH_MAX + #endif 1]; uint64_t prof_interval = 0; /* * Table of mutexes that are shared among ctx's. These are leaf locks, so * there is no problem with using them for more than one ctx at the same time. * The primary motivation for this sharing though is that ctx's are ephemeral, * and destroying mutexes causes complications for systems that allocate when * creating/destroying mutexes. */ static malloc_mutex_t *ctx_locks; static unsigned cum_ctxs; /* Atomic counter. */ /* * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data * structure that knows about all backtraces currently captured. */ static ckh_t bt2ctx; static malloc_mutex_t bt2ctx_mtx; static malloc_mutex_t prof_dump_seq_mtx; static uint64_t prof_dump_seq; static uint64_t prof_dump_iseq; static uint64_t prof_dump_mseq; static uint64_t prof_dump_useq; /* * This buffer is rather large for stack allocation, so use a single buffer for * all profile dumps. */ static malloc_mutex_t prof_dump_mtx; static char prof_dump_buf[ /* Minimize memory bloat for non-prof builds. */ #ifdef JEMALLOC_PROF PROF_DUMP_BUFSIZE #else 1 #endif ]; static unsigned prof_dump_buf_end; static int prof_dump_fd; /* Do not dump any profiles until bootstrapping is complete. */ static bool prof_booted = false; /******************************************************************************/ void bt_init(prof_bt_t *bt, void **vec) { cassert(config_prof); bt->vec = vec; bt->len = 0; } static void bt_destroy(prof_bt_t *bt) { cassert(config_prof); idalloc(bt); } static prof_bt_t * bt_dup(prof_bt_t *bt) { prof_bt_t *ret; cassert(config_prof); /* * Create a single allocation that has space for vec immediately * following the prof_bt_t structure. The backtraces that get * stored in the backtrace caches are copied from stack-allocated * temporary variables, so size is known at creation time. Making this * a contiguous object improves cache locality. */ ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) + (bt->len * sizeof(void *))); if (ret == NULL) return (NULL); ret->vec = (void **)((uintptr_t)ret + QUANTUM_CEILING(sizeof(prof_bt_t))); memcpy(ret->vec, bt->vec, bt->len * sizeof(void *)); ret->len = bt->len; return (ret); } static inline void prof_enter(prof_tdata_t *prof_tdata) { cassert(config_prof); assert(prof_tdata->enq == false); prof_tdata->enq = true; malloc_mutex_lock(&bt2ctx_mtx); } static inline void prof_leave(prof_tdata_t *prof_tdata) { bool idump, gdump; cassert(config_prof); malloc_mutex_unlock(&bt2ctx_mtx); assert(prof_tdata->enq); prof_tdata->enq = false; idump = prof_tdata->enq_idump; prof_tdata->enq_idump = false; gdump = prof_tdata->enq_gdump; prof_tdata->enq_gdump = false; if (idump) prof_idump(); if (gdump) prof_gdump(); } #ifdef JEMALLOC_PROF_LIBUNWIND void prof_backtrace(prof_bt_t *bt) { int nframes; cassert(config_prof); assert(bt->len == 0); assert(bt->vec != NULL); nframes = unw_backtrace(bt->vec, PROF_BT_MAX); if (nframes <= 0) return; bt->len = nframes; } #elif (defined(JEMALLOC_PROF_LIBGCC)) static _Unwind_Reason_Code prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) { cassert(config_prof); return (_URC_NO_REASON); } static _Unwind_Reason_Code prof_unwind_callback(struct _Unwind_Context *context, void *arg) { prof_unwind_data_t *data = (prof_unwind_data_t *)arg; void *ip; cassert(config_prof); ip = (void *)_Unwind_GetIP(context); if (ip == NULL) return (_URC_END_OF_STACK); data->bt->vec[data->bt->len] = ip; data->bt->len++; if (data->bt->len == data->max) return (_URC_END_OF_STACK); return (_URC_NO_REASON); } void prof_backtrace(prof_bt_t *bt) { prof_unwind_data_t data = {bt, PROF_BT_MAX}; cassert(config_prof); _Unwind_Backtrace(prof_unwind_callback, &data); } #elif (defined(JEMALLOC_PROF_GCC)) void prof_backtrace(prof_bt_t *bt) { #define BT_FRAME(i) \ if ((i) < PROF_BT_MAX) { \ void *p; \ if (__builtin_frame_address(i) == 0) \ return; \ p = __builtin_return_address(i); \ if (p == NULL) \ return; \ bt->vec[(i)] = p; \ bt->len = (i) + 1; \ } else \ return; cassert(config_prof); BT_FRAME(0) BT_FRAME(1) BT_FRAME(2) BT_FRAME(3) BT_FRAME(4) BT_FRAME(5) BT_FRAME(6) BT_FRAME(7) BT_FRAME(8) BT_FRAME(9) BT_FRAME(10) BT_FRAME(11) BT_FRAME(12) BT_FRAME(13) BT_FRAME(14) BT_FRAME(15) BT_FRAME(16) BT_FRAME(17) BT_FRAME(18) BT_FRAME(19) BT_FRAME(20) BT_FRAME(21) BT_FRAME(22) BT_FRAME(23) BT_FRAME(24) BT_FRAME(25) BT_FRAME(26) BT_FRAME(27) BT_FRAME(28) BT_FRAME(29) BT_FRAME(30) BT_FRAME(31) BT_FRAME(32) BT_FRAME(33) BT_FRAME(34) BT_FRAME(35) BT_FRAME(36) BT_FRAME(37) BT_FRAME(38) BT_FRAME(39) BT_FRAME(40) BT_FRAME(41) BT_FRAME(42) BT_FRAME(43) BT_FRAME(44) BT_FRAME(45) BT_FRAME(46) BT_FRAME(47) BT_FRAME(48) BT_FRAME(49) BT_FRAME(50) BT_FRAME(51) BT_FRAME(52) BT_FRAME(53) BT_FRAME(54) BT_FRAME(55) BT_FRAME(56) BT_FRAME(57) BT_FRAME(58) BT_FRAME(59) BT_FRAME(60) BT_FRAME(61) BT_FRAME(62) BT_FRAME(63) BT_FRAME(64) BT_FRAME(65) BT_FRAME(66) BT_FRAME(67) BT_FRAME(68) BT_FRAME(69) BT_FRAME(70) BT_FRAME(71) BT_FRAME(72) BT_FRAME(73) BT_FRAME(74) BT_FRAME(75) BT_FRAME(76) BT_FRAME(77) BT_FRAME(78) BT_FRAME(79) BT_FRAME(80) BT_FRAME(81) BT_FRAME(82) BT_FRAME(83) BT_FRAME(84) BT_FRAME(85) BT_FRAME(86) BT_FRAME(87) BT_FRAME(88) BT_FRAME(89) BT_FRAME(90) BT_FRAME(91) BT_FRAME(92) BT_FRAME(93) BT_FRAME(94) BT_FRAME(95) BT_FRAME(96) BT_FRAME(97) BT_FRAME(98) BT_FRAME(99) BT_FRAME(100) BT_FRAME(101) BT_FRAME(102) BT_FRAME(103) BT_FRAME(104) BT_FRAME(105) BT_FRAME(106) BT_FRAME(107) BT_FRAME(108) BT_FRAME(109) BT_FRAME(110) BT_FRAME(111) BT_FRAME(112) BT_FRAME(113) BT_FRAME(114) BT_FRAME(115) BT_FRAME(116) BT_FRAME(117) BT_FRAME(118) BT_FRAME(119) BT_FRAME(120) BT_FRAME(121) BT_FRAME(122) BT_FRAME(123) BT_FRAME(124) BT_FRAME(125) BT_FRAME(126) BT_FRAME(127) #undef BT_FRAME } #else void prof_backtrace(prof_bt_t *bt) { cassert(config_prof); not_reached(); } #endif static malloc_mutex_t * prof_ctx_mutex_choose(void) { unsigned nctxs = atomic_add_u(&cum_ctxs, 1); return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]); } static void prof_ctx_init(prof_ctx_t *ctx, prof_bt_t *bt) { ctx->bt = bt; ctx->lock = prof_ctx_mutex_choose(); /* * Set nlimbo to 1, in order to avoid a race condition with * prof_ctx_merge()/prof_ctx_destroy(). */ ctx->nlimbo = 1; ql_elm_new(ctx, dump_link); memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t)); ql_new(&ctx->cnts_ql); } static void prof_ctx_destroy(prof_ctx_t *ctx) { prof_tdata_t *prof_tdata; cassert(config_prof); /* * Check that ctx is still unused by any thread cache before destroying * it. prof_lookup() increments ctx->nlimbo in order to avoid a race * condition with this function, as does prof_ctx_merge() in order to * avoid a race between the main body of prof_ctx_merge() and entry * into this function. */ prof_tdata = prof_tdata_get(false); assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); prof_enter(prof_tdata); malloc_mutex_lock(ctx->lock); if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 1) { assert(ctx->cnt_merged.curbytes == 0); assert(ctx->cnt_merged.accumobjs == 0); assert(ctx->cnt_merged.accumbytes == 0); /* Remove ctx from bt2ctx. */ if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) not_reached(); prof_leave(prof_tdata); /* Destroy ctx. */ malloc_mutex_unlock(ctx->lock); bt_destroy(ctx->bt); idalloc(ctx); } else { /* * Compensate for increment in prof_ctx_merge() or * prof_lookup(). */ ctx->nlimbo--; malloc_mutex_unlock(ctx->lock); prof_leave(prof_tdata); } } static void prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) { bool destroy; cassert(config_prof); /* Merge cnt stats and detach from ctx. */ malloc_mutex_lock(ctx->lock); ctx->cnt_merged.curobjs += cnt->cnts.curobjs; ctx->cnt_merged.curbytes += cnt->cnts.curbytes; ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; ql_remove(&ctx->cnts_ql, cnt, cnts_link); if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) { /* * Increment ctx->nlimbo in order to keep another thread from * winning the race to destroy ctx while this one has ctx->lock * dropped. Without this, it would be possible for another * thread to: * * 1) Sample an allocation associated with ctx. * 2) Deallocate the sampled object. * 3) Successfully prof_ctx_destroy(ctx). * * The result would be that ctx no longer exists by the time * this thread accesses it in prof_ctx_destroy(). */ ctx->nlimbo++; destroy = true; } else destroy = false; malloc_mutex_unlock(ctx->lock); if (destroy) prof_ctx_destroy(ctx); } static bool prof_lookup_global(prof_bt_t *bt, prof_tdata_t *prof_tdata, void **p_btkey, prof_ctx_t **p_ctx, bool *p_new_ctx) { union { prof_ctx_t *p; void *v; } ctx; union { prof_bt_t *p; void *v; } btkey; bool new_ctx; prof_enter(prof_tdata); if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { /* bt has never been seen before. Insert it. */ ctx.v = imalloc(sizeof(prof_ctx_t)); if (ctx.v == NULL) { prof_leave(prof_tdata); return (true); } btkey.p = bt_dup(bt); if (btkey.v == NULL) { prof_leave(prof_tdata); idalloc(ctx.v); return (true); } prof_ctx_init(ctx.p, btkey.p); if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { /* OOM. */ prof_leave(prof_tdata); idalloc(btkey.v); idalloc(ctx.v); return (true); } new_ctx = true; } else { /* * Increment nlimbo, in order to avoid a race condition with * prof_ctx_merge()/prof_ctx_destroy(). */ malloc_mutex_lock(ctx.p->lock); ctx.p->nlimbo++; malloc_mutex_unlock(ctx.p->lock); new_ctx = false; } prof_leave(prof_tdata); *p_btkey = btkey.v; *p_ctx = ctx.p; *p_new_ctx = new_ctx; return (false); } prof_thr_cnt_t * prof_lookup(prof_bt_t *bt) { union { prof_thr_cnt_t *p; void *v; } ret; prof_tdata_t *prof_tdata; cassert(config_prof); prof_tdata = prof_tdata_get(false); if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return (NULL); if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { void *btkey; prof_ctx_t *ctx; bool new_ctx; /* * This thread's cache lacks bt. Look for it in the global * cache. */ if (prof_lookup_global(bt, prof_tdata, &btkey, &ctx, &new_ctx)) return (NULL); /* Link a prof_thd_cnt_t into ctx for this thread. */ if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) { assert(ckh_count(&prof_tdata->bt2cnt) > 0); /* * Flush the least recently used cnt in order to keep * bt2cnt from becoming too large. */ ret.p = ql_last(&prof_tdata->lru_ql, lru_link); assert(ret.v != NULL); if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, NULL, NULL)) not_reached(); ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); prof_ctx_merge(ret.p->ctx, ret.p); /* ret can now be re-used. */ } else { assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX); /* Allocate and partially initialize a new cnt. */ ret.v = imalloc(sizeof(prof_thr_cnt_t)); if (ret.p == NULL) { if (new_ctx) prof_ctx_destroy(ctx); return (NULL); } ql_elm_new(ret.p, cnts_link); ql_elm_new(ret.p, lru_link); } /* Finish initializing ret. */ ret.p->ctx = ctx; ret.p->epoch = 0; memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); if (ckh_insert(&prof_tdata->bt2cnt, btkey, ret.v)) { if (new_ctx) prof_ctx_destroy(ctx); idalloc(ret.v); return (NULL); } ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); malloc_mutex_lock(ctx->lock); ql_tail_insert(&ctx->cnts_ql, ret.p, cnts_link); ctx->nlimbo--; malloc_mutex_unlock(ctx->lock); } else { /* Move ret to the front of the LRU. */ ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); } return (ret.p); } void prof_sample_threshold_update(prof_tdata_t *prof_tdata) { /* * The body of this function is compiled out unless heap profiling is * enabled, so that it is possible to compile jemalloc with floating * point support completely disabled. Avoiding floating point code is * important on memory-constrained systems, but it also enables a * workaround for versions of glibc that don't properly save/restore * floating point registers during dynamic lazy symbol loading (which * internally calls into whatever malloc implementation happens to be * integrated into the application). Note that some compilers (e.g. * gcc 4.8) may use floating point registers for fast memory moves, so * jemalloc must be compiled with such optimizations disabled (e.g. * -mno-sse) in order for the workaround to be complete. */ #ifdef JEMALLOC_PROF uint64_t r; double u; if (!config_prof) return; if (prof_tdata == NULL) prof_tdata = prof_tdata_get(false); if (opt_lg_prof_sample == 0) { prof_tdata->bytes_until_sample = 0; return; } /* * Compute sample threshold as a geometrically distributed random * variable with mean (2^opt_lg_prof_sample). * * __ __ * | log(u) | 1 * prof_tdata->threshold = | -------- |, where p = ------------------- * | log(1-p) | opt_lg_prof_sample * 2 * * For more information on the math, see: * * Non-Uniform Random Variate Generation * Luc Devroye * Springer-Verlag, New York, 1986 * pp 500 * (http://luc.devroye.org/rnbookindex.html) */ prng64(r, 53, prof_tdata->prng_state, UINT64_C(6364136223846793005), UINT64_C(1442695040888963407)); u = (double)r * (1.0/9007199254740992.0L); prof_tdata->bytes_until_sample = (uint64_t)(log(u) / log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) + (uint64_t)1U; #endif } #ifdef JEMALLOC_JET size_t prof_bt_count(void) { size_t bt_count; prof_tdata_t *prof_tdata; prof_tdata = prof_tdata_get(false); if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return (0); prof_enter(prof_tdata); bt_count = ckh_count(&bt2ctx); prof_leave(prof_tdata); return (bt_count); } #endif #ifdef JEMALLOC_JET #undef prof_dump_open #define prof_dump_open JEMALLOC_N(prof_dump_open_impl) #endif static int prof_dump_open(bool propagate_err, const char *filename) { int fd; fd = creat(filename, 0644); if (fd == -1 && propagate_err == false) { malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n", filename); if (opt_abort) abort(); } return (fd); } #ifdef JEMALLOC_JET #undef prof_dump_open #define prof_dump_open JEMALLOC_N(prof_dump_open) prof_dump_open_t *prof_dump_open = JEMALLOC_N(prof_dump_open_impl); #endif static bool prof_dump_flush(bool propagate_err) { bool ret = false; ssize_t err; cassert(config_prof); err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); if (err == -1) { if (propagate_err == false) { malloc_write("<jemalloc>: write() failed during heap " "profile flush\n"); if (opt_abort) abort(); } ret = true; } prof_dump_buf_end = 0; return (ret); } static bool prof_dump_close(bool propagate_err) { bool ret; assert(prof_dump_fd != -1); ret = prof_dump_flush(propagate_err); close(prof_dump_fd); prof_dump_fd = -1; return (ret); } static bool prof_dump_write(bool propagate_err, const char *s) { unsigned i, slen, n; cassert(config_prof); i = 0; slen = strlen(s); while (i < slen) { /* Flush the buffer if it is full. */ if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) if (prof_dump_flush(propagate_err) && propagate_err) return (true); if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) { /* Finish writing. */ n = slen - i; } else { /* Write as much of s as will fit. */ n = PROF_DUMP_BUFSIZE - prof_dump_buf_end; } memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n); prof_dump_buf_end += n; i += n; } return (false); } JEMALLOC_ATTR(format(printf, 2, 3)) static bool prof_dump_printf(bool propagate_err, const char *format, ...) { bool ret; va_list ap; char buf[PROF_PRINTF_BUFSIZE]; va_start(ap, format); malloc_vsnprintf(buf, sizeof(buf), format, ap); va_end(ap); ret = prof_dump_write(propagate_err, buf); return (ret); } static void prof_dump_ctx_prep(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx, prof_ctx_list_t *ctx_ql) { prof_thr_cnt_t *thr_cnt; prof_cnt_t tcnt; cassert(config_prof); malloc_mutex_lock(ctx->lock); /* * Increment nlimbo so that ctx won't go away before dump. * Additionally, link ctx into the dump list so that it is included in * prof_dump()'s second pass. */ ctx->nlimbo++; ql_tail_insert(ctx_ql, ctx, dump_link); memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { volatile unsigned *epoch = &thr_cnt->epoch; while (true) { unsigned epoch0 = *epoch; /* Make sure epoch is even. */ if (epoch0 & 1U) continue; memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t)); /* Terminate if epoch didn't change while reading. */ if (*epoch == epoch0) break; } ctx->cnt_summed.curobjs += tcnt.curobjs; ctx->cnt_summed.curbytes += tcnt.curbytes; if (opt_prof_accum) { ctx->cnt_summed.accumobjs += tcnt.accumobjs; ctx->cnt_summed.accumbytes += tcnt.accumbytes; } } if (ctx->cnt_summed.curobjs != 0) (*leak_nctx)++; /* Add to cnt_all. */ cnt_all->curobjs += ctx->cnt_summed.curobjs; cnt_all->curbytes += ctx->cnt_summed.curbytes; if (opt_prof_accum) { cnt_all->accumobjs += ctx->cnt_summed.accumobjs; cnt_all->accumbytes += ctx->cnt_summed.accumbytes; } malloc_mutex_unlock(ctx->lock); } static bool prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all) { if (opt_lg_prof_sample == 0) { if (prof_dump_printf(propagate_err, "heap profile: %"PRId64": %"PRId64 " [%"PRIu64": %"PRIu64"] @ heapprofile\n", cnt_all->curobjs, cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) return (true); } else { if (prof_dump_printf(propagate_err, "heap profile: %"PRId64": %"PRId64 " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n", cnt_all->curobjs, cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes, ((uint64_t)1U << opt_lg_prof_sample))) return (true); } return (false); } static void prof_dump_ctx_cleanup_locked(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql) { ctx->nlimbo--; ql_remove(ctx_ql, ctx, dump_link); } static void prof_dump_ctx_cleanup(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql) { malloc_mutex_lock(ctx->lock); prof_dump_ctx_cleanup_locked(ctx, ctx_ql); malloc_mutex_unlock(ctx->lock); } static bool prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt, prof_ctx_list_t *ctx_ql) { bool ret; unsigned i; cassert(config_prof); /* * Current statistics can sum to 0 as a result of unmerged per thread * statistics. Additionally, interval- and growth-triggered dumps can * occur between the time a ctx is created and when its statistics are * filled in. Avoid dumping any ctx that is an artifact of either * implementation detail. */ malloc_mutex_lock(ctx->lock); if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) || (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) { assert(ctx->cnt_summed.curobjs == 0); assert(ctx->cnt_summed.curbytes == 0); assert(ctx->cnt_summed.accumobjs == 0); assert(ctx->cnt_summed.accumbytes == 0); ret = false; goto label_return; } if (prof_dump_printf(propagate_err, "%"PRId64": %"PRId64 " [%"PRIu64": %"PRIu64"] @", ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes, ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) { ret = true; goto label_return; } for (i = 0; i < bt->len; i++) { if (prof_dump_printf(propagate_err, " %#"PRIxPTR, (uintptr_t)bt->vec[i])) { ret = true; goto label_return; } } if (prof_dump_write(propagate_err, "\n")) { ret = true; goto label_return; } ret = false; label_return: prof_dump_ctx_cleanup_locked(ctx, ctx_ql); malloc_mutex_unlock(ctx->lock); return (ret); } static bool prof_dump_maps(bool propagate_err) { bool ret; int mfd; char filename[PATH_MAX + 1]; cassert(config_prof); #ifdef __FreeBSD__ malloc_snprintf(filename, sizeof(filename), "/proc/curproc/map"); #else malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps", (int)getpid()); #endif mfd = open(filename, O_RDONLY); if (mfd != -1) { ssize_t nread; if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") && propagate_err) { ret = true; goto label_return; } nread = 0; do { prof_dump_buf_end += nread; if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) { /* Make space in prof_dump_buf before read(). */ if (prof_dump_flush(propagate_err) && propagate_err) { ret = true; goto label_return; } } nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE - prof_dump_buf_end); } while (nread > 0); } else { ret = true; goto label_return; } ret = false; label_return: if (mfd != -1) close(mfd); return (ret); } static void prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_nctx, const char *filename) { if (cnt_all->curbytes != 0) { malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %" PRId64" object%s, %zu context%s\n", cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "", cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "", leak_nctx, (leak_nctx != 1) ? "s" : ""); malloc_printf( "<jemalloc>: Run pprof on \"%s\" for leak detail\n", filename); } } static bool prof_dump(bool propagate_err, const char *filename, bool leakcheck) { prof_tdata_t *prof_tdata; prof_cnt_t cnt_all; size_t tabind; union { prof_ctx_t *p; void *v; } ctx; size_t leak_nctx; prof_ctx_list_t ctx_ql; cassert(config_prof); prof_tdata = prof_tdata_get(false); if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return (true); malloc_mutex_lock(&prof_dump_mtx); /* Merge per thread profile stats, and sum them in cnt_all. */ memset(&cnt_all, 0, sizeof(prof_cnt_t)); leak_nctx = 0; ql_new(&ctx_ql); prof_enter(prof_tdata); for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;) prof_dump_ctx_prep(ctx.p, &cnt_all, &leak_nctx, &ctx_ql); prof_leave(prof_tdata); /* Create dump file. */ if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) goto label_open_close_error; /* Dump profile header. */ if (prof_dump_header(propagate_err, &cnt_all)) goto label_write_error; /* Dump per ctx profile stats. */ while ((ctx.p = ql_first(&ctx_ql)) != NULL) { if (prof_dump_ctx(propagate_err, ctx.p, ctx.p->bt, &ctx_ql)) goto label_write_error; } /* Dump /proc/<pid>/maps if possible. */ if (prof_dump_maps(propagate_err)) goto label_write_error; if (prof_dump_close(propagate_err)) goto label_open_close_error; malloc_mutex_unlock(&prof_dump_mtx); if (leakcheck) prof_leakcheck(&cnt_all, leak_nctx, filename); return (false); label_write_error: prof_dump_close(propagate_err); label_open_close_error: while ((ctx.p = ql_first(&ctx_ql)) != NULL) prof_dump_ctx_cleanup(ctx.p, &ctx_ql); malloc_mutex_unlock(&prof_dump_mtx); return (true); } #define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1) #define VSEQ_INVALID UINT64_C(0xffffffffffffffff) static void prof_dump_filename(char *filename, char v, uint64_t vseq) { cassert(config_prof); if (vseq != VSEQ_INVALID) { /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */ malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, "%s.%d.%"PRIu64".%c%"PRIu64".heap", opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq); } else { /* "<prefix>.<pid>.<seq>.<v>.heap" */ malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, "%s.%d.%"PRIu64".%c.heap", opt_prof_prefix, (int)getpid(), prof_dump_seq, v); } prof_dump_seq++; } static void prof_fdump(void) { char filename[DUMP_FILENAME_BUFSIZE]; cassert(config_prof); if (prof_booted == false) return; if (opt_prof_final && opt_prof_prefix[0] != '\0') { malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename, 'f', VSEQ_INVALID); malloc_mutex_unlock(&prof_dump_seq_mtx); prof_dump(false, filename, opt_prof_leak); } } void prof_idump(void) { prof_tdata_t *prof_tdata; char filename[PATH_MAX + 1]; cassert(config_prof); if (prof_booted == false) return; prof_tdata = prof_tdata_get(false); if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return; if (prof_tdata->enq) { prof_tdata->enq_idump = true; return; } if (opt_prof_prefix[0] != '\0') { malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename, 'i', prof_dump_iseq); prof_dump_iseq++; malloc_mutex_unlock(&prof_dump_seq_mtx); prof_dump(false, filename, false); } } bool prof_mdump(const char *filename) { char filename_buf[DUMP_FILENAME_BUFSIZE]; cassert(config_prof); if (opt_prof == false || prof_booted == false) return (true); if (filename == NULL) { /* No filename specified, so automatically generate one. */ if (opt_prof_prefix[0] == '\0') return (true); malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename_buf, 'm', prof_dump_mseq); prof_dump_mseq++; malloc_mutex_unlock(&prof_dump_seq_mtx); filename = filename_buf; } return (prof_dump(true, filename, false)); } void prof_gdump(void) { prof_tdata_t *prof_tdata; char filename[DUMP_FILENAME_BUFSIZE]; cassert(config_prof); if (prof_booted == false) return; prof_tdata = prof_tdata_get(false); if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) return; if (prof_tdata->enq) { prof_tdata->enq_gdump = true; return; } if (opt_prof_prefix[0] != '\0') { malloc_mutex_lock(&prof_dump_seq_mtx); prof_dump_filename(filename, 'u', prof_dump_useq); prof_dump_useq++; malloc_mutex_unlock(&prof_dump_seq_mtx); prof_dump(false, filename, false); } } static void prof_bt_hash(const void *key, size_t r_hash[2]) { prof_bt_t *bt = (prof_bt_t *)key; cassert(config_prof); hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash); } static bool prof_bt_keycomp(const void *k1, const void *k2) { const prof_bt_t *bt1 = (prof_bt_t *)k1; const prof_bt_t *bt2 = (prof_bt_t *)k2; cassert(config_prof); if (bt1->len != bt2->len) return (false); return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); } prof_tdata_t * prof_tdata_init(void) { prof_tdata_t *prof_tdata; cassert(config_prof); /* Initialize an empty cache for this thread. */ prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t)); if (prof_tdata == NULL) return (NULL); if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) { idalloc(prof_tdata); return (NULL); } ql_new(&prof_tdata->lru_ql); prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX); if (prof_tdata->vec == NULL) { ckh_delete(&prof_tdata->bt2cnt); idalloc(prof_tdata); return (NULL); } prof_tdata->prng_state = (uint64_t)(uintptr_t)prof_tdata; prof_sample_threshold_update(prof_tdata); prof_tdata->enq = false; prof_tdata->enq_idump = false; prof_tdata->enq_gdump = false; prof_tdata_tsd_set(&prof_tdata); return (prof_tdata); } void prof_tdata_cleanup(void *arg) { prof_thr_cnt_t *cnt; prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg; cassert(config_prof); if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) { /* * Another destructor deallocated memory after this destructor * was called. Reset prof_tdata to PROF_TDATA_STATE_PURGATORY * in order to receive another callback. */ prof_tdata = PROF_TDATA_STATE_PURGATORY; prof_tdata_tsd_set(&prof_tdata); } else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) { /* * The previous time this destructor was called, we set the key * to PROF_TDATA_STATE_PURGATORY so that other destructors * wouldn't cause re-creation of the prof_tdata. This time, do * nothing, so that the destructor will not be called again. */ } else if (prof_tdata != NULL) { /* * Delete the hash table. All of its contents can still be * iterated over via the LRU. */ ckh_delete(&prof_tdata->bt2cnt); /* * Iteratively merge cnt's into the global stats and delete * them. */ while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { ql_remove(&prof_tdata->lru_ql, cnt, lru_link); prof_ctx_merge(cnt->ctx, cnt); idalloc(cnt); } idalloc(prof_tdata->vec); idalloc(prof_tdata); prof_tdata = PROF_TDATA_STATE_PURGATORY; prof_tdata_tsd_set(&prof_tdata); } } void prof_boot0(void) { cassert(config_prof); memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT, sizeof(PROF_PREFIX_DEFAULT)); } void prof_boot1(void) { cassert(config_prof); /* * opt_prof must be in its final state before any arenas are * initialized, so this function must be executed early. */ if (opt_prof_leak && opt_prof == false) { /* * Enable opt_prof, but in such a way that profiles are never * automatically dumped. */ opt_prof = true; opt_prof_gdump = false; } else if (opt_prof) { if (opt_lg_prof_interval >= 0) { prof_interval = (((uint64_t)1U) << opt_lg_prof_interval); } } } bool prof_boot2(void) { cassert(config_prof); if (opt_prof) { unsigned i; if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp)) return (true); if (malloc_mutex_init(&bt2ctx_mtx)) return (true); if (prof_tdata_tsd_boot()) { malloc_write( "<jemalloc>: Error in pthread_key_create()\n"); abort(); } if (malloc_mutex_init(&prof_dump_seq_mtx)) return (true); if (malloc_mutex_init(&prof_dump_mtx)) return (true); if (atexit(prof_fdump) != 0) { malloc_write("<jemalloc>: Error in atexit()\n"); if (opt_abort) abort(); } ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS * sizeof(malloc_mutex_t)); if (ctx_locks == NULL) return (true); for (i = 0; i < PROF_NCTX_LOCKS; i++) { if (malloc_mutex_init(&ctx_locks[i])) return (true); } } #ifdef JEMALLOC_PROF_LIBGCC /* * Cause the backtracing machinery to allocate its internal state * before enabling profiling. */ _Unwind_Backtrace(prof_unwind_init_callback, NULL); #endif prof_booted = true; return (false); } void prof_prefork(void) { if (opt_prof) { unsigned i; malloc_mutex_prefork(&bt2ctx_mtx); malloc_mutex_prefork(&prof_dump_seq_mtx); for (i = 0; i < PROF_NCTX_LOCKS; i++) malloc_mutex_prefork(&ctx_locks[i]); } } void prof_postfork_parent(void) { if (opt_prof) { unsigned i; for (i = 0; i < PROF_NCTX_LOCKS; i++) malloc_mutex_postfork_parent(&ctx_locks[i]); malloc_mutex_postfork_parent(&prof_dump_seq_mtx); malloc_mutex_postfork_parent(&bt2ctx_mtx); } } void prof_postfork_child(void) { if (opt_prof) { unsigned i; for (i = 0; i < PROF_NCTX_LOCKS; i++) malloc_mutex_postfork_child(&ctx_locks[i]); malloc_mutex_postfork_child(&prof_dump_seq_mtx); malloc_mutex_postfork_child(&bt2ctx_mtx); } } /******************************************************************************/