// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef BASE_TRACKED_OBJECTS_H_
#define BASE_TRACKED_OBJECTS_H_
#pragma once
#include <map>
#include <string>
#include <vector>
#include "base/base_api.h"
#include "base/synchronization/lock.h"
#include "base/tracked.h"
#include "base/threading/thread_local_storage.h"
// TrackedObjects provides a database of stats about objects (generally Tasks)
// that are tracked. Tracking means their birth, death, duration, birth thread,
// death thread, and birth place are recorded. This data is carefully spread
// across a series of objects so that the counts and times can be rapidly
// updated without (usually) having to lock the data, and hence there is usually
// very little contention caused by the tracking. The data can be viewed via
// the about:tasks URL, with a variety of sorting and filtering choices.
//
// These classes serve as the basis of a profiler of sorts for the Tasks system.
// As a result, design decisions were made to maximize speed, by minimizing
// recurring allocation/deallocation, lock contention and data copying. In the
// "stable" state, which is reached relatively quickly, there is no separate
// marginal allocation cost associated with construction or destruction of
// tracked objects, no locks are generally employed, and probably the largest
// computational cost is associated with obtaining start and stop times for
// instances as they are created and destroyed. The introduction of worker
// threads had a slight impact on this approach, and required use of some locks
// when accessing data from the worker threads.
//
// The following describes the lifecycle of tracking an instance.
//
// First off, when the instance is created, the FROM_HERE macro is expanded
// to specify the birth place (file, line, function) where the instance was
// created. That data is used to create a transient Location instance
// encapsulating the above triple of information. The strings (like __FILE__)
// are passed around by reference, with the assumption that they are static, and
// will never go away. This ensures that the strings can be dealt with as atoms
// with great efficiency (i.e., copying of strings is never needed, and
// comparisons for equality can be based on pointer comparisons).
//
// Next, a Births instance is created for use ONLY on the thread where this
// instance was created. That Births instance records (in a base class
// BirthOnThread) references to the static data provided in a Location instance,
// as well as a pointer specifying the thread on which the birth takes place.
// Hence there is at most one Births instance for each Location on each thread.
// The derived Births class contains slots for recording statistics about all
// instances born at the same location. Statistics currently include only the
// count of instances constructed.
// Since the base class BirthOnThread contains only constant data, it can be
// freely accessed by any thread at any time (i.e., only the statistic needs to
// be handled carefully, and it is ONLY read or written by the birth thread).
//
// Having now either constructed or found the Births instance described above, a
// pointer to the Births instance is then embedded in a base class of the
// instance we're tracking (usually a Task). This fact alone is very useful in
// debugging, when there is a question of where an instance came from. In
// addition, the birth time is also embedded in the base class Tracked (see
// tracked.h), and used to later evaluate the lifetime duration.
// As a result of the above embedding, we can (for any tracked instance) find
// out its location of birth, and thread of birth, without using any locks, as
// all that data is constant across the life of the process.
//
// The amount of memory used in the above data structures depends on how many
// threads there are, and how many Locations of construction there are.
// Fortunately, we don't use memory that is the product of those two counts, but
// rather we only need one Births instance for each thread that constructs an
// instance at a Location. In many cases, instances (such as Tasks) are only
// created on one thread, so the memory utilization is actually fairly
// restrained.
//
// Lastly, when an instance is deleted, the final tallies of statistics are
// carefully accumulated. That tallying wrties into slots (members) in a
// collection of DeathData instances. For each birth place Location that is
// destroyed on a thread, there is a DeathData instance to record the additional
// death count, as well as accumulate the lifetime duration of the instance as
// it is destroyed (dies). By maintaining a single place to aggregate this
// addition *only* for the given thread, we avoid the need to lock such
// DeathData instances.
//
// With the above lifecycle description complete, the major remaining detail is
// explaining how each thread maintains a list of DeathData instances, and of
// Births instances, and is able to avoid additional (redundant/unnecessary)
// allocations.
//
// Each thread maintains a list of data items specific to that thread in a
// ThreadData instance (for that specific thread only). The two critical items
// are lists of DeathData and Births instances. These lists are maintained in
// STL maps, which are indexed by Location. As noted earlier, we can compare
// locations very efficiently as we consider the underlying data (file,
// function, line) to be atoms, and hence pointer comparison is used rather than
// (slow) string comparisons.
//
// To provide a mechanism for iterating over all "known threads," which means
// threads that have recorded a birth or a death, we create a singly linked list
// of ThreadData instances. Each such instance maintains a pointer to the next
// one. A static member of ThreadData provides a pointer to the first_ item on
// this global list, and access to that first_ item requires the use of a lock_.
// When new ThreadData instances is added to the global list, it is pre-pended,
// which ensures that any prior acquisition of the list is valid (i.e., the
// holder can iterate over it without fear of it changing, or the necessity of
// using an additional lock. Iterations are actually pretty rare (used
// primarilly for cleanup, or snapshotting data for display), so this lock has
// very little global performance impact.
//
// The above description tries to define the high performance (run time)
// portions of these classes. After gathering statistics, calls instigated
// by visiting about:tasks will assemble and aggregate data for display. The
// following data structures are used for producing such displays. They are
// not performance critical, and their only major constraint is that they should
// be able to run concurrently with ongoing augmentation of the birth and death
// data.
//
// For a given birth location, information about births are spread across data
// structures that are asynchronously changing on various threads. For display
// purposes, we need to construct Snapshot instances for each combination of
// birth thread, death thread, and location, along with the count of such
// lifetimes. We gather such data into a Snapshot instances, so that such
// instances can be sorted and aggregated (and remain frozen during our
// processing). Snapshot instances use pointers to constant portions of the
// birth and death datastructures, but have local (frozen) copies of the actual
// statistics (birth count, durations, etc. etc.).
//
// A DataCollector is a container object that holds a set of Snapshots. A
// DataCollector can be passed from thread to thread, and each thread
// contributes to it by adding or updating Snapshot instances. DataCollector
// instances are thread safe containers which are passed to various threads to
// accumulate all Snapshot instances.
//
// After an array of Snapshots instances are colleted into a DataCollector, they
// need to be sorted, and possibly aggregated (example: how many threads are in
// a specific consecutive set of Snapshots? What was the total birth count for
// that set? etc.). Aggregation instances collect running sums of any set of
// snapshot instances, and are used to print sub-totals in an about:tasks page.
//
// TODO(jar): I need to store DataCollections, and provide facilities for taking
// the difference between two gathered DataCollections. For now, I'm just
// adding a hack that Reset()'s to zero all counts and stats. This is also
// done in a slighly thread-unsafe fashion, as the reseting is done
// asynchronously relative to ongoing updates, and worse yet, some data fields
// are 64bit quantities, and are not atomicly accessed (reset or incremented
// etc.). For basic profiling, this will work "most of the time," and should be
// sufficient... but storing away DataCollections is the "right way" to do this.
//
class MessageLoop;
namespace tracked_objects {
//------------------------------------------------------------------------------
// For a specific thread, and a specific birth place, the collection of all
// death info (with tallies for each death thread, to prevent access conflicts).
class ThreadData;
class BASE_API BirthOnThread {
public:
explicit BirthOnThread(const Location& location);
const Location location() const { return location_; }
const ThreadData* birth_thread() const { return birth_thread_; }
private:
// File/lineno of birth. This defines the essence of the type, as the context
// of the birth (construction) often tell what the item is for. This field
// is const, and hence safe to access from any thread.
const Location location_;
// The thread that records births into this object. Only this thread is
// allowed to access birth_count_ (which changes over time).
const ThreadData* birth_thread_; // The thread this birth took place on.
DISALLOW_COPY_AND_ASSIGN(BirthOnThread);
};
//------------------------------------------------------------------------------
// A class for accumulating counts of births (without bothering with a map<>).
class BASE_API Births: public BirthOnThread {
public:
explicit Births(const Location& location);
int birth_count() const { return birth_count_; }
// When we have a birth we update the count for this BirhPLace.
void RecordBirth() { ++birth_count_; }
// When a birthplace is changed (updated), we need to decrement the counter
// for the old instance.
void ForgetBirth() { --birth_count_; } // We corrected a birth place.
// Hack to quickly reset all counts to zero.
void Clear() { birth_count_ = 0; }
private:
// The number of births on this thread for our location_.
int birth_count_;
DISALLOW_COPY_AND_ASSIGN(Births);
};
//------------------------------------------------------------------------------
// Basic info summarizing multiple destructions of an object with a single
// birthplace (fixed Location). Used both on specific threads, and also used
// in snapshots when integrating assembled data.
class BASE_API DeathData {
public:
// Default initializer.
DeathData() : count_(0), square_duration_(0) {}
// When deaths have not yet taken place, and we gather data from all the
// threads, we create DeathData stats that tally the number of births without
// a corrosponding death.
explicit DeathData(int count) : count_(count), square_duration_(0) {}
void RecordDeath(const base::TimeDelta& duration);
// Metrics accessors.
int count() const { return count_; }
base::TimeDelta life_duration() const { return life_duration_; }
int64 square_duration() const { return square_duration_; }
int AverageMsDuration() const;
double StandardDeviation() const;
// Accumulate metrics from other into this.
void AddDeathData(const DeathData& other);
// Simple print of internal state.
void Write(std::string* output) const;
// Reset all tallies to zero.
void Clear();
private:
int count_; // Number of destructions.
base::TimeDelta life_duration_; // Sum of all lifetime durations.
int64 square_duration_; // Sum of squares in milliseconds.
};
//------------------------------------------------------------------------------
// A temporary collection of data that can be sorted and summarized. It is
// gathered (carefully) from many threads. Instances are held in arrays and
// processed, filtered, and rendered.
// The source of this data was collected on many threads, and is asynchronously
// changing. The data in this instance is not asynchronously changing.
class BASE_API Snapshot {
public:
// When snapshotting a full life cycle set (birth-to-death), use this:
Snapshot(const BirthOnThread& birth_on_thread, const ThreadData& death_thread,
const DeathData& death_data);
// When snapshotting a birth, with no death yet, use this:
Snapshot(const BirthOnThread& birth_on_thread, int count);
const ThreadData* birth_thread() const { return birth_->birth_thread(); }
const Location location() const { return birth_->location(); }
const BirthOnThread& birth() const { return *birth_; }
const ThreadData* death_thread() const {return death_thread_; }
const DeathData& death_data() const { return death_data_; }
const std::string DeathThreadName() const;
int count() const { return death_data_.count(); }
base::TimeDelta life_duration() const { return death_data_.life_duration(); }
int64 square_duration() const { return death_data_.square_duration(); }
int AverageMsDuration() const { return death_data_.AverageMsDuration(); }
void Write(std::string* output) const;
void Add(const Snapshot& other);
private:
const BirthOnThread* birth_; // Includes Location and birth_thread.
const ThreadData* death_thread_;
DeathData death_data_;
};
//------------------------------------------------------------------------------
// DataCollector is a container class for Snapshot and BirthOnThread count
// items. It protects the gathering under locks, so that it could be called via
// Posttask on any threads, or passed to all the target threads in parallel.
class BASE_API DataCollector {
public:
typedef std::vector<Snapshot> Collection;
// Construct with a list of how many threads should contribute. This helps us
// determine (in the async case) when we are done with all contributions.
DataCollector();
~DataCollector();
// Add all stats from the indicated thread into our arrays. This function is
// mutex protected, and *could* be called from any threads (although current
// implementation serialized calls to Append).
void Append(const ThreadData& thread_data);
// After the accumulation phase, the following accessor is used to process the
// data.
Collection* collection();
// After collection of death data is complete, we can add entries for all the
// remaining living objects.
void AddListOfLivingObjects();
private:
typedef std::map<const BirthOnThread*, int> BirthCount;
// This instance may be provided to several threads to contribute data. The
// following counter tracks how many more threads will contribute. When it is
// zero, then all asynchronous contributions are complete, and locked access
// is no longer needed.
int count_of_contributing_threads_;
// The array that we collect data into.
Collection collection_;
// The total number of births recorded at each location for which we have not
// seen a death count.
BirthCount global_birth_count_;
base::Lock accumulation_lock_; // Protects access during accumulation phase.
DISALLOW_COPY_AND_ASSIGN(DataCollector);
};
//------------------------------------------------------------------------------
// Aggregation contains summaries (totals and subtotals) of groups of Snapshot
// instances to provide printing of these collections on a single line.
class BASE_API Aggregation: public DeathData {
public:
Aggregation();
~Aggregation();
void AddDeathSnapshot(const Snapshot& snapshot);
void AddBirths(const Births& births);
void AddBirth(const BirthOnThread& birth);
void AddBirthPlace(const Location& location);
void Write(std::string* output) const;
void Clear();
private:
int birth_count_;
std::map<std::string, int> birth_files_;
std::map<Location, int> locations_;
std::map<const ThreadData*, int> birth_threads_;
DeathData death_data_;
std::map<const ThreadData*, int> death_threads_;
DISALLOW_COPY_AND_ASSIGN(Aggregation);
};
//------------------------------------------------------------------------------
// Comparator is a class that supports the comparison of Snapshot instances.
// An instance is actually a list of chained Comparitors, that can provide for
// arbitrary ordering. The path portion of an about:tasks URL is translated
// into such a chain, which is then used to order Snapshot instances in a
// vector. It orders them into groups (for aggregation), and can also order
// instances within the groups (for detailed rendering of the instances in an
// aggregation).
class BASE_API Comparator {
public:
// Selector enum is the token identifier for each parsed keyword, most of
// which specify a sort order.
// Since it is not meaningful to sort more than once on a specific key, we
// use bitfields to accumulate what we have sorted on so far.
enum Selector {
// Sort orders.
NIL = 0,
BIRTH_THREAD = 1,
DEATH_THREAD = 2,
BIRTH_FILE = 4,
BIRTH_FUNCTION = 8,
BIRTH_LINE = 16,
COUNT = 32,
AVERAGE_DURATION = 64,
TOTAL_DURATION = 128,
// Imediate action keywords.
RESET_ALL_DATA = -1,
};
explicit Comparator();
// Reset the comparator to a NIL selector. Clear() and recursively delete any
// tiebreaker_ entries. NOTE: We can't use a standard destructor, because
// the sort algorithm makes copies of this object, and then deletes them,
// which would cause problems (either we'd make expensive deep copies, or we'd
// do more thna one delete on a tiebreaker_.
void Clear();
// The less() operator for sorting the array via std::sort().
bool operator()(const Snapshot& left, const Snapshot& right) const;
void Sort(DataCollector::Collection* collection) const;
// Check to see if the items are sort equivalents (should be aggregated).
bool Equivalent(const Snapshot& left, const Snapshot& right) const;
// Check to see if all required fields are present in the given sample.
bool Acceptable(const Snapshot& sample) const;
// A comparator can be refined by specifying what to do if the selected basis
// for comparison is insufficient to establish an ordering. This call adds
// the indicated attribute as the new "least significant" basis of comparison.
void SetTiebreaker(Selector selector, const std::string& required);
// Indicate if this instance is set up to sort by the given Selector, thereby
// putting that information in the SortGrouping, so it is not needed in each
// printed line.
bool IsGroupedBy(Selector selector) const;
// Using the tiebreakers as set above, we mostly get an ordering, which
// equivalent groups. If those groups are displayed (rather than just being
// aggregated, then the following is used to order them (within the group).
void SetSubgroupTiebreaker(Selector selector);
// Translate a keyword and restriction in URL path to a selector for sorting.
void ParseKeyphrase(const std::string& key_phrase);
// Parse a query in an about:tasks URL to decide on sort ordering.
bool ParseQuery(const std::string& query);
// Output a header line that can be used to indicated what items will be
// collected in the group. It lists all (potentially) tested attributes and
// their values (in the sample item).
bool WriteSortGrouping(const Snapshot& sample, std::string* output) const;
// Output a sample, with SortGroup details not displayed.
void WriteSnapshot(const Snapshot& sample, std::string* output) const;
private:
// The selector directs this instance to compare based on the specified
// members of the tested elements.
enum Selector selector_;
// For filtering into acceptable and unacceptable snapshot instance, the
// following is required to be a substring of the selector_ field.
std::string required_;
// If this instance can't decide on an ordering, we can consult a tie-breaker
// which may have a different basis of comparison.
Comparator* tiebreaker_;
// We or together all the selectors we sort on (not counting sub-group
// selectors), so that we can tell if we've decided to group on any given
// criteria.
int combined_selectors_;
// Some tiebreakrs are for subgroup ordering, and not for basic ordering (in
// preparation for aggregation). The subgroup tiebreakers are not consulted
// when deciding if two items are in equivalent groups. This flag tells us
// to ignore the tiebreaker when doing Equivalent() testing.
bool use_tiebreaker_for_sort_only_;
};
//------------------------------------------------------------------------------
// For each thread, we have a ThreadData that stores all tracking info generated
// on this thread. This prevents the need for locking as data accumulates.
class BASE_API ThreadData {
public:
typedef std::map<Location, Births*> BirthMap;
typedef std::map<const Births*, DeathData> DeathMap;
ThreadData();
~ThreadData();
// Using Thread Local Store, find the current instance for collecting data.
// If an instance does not exist, construct one (and remember it for use on
// this thread.
// If shutdown has already started, and we don't yet have an instance, then
// return null.
static ThreadData* current();
// For a given about:tasks URL, develop resulting HTML, and append to output.
static void WriteHTML(const std::string& query, std::string* output);
// For a given accumulated array of results, use the comparator to sort and
// subtotal, writing the results to the output.
static void WriteHTMLTotalAndSubtotals(
const DataCollector::Collection& match_array,
const Comparator& comparator, std::string* output);
// In this thread's data, record a new birth.
Births* TallyABirth(const Location& location);
// Find a place to record a death on this thread.
void TallyADeath(const Births& lifetimes, const base::TimeDelta& duration);
// (Thread safe) Get start of list of instances.
static ThreadData* first();
// Iterate through the null terminated list of instances.
ThreadData* next() const { return next_; }
MessageLoop* message_loop() const { return message_loop_; }
const std::string ThreadName() const;
// Using our lock, make a copy of the specified maps. These calls may arrive
// from non-local threads, and are used to quickly scan data from all threads
// in order to build an HTML page for about:tasks.
void SnapshotBirthMap(BirthMap *output) const;
void SnapshotDeathMap(DeathMap *output) const;
// Hack: asynchronously clear all birth counts and death tallies data values
// in all ThreadData instances. The numerical (zeroing) part is done without
// use of a locks or atomics exchanges, and may (for int64 values) produce
// bogus counts VERY rarely.
static void ResetAllThreadData();
// Using our lock to protect the iteration, Clear all birth and death data.
void Reset();
// Using the "known list of threads" gathered during births and deaths, the
// following attempts to run the given function once all all such threads.
// Note that the function can only be run on threads which have a message
// loop!
static void RunOnAllThreads(void (*Func)());
// Set internal status_ to either become ACTIVE, or later, to be SHUTDOWN,
// based on argument being true or false respectively.
// IF tracking is not compiled in, this function will return false.
static bool StartTracking(bool status);
static bool IsActive();
#ifdef OS_WIN
// WARNING: ONLY call this function when all MessageLoops are still intact for
// all registered threads. IF you call it later, you will crash.
// Note: You don't need to call it at all, and you can wait till you are
// single threaded (again) to do the cleanup via
// ShutdownSingleThreadedCleanup().
// Start the teardown (shutdown) process in a multi-thread mode by disabling
// further additions to thread database on all threads. First it makes a
// local (locked) change to prevent any more threads from registering. Then
// it Posts a Task to all registered threads to be sure they are aware that no
// more accumulation can take place.
static void ShutdownMultiThreadTracking();
#endif
// WARNING: ONLY call this function when you are running single threaded
// (again) and all message loops and threads have terminated. Until that
// point some threads may still attempt to write into our data structures.
// Delete recursively all data structures, starting with the list of
// ThreadData instances.
static void ShutdownSingleThreadedCleanup();
private:
// Current allowable states of the tracking system. The states always
// proceed towards SHUTDOWN, and never go backwards.
enum Status {
UNINITIALIZED,
ACTIVE,
SHUTDOWN,
};
#if defined(OS_WIN)
class ThreadSafeDownCounter;
class RunTheStatic;
#endif
// Each registered thread is called to set status_ to SHUTDOWN.
// This is done redundantly on every registered thread because it is not
// protected by a mutex. Running on all threads guarantees we get the
// notification into the memory cache of all possible threads.
static void ShutdownDisablingFurtherTracking();
// We use thread local store to identify which ThreadData to interact with.
static base::ThreadLocalStorage::Slot tls_index_;
// Link to the most recently created instance (starts a null terminated list).
static ThreadData* first_;
// Protection for access to first_.
static base::Lock list_lock_;
// We set status_ to SHUTDOWN when we shut down the tracking service. This
// setting is redundantly established by all participating threads so that we
// are *guaranteed* (without locking) that all threads can "see" the status
// and avoid additional calls into the service.
static Status status_;
// Link to next instance (null terminated list). Used to globally track all
// registered instances (corresponds to all registered threads where we keep
// data).
ThreadData* next_;
// The message loop where tasks needing to access this instance's private data
// should be directed. Since some threads have no message loop, some
// instances have data that can't be (safely) modified externally.
MessageLoop* message_loop_;
// A map used on each thread to keep track of Births on this thread.
// This map should only be accessed on the thread it was constructed on.
// When a snapshot is needed, this structure can be locked in place for the
// duration of the snapshotting activity.
BirthMap birth_map_;
// Similar to birth_map_, this records informations about death of tracked
// instances (i.e., when a tracked instance was destroyed on this thread).
// It is locked before changing, and hence other threads may access it by
// locking before reading it.
DeathMap death_map_;
// Lock to protect *some* access to BirthMap and DeathMap. The maps are
// regularly read and written on this thread, but may only be read from other
// threads. To support this, we acquire this lock if we are writing from this
// thread, or reading from another thread. For reading from this thread we
// don't need a lock, as there is no potential for a conflict since the
// writing is only done from this thread.
mutable base::Lock lock_;
DISALLOW_COPY_AND_ASSIGN(ThreadData);
};
//------------------------------------------------------------------------------
// Provide simple way to to start global tracking, and to tear down tracking
// when done. Note that construction and destruction of this object must be
// done when running in threaded mode (before spawning a lot of threads
// for construction, and after shutting down all the threads for destruction).
// To prevent grabbing thread local store resources time and again if someone
// chooses to try to re-run the browser many times, we maintain global state and
// only allow the tracking system to be started up at most once, and shutdown
// at most once. See bug 31344 for an example.
class AutoTracking {
public:
AutoTracking() {
if (state_ != kNeverBeenRun)
return;
ThreadData::StartTracking(true);
state_ = kRunning;
}
~AutoTracking() {
#ifndef NDEBUG
if (state_ != kRunning)
return;
// We don't do cleanup of any sort in Release build because it is a
// complete waste of time. Since Chromium doesn't join all its thread and
// guarantee we're in a single threaded mode, we don't even do cleanup in
// debug mode, as it will generate race-checker warnings.
#endif
}
private:
enum State {
kNeverBeenRun,
kRunning,
kTornDownAndStopped,
};
static State state_;
DISALLOW_COPY_AND_ASSIGN(AutoTracking);
};
} // namespace tracked_objects
#endif // BASE_TRACKED_OBJECTS_H_