// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// This file defines a WatchDog thread that monitors the responsiveness of other
// browser threads like UI, IO, DB, FILE and CACHED threads. It also defines
// ThreadWatcher class which performs health check on threads that would like to
// be watched. This file also defines ThreadWatcherList class that has list of
// all active ThreadWatcher objects.
//
// ThreadWatcher class sends ping message to the watched thread and the watched
// thread responds back with a pong message. It uploads response time
// (difference between ping and pong times) as a histogram.
//
// TODO(raman): ThreadWatcher can detect hung threads. If a hung thread is
// detected, we should probably just crash, and allow the crash system to gather
// then stack trace.
//
// Example Usage:
//
// The following is an example for watching responsiveness of IO thread.
// sleep_time specifies how often ping messages have to be sent to IO thread.
// unresponsive_time is the wait time after ping message is sent, to check if
// we have received pong message or not.
//
// base::TimeDelta sleep_time = base::TimeDelta::FromSeconds(5);
// base::TimeDelta unresponsive_time = base::TimeDelta::FromSeconds(10);
// ThreadWatcher::StartWatching(BrowserThread::IO, "IO", sleep_time,
// unresponsive_time);
#ifndef CHROME_BROWSER_METRICS_THREAD_WATCHER_H_
#define CHROME_BROWSER_METRICS_THREAD_WATCHER_H_
#include <map>
#include <string>
#include <vector>
#include "base/basictypes.h"
#include "base/gtest_prod_util.h"
#include "base/memory/ref_counted.h"
#include "base/memory/scoped_ptr.h"
#include "base/message_loop.h"
#include "base/metrics/histogram.h"
#include "base/synchronization/lock.h"
#include "base/task.h"
#include "base/threading/thread.h"
#include "base/time.h"
#include "content/browser/browser_thread.h"
#include "content/common/notification_observer.h"
#include "content/common/notification_registrar.h"
class CustomThreadWatcher;
class ThreadWatcherList;
// This class performs health check on threads that would like to be watched.
class ThreadWatcher {
public:
// This method starts performing health check on the given thread_id. It will
// create ThreadWatcher object for the given thread_id, thread_name,
// sleep_time and unresponsive_time. sleep_time_ is the wait time between ping
// messages. unresponsive_time_ is the wait time after ping message is sent,
// to check if we have received pong message or not. It will register that
// ThreadWatcher object and activate the thread watching of the given
// thread_id.
static void StartWatching(const BrowserThread::ID& thread_id,
const std::string& thread_name,
const base::TimeDelta& sleep_time,
const base::TimeDelta& unresponsive_time);
// Return the thread_id of the thread being watched.
BrowserThread::ID thread_id() const { return thread_id_; }
// Return the name of the thread being watched.
std::string thread_name() const { return thread_name_; }
// Return the sleep time between ping messages to be sent to the thread.
base::TimeDelta sleep_time() const { return sleep_time_; }
// Return the the wait time to check the responsiveness of the thread.
base::TimeDelta unresponsive_time() const { return unresponsive_time_; }
// Returns true if we are montioring the thread.
bool active() const { return active_; }
// Returns ping_time_ (used by unit tests).
base::TimeTicks ping_time() const { return ping_time_; }
// Returns ping_sequence_number_ (used by unit tests).
uint64 ping_sequence_number() const { return ping_sequence_number_; }
protected:
// Construct a ThreadWatcher for the given thread_id. sleep_time_ is the
// wait time between ping messages. unresponsive_time_ is the wait time after
// ping message is sent, to check if we have received pong message or not.
ThreadWatcher(const BrowserThread::ID& thread_id,
const std::string& thread_name,
const base::TimeDelta& sleep_time,
const base::TimeDelta& unresponsive_time);
virtual ~ThreadWatcher();
// This method activates the thread watching which starts ping/pong messaging.
virtual void ActivateThreadWatching();
// This method de-activates the thread watching and revokes all tasks.
virtual void DeActivateThreadWatching();
// This will ensure that the watching is actively taking place, and awaken
// (i.e., post a PostPingMessage) if the watcher has stopped pinging due to
// lack of user activity. It will also reset ping_count_ to kPingCount.
virtual void WakeUp();
// This method records when ping message was sent and it will Post a task
// (OnPingMessage) to the watched thread that does nothing but respond with
// OnPongMessage. It also posts a task (OnCheckResponsiveness) to check
// responsiveness of monitored thread that would be called after waiting
// unresponsive_time_.
// This method is accessible on WatchDogThread.
virtual void PostPingMessage();
// This method handles a Pong Message from watched thread. It will track the
// response time (pong time minus ping time) via histograms. It posts a
// PostPingMessage task that would be called after waiting sleep_time_. It
// increments ping_sequence_number_ by 1.
// This method is accessible on WatchDogThread.
virtual void OnPongMessage(uint64 ping_sequence_number);
// This method will determine if the watched thread is responsive or not. If
// the latest ping_sequence_number_ is not same as the ping_sequence_number
// that is passed in, then we can assume that watched thread has responded
// with a pong message.
// This method is accessible on WatchDogThread.
virtual bool OnCheckResponsiveness(uint64 ping_sequence_number);
private:
friend class ThreadWatcherList;
// Allow tests to access our innards for testing purposes.
FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, Registration);
FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadResponding);
FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadNotResponding);
FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, MultipleThreadsResponding);
FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, MultipleThreadsNotResponding);
// Post constructor initialization.
void Initialize();
// Watched thread does nothing except post callback_task to the WATCHDOG
// Thread. This method is called on watched thread.
static void OnPingMessage(const BrowserThread::ID& thread_id,
Task* callback_task);
// This is the number of ping messages to be sent when the user is idle.
// ping_count_ will be initialized to kPingCount whenever user becomes active.
static const int kPingCount;
// The thread_id of the thread being watched. Only one instance can exist for
// the given thread_id of the thread being watched.
const BrowserThread::ID thread_id_;
// The name of the thread being watched.
const std::string thread_name_;
// It is the sleep time between between the receipt of a pong message back,
// and the sending of another ping message.
const base::TimeDelta sleep_time_;
// It is the duration from sending a ping message, until we check status to be
// sure a pong message has been returned.
const base::TimeDelta unresponsive_time_;
// This is the last time when ping message was sent.
base::TimeTicks ping_time_;
// This is the sequence number of the next ping for which there is no pong. If
// the instance is sleeping, then it will be the sequence number for the next
// ping.
uint64 ping_sequence_number_;
// This is set to true if thread watcher is watching.
bool active_;
// The counter tracks least number of ping messages that will be sent to
// watched thread before the ping-pong mechanism will go into an extended
// sleep. If this value is zero, then the mechanism is in an extended sleep,
// and awaiting some observed user action before continuing.
int ping_count_;
// Histogram that keeps track of response times for the watched thread.
base::Histogram* histogram_;
// We use this factory to create callback tasks for ThreadWatcher object. We
// use this during ping-pong messaging between WatchDog thread and watched
// thread.
ScopedRunnableMethodFactory<ThreadWatcher> method_factory_;
DISALLOW_COPY_AND_ASSIGN(ThreadWatcher);
};
// Class with a list of all active thread watchers. A thread watcher is active
// if it has been registered, which includes determing the histogram name. This
// class provides utility functions to start and stop watching all browser
// threads. Only one instance of this class exists.
class ThreadWatcherList : public NotificationObserver {
public:
// A map from BrowserThread to the actual instances.
typedef std::map<BrowserThread::ID, ThreadWatcher*> RegistrationList;
// This singleton holds the global list of registered ThreadWatchers.
ThreadWatcherList();
// Destructor deletes all registered ThreadWatcher instances.
virtual ~ThreadWatcherList();
// Register() stores a pointer to the given ThreadWatcher in a global map.
static void Register(ThreadWatcher* watcher);
// This method returns true if the ThreadWatcher object is registerd.
static bool IsRegistered(const BrowserThread::ID thread_id);
// This method posts a task on WatchDogThread to start watching all browser
// threads.
// This method is accessible on UI thread.
static void StartWatchingAll();
// This method posts a task on WatchDogThread to RevokeAll tasks and to
// deactive thread watching of other threads and tell NotificationService to
// stop calling Observe.
// This method is accessible on UI thread.
static void StopWatchingAll();
// RemoveAll NotificationTypes that are being observed.
// This method is accessible on UI thread.
static void RemoveNotifications();
private:
// Allow tests to access our innards for testing purposes.
FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, Registration);
// Delete all thread watcher objects and remove them from global map.
// This method is accessible on WatchDogThread.
void DeleteAll();
// This will ensure that the watching is actively taking place. It will wakeup
// all thread watchers every 2 seconds. This is the implementation of
// NotificationObserver. When a matching notification is posted to the
// notification service, this method is called.
// This method is accessible on UI thread.
virtual void Observe(NotificationType type,
const NotificationSource& source,
const NotificationDetails& details);
// This will ensure that the watching is actively taking place, and awaken
// all thread watchers that are registered.
// This method is accessible on WatchDogThread.
virtual void WakeUpAll();
// The Find() method can be used to test to see if a given ThreadWatcher was
// already registered, or to retrieve a pointer to it from the global map.
static ThreadWatcher* Find(const BrowserThread::ID& thread_id);
// Helper function should be called only while holding lock_.
ThreadWatcher* PreLockedFind(const BrowserThread::ID& thread_id);
static ThreadWatcherList* global_; // The singleton of this class.
// Lock for access to registered_.
base::Lock lock_;
// Map of all registered watched threads, from thread_id to ThreadWatcher.
RegistrationList registered_;
// The registrar that holds NotificationTypes to be observed.
NotificationRegistrar registrar_;
// This is the last time when woke all thread watchers up.
base::TimeTicks last_wakeup_time_;
DISALLOW_COPY_AND_ASSIGN(ThreadWatcherList);
};
// Class for WatchDogThread and in its Init method, we start watching UI, IO,
// DB, FILE, CACHED threads.
class WatchDogThread : public base::Thread {
public:
// Constructor.
WatchDogThread();
// Destroys the thread and stops the thread.
virtual ~WatchDogThread();
// Callable on any thread. Returns whether you're currently on a
// watchdog_thread_.
static bool CurrentlyOnWatchDogThread();
// These are the same methods in message_loop.h, but are guaranteed to either
// get posted to the MessageLoop if it's still alive, or be deleted otherwise.
// They return true iff the watchdog thread existed and the task was posted.
// Note that even if the task is posted, there's no guarantee that it will
// run, since the target thread may already have a Quit message in its queue.
static bool PostTask(const tracked_objects::Location& from_here, Task* task);
static bool PostDelayedTask(const tracked_objects::Location& from_here,
Task* task,
int64 delay_ms);
protected:
virtual void Init();
virtual void CleanUp();
virtual void CleanUpAfterMessageLoopDestruction();
private:
static bool PostTaskHelper(
const tracked_objects::Location& from_here,
Task* task,
int64 delay_ms);
// This lock protects watchdog_thread_.
static base::Lock lock_;
static WatchDogThread* watchdog_thread_; // The singleton of this class.
DISALLOW_COPY_AND_ASSIGN(WatchDogThread);
};
// DISABLE_RUNNABLE_METHOD_REFCOUNT is a convenience macro for disabling
// refcounting of ThreadWatcher and ThreadWatcherList classes.
DISABLE_RUNNABLE_METHOD_REFCOUNT(ThreadWatcher);
DISABLE_RUNNABLE_METHOD_REFCOUNT(ThreadWatcherList);
#endif // CHROME_BROWSER_METRICS_THREAD_WATCHER_H_