// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/common/metrics_helpers.h"
#if defined(USE_SYSTEM_LIBBZ2)
#include <bzlib.h>
#else
#include "third_party/bzip2/bzlib.h"
#endif
#include "base/base64.h"
#include "base/time.h"
#include "base/basictypes.h"
#include "base/file_util.h"
#include "base/md5.h"
#include "base/perftimer.h"
#include "base/string_number_conversions.h"
#include "base/sys_info.h"
#include "base/utf_string_conversions.h"
#include "base/third_party/nspr/prtime.h"
#include "chrome/common/logging_chrome.h"
#include "googleurl/src/gurl.h"
#include "libxml/xmlwriter.h"
#define OPEN_ELEMENT_FOR_SCOPE(name) ScopedElement scoped_element(this, name)
using base::Histogram;
using base::StatisticsRecorder;
using base::Time;
using base::TimeDelta;
// http://blogs.msdn.com/oldnewthing/archive/2004/10/25/247180.aspx
#if defined(OS_WIN)
extern "C" IMAGE_DOS_HEADER __ImageBase;
#endif
namespace {
// libxml take xmlChar*, which is unsigned char*
inline const unsigned char* UnsignedChar(const char* input) {
return reinterpret_cast<const unsigned char*>(input);
}
} // namespace
class MetricsLogBase::XmlWrapper {
public:
XmlWrapper()
: doc_(NULL),
buffer_(NULL),
writer_(NULL) {
buffer_ = xmlBufferCreate();
CHECK(buffer_);
#if defined(OS_CHROMEOS)
writer_ = xmlNewTextWriterDoc(&doc_, /* compression */ 0);
#else
writer_ = xmlNewTextWriterMemory(buffer_, /* compression */ 0);
#endif // OS_CHROMEOS
DCHECK(writer_);
int result = xmlTextWriterSetIndent(writer_, 2);
DCHECK_EQ(0, result);
}
~XmlWrapper() {
FreeDocWriter();
if (buffer_) {
xmlBufferFree(buffer_);
buffer_ = NULL;
}
}
void FreeDocWriter() {
if (writer_) {
xmlFreeTextWriter(writer_);
writer_ = NULL;
}
if (doc_) {
xmlFreeDoc(doc_);
doc_ = NULL;
}
}
xmlDocPtr doc() const { return doc_; }
xmlTextWriterPtr writer() const { return writer_; }
xmlBufferPtr buffer() const { return buffer_; }
private:
xmlDocPtr doc_;
xmlBufferPtr buffer_;
xmlTextWriterPtr writer_;
};
// static
std::string MetricsLogBase::version_extension_;
MetricsLogBase::MetricsLogBase(const std::string& client_id, int session_id,
const std::string& version_string)
: start_time_(Time::Now()),
client_id_(client_id),
session_id_(base::IntToString(session_id)),
locked_(false),
xml_wrapper_(new XmlWrapper),
num_events_(0) {
StartElement("log");
WriteAttribute("clientid", client_id_);
WriteInt64Attribute("buildtime", GetBuildTime());
WriteAttribute("appversion", version_string);
}
MetricsLogBase::~MetricsLogBase() {
delete xml_wrapper_;
}
void MetricsLogBase::CloseLog() {
DCHECK(!locked_);
locked_ = true;
int result = xmlTextWriterEndDocument(xml_wrapper_->writer());
DCHECK_GE(result, 0);
result = xmlTextWriterFlush(xml_wrapper_->writer());
DCHECK_GE(result, 0);
#if defined(OS_CHROMEOS)
xmlNodePtr root = xmlDocGetRootElement(xml_wrapper_->doc());
if (!hardware_class_.empty()) {
// The hardware class is determined after the first ongoing log is
// constructed, so this adds the root element's "hardwareclass"
// attribute when the log is closed instead.
xmlNewProp(root, UnsignedChar("hardwareclass"),
UnsignedChar(hardware_class_.c_str()));
}
// Flattens the XML tree into a character buffer.
PerfTimer dump_timer;
result = xmlNodeDump(xml_wrapper_->buffer(), xml_wrapper_->doc(),
root, /* level */ 0, /* format */ 1);
DCHECK_GE(result, 0);
UMA_HISTOGRAM_TIMES("UMA.XMLNodeDumpTime", dump_timer.Elapsed());
PerfTimer free_timer;
xml_wrapper_->FreeDocWriter();
UMA_HISTOGRAM_TIMES("UMA.XMLWriterDestructionTime", free_timer.Elapsed());
#endif // OS_CHROMEOS
}
int MetricsLogBase::GetEncodedLogSize() {
DCHECK(locked_);
CHECK(xml_wrapper_);
CHECK(xml_wrapper_->buffer());
return xml_wrapper_->buffer()->use;
}
bool MetricsLogBase::GetEncodedLog(char* buffer, int buffer_size) {
DCHECK(locked_);
if (buffer_size < GetEncodedLogSize())
return false;
memcpy(buffer, xml_wrapper_->buffer()->content, GetEncodedLogSize());
return true;
}
std::string MetricsLogBase::GetEncodedLogString() {
DCHECK(locked_);
return std::string(reinterpret_cast<char*>(xml_wrapper_->buffer()->content));
}
int MetricsLogBase::GetElapsedSeconds() {
return static_cast<int>((Time::Now() - start_time_).InSeconds());
}
std::string MetricsLogBase::CreateHash(const std::string& value) {
MD5Context ctx;
MD5Init(&ctx);
MD5Update(&ctx, value.data(), value.length());
MD5Digest digest;
MD5Final(&digest, &ctx);
uint64 reverse_uint64;
// UMA only uses first 8 chars of hash. We use the above uint64 instead
// of a unsigned char[8] so that we don't run into strict aliasing issues
// in the LOG statement below when trying to interpret reverse as a uint64.
unsigned char* reverse = reinterpret_cast<unsigned char *>(&reverse_uint64);
DCHECK(arraysize(digest.a) >= sizeof(reverse_uint64));
for (size_t i = 0; i < sizeof(reverse_uint64); ++i)
reverse[i] = digest.a[sizeof(reverse_uint64) - i - 1];
// The following log is VERY helpful when folks add some named histogram into
// the code, but forgot to update the descriptive list of histograms. When
// that happens, all we get to see (server side) is a hash of the histogram
// name. We can then use this logging to find out what histogram name was
// being hashed to a given MD5 value by just running the version of Chromium
// in question with --enable-logging.
VLOG(1) << "Metrics: Hash numeric [" << value
<< "]=[" << reverse_uint64 << "]";
return std::string(reinterpret_cast<char*>(digest.a), arraysize(digest.a));
}
std::string MetricsLogBase::CreateBase64Hash(const std::string& string) {
std::string encoded_digest;
if (base::Base64Encode(CreateHash(string), &encoded_digest)) {
DVLOG(1) << "Metrics: Hash [" << encoded_digest << "]=[" << string << "]";
return encoded_digest;
}
return std::string();
}
void MetricsLogBase::RecordUserAction(const char* key) {
DCHECK(!locked_);
std::string command_hash = CreateBase64Hash(key);
if (command_hash.empty()) {
NOTREACHED() << "Unable generate encoded hash of command: " << key;
return;
}
OPEN_ELEMENT_FOR_SCOPE("uielement");
WriteAttribute("action", "command");
WriteAttribute("targetidhash", command_hash);
// TODO(jhughes): Properly track windows.
WriteIntAttribute("window", 0);
WriteCommonEventAttributes();
++num_events_;
}
void MetricsLogBase::RecordLoadEvent(int window_id,
const GURL& url,
PageTransition::Type origin,
int session_index,
TimeDelta load_time) {
DCHECK(!locked_);
OPEN_ELEMENT_FOR_SCOPE("document");
WriteAttribute("action", "load");
WriteIntAttribute("docid", session_index);
WriteIntAttribute("window", window_id);
WriteAttribute("loadtime", base::Int64ToString(load_time.InMilliseconds()));
std::string origin_string;
switch (PageTransition::StripQualifier(origin)) {
// TODO(jhughes): Some of these mappings aren't right... we need to add
// some values to the server's enum.
case PageTransition::LINK:
case PageTransition::MANUAL_SUBFRAME:
origin_string = "link";
break;
case PageTransition::TYPED:
origin_string = "typed";
break;
case PageTransition::AUTO_BOOKMARK:
origin_string = "bookmark";
break;
case PageTransition::AUTO_SUBFRAME:
case PageTransition::RELOAD:
origin_string = "refresh";
break;
case PageTransition::GENERATED:
case PageTransition::KEYWORD:
origin_string = "global-history";
break;
case PageTransition::START_PAGE:
origin_string = "start-page";
break;
case PageTransition::FORM_SUBMIT:
origin_string = "form-submit";
break;
default:
NOTREACHED() << "Received an unknown page transition type: " <<
PageTransition::StripQualifier(origin);
}
if (!origin_string.empty())
WriteAttribute("origin", origin_string);
WriteCommonEventAttributes();
++num_events_;
}
void MetricsLogBase::RecordWindowEvent(WindowEventType type,
int window_id,
int parent_id) {
DCHECK(!locked_);
OPEN_ELEMENT_FOR_SCOPE("window");
WriteAttribute("action", WindowEventTypeToString(type));
WriteAttribute("windowid", base::IntToString(window_id));
if (parent_id >= 0)
WriteAttribute("parent", base::IntToString(parent_id));
WriteCommonEventAttributes();
++num_events_;
}
std::string MetricsLogBase::GetCurrentTimeString() {
return base::Uint64ToString(Time::Now().ToTimeT());
}
// These are the attributes that are common to every event.
void MetricsLogBase::WriteCommonEventAttributes() {
WriteAttribute("session", session_id_);
WriteAttribute("time", GetCurrentTimeString());
}
void MetricsLogBase::WriteAttribute(const std::string& name,
const std::string& value) {
DCHECK(!locked_);
DCHECK(!name.empty());
int result = xmlTextWriterWriteAttribute(xml_wrapper_->writer(),
UnsignedChar(name.c_str()),
UnsignedChar(value.c_str()));
DCHECK_GE(result, 0);
}
void MetricsLogBase::WriteIntAttribute(const std::string& name, int value) {
WriteAttribute(name, base::IntToString(value));
}
void MetricsLogBase::WriteInt64Attribute(const std::string& name, int64 value) {
WriteAttribute(name, base::Int64ToString(value));
}
// static
const char* MetricsLogBase::WindowEventTypeToString(WindowEventType type) {
switch (type) {
case WINDOW_CREATE: return "create";
case WINDOW_OPEN: return "open";
case WINDOW_CLOSE: return "close";
case WINDOW_DESTROY: return "destroy";
default:
NOTREACHED();
return "unknown"; // Can't return NULL as this is used in a required
// attribute.
}
}
void MetricsLogBase::StartElement(const char* name) {
DCHECK(!locked_);
DCHECK(name);
int result = xmlTextWriterStartElement(xml_wrapper_->writer(),
UnsignedChar(name));
DCHECK_GE(result, 0);
}
void MetricsLogBase::EndElement() {
DCHECK(!locked_);
int result = xmlTextWriterEndElement(xml_wrapper_->writer());
DCHECK_GE(result, 0);
}
// static
int64 MetricsLogBase::GetBuildTime() {
static int64 integral_build_time = 0;
if (!integral_build_time) {
Time time;
const char* kDateTime = __DATE__ " " __TIME__ " GMT";
bool result = Time::FromString(ASCIIToWide(kDateTime).c_str(), &time);
DCHECK(result);
integral_build_time = static_cast<int64>(time.ToTimeT());
}
return integral_build_time;
}
MetricsLog* MetricsLogBase::AsMetricsLog() {
return NULL;
}
// TODO(JAR): A The following should really be part of the histogram class.
// Internal state is being needlessly exposed, and it would be hard to reuse
// this code. If we moved this into the Histogram class, then we could use
// the same infrastructure for logging StatsCounters, RatesCounters, etc.
void MetricsLogBase::RecordHistogramDelta(
const Histogram& histogram,
const Histogram::SampleSet& snapshot) {
DCHECK(!locked_);
DCHECK_NE(0, snapshot.TotalCount());
snapshot.CheckSize(histogram);
// We will ignore the MAX_INT/infinite value in the last element of range[].
OPEN_ELEMENT_FOR_SCOPE("histogram");
WriteAttribute("name", CreateBase64Hash(histogram.histogram_name()));
WriteInt64Attribute("sum", snapshot.sum());
// TODO(jar): Remove sumsquares when protobuffer accepts this as optional.
WriteInt64Attribute("sumsquares", 0);
for (size_t i = 0; i < histogram.bucket_count(); i++) {
if (snapshot.counts(i)) {
OPEN_ELEMENT_FOR_SCOPE("histogrambucket");
WriteIntAttribute("min", histogram.ranges(i));
WriteIntAttribute("max", histogram.ranges(i + 1));
WriteIntAttribute("count", snapshot.counts(i));
}
}
}
// MetricsServiceBase
MetricsServiceBase::MetricsServiceBase()
: pending_log_(NULL),
compressed_log_(),
current_log_(NULL) {
}
MetricsServiceBase::~MetricsServiceBase() {
if (pending_log_) {
delete pending_log_;
pending_log_ = NULL;
}
if (current_log_) {
delete current_log_;
current_log_ = NULL;
}
}
// This implementation is based on the Firefox MetricsService implementation.
bool MetricsServiceBase::Bzip2Compress(const std::string& input,
std::string* output) {
bz_stream stream = {0};
// As long as our input is smaller than the bzip2 block size, we should get
// the best compression. For example, if your input was 250k, using a block
// size of 300k or 500k should result in the same compression ratio. Since
// our data should be under 100k, using the minimum block size of 100k should
// allocate less temporary memory, but result in the same compression ratio.
int result = BZ2_bzCompressInit(&stream,
1, // 100k (min) block size
0, // quiet
0); // default "work factor"
if (result != BZ_OK) { // out of memory?
return false;
}
output->clear();
stream.next_in = const_cast<char*>(input.data());
stream.avail_in = static_cast<int>(input.size());
// NOTE: we don't need a BZ_RUN phase since our input buffer contains
// the entire input
do {
output->resize(output->size() + 1024);
stream.next_out = &((*output)[stream.total_out_lo32]);
stream.avail_out = static_cast<int>(output->size()) - stream.total_out_lo32;
result = BZ2_bzCompress(&stream, BZ_FINISH);
} while (result == BZ_FINISH_OK);
if (result != BZ_STREAM_END) // unknown failure?
return false;
result = BZ2_bzCompressEnd(&stream);
DCHECK(result == BZ_OK);
output->resize(stream.total_out_lo32);
return true;
}
void MetricsServiceBase::DiscardPendingLog() {
if (pending_log_) { // Shutdown might have deleted it!
delete pending_log_;
pending_log_ = NULL;
}
compressed_log_.clear();
}
void MetricsServiceBase::RecordCurrentHistograms() {
DCHECK(current_log_);
TransmitAllHistograms(base::Histogram::kNoFlags, true);
}
void MetricsServiceBase::TransmitHistogramDelta(
const base::Histogram& histogram,
const base::Histogram::SampleSet& snapshot) {
current_log_->RecordHistogramDelta(histogram, snapshot);
}
void MetricsServiceBase::InconsistencyDetected(int problem) {
UMA_HISTOGRAM_ENUMERATION("Histogram.InconsistenciesBrowser",
problem, Histogram::NEVER_EXCEEDED_VALUE);
}
void MetricsServiceBase::UniqueInconsistencyDetected(int problem) {
UMA_HISTOGRAM_ENUMERATION("Histogram.InconsistenciesBrowserUnique",
problem, Histogram::NEVER_EXCEEDED_VALUE);
}
void MetricsServiceBase::SnapshotProblemResolved(int amount) {
UMA_HISTOGRAM_COUNTS("Histogram.InconsistentSnapshotBrowser",
std::abs(amount));
}
HistogramSender::HistogramSender() {}
HistogramSender::~HistogramSender() {}
void HistogramSender::TransmitAllHistograms(Histogram::Flags flag_to_set,
bool send_only_uma) {
StatisticsRecorder::Histograms histograms;
StatisticsRecorder::GetHistograms(&histograms);
for (StatisticsRecorder::Histograms::const_iterator it = histograms.begin();
histograms.end() != it;
++it) {
(*it)->SetFlags(flag_to_set);
if (send_only_uma &&
0 == ((*it)->flags() & Histogram::kUmaTargetedHistogramFlag))
continue;
TransmitHistogram(**it);
}
}
void HistogramSender::TransmitHistogram(const Histogram& histogram) {
// Get up-to-date snapshot of sample stats.
Histogram::SampleSet snapshot;
histogram.SnapshotSample(&snapshot);
const std::string& histogram_name = histogram.histogram_name();
int corruption = histogram.FindCorruption(snapshot);
// Crash if we detect that our histograms have been overwritten. This may be
// a fair distance from the memory smasher, but we hope to correlate these
// crashes with other events, such as plugins, or usage patterns, etc.
if (Histogram::BUCKET_ORDER_ERROR & corruption) {
// The checksum should have caught this, so crash separately if it didn't.
CHECK_NE(0, Histogram::RANGE_CHECKSUM_ERROR & corruption);
CHECK(false); // Crash for the bucket order corruption.
}
// Checksum corruption might not have caused order corruption.
CHECK_EQ(0, Histogram::RANGE_CHECKSUM_ERROR & corruption);
if (corruption) {
NOTREACHED();
InconsistencyDetected(corruption);
// Don't send corrupt data to metrics survices.
if (NULL == inconsistencies_.get())
inconsistencies_.reset(new ProblemMap);
int old_corruption = (*inconsistencies_)[histogram_name];
if (old_corruption == (corruption | old_corruption))
return; // We've already seen this corruption for this histogram.
(*inconsistencies_)[histogram_name] |= corruption;
UniqueInconsistencyDetected(corruption);
return;
}
// Find the already sent stats, or create an empty set. Remove from our
// snapshot anything that we've already sent.
LoggedSampleMap::iterator it = logged_samples_.find(histogram_name);
Histogram::SampleSet* already_logged;
if (logged_samples_.end() == it) {
// Add new entry
already_logged = &logged_samples_[histogram.histogram_name()];
already_logged->Resize(histogram); // Complete initialization.
} else {
already_logged = &(it->second);
int64 discrepancy(already_logged->TotalCount() -
already_logged->redundant_count());
if (discrepancy) {
NOTREACHED(); // Already_logged has become corrupt.
int problem = static_cast<int>(discrepancy);
if (problem != discrepancy)
problem = INT_MAX;
SnapshotProblemResolved(problem);
// With no valid baseline, we'll act like we've sent everything in our
// snapshot.
already_logged->Subtract(*already_logged);
already_logged->Add(snapshot);
}
// Deduct any stats we've already logged from our snapshot.
snapshot.Subtract(*already_logged);
}
// Snapshot now contains only a delta to what we've already_logged.
if (snapshot.redundant_count() > 0) {
TransmitHistogramDelta(histogram, snapshot);
// Add new data into our running total.
already_logged->Add(snapshot);
}
}