// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Histogram is an object that aggregates statistics, and can summarize them in
// various forms, including ASCII graphical, HTML, and numerically (as a
// vector of numbers corresponding to each of the aggregating buckets).

// It supports calls to accumulate either time intervals (which are processed
// as integral number of milliseconds), or arbitrary integral units.

// For Histogram(exponential histogram), LinearHistogram and CustomHistogram,
// the minimum for a declared range is 1 (instead of 0), while the maximum is
// (HistogramBase::kSampleType_MAX - 1). Currently you can declare histograms
// with ranges exceeding those limits (e.g. 0 as minimal or
// HistogramBase::kSampleType_MAX as maximal), but those excesses will be
// silently clamped to those limits (for backwards compatibility with existing
// code). Best practice is to not exceed the limits.

// Each use of a histogram with the same name will reference the same underlying
// data, so it is safe to record to the same histogram from multiple locations
// in the code. It is a runtime error if all uses of the same histogram do not
// agree exactly in type, bucket size and range.

// For Histogram and LinearHistogram, the maximum for a declared range should
// always be larger (not equal) than minimal range. Zero and
// HistogramBase::kSampleType_MAX are implicitly added as first and last ranges,
// so the smallest legal bucket_count is 3. However CustomHistogram can have
// bucket count as 2 (when you give a custom ranges vector containing only 1
// range).
// For these 3 kinds of histograms, the max bucket count is always
// (Histogram::kBucketCount_MAX - 1).

// The buckets layout of class Histogram is exponential. For example, buckets
// might contain (sequentially) the count of values in the following intervals:
// [0,1), [1,2), [2,4), [4,8), [8,16), [16,32), [32,64), [64,infinity)
// That bucket allocation would actually result from construction of a histogram
// for values between 1 and 64, with 8 buckets, such as:
// Histogram count("some name", 1, 64, 8);
// Note that the underflow bucket [0,1) and the overflow bucket [64,infinity)
// are also counted by the constructor in the user supplied "bucket_count"
// argument.
// The above example has an exponential ratio of 2 (doubling the bucket width
// in each consecutive bucket.  The Histogram class automatically calculates
// the smallest ratio that it can use to construct the number of buckets
// selected in the constructor.  An another example, if you had 50 buckets,
// and millisecond time values from 1 to 10000, then the ratio between
// consecutive bucket widths will be approximately somewhere around the 50th
// root of 10000.  This approach provides very fine grain (narrow) buckets
// at the low end of the histogram scale, but allows the histogram to cover a
// gigantic range with the addition of very few buckets.

// Usually we use macros to define and use a histogram, which are defined in
// base/metrics/histogram_macros.h. Note: Callers should include that header
// directly if they only access the histogram APIs through macros.
//
// Macros use a pattern involving a function static variable, that is a pointer
// to a histogram.  This static is explicitly initialized on any thread
// that detects a uninitialized (NULL) pointer.  The potentially racy
// initialization is not a problem as it is always set to point to the same
// value (i.e., the FactoryGet always returns the same value).  FactoryGet
// is also completely thread safe, which results in a completely thread safe,
// and relatively fast, set of counters.  To avoid races at shutdown, the static
// pointer is NOT deleted, and we leak the histograms at process termination.

#ifndef BASE_METRICS_HISTOGRAM_H_
#define BASE_METRICS_HISTOGRAM_H_

#include <stddef.h>
#include <stdint.h>

#include <map>
#include <string>
#include <vector>

#include "base/base_export.h"
#include "base/compiler_specific.h"
#include "base/gtest_prod_util.h"
#include "base/logging.h"
#include "base/macros.h"
#include "base/memory/scoped_ptr.h"
#include "base/metrics/bucket_ranges.h"
#include "base/metrics/histogram_base.h"
// TODO(asvitkine): Migrate callers to to include this directly and remove this.
#include "base/metrics/histogram_macros.h"
#include "base/metrics/histogram_samples.h"
#include "base/time/time.h"

namespace base {

class BooleanHistogram;
class CustomHistogram;
class Histogram;
class LinearHistogram;
class Pickle;
class PickleIterator;
class SampleVector;

class BASE_EXPORT Histogram : public HistogramBase {
 public:
  // Initialize maximum number of buckets in histograms as 16,384.
  static const size_t kBucketCount_MAX;

  typedef std::vector<Count> Counts;

  //----------------------------------------------------------------------------
  // For a valid histogram, input should follow these restrictions:
  // minimum > 0 (if a minimum below 1 is specified, it will implicitly be
  //              normalized up to 1)
  // maximum > minimum
  // buckets > 2 [minimum buckets needed: underflow, overflow and the range]
  // Additionally,
  // buckets <= (maximum - minimum + 2) - this is to ensure that we don't have
  // more buckets than the range of numbers; having more buckets than 1 per
  // value in the range would be nonsensical.
  static HistogramBase* FactoryGet(const std::string& name,
                                   Sample minimum,
                                   Sample maximum,
                                   size_t bucket_count,
                                   int32_t flags);
  static HistogramBase* FactoryTimeGet(const std::string& name,
                                       base::TimeDelta minimum,
                                       base::TimeDelta maximum,
                                       size_t bucket_count,
                                       int32_t flags);

  // Overloads of the above two functions that take a const char* |name| param,
  // to avoid code bloat from the std::string constructor being inlined into
  // call sites.
  static HistogramBase* FactoryGet(const char* name,
                                   Sample minimum,
                                   Sample maximum,
                                   size_t bucket_count,
                                   int32_t flags);
  static HistogramBase* FactoryTimeGet(const char* name,
                                       base::TimeDelta minimum,
                                       base::TimeDelta maximum,
                                       size_t bucket_count,
                                       int32_t flags);

  static void InitializeBucketRanges(Sample minimum,
                                     Sample maximum,
                                     BucketRanges* ranges);

  // This constant if for FindCorruption. Since snapshots of histograms are
  // taken asynchronously relative to sampling, and our counting code currently
  // does not prevent race conditions, it is pretty likely that we'll catch a
  // redundant count that doesn't match the sample count.  We allow for a
  // certain amount of slop before flagging this as an inconsistency. Even with
  // an inconsistency, we'll snapshot it again (for UMA in about a half hour),
  // so we'll eventually get the data, if it was not the result of a corruption.
  static const int kCommonRaceBasedCountMismatch;

  // Check to see if bucket ranges, counts and tallies in the snapshot are
  // consistent with the bucket ranges and checksums in our histogram.  This can
  // produce a false-alarm if a race occurred in the reading of the data during
  // a SnapShot process, but should otherwise be false at all times (unless we
  // have memory over-writes, or DRAM failures).
  int FindCorruption(const HistogramSamples& samples) const override;

  //----------------------------------------------------------------------------
  // Accessors for factory construction, serialization and testing.
  //----------------------------------------------------------------------------
  Sample declared_min() const { return declared_min_; }
  Sample declared_max() const { return declared_max_; }
  virtual Sample ranges(size_t i) const;
  virtual size_t bucket_count() const;
  const BucketRanges* bucket_ranges() const { return bucket_ranges_; }

  // This function validates histogram construction arguments. It returns false
  // if some of the arguments are totally bad.
  // Note. Currently it allow some bad input, e.g. 0 as minimum, but silently
  // converts it to good input: 1.
  // TODO(kaiwang): Be more restrict and return false for any bad input, and
  // make this a readonly validating function.
  static bool InspectConstructionArguments(const std::string& name,
                                           Sample* minimum,
                                           Sample* maximum,
                                           size_t* bucket_count);

  // HistogramBase implementation:
  uint64_t name_hash() const override;
  HistogramType GetHistogramType() const override;
  bool HasConstructionArguments(Sample expected_minimum,
                                Sample expected_maximum,
                                size_t expected_bucket_count) const override;
  void Add(Sample value) override;
  void AddCount(Sample value, int count) override;
  scoped_ptr<HistogramSamples> SnapshotSamples() const override;
  void AddSamples(const HistogramSamples& samples) override;
  bool AddSamplesFromPickle(base::PickleIterator* iter) override;
  void WriteHTMLGraph(std::string* output) const override;
  void WriteAscii(std::string* output) const override;

 protected:
  // |ranges| should contain the underflow and overflow buckets. See top
  // comments for example.
  Histogram(const std::string& name,
            Sample minimum,
            Sample maximum,
            const BucketRanges* ranges);

  ~Histogram() override;

  // HistogramBase implementation:
  bool SerializeInfoImpl(base::Pickle* pickle) const override;

  // Method to override to skip the display of the i'th bucket if it's empty.
  virtual bool PrintEmptyBucket(size_t index) const;

  // Get normalized size, relative to the ranges(i).
  virtual double GetBucketSize(Count current, size_t i) const;

  // Return a string description of what goes in a given bucket.
  // Most commonly this is the numeric value, but in derived classes it may
  // be a name (or string description) given to the bucket.
  virtual const std::string GetAsciiBucketRange(size_t it) const;

 private:
  // Allow tests to corrupt our innards for testing purposes.
  FRIEND_TEST_ALL_PREFIXES(HistogramTest, BoundsTest);
  FRIEND_TEST_ALL_PREFIXES(HistogramTest, BucketPlacementTest);
  FRIEND_TEST_ALL_PREFIXES(HistogramTest, CorruptBucketBounds);
  FRIEND_TEST_ALL_PREFIXES(HistogramTest, CorruptSampleCounts);
  FRIEND_TEST_ALL_PREFIXES(HistogramTest, NameMatchTest);
  FRIEND_TEST_ALL_PREFIXES(HistogramTest, AddCountTest);

  friend class StatisticsRecorder;  // To allow it to delete duplicates.
  friend class StatisticsRecorderTest;

  friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
      base::PickleIterator* iter);
  static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);

  // Implementation of SnapshotSamples function.
  scoped_ptr<SampleVector> SnapshotSampleVector() const;

  //----------------------------------------------------------------------------
  // Helpers for emitting Ascii graphic.  Each method appends data to output.

  void WriteAsciiImpl(bool graph_it,
                      const std::string& newline,
                      std::string* output) const;

  // Find out how large (graphically) the largest bucket will appear to be.
  double GetPeakBucketSize(const SampleVector& samples) const;

  // Write a common header message describing this histogram.
  void WriteAsciiHeader(const SampleVector& samples,
                        Count sample_count,
                        std::string* output) const;

  // Write information about previous, current, and next buckets.
  // Information such as cumulative percentage, etc.
  void WriteAsciiBucketContext(const int64_t past,
                               const Count current,
                               const int64_t remaining,
                               const size_t i,
                               std::string* output) const;

  // WriteJSON calls these.
  void GetParameters(DictionaryValue* params) const override;

  void GetCountAndBucketData(Count* count,
                             int64_t* sum,
                             ListValue* buckets) const override;

  // Does not own this object. Should get from StatisticsRecorder.
  const BucketRanges* bucket_ranges_;

  Sample declared_min_;  // Less than this goes into the first bucket.
  Sample declared_max_;  // Over this goes into the last bucket.

  // Finally, provide the state that changes with the addition of each new
  // sample.
  scoped_ptr<SampleVector> samples_;

  DISALLOW_COPY_AND_ASSIGN(Histogram);
};

//------------------------------------------------------------------------------

// LinearHistogram is a more traditional histogram, with evenly spaced
// buckets.
class BASE_EXPORT LinearHistogram : public Histogram {
 public:
  ~LinearHistogram() override;

  /* minimum should start from 1. 0 is as minimum is invalid. 0 is an implicit
     default underflow bucket. */
  static HistogramBase* FactoryGet(const std::string& name,
                                   Sample minimum,
                                   Sample maximum,
                                   size_t bucket_count,
                                   int32_t flags);
  static HistogramBase* FactoryTimeGet(const std::string& name,
                                       TimeDelta minimum,
                                       TimeDelta maximum,
                                       size_t bucket_count,
                                       int32_t flags);

  // Overloads of the above two functions that take a const char* |name| param,
  // to avoid code bloat from the std::string constructor being inlined into
  // call sites.
  static HistogramBase* FactoryGet(const char* name,
                                   Sample minimum,
                                   Sample maximum,
                                   size_t bucket_count,
                                   int32_t flags);
  static HistogramBase* FactoryTimeGet(const char* name,
                                       TimeDelta minimum,
                                       TimeDelta maximum,
                                       size_t bucket_count,
                                       int32_t flags);

  struct DescriptionPair {
    Sample sample;
    const char* description;  // Null means end of a list of pairs.
  };

  // Create a LinearHistogram and store a list of number/text values for use in
  // writing the histogram graph.
  // |descriptions| can be NULL, which means no special descriptions to set. If
  // it's not NULL, the last element in the array must has a NULL in its
  // "description" field.
  static HistogramBase* FactoryGetWithRangeDescription(
      const std::string& name,
      Sample minimum,
      Sample maximum,
      size_t bucket_count,
      int32_t flags,
      const DescriptionPair descriptions[]);

  static void InitializeBucketRanges(Sample minimum,
                                     Sample maximum,
                                     BucketRanges* ranges);

  // Overridden from Histogram:
  HistogramType GetHistogramType() const override;

 protected:
  LinearHistogram(const std::string& name,
                  Sample minimum,
                  Sample maximum,
                  const BucketRanges* ranges);

  double GetBucketSize(Count current, size_t i) const override;

  // If we have a description for a bucket, then return that.  Otherwise
  // let parent class provide a (numeric) description.
  const std::string GetAsciiBucketRange(size_t i) const override;

  // Skip printing of name for numeric range if we have a name (and if this is
  // an empty bucket).
  bool PrintEmptyBucket(size_t index) const override;

 private:
  friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
      base::PickleIterator* iter);
  static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);

  // For some ranges, we store a printable description of a bucket range.
  // If there is no description, then GetAsciiBucketRange() uses parent class
  // to provide a description.
  typedef std::map<Sample, std::string> BucketDescriptionMap;
  BucketDescriptionMap bucket_description_;

  DISALLOW_COPY_AND_ASSIGN(LinearHistogram);
};

//------------------------------------------------------------------------------

// BooleanHistogram is a histogram for booleans.
class BASE_EXPORT BooleanHistogram : public LinearHistogram {
 public:
  static HistogramBase* FactoryGet(const std::string& name, int32_t flags);

  // Overload of the above function that takes a const char* |name| param,
  // to avoid code bloat from the std::string constructor being inlined into
  // call sites.
  static HistogramBase* FactoryGet(const char* name, int32_t flags);

  HistogramType GetHistogramType() const override;

 private:
  BooleanHistogram(const std::string& name, const BucketRanges* ranges);

  friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
      base::PickleIterator* iter);
  static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);

  DISALLOW_COPY_AND_ASSIGN(BooleanHistogram);
};

//------------------------------------------------------------------------------

// CustomHistogram is a histogram for a set of custom integers.
class BASE_EXPORT CustomHistogram : public Histogram {
 public:
  // |custom_ranges| contains a vector of limits on ranges. Each limit should be
  // > 0 and < kSampleType_MAX. (Currently 0 is still accepted for backward
  // compatibility). The limits can be unordered or contain duplication, but
  // client should not depend on this.
  static HistogramBase* FactoryGet(const std::string& name,
                                   const std::vector<Sample>& custom_ranges,
                                   int32_t flags);

  // Overload of the above function that takes a const char* |name| param,
  // to avoid code bloat from the std::string constructor being inlined into
  // call sites.
  static HistogramBase* FactoryGet(const char* name,
                                   const std::vector<Sample>& custom_ranges,
                                   int32_t flags);

  // Overridden from Histogram:
  HistogramType GetHistogramType() const override;

  // Helper method for transforming an array of valid enumeration values
  // to the std::vector<int> expected by UMA_HISTOGRAM_CUSTOM_ENUMERATION.
  // This function ensures that a guard bucket exists right after any
  // valid sample value (unless the next higher sample is also a valid value),
  // so that invalid samples never fall into the same bucket as valid samples.
  // TODO(kaiwang): Change name to ArrayToCustomEnumRanges.
  static std::vector<Sample> ArrayToCustomRanges(const Sample* values,
                                                 size_t num_values);
 protected:
  CustomHistogram(const std::string& name,
                  const BucketRanges* ranges);

  // HistogramBase implementation:
  bool SerializeInfoImpl(base::Pickle* pickle) const override;

  double GetBucketSize(Count current, size_t i) const override;

 private:
  friend BASE_EXPORT HistogramBase* DeserializeHistogramInfo(
      base::PickleIterator* iter);
  static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);

  static bool ValidateCustomRanges(const std::vector<Sample>& custom_ranges);
  static BucketRanges* CreateBucketRangesFromCustomRanges(
      const std::vector<Sample>& custom_ranges);

  DISALLOW_COPY_AND_ASSIGN(CustomHistogram);
};

}  // namespace base

#endif  // BASE_METRICS_HISTOGRAM_H_