# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Histogram generation tools."""
from collections import defaultdict
import format_utils
class Histogram(object):
"""A histogram generating object.
This object serves the sole purpose of formatting (key, val) pairs as an
ASCII histogram, including bars and percentage markers, and taking care of
label alignment, scaling, etc. In addition to the standard __init__
interface, two static methods are provided for conveniently converting data
in different formats into a histogram. Histogram generation is exported via
its __str__ method, and looks as follows:
Yes |################ | 5 (83.3%)
No |### | 1 (16.6%)
TODO(garnold) we may want to add actual methods for adding data or tweaking
the output layout and formatting. For now, though, this is fine.
"""
def __init__(self, data, scale=20, formatter=None):
"""Initialize a histogram object.
Args:
data: list of (key, count) pairs constituting the histogram
scale: number of characters used to indicate 100%
formatter: function used for formatting raw histogram values
"""
self.data = data
self.scale = scale
self.formatter = formatter or str
self.max_key_len = max([len(str(key)) for key, count in self.data])
self.total = sum([count for key, count in self.data])
@staticmethod
def FromCountDict(count_dict, scale=20, formatter=None, key_names=None):
"""Takes a dictionary of counts and returns a histogram object.
This simply converts a mapping from names to counts into a list of (key,
count) pairs, optionally translating keys into name strings, then
generating and returning a histogram for them. This is a useful convenience
call for clients that update a dictionary of counters as they (say) scan a
data stream.
Args:
count_dict: dictionary mapping keys to occurrence counts
scale: number of characters used to indicate 100%
formatter: function used for formatting raw histogram values
key_names: dictionary mapping keys to name strings
Returns:
A histogram object based on the given data.
"""
namer = None
if key_names:
namer = lambda key: key_names[key]
else:
namer = lambda key: key
hist = [(namer(key), count) for key, count in count_dict.items()]
return Histogram(hist, scale, formatter)
@staticmethod
def FromKeyList(key_list, scale=20, formatter=None, key_names=None):
"""Takes a list of (possibly recurring) keys and returns a histogram object.
This converts the list into a dictionary of counters, then uses
FromCountDict() to generate the actual histogram. For example:
['a', 'a', 'b', 'a', 'b'] --> {'a': 3, 'b': 2} --> ...
Args:
key_list: list of (possibly recurring) keys
scale: number of characters used to indicate 100%
formatter: function used for formatting raw histogram values
key_names: dictionary mapping keys to name strings
Returns:
A histogram object based on the given data.
"""
count_dict = defaultdict(int) # Unset items default to zero
for key in key_list:
count_dict[key] += 1
return Histogram.FromCountDict(count_dict, scale, formatter, key_names)
def __str__(self):
hist_lines = []
hist_bar = '|'
for key, count in self.data:
if self.total:
bar_len = count * self.scale / self.total
hist_bar = '|%s|' % ('#' * bar_len).ljust(self.scale)
line = '%s %s %s' % (
str(key).ljust(self.max_key_len),
hist_bar,
self.formatter(count))
percent_str = format_utils.NumToPercent(count, self.total)
if percent_str:
line += ' (%s)' % percent_str
hist_lines.append(line)
return '\n'.join(hist_lines)
def GetKeys(self):
"""Returns the keys of the histogram."""
return [key for key, _ in self.data]