#!/usr/bin/env python
# Copyright 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
# A script to accumulate values from the 'dmprof cat' command into CSV or else.
#
# Usage:
# ./accumulate.py -f <format> -t <template-name> < input.json > output
#
# <format> is one of "csv", "json", and "tree". If "csv" or "json" is given,
# accumulate.py dumps a similar file to "dmprof csv|json". If "tree" is given,
# accumulate.py dumps a human-readable breakdown tree.
#
# <template-name> is a label in templates.json.
import datetime
import json
import logging
import optparse
import sys
from lib.ordered_dict import OrderedDict
LOGGER = logging.getLogger('dmprof-accumulate')
def visit_in_template(template, snapshot, depth):
"""Visits all categories via a given template.
This function is not used. It's a sample function to traverse a template.
"""
world = template[0]
breakdown = template[1]
rules = template[2]
for rule, _ in snapshot[world]['breakdown'][breakdown].iteritems():
print (' ' * depth) + rule
if rule in rules:
visit_in_template(rules[rule], snapshot, depth + 1)
def accumulate(template, snapshot, units_dict, target_units):
"""Accumulates units in a JSON |snapshot| with applying a given |template|.
Args:
template: A template tree included in a dmprof cat JSON file.
snapshot: A snapshot in a dmprof cat JSON file.
units_dict: A dict of units in worlds.
target_units: A list of unit ids which are a target of this accumulation.
"""
world = template[0]
breakdown = template[1]
rules = template[2]
remainder_units = target_units.copy()
category_tree = OrderedDict()
total = 0
for rule, match in snapshot[world]['breakdown'][breakdown].iteritems():
if 'hidden' in match and match['hidden']:
continue
matched_units = set(match['units']).intersection(target_units)
subtotal = 0
for unit_id in matched_units:
subtotal += units_dict[world][unit_id]
total += subtotal
remainder_units = remainder_units.difference(matched_units)
if rule not in rules:
# A category matched with |rule| is a leaf of the breakdown tree.
# It is NOT broken down more.
category_tree[rule] = subtotal
continue
# A category matched with |rule| is broken down more.
subtemplate = rules[rule]
subworld = subtemplate[0]
subbreakdown = subtemplate[1]
if subworld == world:
# Break down in the same world: consider units.
category_tree[rule], accounted_total, subremainder_units = accumulate(
subtemplate, snapshot, units_dict, matched_units)
subremainder_total = 0
if subremainder_units:
for unit_id in subremainder_units:
subremainder_total += units_dict[world][unit_id]
category_tree[rule][None] = subremainder_total
if subtotal != accounted_total + subremainder_total:
print >> sys.stderr, (
'WARNING: Sum of %s:%s is different from %s by %d bytes.' % (
subworld, subbreakdown, rule,
subtotal - (accounted_total + subremainder_total)))
else:
# Break down in a different world: consider only the total size.
category_tree[rule], accounted_total, _ = accumulate(
subtemplate, snapshot, units_dict, set(units_dict[subworld].keys()))
if subtotal >= accounted_total:
category_tree[rule][None] = subtotal - accounted_total
else:
print >> sys.stderr, (
'WARNING: Sum of %s:%s is larger than %s by %d bytes.' % (
subworld, subbreakdown, rule, accounted_total - subtotal))
print >> sys.stderr, (
'WARNING: Assuming remainder of %s is 0.' % rule)
category_tree[rule][None] = 0
return category_tree, total, remainder_units
def flatten(category_tree, header=''):
"""Flattens a category tree into a flat list."""
result = []
for rule, sub in category_tree.iteritems():
if not rule:
rule = 'remaining'
if header:
flattened_rule = header + '>' + rule
else:
flattened_rule = rule
if isinstance(sub, dict) or isinstance(sub, OrderedDict):
result.extend(flatten(sub, flattened_rule))
else:
result.append((flattened_rule, sub))
return result
def print_category_tree(category_tree, output, depth=0):
"""Prints a category tree in a human-readable format."""
for label in category_tree:
print >> output, (' ' * depth),
if (isinstance(category_tree[label], dict) or
isinstance(category_tree[label], OrderedDict)):
print >> output, '%s:' % label
print_category_tree(category_tree[label], output, depth + 1)
else:
print >> output, '%s: %d' % (label, category_tree[label])
def flatten_all_category_trees(category_trees):
flattened_labels = set()
flattened_table = []
for category_tree in category_trees:
flattened = OrderedDict()
for label, subtotal in flatten(category_tree):
flattened_labels.add(label)
flattened[label] = subtotal
flattened_table.append(flattened)
return flattened_labels, flattened_table
def output_csv(output, category_trees, data, first_time, output_exponent):
flattened_labels, flattened_table = flatten_all_category_trees(category_trees)
sorted_flattened_labels = sorted(flattened_labels)
print >> output, ','.join(['second'] + sorted_flattened_labels)
for index, row in enumerate(flattened_table):
values = [str(data['snapshots'][index]['time'] - first_time)]
for label in sorted_flattened_labels:
if label in row:
divisor = 1
if output_exponent.upper() == 'K':
divisor = 1024.0
elif output_exponent.upper() == 'M':
divisor = 1024.0 * 1024.0
values.append(str(row[label] / divisor))
else:
values.append('0')
print >> output, ','.join(values)
def output_json(output, category_trees, data, first_time, template_label):
flattened_labels, flattened_table = flatten_all_category_trees(category_trees)
json_snapshots = []
for index, row in enumerate(flattened_table):
row_with_meta = row.copy()
row_with_meta['second'] = data['snapshots'][index]['time'] - first_time
row_with_meta['dump_time'] = datetime.datetime.fromtimestamp(
data['snapshots'][index]['time']).strftime('%Y-%m-%d %H:%M:%S')
json_snapshots.append(row_with_meta)
json_root = {
'version': 'JSON_DEEP_2',
'policies': {
template_label: {
'legends': sorted(flattened_labels),
'snapshots': json_snapshots
}
}
}
json.dump(json_root, output, indent=2, sort_keys=True)
def output_tree(output, category_trees):
for index, category_tree in enumerate(category_trees):
print >> output, '< Snapshot #%d >' % index
print_category_tree(category_tree, output, 1)
print >> output, ''
def do_main(cat_input, output, template_label, output_format, output_exponent):
"""Does the main work: accumulate for every snapshot and print a result."""
if output_format not in ['csv', 'json', 'tree']:
raise NotImplementedError('The output format \"%s\" is not implemented.' %
output_format)
if output_exponent.upper() not in ['B', 'K', 'M']:
raise NotImplementedError('The exponent \"%s\" is not implemented.' %
output_exponent)
data = json.loads(cat_input.read(), object_pairs_hook=OrderedDict)
templates = data['templates']
if not template_label:
template_label = data['default_template']
if template_label not in templates:
LOGGER.error('A template \'%s\' is not found.' % template_label)
return
template = templates[template_label]
category_trees = []
first_time = None
for snapshot in data['snapshots']:
if not first_time:
first_time = snapshot['time']
units = {}
for world_name in snapshot['worlds']:
world_units = {}
for unit_id, sizes in snapshot['worlds'][world_name]['units'].iteritems():
world_units[int(unit_id)] = sizes[0]
units[world_name] = world_units
category_tree, _, _ = accumulate(
template, snapshot['worlds'], units, set(units[template[0]].keys()))
category_trees.append(category_tree)
if output_format == 'csv':
output_csv(output, category_trees, data, first_time, output_exponent)
elif output_format == 'json':
output_json(output, category_trees, data, first_time, template_label)
elif output_format == 'tree':
output_tree(output, category_trees)
def main():
LOGGER.setLevel(logging.DEBUG)
handler = logging.StreamHandler()
handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
LOGGER.addHandler(handler)
parser = optparse.OptionParser()
parser.add_option('-t', '--template', dest='template',
metavar='TEMPLATE',
help='Apply TEMPLATE to list up.')
parser.add_option('-f', '--format', dest='format', default='csv',
help='Specify the output format: csv, json or tree.')
parser.add_option('-e', '--exponent', dest='exponent', default='M',
help='Specify B (bytes), K (kilobytes) or M (megabytes).')
options, _ = parser.parse_args(sys.argv)
do_main(sys.stdin, sys.stdout,
options.template, options.format, options.exponent)
if __name__ == '__main__':
sys.exit(main())