from parser.aggregate import aggregate_times, nan_to_zero, LAYER_TOTAL
from parser.naming import layers, names, phases, make_tag, subphases
from parser.naming import LAYER_APPLICATION, LAYER_CPU
from parser.naming import (PHASE_INITIALIZATION, PHASE_PREPARATION, PHASE_COMPILATION,
PHASE_INPUTS_AND_OUTPUTS, PHASE_EXECUTION, PHASE_RESULTS,
PHASE_TERMINATION, PHASE_OVERALL, PHASE_WARMUP,
PHASE_BENCHMARK)
import json
import math
import sys
def print_stats(tracker_map, print_detail=True, total_times=False, per_execution=False,
json_output=False, starting_mark='', sep=''):
""" Prints statistics for a single Overall phase as text or json.
For text output:
By default prints the self-time for each layer, prints total times instead if
given total_times=True.
By default prints stats for all phases, prints only the Execution and its
subphases (as per-execution times) if per_execution=True.
If per_execution=True and the trace contains separate Warmup and Benchmark
phases, prints only the Benchmark phase.
For json output:
The json output is internal to NNAPI and is not guaranteed stable or
extensively defined. It does however contain a version field so that
backwards-compativle tools can be created on top of it.
The json output includes both the statistics themselves produced by
aggregate_times as well as the values used to create the text output
so that those are easily available.
Look at the end of the function for the fields included in the json.
"""
PHASE_EXECUTION_LESS_IO_AND_RESULTS = "PEO"
phases_to_pick = phases + [PHASE_INPUTS_AND_OUTPUTS, PHASE_RESULTS]
for tracker in tracker_map.values():
if not tracker.is_complete():
sys.stderr.write("Incomplete trace, not able to print all statistics\n")
return
if sep:
print(sep)
# Select template and statistics to use
times, self_times, has_warmup_and_benchmark, execution_counts = aggregate_times(tracker_map)
if not per_execution:
template = TEMPLATE_ALL_PHASES
else:
template = TEMPLATE_EXECUTION_ONLY
if total_times:
template = template.replace("self-times", "total time")
times_to_use = times
else:
times_to_use = self_times
if has_warmup_and_benchmark and per_execution:
template = template.replace("Execution", "Benchmark")
for phase in [PHASE_EXECUTION] + subphases[PHASE_EXECUTION]:
for layer in layers + [LAYER_TOTAL]:
times_to_use[phase][layer] = times_to_use[PHASE_BENCHMARK][phase][layer]
# Rewrite template shorthand
template = template.replace(":fl", ":>11.2f")
template = template.replace(":f", ":>9.2f")
# Gather template inputs from statistics
values = dict()
full_total = 0.0
has_cpu = False
for layer in layers:
for phase in phases_to_pick:
t = times_to_use[phase][layer]
values[make_tag(layer, phase)] = t
if layer == LAYER_CPU:
has_cpu = (has_cpu or t > 0.0)
# Calculate layer totals and PHASE_EXECUTION_LESS_IO_AND_RESULTS
for phase in phases_to_pick:
values[make_tag(LAYER_TOTAL, phase)] = times_to_use[phase][LAYER_TOTAL]
for layer in layers + [LAYER_TOTAL]:
values[make_tag(layer, PHASE_EXECUTION_LESS_IO_AND_RESULTS)] = (
values[make_tag(layer, PHASE_EXECUTION)] -
values[make_tag(layer, PHASE_INPUTS_AND_OUTPUTS)] -
values[make_tag(layer, PHASE_RESULTS)])
values[make_tag(layer, PHASE_OVERALL)] = times_to_use[PHASE_OVERALL][layer]
# Calculate layer execution percentages
for layer in layers:
if values[make_tag(LAYER_TOTAL, PHASE_EXECUTION)] > 0.0:
values[make_tag(layer, "PEp")] = (values[make_tag(layer, PHASE_EXECUTION)] * 100.0 /
values[make_tag(LAYER_TOTAL, PHASE_EXECUTION)])
else:
values[make_tag(layer, "PEp")] = math.nan
# Make output numbers per-execution if desired
if per_execution:
if has_warmup_and_benchmark:
divide_by = execution_counts[PHASE_BENCHMARK]
else:
divide_by = execution_counts[PHASE_OVERALL]
for layer in (layers + [LAYER_TOTAL]):
for phase in [PHASE_INPUTS_AND_OUTPUTS, PHASE_EXECUTION_LESS_IO_AND_RESULTS, PHASE_RESULTS, PHASE_EXECUTION]:
if divide_by != 0:
values[layer + "_" + phase] = values[layer + "_" + phase] / divide_by
else:
values[layer + "_" + phase] = math.nan
# Generate and print output
if not json_output:
# Apply template and prettify numbers
output = template.format(**values)
output = output.replace(" 0.00%", " -")
output = output.replace(" 0.00", " -")
output = output.replace(" nan", " n/a")
# Print output
print(starting_mark)
for line in output.splitlines():
if line[0:3] == "CPU" and not has_cpu:
continue
print(line)
if print_detail:
for pid in tracker_map:
tracker = tracker_map[pid]
tracker.print_stats()
for pid in tracker_map:
tracker = tracker_map[pid]
tracker.print()
else:
output = dict(times=times, self_times=self_times, execution_counts=execution_counts,
template_inputs=values, version=1, starting_mark=starting_mark)
output = json.dumps(output, indent=2, sort_keys=True)
# JSON doesn't recognize NaN
output = output.replace("NaN", "null")
print(output)
def reset_trackers(tracker_map):
for pid in tracker_map:
tracker = tracker_map[pid]
tracker.reset()
TEMPLATE_ALL_PHASES = """
===========================================================================================================================================
NNAPI timing summary (self-times, ms wall-clock) Execution
----------------------------------------------------
Initialization Preparation Compilation I/O Compute Results Ex. total Termination Total
-------------- ----------- ----------- ----------- ------------ ----------- ----------- ----------- ----------
Application {LA_PI:f} {LA_PP:f} {LA_PC:f} {LA_PIO:fl} {LA_PEO:fl} {LA_PR:f} {LA_PE:f} {LA_PT:f} {LA_PO:f}*
Runtime {LR_PI:f} {LR_PP:f} {LR_PC:f} {LR_PIO:fl} {LR_PEO:fl} {LR_PR:f} {LR_PE:f} {LR_PT:f} {LR_PO:f}
IPC {LI_PI:f} {LI_PP:f} {LI_PC:f} {LI_PIO:fl} {LI_PEO:fl} {LI_PR:f} {LI_PE:f} {LI_PT:f} {LI_PO:f}
Driver {LD_PI:f} {LD_PP:f} {LD_PC:f} {LD_PIO:fl} {LD_PEO:fl} {LD_PR:f} {LD_PE:f} {LD_PT:f} {LD_PO:f}
CPU {LC_PI:f} {LC_PP:f} {LC_PC:f} {LC_PIO:fl} {LC_PEO:fl} {LC_PR:f} {LC_PE:f} {LC_PT:f} {LC_PO:f}
Total {LT_PI:f}* {LT_PP:f}* {LT_PC:f}* {LT_PIO:fl}* {LT_PEO:fl}* {LT_PR:f}* {LT_PE:f}* {LT_PT:f}* {LT_PO:f}*
===========================================================================================================================================
* This total ignores missing (n/a) values and thus is not necessarily consistent with the rest of the numbers
"""
TEMPLATE_EXECUTION_ONLY = """
================================================================================
NNAPI timing summary (self-times, ms wall-clock) Execution
------------------------------------------------------------------
I/O Compute Results Total Percentage
----------- ------------ ----------- ----------- -----------
Application {LA_PIO:fl} {LA_PEO:fl} {LA_PR:f} {LA_PE:f} {LA_PEp:fl}%
Runtime {LR_PIO:fl} {LR_PEO:fl} {LR_PR:f} {LR_PE:f} {LR_PEp:fl}%
IPC {LI_PIO:fl} {LI_PEO:fl} {LI_PR:f} {LI_PE:f} {LI_PEp:fl}%
Driver {LD_PIO:fl} {LD_PEO:fl} {LD_PR:f} {LD_PE:f} {LD_PEp:fl}%
CPU {LC_PIO:fl} {LC_PEO:fl} {LC_PR:f} {LC_PE:f} {LC_PEp:fl}%
Total {LT_PIO:fl}* {LT_PEO:fl}* {LT_PR:f}* {LT_PE:f} 100%
================================================================================
* This total ignores missing (n/a) values and thus is not necessarily consistent
with the rest of the numbers
"""