# SPDX-License-Identifier: Apache-2.0
#
# Copyright (C) 2015, ARM Limited and contributors.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
""" Latency Analysis Module """
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pylab as pl
import re
from collections import namedtuple
from analysis_module import AnalysisModule
from devlib.utils.misc import memoized
from trappy.utils import listify
# Tuple representing all IDs data of a Task
TaskData = namedtuple('TaskData', ['pid', 'names', 'label'])
CDF = namedtuple('CDF', ['df', 'threshold', 'above', 'below'])
class LatencyAnalysis(AnalysisModule):
"""
Support for plotting Latency Analysis data
:param trace: input Trace object
:type trace: :mod:`libs.utils.Trace`
"""
def __init__(self, trace):
super(LatencyAnalysis, self).__init__(trace)
###############################################################################
# DataFrame Getter Methods
###############################################################################
@memoized
def _dfg_latency_df(self, task):
"""
DataFrame of task's wakeup/suspend events
The returned DataFrame index is the time, in seconds, an event related
to `task` happened.
The DataFrame has these columns:
- target_cpu: the CPU where the task has been scheduled
reported only for wakeup events
- curr_state: the current task state:
A letter which corresponds to the standard events reported by the
prev_state field of a sched_switch event.
Only exception is 'A', which is used to represent active tasks,
i.e. tasks RUNNING on a CPU
- next_state: the next status for the task
- t_start: the time when the current status started, it matches Time
- t_delta: the interval of time after witch the task will switch to the
next_state
:param task: the task to report wakeup latencies for
:type task: int or str
"""
if not self._trace.hasEvents('sched_wakeup'):
self._log.warning('Events [sched_wakeup] not found, '
'cannot compute CPU active signal!')
return None
if not self._trace.hasEvents('sched_switch'):
self._log.warning('Events [sched_switch] not found, '
'cannot compute CPU active signal!')
return None
# Get task data
td = self._getTaskData(task)
if not td:
return None
wk_df = self._dfg_trace_event('sched_wakeup')
sw_df = self._dfg_trace_event('sched_switch')
# Filter Task's WAKEUP events
task_wakeup = wk_df[wk_df.pid == td.pid][['target_cpu', 'pid']]
# Filter Task's START events
task_events = (sw_df.prev_pid == td.pid) | (sw_df.next_pid == td.pid)
task_switches_df = sw_df[task_events]\
[['__cpu', 'prev_pid', 'next_pid', 'prev_state']]
# Unset prev_state for switch_in events, i.e.
# we don't care about the status of a task we are replacing
task_switches_df.prev_state = task_switches_df.apply(
lambda r : np.nan if r['prev_pid'] != td.pid
else self._taskState(r['prev_state']),
axis=1)
# Rename prev_state
task_switches_df.rename(columns={'prev_state' : 'curr_state'}, inplace=True)
# Fill in Running status
# We've just set curr_state (a.k.a prev_state) to nan where td.pid was
# switching in, so set the state to 'A' ("active") in those places.
task_switches_df.curr_state = task_switches_df.curr_state.fillna(value='A')
# Join Wakeup and SchedSwitch events
task_latency_df = task_wakeup.join(task_switches_df, how='outer',
lsuffix='_wkp', rsuffix='_slp')
# Remove not required columns
task_latency_df = task_latency_df[['target_cpu', '__cpu', 'curr_state']]
# Set Wakeup state on each Wakeup event
task_latency_df.curr_state = task_latency_df.curr_state.fillna(value='W')
# Sanity check for all task states to be mapped to a char
numbers = 0
for value in task_switches_df.curr_state.unique():
if type(value) is not str:
self._log.warning('The [sched_switch] events contain "prev_state" value [%s]',
value)
numbers += 1
if numbers:
verb = 'is' if numbers == 1 else 'are'
self._log.warning(' which %s not currently mapped into a task state.',
verb)
self._log.warning('Check mappings in:')
self._log.warning(' %s::%s _taskState()',
__file__, self.__class__.__name__)
# Forward annotate task state
task_latency_df['next_state'] = task_latency_df.curr_state.shift(-1)
# Forward account for previous state duration
task_latency_df['t_start'] = task_latency_df.index
task_latency_df['t_delta'] = (
task_latency_df['t_start'].shift(-1)
- task_latency_df['t_start']
)
return task_latency_df
# Select Wakeup latency
def _dfg_latency_wakeup_df(self, task):
"""
DataFrame of task's wakeup latencies
The returned DataFrame index is the time, in seconds, `task` waken-up.
The DataFrame has just one column:
- wakeup_latency: the time the task waited before getting a CPU
:param task: the task to report wakeup latencies for
:type task: int or str
"""
task_latency_df = self._dfg_latency_df(task)
if task_latency_df is None:
return None
df = task_latency_df[
(task_latency_df.curr_state == 'W') &
(task_latency_df.next_state == 'A')][['t_delta']]
df.rename(columns={'t_delta' : 'wakeup_latency'}, inplace=True)
return df
# Select Wakeup latency
def _dfg_latency_preemption_df(self, task):
"""
DataFrame of task's preemption latencies
The returned DataFrame index is the time, in seconds, `task` has been
preempted.
The DataFrame has just one column:
- preemption_latency: the time the task waited before getting again a CPU
:param task: the task to report wakeup latencies for
:type task: int or str
"""
task_latency_df = self._dfg_latency_df(task)
if task_latency_df is None:
return None
df = task_latency_df[
(task_latency_df.curr_state.isin([0, 'R', 'R+'])) &
(task_latency_df.next_state == 'A')][['t_delta']]
df.rename(columns={'t_delta' : 'preempt_latency'}, inplace=True)
return df
@memoized
def _dfg_activations_df(self, task):
"""
DataFrame of task's wakeup intrvals
The returned DataFrame index is the time, in seconds, `task` has
waken-up.
The DataFrame has just one column:
- activation_interval: the time since the previous wakeup events
:param task: the task to report runtimes for
:type task: int or str
"""
# Select all wakeup events
wkp_df = self._dfg_latency_df(task)
wkp_df = wkp_df[wkp_df.curr_state == 'W'].copy()
# Compute delta between successive wakeup events
wkp_df['activation_interval'] = (
wkp_df['t_start'].shift(-1) - wkp_df['t_start'])
wkp_df['activation_interval'] = wkp_df['activation_interval'].shift(1)
# Return the activation period each time the task wakeups
wkp_df = wkp_df[['activation_interval']].shift(-1)
return wkp_df
@memoized
def _dfg_runtimes_df(self, task):
"""
DataFrame of task's runtime each time the task blocks
The returned DataFrame index is the time, in seconds, `task` completed
an activation (i.e. sleep or exit)
The DataFrame has just one column:
- running_time: the time the task spent RUNNING since its last wakeup
:param task: the task to report runtimes for
:type task: int or str
"""
# Select all wakeup events
run_df = self._dfg_latency_df(task)
# Filter function to add up RUNNING intervals of each activation
def cr(row):
if row['curr_state'] in ['S']:
return cr.runtime
if row['curr_state'] in ['W']:
if cr.spurious_wkp:
cr.runtime += row['t_delta']
cr.spurious_wkp = False
return cr.runtime
cr.runtime = 0
return cr.runtime
if row['curr_state'] != 'A':
return cr.runtime
if row['next_state'] in ['R', 'R+', 'S', 'x', 'D']:
cr.runtime += row['t_delta']
return cr.runtime
# This is required to capture strange trace sequences where
# a switch_in event is follower by a wakeup_event.
# This sequence is not expected, but we found it in some traces.
# Possible reasons could be:
# - misplaced sched_wakeup events
# - trace buffer artifacts
# TO BE BETTER investigated in kernel space.
# For the time being, we account this interval as RUNNING time,
# which is what kernelshark does.
if row['next_state'] in ['W']:
cr.runtime += row['t_delta']
cr.spurious_wkp = True
return cr.runtime
if row['next_state'] in ['n']:
return cr.runtime
self._log.warning("Unexpected next state: %s @ %f",
row['next_state'], row['t_start'])
return 0
# cr's static variables intialization
cr.runtime = 0
cr.spurious_wkp = False
# Add up RUNNING intervals of each activation
run_df['running_time'] = run_df.apply(cr, axis=1)
# Return RUNTIME computed for each activation,
# each time the task blocks or terminate
run_df = run_df[run_df.next_state.isin(['S', 'x'])][['running_time']]
return run_df
###############################################################################
# Plotting Methods
###############################################################################
def plotLatency(self, task, kind='all', tag=None, threshold_ms=1, bins=64):
"""
Generate a set of plots to report the WAKEUP and PREEMPT latencies the
specified task has been subject to. A WAKEUP latencies is the time from
when a task becomes RUNNABLE till the first time it gets a CPU.
A PREEMPT latencies is the time from when a RUNNING task is suspended
because of the CPU is assigned to another task till when the task
enters the CPU again.
:param task: the task to report latencies for
:type task: int or list(str)
:param kind: the kind of latencies to report (WAKEUP and/or PREEMPT")
:type kind: str
:param tag: a string to add to the plot title
:type tag: str
:param threshold_ms: the minimum acceptable [ms] value to report
graphically in the generated plots
:type threshold_ms: int or float
:param bins: number of bins to be used for the runtime's histogram
:type bins: int
:returns: a DataFrame with statistics on ploted latencies
"""
if not self._trace.hasEvents('sched_switch'):
self._log.warning('Event [sched_switch] not found, '
'plot DISABLED!')
return
if not self._trace.hasEvents('sched_wakeup'):
self._log.warning('Event [sched_wakeup] not found, '
'plot DISABLED!')
return
# Get task data
td = self._getTaskData(task)
if not td:
return None
# Load wakeup latencies (if required)
wkp_df = None
if 'all' in kind or 'wakeup' in kind:
wkp_df = self._dfg_latency_wakeup_df(td.pid)
if wkp_df is not None:
wkp_df.rename(columns={'wakeup_latency' : 'latency'}, inplace=True)
self._log.info('Found: %5d WAKEUP latencies', len(wkp_df))
# Load preempt latencies (if required)
prt_df = None
if 'all' in kind or 'preempt' in kind:
prt_df = self._dfg_latency_preemption_df(td.pid)
if prt_df is not None:
prt_df.rename(columns={'preempt_latency' : 'latency'}, inplace=True)
self._log.info('Found: %5d PREEMPT latencies', len(prt_df))
if wkp_df is None and prt_df is None:
self._log.warning('No Latency info for task [%s]', td.label)
return
# Join the two data frames
df = wkp_df.append(prt_df)
ymax = 1.1 * df.latency.max()
self._log.info('Total: %5d latency events', len(df))
# Build the series for the CDF
cdf = self._getCDF(df.latency, (threshold_ms / 1000.))
self._log.info('%.1f %% samples below %d [ms] threshold',
100. * cdf.below, threshold_ms)
# Setup plots
gs = gridspec.GridSpec(2, 2, height_ratios=[2,1], width_ratios=[1,1])
plt.figure(figsize=(16, 8))
plot_title = "[{}]: {} latencies".format(td.label, kind.upper())
if tag:
plot_title = "{} [{}]".format(plot_title, tag)
plot_title = "{}, threshold @ {} [ms]".format(plot_title, threshold_ms)
# Latency events duration over time
axes = plt.subplot(gs[0,0:2])
axes.set_title(plot_title)
try:
wkp_df.rename(columns={'latency': 'wakeup'}, inplace=True)
wkp_df.plot(style='b+', logy=True, ax=axes)
except: pass
try:
prt_df.rename(columns={'latency' : 'preempt'}, inplace=True)
prt_df.plot(style='r+', logy=True, ax=axes)
except: pass
axes.axhline(threshold_ms / 1000., linestyle='--', color='g')
self._trace.analysis.status.plotOverutilized(axes)
axes.legend(loc='lower center', ncol=2)
axes.set_xlim(self._trace.x_min, self._trace.x_max)
# Cumulative distribution of latencies samples
axes = plt.subplot(gs[1,0])
cdf.df.plot(ax=axes, legend=False, xlim=(0,None),
title='Latencies CDF ({:.1f}% within {} [ms] threshold)'\
.format(100. * cdf.below, threshold_ms))
axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5);
axes.axhline(y=cdf.below, linewidth=1, color='r', linestyle='--')
# Histogram of all latencies
axes = plt.subplot(gs[1,1])
df.latency.plot(kind='hist', bins=bins, ax=axes,
xlim=(0,ymax), legend=False,
title='Latency histogram ({} bins, {} [ms] green threshold)'\
.format(bins, threshold_ms));
axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5);
# Save generated plots into datadir
task_name = re.sub('[\ :/]', '_', td.label)
figname = '{}/{}task_latencies_{}_{}.png'\
.format(self._trace.plots_dir, self._trace.plots_prefix,
td.pid, task_name)
pl.savefig(figname, bbox_inches='tight')
# Return statistics
stats_df = df.describe(percentiles=[0.95, 0.99])
label = '{:.1f}%'.format(100. * cdf.below)
stats = { label : cdf.threshold }
return stats_df.append(pd.DataFrame(
stats.values(), columns=['latency'], index=stats.keys()))
def plotLatencyBands(self, task, axes=None):
"""
Draw a plot that shows intervals of time when the execution of a
RUNNABLE task has been delayed. The plot reports:
WAKEUP lantecies as RED colored bands
PREEMPTION lantecies as BLUE colored bands
The optional axes parameter allows to plot the signal on an existing
graph.
:param task: the task to report latencies for
:type task: str
:param axes: axes on which to plot the signal
:type axes: :mod:`matplotlib.axes.Axes`
"""
if not self._trace.hasEvents('sched_switch'):
self._log.warning('Event [sched_switch] not found, '
'plot DISABLED!')
return
if not self._trace.hasEvents('sched_wakeup'):
self._log.warning('Event [sched_wakeup] not found, '
'plot DISABLED!')
return
# Get task PID
td = self._getTaskData(task)
if not td:
return None
wkl_df = self._dfg_latency_wakeup_df(td.pid)
prt_df = self._dfg_latency_preemption_df(td.pid)
if wkl_df is None and prt_df is None:
self._log.warning('No task with name [%s]', td.label)
return
# If not axis provided: generate a standalone plot
if not axes:
gs = gridspec.GridSpec(1, 1)
plt.figure(figsize=(16, 2))
axes = plt.subplot(gs[0, 0])
axes.set_title('Latencies on [{}] '
'(red: WAKEUP, blue: PREEMPT)'\
.format(td.label))
axes.set_xlim(self._trace.x_min, self._trace.x_max)
axes.set_yticklabels([])
axes.set_xlabel('Time [s]')
axes.grid(True)
# Draw WAKEUP latencies
try:
bands = [(t, wkl_df['wakeup_latency'][t]) for t in wkl_df.index]
for (start, duration) in bands:
end = start + duration
axes.axvspan(start, end, facecolor='r', alpha=0.1)
axes.set_xlim(self._trace.x_min, self._trace.x_max)
except: pass
# Draw PREEMPTION latencies
try:
bands = [(t, prt_df['preempt_latency'][t]) for t in prt_df.index]
for (start, duration) in bands:
end = start + duration
axes.axvspan(start, end, facecolor='b', alpha=0.1)
axes.set_xlim(self._trace.x_min, self._trace.x_max)
except: pass
def plotActivations(self, task, tag=None, threshold_ms=16, bins=64):
"""
Plots "activation intervals" for the specified task
An "activation interval" is time incurring between two consecutive
wakeups of a task. A set of plots is generated to report:
- Activations interval at wakeup time: every time a task wakeups a
point is plotted to represent the time interval since the previous
wakeup.
- Activations interval cumulative function: reports the cumulative
function of the activation intervals.
- Activations intervals histogram: reports a 64 bins histogram of
the activation intervals.
All plots are parameterized based on the value of threshold_ms, which
can be used to filter activations intervals bigger than 2 times this
value.
Such a threshold is useful to filter out from the plots outliers thus
focusing the analysis in the most critical periodicity under analysis.
The number and percentage of discarded samples is reported in output.
A default threshold of 16 [ms] is used, which is useful for example
to analyze a 60Hz rendering pipelines.
A PNG of the generated plots is generated and saved in the same folder
where the trace is.
:param task: the task to report latencies for
:type task: int or list(str)
:param tag: a string to add to the plot title
:type tag: str
:param threshold_ms: the minimum acceptable [ms] value to report
graphically in the generated plots
:type threshold_ms: int or float
:param bins: number of bins to be used for the runtime's histogram
:type bins: int
:returns: a DataFrame with statistics on ploted activation intervals
"""
if not self._trace.hasEvents('sched_switch'):
self._log.warning('Event [sched_switch] not found, '
'plot DISABLED!')
return
if not self._trace.hasEvents('sched_wakeup'):
self._log.warning('Event [sched_wakeup] not found, '
'plot DISABLED!')
return
# Get task data
td = self._getTaskData(task)
if not td:
return None
# Load activation data
wkp_df = self._dfg_activations_df(td.pid)
if wkp_df is None:
return None
self._log.info('Found: %5d activations for [%s]',
len(wkp_df), td.label)
# Disregard data above two time the specified threshold
y_max = (2 * threshold_ms) / 1000.
len_tot = len(wkp_df)
wkp_df = wkp_df[wkp_df.activation_interval <= y_max]
len_plt = len(wkp_df)
if len_plt < len_tot:
len_dif = len_tot - len_plt
len_pct = 100. * len_dif / len_tot
self._log.warning('Discarding {} activation intervals (above 2 x threshold_ms, '
'{:.1f}% of the overall activations)'\
.format(len_dif, len_pct))
ymax = 1.1 * wkp_df.activation_interval.max()
# Build the series for the CDF
cdf = self._getCDF(wkp_df.activation_interval, (threshold_ms / 1000.))
self._log.info('%.1f %% samples below %d [ms] threshold',
100. * cdf.below, threshold_ms)
# Setup plots
gs = gridspec.GridSpec(2, 2, height_ratios=[2,1], width_ratios=[1,1])
plt.figure(figsize=(16, 8))
plot_title = "[{}]: activaton intervals (@ wakeup time)".format(td.label)
if tag:
plot_title = "{} [{}]".format(plot_title, tag)
plot_title = "{}, threshold @ {} [ms]".format(plot_title, threshold_ms)
# Activations intervals over time
axes = plt.subplot(gs[0,0:2])
axes.set_title(plot_title)
wkp_df.plot(style='g+', logy=False, ax=axes)
axes.axhline(threshold_ms / 1000., linestyle='--', color='g')
self._trace.analysis.status.plotOverutilized(axes)
axes.legend(loc='lower center', ncol=2)
axes.set_xlim(self._trace.x_min, self._trace.x_max)
# Cumulative distribution of all activations intervals
axes = plt.subplot(gs[1,0])
cdf.df.plot(ax=axes, legend=False, xlim=(0,None),
title='Activations CDF ({:.1f}% within {} [ms] threshold)'\
.format(100. * cdf.below, threshold_ms))
axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5);
axes.axhline(y=cdf.below, linewidth=1, color='r', linestyle='--')
# Histogram of all activations intervals
axes = plt.subplot(gs[1,1])
wkp_df.plot(kind='hist', bins=bins, ax=axes,
xlim=(0,ymax), legend=False,
title='Activation intervals histogram ({} bins, {} [ms] green threshold)'\
.format(bins, threshold_ms));
axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5);
# Save generated plots into datadir
task_name = re.sub('[\ :/]', '_', td.label)
figname = '{}/{}task_activations_{}_{}.png'\
.format(self._trace.plots_dir, self._trace.plots_prefix,
td.pid, task_name)
pl.savefig(figname, bbox_inches='tight')
# Return statistics
stats_df = wkp_df.describe(percentiles=[0.95, 0.99])
label = '{:.1f}%'.format(100. * cdf.below)
stats = { label : cdf.threshold }
return stats_df.append(pd.DataFrame(
stats.values(), columns=['activation_interval'], index=stats.keys()))
def plotRuntimes(self, task, tag=None, threshold_ms=8, bins=64):
"""
Plots "running times" for the specified task
A "running time" is the sum of all the time intervals a task executed
in between a wakeup and the next sleep (or exit).
A set of plots is generated to report:
- Running times at block time: every time a task blocks a
point is plotted to represent the cumulative time the task has be
running since its last wakeup
- Running time cumulative function: reports the cumulative
function of the running times.
- Running times histogram: reports a 64 bins histogram of
the running times.
All plots are parameterized based on the value of threshold_ms, which
can be used to filter running times bigger than 2 times this value.
Such a threshold is useful to filter out from the plots outliers thus
focusing the analysis in the most critical periodicity under analysis.
The number and percentage of discarded samples is reported in output.
A default threshold of 16 [ms] is used, which is useful for example to
analyze a 60Hz rendering pipelines.
A PNG of the generated plots is generated and saved in the same folder
where the trace is.
:param task: the task to report latencies for
:type task: int or list(str)
:param tag: a string to add to the plot title
:type tag: str
:param threshold_ms: the minimum acceptable [ms] value to report
graphically in the generated plots
:type threshold_ms: int or float
:param bins: number of bins to be used for the runtime's histogram
:type bins: int
:returns: a DataFrame with statistics on ploted running times
"""
if not self._trace.hasEvents('sched_switch'):
self._log.warning('Event [sched_switch] not found, '
'plot DISABLED!')
return
if not self._trace.hasEvents('sched_wakeup'):
self._log.warning('Event [sched_wakeup] not found, '
'plot DISABLED!')
return
# Get task data
td = self._getTaskData(task)
if not td:
return None
# Load runtime data
run_df = self._dfg_runtimes_df(td.pid)
if run_df is None:
return None
self._log.info('Found: %5d activations for [%s]',
len(run_df), td.label)
# Disregard data above two time the specified threshold
y_max = (2 * threshold_ms) / 1000.
len_tot = len(run_df)
run_df = run_df[run_df.running_time <= y_max]
len_plt = len(run_df)
if len_plt < len_tot:
len_dif = len_tot - len_plt
len_pct = 100. * len_dif / len_tot
self._log.warning('Discarding {} running times (above 2 x threshold_ms, '
'{:.1f}% of the overall activations)'\
.format(len_dif, len_pct))
ymax = 1.1 * run_df.running_time.max()
# Build the series for the CDF
cdf = self._getCDF(run_df.running_time, (threshold_ms / 1000.))
self._log.info('%.1f %% samples below %d [ms] threshold',
100. * cdf.below, threshold_ms)
# Setup plots
gs = gridspec.GridSpec(2, 2, height_ratios=[2,1], width_ratios=[1,1])
plt.figure(figsize=(16, 8))
plot_title = "[{}]: running times (@ block time)".format(td.label)
if tag:
plot_title = "{} [{}]".format(plot_title, tag)
plot_title = "{}, threshold @ {} [ms]".format(plot_title, threshold_ms)
# Running time over time
axes = plt.subplot(gs[0,0:2])
axes.set_title(plot_title)
run_df.plot(style='g+', logy=False, ax=axes)
axes.axhline(threshold_ms / 1000., linestyle='--', color='g')
self._trace.analysis.status.plotOverutilized(axes)
axes.legend(loc='lower center', ncol=2)
axes.set_xlim(self._trace.x_min, self._trace.x_max)
# Cumulative distribution of all running times
axes = plt.subplot(gs[1,0])
cdf.df.plot(ax=axes, legend=False, xlim=(0,None),
title='Runtime CDF ({:.1f}% within {} [ms] threshold)'\
.format(100. * cdf.below, threshold_ms))
axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5);
axes.axhline(y=cdf.below, linewidth=1, color='r', linestyle='--')
# Histogram of all running times
axes = plt.subplot(gs[1,1])
run_df.plot(kind='hist', bins=bins, ax=axes,
xlim=(0,ymax), legend=False,
title='Latency histogram ({} bins, {} [ms] green threshold)'\
.format(bins, threshold_ms));
axes.axvspan(0, threshold_ms / 1000., facecolor='g', alpha=0.5);
# Save generated plots into datadir
task_name = re.sub('[\ :/]', '_', td.label)
figname = '{}/{}task_runtimes_{}_{}.png'\
.format(self._trace.plots_dir, self._trace.plots_prefix,
td.pid, task_name)
pl.savefig(figname, bbox_inches='tight')
# Return statistics
stats_df = run_df.describe(percentiles=[0.95, 0.99])
label = '{:.1f}%'.format(100. * cdf.below)
stats = { label : cdf.threshold }
return stats_df.append(pd.DataFrame(
stats.values(), columns=['running_time'], index=stats.keys()))
###############################################################################
# Utility Methods
###############################################################################
@memoized
def _getTaskData(self, task):
# Get task PID
if isinstance(task, str):
task_pids = self._trace.getTaskByName(task)
if len(task_pids) == 0:
self._log.warning('No tasks found with name [%s]', task)
return None
task_pid = task_pids[0]
if len(task_pids) > 1:
self._log.warning('Multiple PIDs for task named [%s]', task)
for pid in task_pids:
self._log.warning(' %5d : %s', pid,
','.join(self._trace.getTaskByPid(pid)))
self._log.warning('Returning stats only for PID: %d',
task_pid)
task_names = self._trace.getTaskByPid(task_pid)
# Get task name
elif isinstance(task, int):
task_pid = task
task_names = self._trace.getTaskByPid(task_pid)
if len(task_names) == 0:
self._log.warning('No tasks found with name [%s]', task)
return None
else:
raise ValueError("Task must be either an int or str")
task_label = "{}: {}".format(task_pid, ', '.join(task_names))
return TaskData(task_pid, task_names, task_label)
@memoized
def _taskState(self, state):
try:
state = int(state)
except ValueError:
# State already converted to symbol
return state
# Tasks STATE flags (Linux 3.18)
TASK_STATES = {
0: "R", # TASK_RUNNING
1: "S", # TASK_INTERRUPTIBLE
2: "D", # TASK_UNINTERRUPTIBLE
4: "T", # __TASK_STOPPED
8: "t", # __TASK_TRACED
16: "X", # EXIT_DEAD
32: "Z", # EXIT_ZOMBIE
64: "x", # TASK_DEAD
128: "K", # TASK_WAKEKILL
256: "W", # TASK_WAKING
512: "P", # TASK_PARKED
1024: "N", # TASK_NOLOAD
}
kver = self._trace.platform['kernel']['parts']
if kver is None:
kver = (3, 18)
self._log.info('Parsing sched_switch states assuming kernel v%d.%d',
kver[0], kver[1])
if kver >= (4, 8):
TASK_STATES[2048] = "n" # TASK_NEW
TASK_MAX_STATE = 2 * max(TASK_STATES)
res = "R"
if state & (TASK_MAX_STATE - 1) != 0:
res = ""
for key in TASK_STATES.keys():
if key & state:
res += TASK_STATES[key]
if state & TASK_MAX_STATE:
res += "+"
else:
res = '|'.join(res)
return res
def _getCDF(self, data, threshold):
"""
Build the "Cumulative Distribution Function" (CDF) for the given data
"""
# Build the series of sorted values
ser = data.sort_values()
if len(ser) < 1000:
# Append again the last (and largest) value.
# This step is important especially for small sample sizes
# in order to get an unbiased CDF
ser = ser.append(pd.Series(ser.iloc[-1]))
df = pd.Series(np.linspace(0., 1., len(ser)), index=ser)
# Compute percentage of samples above/below the specified threshold
below = float(max(df[:threshold]))
above = 1 - below
return CDF(df, threshold, above, below)
# vim :set tabstop=4 shiftwidth=4 expandtab