# SPDX-License-Identifier: Apache-2.0 # # Copyright (C) 2015, ARM Limited and contributors. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # """ Frequency Analysis Module """ import matplotlib.gridspec as gridspec import matplotlib.pyplot as plt import pandas as pd import pylab as pl import operator from trappy.utils import listify from devlib.utils.misc import memoized from analysis_module import AnalysisModule from trace import ResidencyTime, ResidencyData from bart.common.Utils import area_under_curve import numpy as np class FrequencyAnalysis(AnalysisModule): """ Support for plotting Frequency Analysis data :param trace: input Trace object :type trace: :mod:`libs.utils.Trace` """ def __init__(self, trace): super(FrequencyAnalysis, self).__init__(trace) ############################################################################### # DataFrame Getter Methods ############################################################################### def _dfg_cpu_frequency_residency(self, cpu, total=True): """ Get per-CPU frequency residency, i.e. amount of time CPU `cpu` spent at each frequency. :param cpu: CPU ID :type cpu: int :param total: if true returns the "total" time, otherwise the "active" time is returned :type total: bool :returns: :mod:`pandas.DataFrame` - "total" or "active" time residency at each frequency. :raises: TypeError """ if not isinstance(cpu, int): raise TypeError('Input CPU parameter must be an integer') residency = self._getFrequencyResidency(cpu) if not residency: return None if total: return residency.total return residency.active def _dfg_cluster_frequency_residency(self, cluster, total=True): """ Get per-Cluster frequency residency, i.e. amount of time CLUSTER `cluster` spent at each frequency. :param cluster: this can be either a list of CPU IDs belonging to a cluster or the cluster name as specified in the platform description :type cluster: str or list(int) :param total: if true returns the "total" time, otherwise the "active" time is returned :type total: bool :returns: :mod:`pandas.DataFrame` - "total" or "active" time residency at each frequency. :raises: KeyError """ if isinstance(cluster, str): try: residency = self._getFrequencyResidency( self._platform['clusters'][cluster.lower()] ) except KeyError: self._log.warning( 'Platform descriptor has not a cluster named [%s], ' 'plot disabled!', cluster ) return None else: residency = self._getFrequencyResidency(cluster) if not residency: return None if total: return residency.total return residency.active ############################################################################### # Plotting Methods ############################################################################### def plotPeripheralClock(self, title='Peripheral Frequency', clk='unknown'): """ Produce graph plotting the frequency of a particular peripheral clock :param title: The title for the chart :type title: str :param clk: The clk name to chart :type clk: str :raises: KeyError """ if not self._trace.hasEvents('clock_set_rate'): self._log.warning('Events [clock_set_rate] not found, plot DISABLED!') return rate_df = self._dfg_trace_event('clock_set_rate') enable_df = self._dfg_trace_event('clock_enable') disable_df = self._dfg_trace_event('clock_disable') pd.options.mode.chained_assignment = None rate_df = rate_df[rate_df.clk_name == clk] rate_df['clock_setting'] = rate_df['rate']/1e6 rate_df['state'] = -1 pd.set_option('display.expand_frame_repr', False) if not enable_df.empty: enable_df = enable_df[enable_df.clk_name == clk] enable_df['clock_setting'] = 1; if not disable_df.empty: disable_df = disable_df[disable_df.clk_name == clk] disable_df['clock_setting'] = 0; freq = pd.concat([rate_df, enable_df, disable_df]) freq.sort_index(inplace=True) if freq.empty: self._log.warning('No events for clock ' + clk + ' found in trace') return last = freq.tail(n=1) last.index = { self._trace.x_max } freq = pd.concat([freq, last]) freq['timestamp'] = freq.index freq['frequency'] = 0; max_freq = 0 last_state = 1 last_freq = rate_df.iloc[0]['clock_setting'] for index, row in freq.iterrows(): if row.state == -1 : last_freq = row.clock_setting if max_freq < last_freq: max_freq = last_freq if last_state == 0: freq.loc[index, 'frequency'] = 0 else: freq.loc[index, 'frequency'] = last_freq if row.state == 1: freq.loc[index, 'frequency'] = last_freq last_state = row.state if row.state == 0: freq.loc[index, 'frequency'] = 0 last_state = row.state gs = gridspec.GridSpec(5,1) freq_axis = plt.subplot(gs[:4, 0]) state_axis = plt.subplot(gs[4:, 0]) plt.suptitle(title, y=.97, fontsize=16, horizontalalignment='center') gs.update(hspace=1.7) #plot frequency information freq_axis.set_title("Clock frequency for " + clk) freq_axis.set_ylim(0, max_freq * 1.1) if len(freq) > 0: freq['frequency'].plot(style=['b-'], ax=freq_axis, drawstyle='steps-post', alpha=0.4) else: self._log.warning('NO frequency events to plot') freq_axis.set_xlim(self._trace.x_min, self._trace.x_max) freq_axis.set_ylabel('MHz') freq_axis.set_xlabel('') freq_axis.grid(True) #figure out when clocks are on and off onoff = freq.loc[freq.state != -1, :] first_onoff = self._trace.x_max if len(onoff): #edge detect when the state changes onoff['statechange'] = onoff['state'].diff() onoff = onoff[onoff.statechange != 0] #compute delta as the time period between state changes onoff['delta'] = (onoff['timestamp'] - onoff['timestamp'].shift()).fillna(0).shift(-1) onoff.iloc[-1, onoff.columns.get_loc('delta')] = self._trace.x_max - onoff.iloc[-1].timestamp #plot state on as green and off as red enable_events = onoff[onoff.state == 1] disable_events = onoff[onoff.state == 0] state_axis.hlines([0] * len(enable_events), enable_events['timestamp'], enable_events['timestamp'] + enable_events['delta'], linewidth = 10.0, label='clock on', color='green') state_axis.hlines([0] * len(disable_events), disable_events['timestamp'], disable_events['timestamp'] + disable_events['delta'], linewidth = 10.0, label='clock off', color='red') first_onoff = onoff.iloc[0].timestamp #plot time period that the clock state was unknown from the trace state_axis.hlines(0, 0, first_onoff, linewidth = 1.0, label='indeterminate clock state', linestyle='--') state_axis.set_yticks([]) state_axis.set_xlabel('seconds') state_axis.set_xlim(self._trace.x_min, self._trace.x_max) state_axis.legend(bbox_to_anchor=(0., 1.02, 1., 0.102), loc=3, ncol=3, mode='expand') figname = '{}/{}{}.png'\ .format(self._trace.plots_dir, self._trace.plots_prefix, clk) pl.savefig(figname, bbox_inches='tight') def plotClusterFrequencies(self, title='Clusters Frequencies'): """ Plot frequency trend for all clusters. If sched_overutilized events are available, the plots will also show the intervals of time where the cluster was overutilized. :param title: user-defined plot title :type title: str """ if not self._trace.hasEvents('cpu_frequency'): self._log.warning('Events [cpu_frequency] not found, plot DISABLED!') return df = self._dfg_trace_event('cpu_frequency') pd.options.mode.chained_assignment = None # Extract LITTLE and big clusters frequencies # and scale them to [MHz] if self._little_cpus: lfreq = df[df.cpu == self._little_cpus[-1]] lfreq['frequency'] = lfreq['frequency']/1e3 else: lfreq = [] if self._big_cpus: bfreq = df[df.cpu == self._big_cpus[-1]] bfreq['frequency'] = bfreq['frequency']/1e3 else: bfreq = [] # Compute AVG frequency for LITTLE cluster avg_lfreq = 0 if len(lfreq) > 0: lfreq['timestamp'] = lfreq.index lfreq['delta'] = (lfreq['timestamp'] -lfreq['timestamp'].shift()).fillna(0).shift(-1) lfreq['cfreq'] = (lfreq['frequency'] * lfreq['delta']).fillna(0) timespan = lfreq.iloc[-1].timestamp - lfreq.iloc[0].timestamp avg_lfreq = lfreq['cfreq'].sum()/timespan # Compute AVG frequency for big cluster avg_bfreq = 0 if len(bfreq) > 0: bfreq['timestamp'] = bfreq.index bfreq['delta'] = (bfreq['timestamp'] - bfreq['timestamp'].shift()).fillna(0).shift(-1) bfreq['cfreq'] = (bfreq['frequency'] * bfreq['delta']).fillna(0) timespan = bfreq.iloc[-1].timestamp - bfreq.iloc[0].timestamp avg_bfreq = bfreq['cfreq'].sum()/timespan pd.options.mode.chained_assignment = 'warn' # Setup a dual cluster plot fig, pltaxes = plt.subplots(2, 1, figsize=(16, 8)) plt.suptitle(title, y=.97, fontsize=16, horizontalalignment='center') # Plot Cluster frequencies axes = pltaxes[0] axes.set_title('big Cluster') if avg_bfreq > 0: axes.axhline(avg_bfreq, color='r', linestyle='--', linewidth=2) axes.set_ylim( (self._platform['freqs']['big'][0] - 100000)/1e3, (self._platform['freqs']['big'][-1] + 100000)/1e3 ) if len(bfreq) > 0: bfreq['frequency'].plot(style=['r-'], ax=axes, drawstyle='steps-post', alpha=0.4) else: self._log.warning('NO big CPUs frequency events to plot') axes.set_xlim(self._trace.x_min, self._trace.x_max) axes.set_ylabel('MHz') axes.grid(True) axes.set_xticklabels([]) axes.set_xlabel('') self._trace.analysis.status.plotOverutilized(axes) axes = pltaxes[1] axes.set_title('LITTLE Cluster') if avg_lfreq > 0: axes.axhline(avg_lfreq, color='b', linestyle='--', linewidth=2) axes.set_ylim( (self._platform['freqs']['little'][0] - 100000)/1e3, (self._platform['freqs']['little'][-1] + 100000)/1e3 ) if len(lfreq) > 0: lfreq['frequency'].plot(style=['b-'], ax=axes, drawstyle='steps-post', alpha=0.4) else: self._log.warning('NO LITTLE CPUs frequency events to plot') axes.set_xlim(self._trace.x_min, self._trace.x_max) axes.set_ylabel('MHz') axes.grid(True) self._trace.analysis.status.plotOverutilized(axes) # Save generated plots into datadir figname = '{}/{}cluster_freqs.png'\ .format(self._trace.plots_dir, self._trace.plots_prefix) pl.savefig(figname, bbox_inches='tight') self._log.info('LITTLE cluster average frequency: %.3f GHz', avg_lfreq/1e3) self._log.info('big cluster average frequency: %.3f GHz', avg_bfreq/1e3) return (avg_lfreq/1e3, avg_bfreq/1e3) def plotCPUFrequencies(self, cpus=None): """ Plot frequency for the specified CPUs (or all if not specified). If sched_overutilized events are available, the plots will also show the intervals of time where the system was overutilized. The generated plots are also saved as PNG images under the folder specified by the `plots_dir` parameter of :class:`Trace`. :param cpus: the list of CPUs to plot, if None it generate a plot for each available CPU :type cpus: int or list(int) :return: a dictionary of average frequency for each CPU. """ if not self._trace.hasEvents('cpu_frequency'): self._log.warning('Events [cpu_frequency] not found, plot DISABLED!') return df = self._dfg_trace_event('cpu_frequency') if cpus is None: # Generate plots only for available CPUs cpus = range(df.cpu.max()+1) else: # Generate plots only specified CPUs cpus = listify(cpus) chained_assignment = pd.options.mode.chained_assignment pd.options.mode.chained_assignment = None freq = {} for cpu_id in listify(cpus): # Extract CPUs' frequencies and scale them to [MHz] _df = df[df.cpu == cpu_id] if _df.empty: self._log.warning('No [cpu_frequency] events for CPU%d, ' 'plot DISABLED!', cpu_id) continue _df['frequency'] = _df.frequency / 1e3 # Compute AVG frequency for this CPU avg_freq = 0 if len(_df) > 1: timespan = _df.index[-1] - _df.index[0] avg_freq = area_under_curve(_df['frequency'], method='rect') / timespan # Store DF for plotting freq[cpu_id] = { 'df' : _df, 'avg' : avg_freq, } pd.options.mode.chained_assignment = chained_assignment plots_count = len(freq) if not plots_count: return # Setup CPUs plots fig, pltaxes = plt.subplots(len(freq), 1, figsize=(16, 4 * plots_count)) avg_freqs = {} for plot_idx, cpu_id in enumerate(freq): # CPU frequencies and average value _df = freq[cpu_id]['df'] _avg = freq[cpu_id]['avg'] # Plot average frequency try: axes = pltaxes[plot_idx] except TypeError: axes = pltaxes axes.set_title('CPU{:2d} Frequency'.format(cpu_id)) axes.axhline(_avg, color='r', linestyle='--', linewidth=2) # Set plot limit based on CPU min/max frequencies if 'clusters' in self._platform: for cluster,cpus in self._platform['clusters'].iteritems(): if cpu_id not in cpus: continue freqs = self._platform['freqs'][cluster] break else: freqs = df['frequency'].unique() axes.set_ylim((min(freqs) - 100000) / 1e3, (max(freqs) + 100000) / 1e3) # Plot CPU frequency transitions _df['frequency'].plot(style=['r-'], ax=axes, drawstyle='steps-post', alpha=0.4) # Plot overutilzied regions (if signal available) self._trace.analysis.status.plotOverutilized(axes) # Finalize plot axes.set_xlim(self._trace.x_min, self._trace.x_max) axes.set_ylabel('MHz') axes.grid(True) if plot_idx + 1 < plots_count: axes.set_xticklabels([]) axes.set_xlabel('') avg_freqs[cpu_id] = _avg/1e3 self._log.info('CPU%02d average frequency: %.3f GHz', cpu_id, avg_freqs[cpu_id]) # Save generated plots into datadir figname = '{}/{}cpus_freqs.png'\ .format(self._trace.plots_dir, self._trace.plots_prefix) pl.savefig(figname, bbox_inches='tight') return avg_freqs def plotCPUFrequencyResidency(self, cpus=None, pct=False, active=False): """ Plot per-CPU frequency residency. big CPUs are plotted first and then LITTLEs. Requires the following trace events: - cpu_frequency - cpu_idle :param cpus: list of CPU IDs. By default plot all CPUs :type cpus: list(int) or int :param pct: plot residencies in percentage :type pct: bool :param active: for percentage plot specify whether to plot active or total time. Default is TOTAL time :type active: bool """ if not self._trace.hasEvents('cpu_frequency'): self._log.warning('Events [cpu_frequency] not found, plot DISABLED!') return if not self._trace.hasEvents('cpu_idle'): self._log.warning('Events [cpu_idle] not found, plot DISABLED!') return if cpus is None: # Generate plots only for available CPUs cpufreq_data = self._dfg_trace_event('cpu_frequency') _cpus = range(cpufreq_data.cpu.max()+1) else: _cpus = listify(cpus) # Split between big and LITTLE CPUs ordered from higher to lower ID _cpus.reverse() big_cpus = [c for c in _cpus if c in self._big_cpus] little_cpus = [c for c in _cpus if c in self._little_cpus] _cpus = big_cpus + little_cpus # Precompute active and total time for each CPU residencies = [] xmax = 0.0 for cpu in _cpus: res = self._getFrequencyResidency(cpu) residencies.append(ResidencyData('CPU{}'.format(cpu), res)) max_time = res.total.max().values[0] if xmax < max_time: xmax = max_time self._plotFrequencyResidency(residencies, 'cpu', xmax, pct, active) def plotClusterFrequencyResidency(self, clusters=None, pct=False, active=False): """ Plot the frequency residency in a given cluster, i.e. the amount of time cluster `cluster` spent at frequency `f_i`. By default, both 'big' and 'LITTLE' clusters data are plotted. Requires the following trace events: - cpu_frequency - cpu_idle :param clusters: name of the clusters to be plotted (all of them by default) :type clusters: str ot list(str) :param pct: plot residencies in percentage :type pct: bool :param active: for percentage plot specify whether to plot active or total time. Default is TOTAL time :type active: bool """ if not self._trace.hasEvents('cpu_frequency'): self._log.warning('Events [cpu_frequency] not found, plot DISABLED!') return if not self._trace.hasEvents('cpu_idle'): self._log.warning('Events [cpu_idle] not found, plot DISABLED!') return if 'clusters' not in self._platform: self._log.warning('No platform cluster info. Plot DISABLED!') return # Assumption: all CPUs in a cluster run at the same frequency, i.e. the # frequency is scaled per-cluster not per-CPU. Hence, we can limit the # cluster frequencies data to a single CPU if not self._trace.freq_coherency: self._log.warning('Cluster frequency is not coherent, plot DISABLED!') return # Sanitize clusters if clusters is None: _clusters = self._platform['clusters'].keys() else: _clusters = listify(clusters) # Precompute active and total time for each cluster residencies = [] xmax = 0.0 for cluster in _clusters: res = self._getFrequencyResidency( self._platform['clusters'][cluster.lower()]) residencies.append(ResidencyData('{} Cluster'.format(cluster), res)) max_time = res.total.max().values[0] if xmax < max_time: xmax = max_time self._plotFrequencyResidency(residencies, 'cluster', xmax, pct, active) ############################################################################### # Utility Methods ############################################################################### @memoized def _getFrequencyResidency(self, cluster): """ Get a DataFrame with per cluster frequency residency, i.e. amount of time spent at a given frequency in each cluster. :param cluster: this can be either a single CPU ID or a list of CPU IDs belonging to a cluster :type cluster: int or list(int) :returns: namedtuple(ResidencyTime) - tuple of total and active time dataframes """ if not self._trace.hasEvents('cpu_frequency'): self._log.warning('Events [cpu_frequency] not found, ' 'frequency residency computation not possible!') return None if not self._trace.hasEvents('cpu_idle'): self._log.warning('Events [cpu_idle] not found, ' 'frequency residency computation not possible!') return None _cluster = listify(cluster) freq_df = self._dfg_trace_event('cpu_frequency') # Assumption: all CPUs in a cluster run at the same frequency, i.e. the # frequency is scaled per-cluster not per-CPU. Hence, we can limit the # cluster frequencies data to a single CPU. This assumption is verified # by the Trace module when parsing the trace. if len(_cluster) > 1 and not self._trace.freq_coherency: self._log.warning('Cluster frequency is NOT coherent,' 'cannot compute residency!') return None cluster_freqs = freq_df[freq_df.cpu == _cluster[0]] # Compute TOTAL Time time_intervals = cluster_freqs.index[1:] - cluster_freqs.index[:-1] total_time = pd.DataFrame({ 'time': time_intervals, 'frequency': [f/1000.0 for f in cluster_freqs.iloc[:-1].frequency] }) total_time = total_time.groupby(['frequency']).sum() # Compute ACTIVE Time cluster_active = self._trace.getClusterActiveSignal(_cluster) # In order to compute the active time spent at each frequency we # multiply 2 square waves: # - cluster_active, a square wave of the form: # cluster_active[t] == 1 if at least one CPU is reported to be # non-idle by CPUFreq at time t # cluster_active[t] == 0 otherwise # - freq_active, square wave of the form: # freq_active[t] == 1 if at time t the frequency is f # freq_active[t] == 0 otherwise available_freqs = sorted(cluster_freqs.frequency.unique()) cluster_freqs = cluster_freqs.join( cluster_active.to_frame(name='active'), how='outer') cluster_freqs.fillna(method='ffill', inplace=True) nonidle_time = [] for f in available_freqs: freq_active = cluster_freqs.frequency.apply(lambda x: 1 if x == f else 0) active_t = cluster_freqs.active * freq_active # Compute total time by integrating the square wave nonidle_time.append(self._trace.integrate_square_wave(active_t)) active_time = pd.DataFrame({'time': nonidle_time}, index=[f/1000.0 for f in available_freqs]) active_time.index.name = 'frequency' return ResidencyTime(total_time, active_time) def _plotFrequencyResidencyAbs(self, axes, residency, n_plots, is_first, is_last, xmax, title=''): """ Private method to generate frequency residency plots. :param axes: axes over which to generate the plot :type axes: matplotlib.axes.Axes :param residency: tuple of total and active time dataframes :type residency: namedtuple(ResidencyTime) :param n_plots: total number of plots :type n_plots: int :param is_first: if True this is the first plot :type is_first: bool :param is_last: if True this is the last plot :type is_last: bool :param xmax: x-axes higher bound :param xmax: double :param title: title of this subplot :type title: str """ yrange = 0.4 * max(6, len(residency.total)) * n_plots residency.total.plot.barh(ax=axes, color='g', legend=False, figsize=(16, yrange)) residency.active.plot.barh(ax=axes, color='r', legend=False, figsize=(16, yrange)) axes.set_xlim(0, 1.05*xmax) axes.set_ylabel('Frequency [MHz]') axes.set_title(title) axes.grid(True) if is_last: axes.set_xlabel('Time [s]') else: axes.set_xticklabels([]) if is_first: # Put title on top of the figure. As of now there is no clean way # to make the title appear always in the same position in the # figure because figure heights may vary between different # platforms (different number of OPPs). Hence, we use annotation legend_y = axes.get_ylim()[1] axes.annotate('OPP Residency Time', xy=(0, legend_y), xytext=(-50, 45), textcoords='offset points', fontsize=18) axes.annotate('GREEN: Total', xy=(0, legend_y), xytext=(-50, 25), textcoords='offset points', color='g', fontsize=14) axes.annotate('RED: Active', xy=(0, legend_y), xytext=(50, 25), textcoords='offset points', color='r', fontsize=14) def _plotFrequencyResidencyPct(self, axes, residency_df, label, n_plots, is_first, is_last, res_type): """ Private method to generate PERCENTAGE frequency residency plots. :param axes: axes over which to generate the plot :type axes: matplotlib.axes.Axes :param residency_df: residency time dataframe :type residency_df: :mod:`pandas.DataFrame` :param label: label to be used for percentage residency dataframe :type label: str :param n_plots: total number of plots :type n_plots: int :param is_first: if True this is the first plot :type is_first: bool :param is_first: if True this is the last plot :type is_first: bool :param res_type: type of residency, either TOTAL or ACTIVE :type title: str """ # Compute sum of the time intervals duration = residency_df.time.sum() residency_pct = pd.DataFrame( {label: residency_df.time.apply(lambda x: x*100/duration)}, index=residency_df.index ) yrange = 3 * n_plots residency_pct.T.plot.barh(ax=axes, stacked=True, figsize=(16, yrange)) axes.legend(loc='lower center', ncol=7) axes.set_xlim(0, 100) axes.grid(True) if is_last: axes.set_xlabel('Residency [%]') else: axes.set_xticklabels([]) if is_first: legend_y = axes.get_ylim()[1] axes.annotate('OPP {} Residency Time'.format(res_type), xy=(0, legend_y), xytext=(-50, 35), textcoords='offset points', fontsize=18) def _plotFrequencyResidency(self, residencies, entity_name, xmax, pct, active): """ Generate Frequency residency plots for the given entities. :param residencies: list of residencies to be plotted :type residencies: list(namedtuple(ResidencyData)) - each tuple contains: - a label to be used as subplot title - a namedtuple(ResidencyTime) :param entity_name: name of the entity ('cpu' or 'cluster') used in the figure name :type entity_name: str :param xmax: upper bound of x-axes :type xmax: double :param pct: plot residencies in percentage :type pct: bool :param active: for percentage plot specify whether to plot active or total time. Default is TOTAL time :type active: bool """ n_plots = len(residencies) gs = gridspec.GridSpec(n_plots, 1) fig = plt.figure() figtype = "" for idx, data in enumerate(residencies): if data.residency is None: plt.close(fig) return axes = fig.add_subplot(gs[idx]) is_first = idx == 0 is_last = idx+1 == n_plots if pct and active: self._plotFrequencyResidencyPct(axes, data.residency.active, data.label, n_plots, is_first, is_last, 'ACTIVE') figtype = "_pct_active" continue if pct: self._plotFrequencyResidencyPct(axes, data.residency.total, data.label, n_plots, is_first, is_last, 'TOTAL') figtype = "_pct_total" continue self._plotFrequencyResidencyAbs(axes, data.residency, n_plots, is_first, is_last, xmax, title=data.label) figname = '{}/{}{}_freq_residency{}.png'\ .format(self._trace.plots_dir, self._trace.plots_prefix, entity_name, figtype) pl.savefig(figname, bbox_inches='tight') # vim :set tabstop=4 shiftwidth=4 expandtab