普通文本  |  187行  |  6.79 KB

# SPDX-License-Identifier: Apache-2.0
#
# Copyright (C) 2015, ARM Limited and contributors.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import glob
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pylab as pl
import re
import sys
import trappy
import logging

# Regexp to match an rt-app generated logfile
TASK_NAME_RE = re.compile('.*\/rt-app-(.+)-[0-9]+.log')

class PerfAnalysis(object):

    def __init__(self, datadir, tasks=None):

        # Dataframe of all tasks performance data
        self.perf_data = {}

        # Folder containing all rt-app data
        self.datadir = None

        # Setup logging
        self._log = logging.getLogger('PerfAnalysis')

        # Load performance data generated by rt-app workloads
        self.__loadRTAData(datadir, tasks)

        # Keep track of the datadir from where data have been loaded
        if len(self.perf_data) == 0:
            raise ValueError('No performance data found on folder [{0:s}]'\
                    .format(datadir))

        self.datadir = datadir

    def __taskNameFromLog(self, logfile):
        tname_match = re.search(TASK_NAME_RE, logfile)
        if tname_match is None:
            raise ValueError('The logfile [{0:s}] is not from rt-app'\
                    .format(logfile))
        return tname_match.group(1)

    def __logfileFromTaskName(self, taskname):
        for logfile in glob.glob(
                '{0:s}/rt-app-{1:s}.log'.format(self.datadir, taskname)):
            return logfile
        raise ValueError('No rt-app logfile found for task [{0:s}]'\
                .format(taskname))

    def tasks(self):
        """
        Return the list of tasks for which performance data have been loaded
        """
        if self.datadir is None:
            raise ValueError("rt-app performance data not (yet) loaded")
        return self.perf_data.keys()

    def logfile(self, task):
        """
        Return the logfile for the specified task
        """
        if task not in self.perf_data:
            raise ValueError('No logfile loaded for task [{0:s}]'\
                    .format(task))
        return self.perf_data[task]['logfile']

    def df(self, task):
        """
        Return the PANDAS dataframe with the performance data for the
        specified task
        """
        if self.datadir is None:
            raise ValueError("rt-app performance data not (yet) loaded")
        if task not in self.perf_data:
            raise ValueError('No dataframe loaded for task [{0:s}]'\
                    .format(task))
        return self.perf_data[task]['df']

    def __loadRTAData(self, datadir, tasks):
        """
        Load peformance data of an rt-app workload
        """

        if tasks is None:
            # Lookup for all rt-app logfile into the specified datadir
            for logfile in glob.glob('{0:s}/rt-app-*.log'.format(datadir)):
                task_name = self.__taskNameFromLog(logfile)
                self.perf_data[task_name] = {}
                self.perf_data[task_name]['logfile'] = logfile
                self._log.debug('Found rt-app logfile for task [%s]', task_name)
        else:
            # Lookup for specified rt-app task logfile into specified datadir
            for task in tasks:
                logfile = self.__logfileFromTaskName(task)
                self.perf_data[task_name] = {}
                self.perf_data[task_name]['logfile'] = logfile
                self._log.debug('Found rt-app logfile for task [%s]', task_name)

        # Load all the found logfile into a dataset
        for task in self.perf_data.keys():
            self._log.debug('Loading dataframe for task [%s]...', task)
            df = pd.read_table(self.logfile(task),
                    sep='\s+',
                    skiprows=1,
                    header=0,
                    usecols=[1,2,3,4,7,8,9,10],
                    names=[
                        'Cycles', 'Run' ,'Period', 'Timestamp',
                        'Slack', 'CRun', 'CPeriod', 'WKPLatency'
                    ])
            # Normalize time to [s] with origin on the first event
            start_time = df['Timestamp'][0]/1e6
            df['Time'] = df['Timestamp']/1e6 - start_time
            df.set_index(['Time'], inplace=True)
            # Add performance metrics column, performance is defined as:
            #             slack
            #   perf = -------------
            #          period - run
            df['PerfIndex'] = df['Slack'] / (df['CPeriod'] - df['CRun'])

            # Keep track of the loaded dataframe
            self.perf_data[task]['df'] = df

    def plotPerf(self, task, title=None):
        """
        Plot the Latency/Slack and Performance data for the specified task
        """
        # Grid
        gs = gridspec.GridSpec(2, 2, height_ratios=[4,1], width_ratios=[3,1]);
        gs.update(wspace=0.1, hspace=0.1);
        # Figure
        plt.figure(figsize=(16, 2*6));
        if title:
            plt.suptitle(title, y=.97, fontsize=16,
                    horizontalalignment='center');
        # Plot: Slack and Latency
        axes = plt.subplot(gs[0,0]);
        axes.set_title('Task [{0:s}] (start) Latency and (completion) Slack'\
                .format(task));
        data = self.df(task)[['Slack', 'WKPLatency']]
        data.plot(ax=axes, drawstyle='steps-post', style=['b', 'g']);
        # axes.set_xlim(x_min, x_max);
        axes.xaxis.set_visible(False);
        # Plot: Performance
        axes = plt.subplot(gs[1,0]);
        axes.set_title('Task [{0:s}] Performance Index'.format(task));
        data = self.df(task)[['PerfIndex',]]
        data.plot(ax=axes, drawstyle='steps-post');
        axes.set_ylim(0, 2);
        # axes.set_xlim(x_min, x_max);
        # Plot: Slack Histogram
        axes = plt.subplot(gs[0:2,1]);
        data = self.df(task)[['PerfIndex',]]
        data.hist(bins=30, ax=axes, alpha=0.4);
        # axes.set_xlim(x_min, x_max);
        pindex_avg = data.mean()[0];
        pindex_std = data.std()[0];
        self._log.info('PerfIndex, Task [%s] avg: %.2f, std: %.2f',
                task, pindex_avg, pindex_std)
        axes.axvline(pindex_avg, color='b', linestyle='--', linewidth=2);


        # Save generated plots into datadir
        figname = '{}/task_perf_{}.png'.format(self.datadir, task)
        pl.savefig(figname, bbox_inches='tight')