#!/usr/bin/env python # # Copyright (C) 2013 eNovance SAS <licensing@enovance.com> # Author: Erwan Velu <erwan@enovance.com> # # The license below covers all files distributed with fio unless otherwise # noted in the file itself. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA import os import fnmatch import sys import getopt import re import math import shutil def find_file(path, pattern): fio_data_file=[] # For all the local files for file in os.listdir(path): # If the file math the regexp if fnmatch.fnmatch(file, pattern): # Let's consider this file fio_data_file.append(file) return fio_data_file def generate_gnuplot_script(fio_data_file,title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir): if verbose: print "Generating rendering scripts" filename=gnuplot_output_dir+'mygraph' temporary_files.append(filename) f=open(filename,'w') # Plotting 3D or comparing graphs doesn't have a meaning unless if there is at least 2 traces if len(fio_data_file) > 1: f.write("call \'%s/graph3D.gpm\' \'%s' \'%s\' \'\' \'%s\' \'%s\'\n" % (gpm_dir,title,gnuplot_output_filename,gnuplot_output_filename,mode)) # Setting up the compare files that will be plot later compare=open(gnuplot_output_dir + 'compare.gnuplot','w') compare.write(''' set title '%s' set terminal png size 1280,1024 set ytics axis out auto set key top left reverse set xlabel "Time (Seconds)" set ylabel '%s' set yrange [0:] set style line 1 lt 1 lw 3 pt 3 linecolor rgb "green" '''% (title,mode)) compare.close() #Copying the common file for all kind of graph (raw/smooth/trend) compare_raw_filename="compare-%s-2Draw" % (gnuplot_output_filename) compare_smooth_filename="compare-%s-2Dsmooth" % (gnuplot_output_filename) compare_trend_filename="compare-%s-2Dtrend" % (gnuplot_output_filename) shutil.copy(gnuplot_output_dir+'compare.gnuplot',gnuplot_output_dir+compare_raw_filename+".gnuplot") shutil.copy(gnuplot_output_dir+'compare.gnuplot',gnuplot_output_dir+compare_smooth_filename+".gnuplot") shutil.copy(gnuplot_output_dir+'compare.gnuplot',gnuplot_output_dir+compare_trend_filename+".gnuplot") temporary_files.append(gnuplot_output_dir+compare_raw_filename+".gnuplot") temporary_files.append(gnuplot_output_dir+compare_smooth_filename+".gnuplot") temporary_files.append(gnuplot_output_dir+compare_trend_filename+".gnuplot") #Setting up a different output filename for each kind of graph compare_raw=open(gnuplot_output_dir+compare_raw_filename + ".gnuplot",'a') compare_raw.write("set output '%s.png'\n" % compare_raw_filename) compare_smooth=open(gnuplot_output_dir+compare_smooth_filename+".gnuplot",'a') compare_smooth.write("set output '%s.png'\n" % compare_smooth_filename) compare_trend=open(gnuplot_output_dir+compare_trend_filename+".gnuplot",'a') compare_trend.write("set output '%s.png'\n" % compare_trend_filename) # Let's plot the average value for all the traces global_disk_perf = sum(disk_perf, []) global_avg = average(global_disk_perf) compare_raw.write("plot %s w l ls 1 ti 'Global average value (%.2f)'" % (global_avg,global_avg)); compare_smooth.write("plot %s w l ls 1 ti 'Global average value (%.2f)'" % (global_avg,global_avg)); compare_trend.write("plot %s w l ls 1 ti 'Global average value (%.2f)'" % (global_avg,global_avg)); pos=0 # Let's create a temporary file for each selected fio file for file in fio_data_file: tmp_filename = "gnuplot_temp_file.%d" % pos # Plotting comparing graphs doesn't have a meaning unless if there is at least 2 traces if len(fio_data_file) > 1: # Adding the plot instruction for each kind of comparing graphs compare_raw.write(",\\\n'%s' using 2:3 with linespoints title '%s'" % (tmp_filename,fio_data_file[pos])) compare_smooth.write(",\\\n'%s' using 2:3 smooth csplines title '%s'" % (tmp_filename,fio_data_file[pos])) compare_trend.write(",\\\n'%s' using 2:3 smooth bezier title '%s'" % (tmp_filename,fio_data_file[pos])) png_file=file.replace('.log','') raw_filename = "%s-2Draw" % (png_file) smooth_filename = "%s-2Dsmooth" % (png_file) trend_filename = "%s-2Dtrend" % (png_file) avg = average(disk_perf[pos]) f.write("call \'%s/graph2D.gpm\' \'%s' \'%s\' \'%s\' \'%s\' \'%s\' \'%s\' \'%s\' \'%f\'\n" % (gpm_dir,title,tmp_filename,fio_data_file[pos],raw_filename,mode,smooth_filename,trend_filename,avg)) pos = pos +1 # Plotting comparing graphs doesn't have a meaning unless if there is at least 2 traces if len(fio_data_file) > 1: os.remove(gnuplot_output_dir+"compare.gnuplot") compare_raw.close() compare_smooth.close() compare_trend.close() f.close() def generate_gnuplot_math_script(title,gnuplot_output_filename,mode,average,gnuplot_output_dir,gpm_dir): filename=gnuplot_output_dir+'mymath'; temporary_files.append(filename) f=open(filename,'a') f.write("call \'%s/math.gpm\' \'%s' \'%s\' \'\' \'%s\' \'%s\' %s\n" % (gpm_dir,title,gnuplot_output_filename,gnuplot_output_filename,mode,average)) f.close() def compute_aggregated_file(fio_data_file, gnuplot_output_filename, gnuplot_output_dir): if verbose: print "Processing data file 2/2" temp_files=[] pos=0 # Let's create a temporary file for each selected fio file for file in fio_data_file: tmp_filename = "%sgnuplot_temp_file.%d" % (gnuplot_output_dir, pos) temp_files.append(open(tmp_filename,'r')) pos = pos +1 f = open(gnuplot_output_dir+gnuplot_output_filename, "w") temporary_files.append(gnuplot_output_dir+gnuplot_output_filename) index=0 # Let's add some information for tempfile in temp_files: f.write("# Disk%d was coming from %s\n" % (index,fio_data_file[index])) f.write(tempfile.read()) f.write("\n") tempfile.close() index = index + 1 f.close() def average(s): return sum(s) * 1.0 / len(s) def compute_temp_file(fio_data_file,disk_perf,gnuplot_output_dir, min_time, max_time): end_time=max_time if end_time == -1: end_time="infinite" if verbose: print "Processing data file 1/2 with %s<time<%s" % (min_time,end_time) files=[] temp_outfile=[] blk_size=0 for file in fio_data_file: files.append(open(file)) pos = len(files) - 1 tmp_filename = "%sgnuplot_temp_file.%d" % (gnuplot_output_dir,pos) temporary_files.append(tmp_filename) gnuplot_file=open(tmp_filename,'w') temp_outfile.append(gnuplot_file) gnuplot_file.write("#Temporary file based on file %s\n" % file) disk_perf.append([]) shall_break = False while True: current_line=[] nb_empty_files=0 nb_files=len(files) for myfile in files: s=myfile.readline().replace(',',' ').split() if not s: nb_empty_files+=1 s="-1, 0, 0, 0".replace(',',' ').split() if (nb_empty_files == nb_files): shall_break=True break; current_line.append(s); if shall_break == True: break last_time = -1 index=-1 perfs=[] for line in enumerate(current_line): # Index will be used to remember what file was featuring what value index=index+1 time, perf, x, block_size = line[1] if (blk_size == 0): try: blk_size=int(block_size) except: print "Error while reading the following line :" print line sys.exit(1); # We ignore the first 500msec as it doesn't seems to be part of the real benchmark # Time < 500 usually reports BW=0 breaking the min computing if (min_time == 0): min_time==0.5 # Then we estimate if the data we got is part of the time range we want to plot if ((float(time)>(float(min_time)*1000)) and ((int(time) < (int(max_time)*1000)) or max_time==-1)): disk_perf[index].append(int(perf)) perfs.append("%d %s %s"% (index, time, perf)) # If we reach this point, it means that all the traces are coherent for p in enumerate(perfs): index, perf_time,perf = p[1].split() temp_outfile[int(index)].write("%s %.2f %s\n" % (index, float(float(perf_time)/1000), perf)) for file in files: file.close() for file in temp_outfile: file.close() return blk_size def compute_math(fio_data_file, title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir): if verbose: print "Computing Maths" global_min=[] global_max=[] average_file=open(gnuplot_output_dir+gnuplot_output_filename+'.average', 'w') min_file=open(gnuplot_output_dir+gnuplot_output_filename+'.min', 'w') max_file=open(gnuplot_output_dir+gnuplot_output_filename+'.max', 'w') stddev_file=open(gnuplot_output_dir+gnuplot_output_filename+'.stddev', 'w') global_file=open(gnuplot_output_dir+gnuplot_output_filename+'.global','w') temporary_files.append(gnuplot_output_dir+gnuplot_output_filename+'.average') temporary_files.append(gnuplot_output_dir+gnuplot_output_filename+'.min') temporary_files.append(gnuplot_output_dir+gnuplot_output_filename+'.max') temporary_files.append(gnuplot_output_dir+gnuplot_output_filename+'.stddev') temporary_files.append(gnuplot_output_dir+gnuplot_output_filename+'.global') min_file.write('DiskName %s\n' % mode) max_file.write('DiskName %s\n'% mode) average_file.write('DiskName %s\n'% mode) stddev_file.write('DiskName %s\n'% mode ) for disk in xrange(len(fio_data_file)): # print disk_perf[disk] min_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk])) max_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk])) average_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk])) stddev_file.write("# Disk%d was coming from %s\n" % (disk,fio_data_file[disk])) avg = average(disk_perf[disk]) variance = map(lambda x: (x - avg)**2, disk_perf[disk]) standard_deviation = math.sqrt(average(variance)) # print "Disk%d [ min=%.2f max=%.2f avg=%.2f stddev=%.2f \n" % (disk,min(disk_perf[disk]),max(disk_perf[disk]),avg, standard_deviation) average_file.write('%d %d\n' % (disk, avg)) stddev_file.write('%d %d\n' % (disk, standard_deviation)) local_min=min(disk_perf[disk]) local_max=max(disk_perf[disk]) min_file.write('%d %d\n' % (disk, local_min)) max_file.write('%d %d\n' % (disk, local_max)) global_min.append(int(local_min)) global_max.append(int(local_max)) global_disk_perf = sum(disk_perf, []) avg = average(global_disk_perf) variance = map(lambda x: (x - avg)**2, global_disk_perf) standard_deviation = math.sqrt(average(variance)) global_file.write('min=%.2f\n' % min(global_disk_perf)) global_file.write('max=%.2f\n' % max(global_disk_perf)) global_file.write('avg=%.2f\n' % avg) global_file.write('stddev=%.2f\n' % standard_deviation) global_file.write('values_count=%d\n' % len(global_disk_perf)) global_file.write('disks_count=%d\n' % len(fio_data_file)) #print "Global [ min=%.2f max=%.2f avg=%.2f stddev=%.2f \n" % (min(global_disk_perf),max(global_disk_perf),avg, standard_deviation) average_file.close() min_file.close() max_file.close() stddev_file.close() global_file.close() try: os.remove(gnuplot_output_dir+'mymath') except: True generate_gnuplot_math_script("Average values of "+title,gnuplot_output_filename+'.average',mode,int(avg),gnuplot_output_dir,gpm_dir) generate_gnuplot_math_script("Min values of "+title,gnuplot_output_filename+'.min',mode,average(global_min),gnuplot_output_dir,gpm_dir) generate_gnuplot_math_script("Max values of "+title,gnuplot_output_filename+'.max',mode,average(global_max),gnuplot_output_dir,gpm_dir) generate_gnuplot_math_script("Standard Deviation of "+title,gnuplot_output_filename+'.stddev',mode,int(standard_deviation),gnuplot_output_dir,gpm_dir) def parse_global_files(fio_data_file, global_search): max_result=0 max_file='' for file in fio_data_file: f=open(file) disk_count=0 search_value=-1 # Let's read the complete file while True: try: # We do split the name from the value name,value=f.readline().split("=") except: f.close() break # If we ended the file if not name: # Let's process what we have f.close() break else: # disks_count is not global_search item # As we need it for some computation, let's save it if name=="disks_count": disks_count=int(value) # Let's catch the searched item if global_search in name: search_value=float(value) # Let's process the avg value by estimated the global bandwidth per file # We keep the biggest in memory for reporting if global_search == "avg": if (disks_count > 0) and (search_value != -1): result=disks_count*search_value if (result > max_result): max_result=result max_file=file # Let's print the avg output if global_search == "avg": print "Biggest aggregated value of %s was %2.f in file %s\n" % (global_search, max_result, max_file) else: print "Global search %s is not yet implemented\n" % global_search def render_gnuplot(fio_data_file, gnuplot_output_dir): print "Running gnuplot Rendering" try: # Let's render all the compared files if some if len(fio_data_file) > 1: if verbose: print " |-> Rendering comparing traces" os.system("cd %s; for i in *.gnuplot; do gnuplot $i; done" % gnuplot_output_dir) if verbose: print " |-> Rendering math traces" os.system("cd %s; gnuplot mymath" % gnuplot_output_dir) if verbose: print " |-> Rendering 2D & 3D traces" os.system("cd %s; gnuplot mygraph" % gnuplot_output_dir) name_of_directory="the current" if gnuplot_output_dir != "./": name_of_directory=gnuplot_output_dir print "\nRendering traces are available in %s directory" % name_of_directory global keep_temp_files keep_temp_files=False except: print "Could not run gnuplot on mymath or mygraph !\n" sys.exit(1); def print_help(): print 'fio2gnuplot -ghbiodvk -t <title> -o <outputfile> -p <pattern> -G <type> -m <time> -M <time>' print print '-h --help : Print this help' print '-p <pattern> or --pattern <pattern> : A pattern in regexp to select fio input files' print '-b or --bandwidth : A predefined pattern for selecting *_bw.log files' print '-i or --iops : A predefined pattern for selecting *_iops.log files' print '-g or --gnuplot : Render gnuplot traces before exiting' print '-o or --outputfile <file> : The basename for gnuplot traces' print ' - Basename is set with the pattern if defined' print '-d or --outputdir <dir> : The directory where gnuplot shall render files' print '-t or --title <title> : The title of the gnuplot traces' print ' - Title is set with the block size detected in fio traces' print '-G or --Global <type> : Search for <type> in .global files match by a pattern' print ' - Available types are : min, max, avg, stddev' print ' - The .global extension is added automatically to the pattern' print '-m or --min_time <time> : Only consider data starting from <time> seconds (default is 0)' print '-M or --max_time <time> : Only consider data ending before <time> seconds (default is -1 aka nolimit)' print '-v or --verbose : Increasing verbosity' print '-k or --keep : Keep all temporary files from gnuplot\'s output dir' def main(argv): mode='unknown' pattern='' pattern_set_by_user=False title='No title' gnuplot_output_filename='result' gnuplot_output_dir='./' gpm_dir="/usr/share/fio/" disk_perf=[] run_gnuplot=False parse_global=False global_search='' min_time=0 max_time=-1 global verbose verbose=False global temporary_files temporary_files=[] global keep_temp_files keep_temp_files=True force_keep_temp_files=False if not os.path.isfile(gpm_dir+'math.gpm'): gpm_dir="/usr/local/share/fio/" if not os.path.isfile(gpm_dir+'math.gpm'): print "Looks like fio didn't get installed properly as no gpm files found in '/usr/share/fio' or '/usr/local/share/fio'\n" sys.exit(3) try: opts, args = getopt.getopt(argv[1:],"ghkbivo:d:t:p:G:m:M:",['bandwidth', 'iops', 'pattern', 'outputfile', 'outputdir', 'title', 'min_time', 'max_time', 'gnuplot', 'Global', 'help', 'verbose','keep']) except getopt.GetoptError: print "Error: One of the options passed to the cmdline was not supported" print "Please fix your command line or read the help (-h option)" sys.exit(2) for opt, arg in opts: if opt in ("-b", "--bandwidth"): pattern='*_bw.log' elif opt in ("-i", "--iops"): pattern='*_iops.log' elif opt in ("-v", "--verbose"): verbose=True elif opt in ("-k", "--keep"): #User really wants to keep the temporary files force_keep_temp_files=True elif opt in ("-p", "--pattern"): pattern_set_by_user=True pattern=arg pattern=pattern.replace('\\','') elif opt in ("-o", "--outputfile"): gnuplot_output_filename=arg elif opt in ("-d", "--outputdir"): gnuplot_output_dir=arg if not gnuplot_output_dir.endswith('/'): gnuplot_output_dir=gnuplot_output_dir+'/' if not os.path.exists(gnuplot_output_dir): os.makedirs(gnuplot_output_dir) elif opt in ("-t", "--title"): title=arg elif opt in ("-m", "--min_time"): min_time=arg elif opt in ("-M", "--max_time"): max_time=arg elif opt in ("-g", "--gnuplot"): run_gnuplot=True elif opt in ("-G", "--Global"): parse_global=True global_search=arg elif opt in ("-h", "--help"): print_help() sys.exit(1) # Adding .global extension to the file if parse_global==True: if not gnuplot_output_filename.endswith('.global'): pattern = pattern+'.global' fio_data_file=find_file('.',pattern) if len(fio_data_file) == 0: print "No log file found with pattern %s!" % pattern sys.exit(1) else: print "%d files Selected with pattern '%s'" % (len(fio_data_file), pattern) fio_data_file=sorted(fio_data_file, key=str.lower) for file in fio_data_file: print ' |-> %s' % file if "_bw.log" in file : mode="Bandwidth (KB/sec)" if "_iops.log" in file : mode="IO per Seconds (IO/sec)" if (title == 'No title') and (mode != 'unknown'): if "Bandwidth" in mode: title='Bandwidth benchmark with %d fio results' % len(fio_data_file) if "IO" in mode: title='IO benchmark with %d fio results' % len(fio_data_file) print #We need to adjust the output filename regarding the pattern required by the user if (pattern_set_by_user == True): gnuplot_output_filename=pattern # As we do have some regexp in the pattern, let's make this simpliest # We do remove the simpliest parts of the expression to get a clear file name gnuplot_output_filename=gnuplot_output_filename.replace('-*-','-') gnuplot_output_filename=gnuplot_output_filename.replace('*','-') gnuplot_output_filename=gnuplot_output_filename.replace('--','-') gnuplot_output_filename=gnuplot_output_filename.replace('.log','') # Insure that we don't have any starting or trailing dash to the filename gnuplot_output_filename = gnuplot_output_filename[:-1] if gnuplot_output_filename.endswith('-') else gnuplot_output_filename gnuplot_output_filename = gnuplot_output_filename[1:] if gnuplot_output_filename.startswith('-') else gnuplot_output_filename if parse_global==True: parse_global_files(fio_data_file, global_search) else: blk_size=compute_temp_file(fio_data_file,disk_perf,gnuplot_output_dir,min_time,max_time) title="%s @ Blocksize = %dK" % (title,blk_size/1024) compute_aggregated_file(fio_data_file, gnuplot_output_filename, gnuplot_output_dir) compute_math(fio_data_file,title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir) generate_gnuplot_script(fio_data_file,title,gnuplot_output_filename,gnuplot_output_dir,mode,disk_perf,gpm_dir) if (run_gnuplot==True): render_gnuplot(fio_data_file, gnuplot_output_dir) # Shall we clean the temporary files ? if keep_temp_files==False and force_keep_temp_files==False: # Cleaning temporary files if verbose: print "Cleaning temporary files" for f in enumerate(temporary_files): if verbose: print " -> %s"%f[1] try: os.remove(f[1]) except: True #Main if __name__ == "__main__": sys.exit(main(sys.argv))