#! /usr/bin/perl

##--------------------------------------------------------------------##
##--- Massif's results printer                         ms_print.in ---##
##--------------------------------------------------------------------##

#  This file is part of Massif, a Valgrind tool for profiling memory
#  usage of programs.
#
#  Copyright (C) 2007-2007 Nicholas Nethercote
#     njn@valgrind.org
#
#  This program is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License as
#  published by the Free Software Foundation; either version 2 of the
#  License, or (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful, but
#  WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#  General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
#  02111-1307, USA.
#
#  The GNU General Public License is contained in the file COPYING.

use warnings;
use strict;

#----------------------------------------------------------------------------
# Global variables, main data structures
#----------------------------------------------------------------------------

# Command line of profiled program.
my $cmd;

# Time unit used in profile.
my $time_unit;

# Threshold dictating what percentage an entry must represent for us to
# bother showing it.
my $threshold = 1.0;

# Graph x and y dimensions.
my $graph_x = 72;
my $graph_y = 20;

# Input file name
my $input_file = undef;

# Tmp file name.
my $tmp_file = "ms_print.tmp.$$";

# Version number.
my $version = "3.10.0.SVN";

# Args passed, for printing.
my $ms_print_args;

# Usage message.
my $usage = <<END
usage: ms_print [options] massif-out-file

  options for the user, with defaults in [ ], are:
    -h --help             show this message
    --version             show version
    --threshold=<m.n>     significance threshold, in percent [$threshold]
    --x=<4..1000>         graph width, in columns [72]
    --y=<4..1000>         graph height, in rows [20]

  ms_print is Copyright (C) 2007-2007 Nicholas Nethercote.
  and licensed under the GNU General Public License, version 2.
  Bug reports, feedback, admiration, abuse, etc, to: njn\@valgrind.org.
                                                
END
;

# Used in various places of output.
my $fancy    = '-' x 80;
my $fancy_nl = $fancy . "\n";

# Returns 0 if the denominator is 0.
sub safe_div_0($$)
{
    my ($x, $y) = @_;
    return ($y ? $x / $y : 0);
}

#-----------------------------------------------------------------------------
# Argument and option handling
#-----------------------------------------------------------------------------
sub process_cmd_line() 
{
    my @files;

    # Grab a copy of the arguments, for printing later.
    for my $arg (@ARGV) { 
        $ms_print_args .= " $arg";       # The arguments.
    }

    for my $arg (@ARGV) { 

        # Option handling
        if ($arg =~ /^-/) {

            # --version
            if ($arg =~ /^--version$/) {
                die("ms_print-$version\n");

            # --threshold=X (tolerates a trailing '%')
            } elsif ($arg =~ /^--threshold=([\d\.]+)%?$/) {
                $threshold = $1;
                ($1 >= 0 && $1 <= 100) or die($usage);

            } elsif ($arg =~ /^--x=(\d+)$/) {
                $graph_x = $1;
                (4 <= $graph_x && $graph_x <= 1000) or die($usage);

            } elsif ($arg =~ /^--y=(\d+)$/) {
                $graph_y = $1;
                (4 <= $graph_y && $graph_y <= 1000) or die($usage);

            } else {            # -h and --help fall under this case
                die($usage);
            }
        } else {
            # Not an option.  Remember it as a filename. 
            push(@files, $arg);
        }
    }

    # Must have chosen exactly one input file.
    if (scalar @files) {
        $input_file = $files[0];
    } else {
        die($usage);
    }
}

#-----------------------------------------------------------------------------
# Reading the input file: auxiliary functions
#-----------------------------------------------------------------------------

# Gets the next line, stripping comments and skipping blanks.
# Returns undef at EOF.
sub get_line()
{
    while (my $line = <INPUTFILE>) {
        $line =~ s/#.*$//;          # remove comments
        if ($line !~ /^\s*$/) {
            return $line;           # return $line if non-empty
        }
    }
    return undef;       # EOF: return undef
}

sub equals_num_line($$)
{
    my ($line, $fieldname) = @_;
    defined($line) 
        or die("Line $.: expected \"$fieldname\" line, got end of file\n");
    $line =~ s/^$fieldname=(.*)\s*$//
        or die("Line $.: expected \"$fieldname\" line, got:\n$line");
    return $1;
}

sub is_significant_XPt($$$)
{
    my ($is_top_node, $xpt_szB, $total_szB) = @_;
    ($xpt_szB <= $total_szB) or die;
    # Nb: we always consider the alloc-XPt significant, even if the size is
    # zero.
    return $is_top_node || 0 == $threshold ||
        ( $total_szB != 0 && $xpt_szB * 100 / $total_szB >= $threshold );
}

#-----------------------------------------------------------------------------
# Reading the input file: reading heap trees
#-----------------------------------------------------------------------------

# Forward declaration, because it's recursive.
sub read_heap_tree($$$$$);

# Return pair:  if the tree was significant, both are zero.  If it was
# insignificant, the first element is 1 and the second is the number of
# bytes.
sub read_heap_tree($$$$$)
{
    # Read the line and determine if it is significant.
    my ($is_top_node, $this_prefix, $child_midfix, $arrow, $mem_total_B) = @_;
    my $line = get_line();
    (defined $line and $line =~ /^\s*n(\d+):\s*(\d+)(.*)$/)
        or die("Line $.: expected a tree node line, got:\n$line\n");
    my $n_children = $1;
    my $bytes      = $2;
    my $details    = $3;
    my $perc       = safe_div_0(100 * $bytes, $mem_total_B);
    # Nb: we always print the alloc-XPt, even if its size is zero.
    my $is_significant = is_significant_XPt($is_top_node, $bytes, $mem_total_B);

    # We precede this node's line with "$this_prefix.$arrow".  We precede
    # any children of this node with "$this_prefix$child_midfix$arrow".
    if ($is_significant) {
        # Nb: $details might have '%' in it, so don't embed directly in the
        # format string.
        printf(TMPFILE
            "$this_prefix$arrow%05.2f%% (%sB)%s\n", $perc, commify($bytes),
            $details);
    }

    # Now read all the children.
    my $n_insig_children = 0;
    my $total_insig_children_szB = 0;
    my $this_prefix2 = $this_prefix . $child_midfix;
    for (my $i = 0; $i < $n_children; $i++) {
        # If child is the last sibling, the midfix is empty.
        my $child_midfix2 = ( $i+1 == $n_children ? "  " : "| " );
        my ($is_child_insignificant, $child_insig_bytes) =
            # '0' means it's not the top node of the tree.
            read_heap_tree(0, $this_prefix2, $child_midfix2, "->",
                $mem_total_B);
        $n_insig_children += $is_child_insignificant;
        $total_insig_children_szB += $child_insig_bytes;
    }

    if ($is_significant) {
        # If this was significant but any children were insignificant, print
        # the "in N places" line for them.
        if ($n_insig_children > 0) {
            $perc = safe_div_0(100 * $total_insig_children_szB, $mem_total_B);
            printf(TMPFILE "%s->%05.2f%% (%sB) in %d+ places, all below "
                 . "ms_print's threshold (%05.2f%%)\n",
                $this_prefix2, $perc, commify($total_insig_children_szB),
                $n_insig_children, $threshold);
            print(TMPFILE "$this_prefix2\n");
        }

        # If this node has no children, print an extra (mostly) empty line.
        if (0 == $n_children) {
            print(TMPFILE "$this_prefix2\n");
        }
        return (0, 0);

    } else {
        return (1, $bytes);
    }
}

#-----------------------------------------------------------------------------
# Reading the input file: main
#-----------------------------------------------------------------------------

sub max_label_2($$)
{
    my ($szB, $szB_scaled) = @_;

    # For the label, if $szB is 999B or below, we print it as an integer.
    # Otherwise, we print it as a float with 5 characters (including the '.').
    # Examples (for bytes):
    #       1 -->     1  B
    #     999 -->   999  B
    #    1000 --> 0.977 KB
    #    1024 --> 1.000 KB
    #   10240 --> 10.00 KB
    #  102400 --> 100.0 KB
    # 1024000 --> 0.977 MB
    # 1048576 --> 1.000 MB
    #
    if    ($szB < 1000)        { return sprintf("%5d",   $szB);        }
    elsif ($szB_scaled < 10)   { return sprintf("%5.3f", $szB_scaled); }
    elsif ($szB_scaled < 100)  { return sprintf("%5.2f", $szB_scaled); }
    else                       { return sprintf("%5.1f", $szB_scaled); }
}

# Work out the units for the max value, measured in instructions.
sub i_max_label($)
{
    my ($nI) = @_;

    # We repeat until the number is less than 1000.
    my $nI_scaled = $nI;
    my $unit = "i";
    # Nb: 'k' is the "kilo" (1000) prefix.
    if ($nI_scaled >= 1000) { $unit = "ki"; $nI_scaled /= 1024; }
    if ($nI_scaled >= 1000) { $unit = "Mi"; $nI_scaled /= 1024; }
    if ($nI_scaled >= 1000) { $unit = "Gi"; $nI_scaled /= 1024; }
    if ($nI_scaled >= 1000) { $unit = "Ti"; $nI_scaled /= 1024; }
    if ($nI_scaled >= 1000) { $unit = "Pi"; $nI_scaled /= 1024; }
    if ($nI_scaled >= 1000) { $unit = "Ei"; $nI_scaled /= 1024; }
    if ($nI_scaled >= 1000) { $unit = "Zi"; $nI_scaled /= 1024; }
    if ($nI_scaled >= 1000) { $unit = "Yi"; $nI_scaled /= 1024; }

    return (max_label_2($nI, $nI_scaled), $unit);
}

# Work out the units for the max value, measured in bytes.
sub B_max_label($)
{
    my ($szB) = @_;

    # We repeat until the number is less than 1000, but we divide by 1024 on
    # each scaling.
    my $szB_scaled = $szB;
    my $unit = "B";
    # Nb: 'K' or 'k' are acceptable as the "binary kilo" (1024) prefix.
    # (Strictly speaking, should use "KiB" (kibibyte), "MiB" (mebibyte), etc,
    # but they're not in common use.)
    if ($szB_scaled >= 1000) { $unit = "KB"; $szB_scaled /= 1024; }
    if ($szB_scaled >= 1000) { $unit = "MB"; $szB_scaled /= 1024; }
    if ($szB_scaled >= 1000) { $unit = "GB"; $szB_scaled /= 1024; }
    if ($szB_scaled >= 1000) { $unit = "TB"; $szB_scaled /= 1024; }
    if ($szB_scaled >= 1000) { $unit = "PB"; $szB_scaled /= 1024; }
    if ($szB_scaled >= 1000) { $unit = "EB"; $szB_scaled /= 1024; }
    if ($szB_scaled >= 1000) { $unit = "ZB"; $szB_scaled /= 1024; }
    if ($szB_scaled >= 1000) { $unit = "YB"; $szB_scaled /= 1024; }

    return (max_label_2($szB, $szB_scaled), $unit);
}

# Work out the units for the max value, measured in ms/s/h.
sub t_max_label($)
{
    my ($szB) = @_;

    # We scale from millisecond to seconds to hours.
    #
    # XXX: this allows a number with 6 chars, eg. "3599.0 s"
    my $szB_scaled = $szB;
    my $unit = "ms";
    if ($szB_scaled >= 1000) { $unit = "s"; $szB_scaled /= 1000; }
    if ($szB_scaled >= 3600) { $unit = "h"; $szB_scaled /= 3600; }

    return (max_label_2($szB, $szB_scaled), $unit);
}

# This prints four things:
#   - the output header
#   - the graph
#   - the snapshot summaries (number, list of detailed ones)
#   - the snapshots
#
# The first three parts can't be printed until we've read the whole input file;
# but the fourth part is much easier to print while we're reading the file.  So
# we print the fourth part to a tmp file, and then dump the tmp file at the
# end.
#
sub read_input_file() 
{
    my $desc = "";              # Concatenated description lines.
    my $peak_mem_total_szB = 0;

    # Info about each snapshot.
    my @snapshot_nums = ();
    my @times         = ();
    my @mem_total_Bs  = ();
    my @is_detaileds  = ();
    my $peak_num = -1;      # An initial value that will be ok if no peak
                            # entry is in the file.
    
    #-------------------------------------------------------------------------
    # Read start of input file.
    #-------------------------------------------------------------------------
    open(INPUTFILE, "< $input_file") 
         || die "Cannot open $input_file for reading\n";

    # Read "desc:" lines.
    my $line;
    while ($line = get_line()) {
        if ($line =~ s/^desc://) {
            $desc .= $line;
        } else {
            last;
        }
    }

    # Read "cmd:" line (Nb: will already be in $line from "desc:" loop above).
    ($line =~ /^cmd:\s*(.*)$/) or die("Line $.: missing 'cmd' line\n");
    $cmd = $1;

    # Read "time_unit:" line.
    $line = get_line();
    ($line =~ /^time_unit:\s*(.*)$/) or
        die("Line $.: missing 'time_unit' line\n");
    $time_unit = $1;

    #-------------------------------------------------------------------------
    # Print snapshot list header to $tmp_file.
    #-------------------------------------------------------------------------
    open(TMPFILE, "> $tmp_file") 
         || die "Cannot open $tmp_file for reading\n";

    my $time_column = sprintf("%14s", "time($time_unit)");
    my $column_format = "%3s %14s %16s %16s %13s %12s\n";
    my $header =
        $fancy_nl .
        sprintf($column_format
        ,   "n"
        ,   $time_column
        ,   "total(B)"
        ,   "useful-heap(B)"
        ,   "extra-heap(B)"
        ,   "stacks(B)"
        ) .
        $fancy_nl;
    print(TMPFILE $header);

    #-------------------------------------------------------------------------
    # Read body of input file.
    #-------------------------------------------------------------------------
    $line = get_line();
    while (defined $line) {
        my $snapshot_num     = equals_num_line($line,      "snapshot");
        my $time             = equals_num_line(get_line(), "time");
        my $mem_heap_B       = equals_num_line(get_line(), "mem_heap_B");
        my $mem_heap_extra_B = equals_num_line(get_line(), "mem_heap_extra_B");
        my $mem_stacks_B     = equals_num_line(get_line(), "mem_stacks_B");
        my $mem_total_B      = $mem_heap_B + $mem_heap_extra_B + $mem_stacks_B;
        my $heap_tree        = equals_num_line(get_line(), "heap_tree");

        # Print the snapshot data to $tmp_file.
        printf(TMPFILE $column_format,
        ,   $snapshot_num
        ,   commify($time)
        ,   commify($mem_total_B)
        ,   commify($mem_heap_B)
        ,   commify($mem_heap_extra_B)
        ,   commify($mem_stacks_B)
        );

        # Remember the snapshot data.
        push(@snapshot_nums, $snapshot_num);
        push(@times,         $time);
        push(@mem_total_Bs,  $mem_total_B);
        push(@is_detaileds,  ( $heap_tree eq "empty" ? 0 : 1 ));
        $peak_mem_total_szB = $mem_total_B
            if $mem_total_B > $peak_mem_total_szB;

        # Read the heap tree, and if it's detailed, print it and a subsequent
        # snapshot list header to $tmp_file.
        if      ($heap_tree eq "empty") {
            $line = get_line();
        } elsif ($heap_tree =~ "(detailed|peak)") {
            # If "peak", remember the number.
            if ($heap_tree eq "peak") {
                $peak_num = $snapshot_num;
            }
            # '1' means it's the top node of the tree.
            read_heap_tree(1, "", "", "", $mem_total_B);

            # Print the header, unless there are no more snapshots.
            $line = get_line();
            if (defined $line) {
                print(TMPFILE $header);
            }
        } else {
            die("Line $.: expected 'empty' or '...' after 'heap_tree='\n");
        }
    }

    close(INPUTFILE);
    close(TMPFILE);

    #-------------------------------------------------------------------------
    # Print header.
    #-------------------------------------------------------------------------
    print($fancy_nl);
    print("Command:            $cmd\n");
    print("Massif arguments:  $desc");
    print("ms_print arguments:$ms_print_args\n");
    print($fancy_nl);
    print("\n\n");

    #-------------------------------------------------------------------------
    # Setup for graph.
    #-------------------------------------------------------------------------
    # The ASCII graph.
    # Row    0 ([0..graph_x][0]) is the X-axis.
    # Column 0 ([0][0..graph_y]) is the Y-axis.
    # The rest ([1][1]..[graph_x][graph_y]) is the usable graph area.
    my @graph;
    my $x;
    my $y;

    my $n_snapshots = scalar(@snapshot_nums);
    ($n_snapshots > 0) or die;
    my $end_time = $times[$n_snapshots-1];
    ($end_time >= 0) or die;

    # Setup graph[][].
    $graph[0][0] = '+';                                     # axes join point
    for ($x = 1; $x <= $graph_x; $x++) { $graph[$x][0] = '-'; } # X-axis
    for ($y = 1; $y <= $graph_y; $y++) { $graph[0][$y] = '|'; } # Y-axis
    $graph[$graph_x][0] = '>';                                  # X-axis arrow
    $graph[0][$graph_y] = '^';                                  # Y-axis arrow 
    for ($x = 1; $x <= $graph_x; $x++) {                        # usable area
       for ($y = 1; $y <= $graph_y; $y++) {
          $graph[$x][$y] = ' ';
       }
    }

    #-------------------------------------------------------------------------
    # Write snapshot bars into graph[][].
    #-------------------------------------------------------------------------
    # Each row represents K bytes, which is 1/graph_y of the peak size
    # (and K can be non-integral).  When drawing the column for a snapshot,
    # in order to fill the slot in row y (where the first row drawn on is
    # row 1) with a full-char (eg. ':'), it must be >= y*K.  For example, if
    # K = 10 bytes, then the values 0, 4, 5, 9, 10, 14, 15, 19, 20, 24, 25,
    # 29, 30 would be drawn like this (showing one per column):
    #
    #                       y    y * K
    #                       -    -----------
    # 30 |            :     3    3 * 10 = 30
    # 20 |        :::::     2    2 * 10 = 20
    # 10 |    :::::::::     1    1 * 10 = 10
    # 0  +-------------

    my $peak_char     = '#';                            
    my $detailed_char = '@';
    my $normal_char   = ':';

    # Work out how many bytes each row represents.  If the peak size was 0,
    # make it 1 so that the Y-axis covers a non-zero range of values.
    # Likewise for end_time.
    if (0 == $peak_mem_total_szB) { $peak_mem_total_szB = 1; }
    if (0 == $end_time          ) { $end_time           = 1; }
    my $K = $peak_mem_total_szB / $graph_y;

       $x          = 0;
    my $prev_x     = 0;
    my $prev_y_max = 0;
    my $prev_char  = ':';

    for (my $i = 0; $i < $n_snapshots; $i++) {
 
        # Work out which column this snapshot belongs to.  
        $prev_x = $x;
        my $x_pos_frac = ($times[$i] / ($end_time)) * $graph_x;
        $x = int($x_pos_frac) + 1;    # +1 due to Y-axis
        # The final snapshot will spill over into the n+1th column, which
        # doesn't get shown.  So we fudge that one and pull it back a
        # column, as if the end_time was actually end_time+epsilon.
        if ($times[$i] == $end_time) {
            ($x == $graph_x+1) or die;
            $x = $graph_x;
        }

        # If there was a gap between the previous snapshot's column and this
        # one, we draw a horizontal line in the gap (so long as it doesn't
        # trash the x-axis).  Without this, graphs with a few sparse
        # snapshots look funny -- as if the memory usage is in temporary
        # spikes.
        if ($prev_y_max > 0) {
            for (my $x2 = $prev_x + 1; $x2 < $x; $x2++) {
                $graph[$x2][$prev_y_max] = $prev_char;
            }
        }

        # Choose the column char.
        my $char;
        if    ($i == $peak_num)   { $char = $peak_char;     }
        elsif ($is_detaileds[$i]) { $char = $detailed_char; }
        else                      { $char = $normal_char;   }

        # Grow this snapshot bar from bottom to top.
        my $y_max = 0;
        for ($y = 1; $y <= $graph_y; $y++) {
            if ($mem_total_Bs[$i] >= $y * $K) {
                # Priority order for chars: peak > detailed > normal
                my $should_draw_char = 
                    (($char eq $peak_char)
                     or
                     ($char eq $detailed_char and 
                      $graph[$x][$y] ne $peak_char
                     )
                     or
                     ($char eq $normal_char and
                      $graph[$x][$y] ne $peak_char and
                      $graph[$x][$y] ne $detailed_char
                     )
                    );

                if ($should_draw_char) {
                    $graph[$x][$y] = $char;
                }
                $y_max = $y;
            }
        }
        $prev_y_max = $y_max;
        $prev_char = $char;
    }

    #-------------------------------------------------------------------------
    # Print graph[][].
    #-------------------------------------------------------------------------
    my ($y_label, $y_unit) = B_max_label($peak_mem_total_szB);
    my ($x_label, $x_unit);
    if    ($time_unit eq "i")  { ($x_label, $x_unit) = i_max_label($end_time) }
    elsif ($time_unit eq "ms") { ($x_label, $x_unit) = t_max_label($end_time) }
    elsif ($time_unit eq "B")  { ($x_label, $x_unit) = B_max_label($end_time) }
    else                       { die "bad time_unit: $time_unit\n"; }

    printf("    %2s\n", $y_unit);
    for ($y = $graph_y; $y >= 0; $y--) {
        if ($graph_y == $y) {            # top row
            print($y_label);
        } elsif (0 == $y) {              # bottom row
            print("   0 ");
        } else {                         # anywhere else
            print("     ");
        }
          
        # Axis and data for the row.
        for ($x = 0; $x <= $graph_x; $x++) {
            printf("%s", $graph[$x][$y]);
        }
        if (0 == $y) {
            print("$x_unit\n");
        } else {
            print("\n");
        }
    }
    printf("     0%s%5s\n", ' ' x ($graph_x-5), $x_label);

    #-------------------------------------------------------------------------
    # Print snapshot numbers.
    #-------------------------------------------------------------------------
    print("\n");
    print("Number of snapshots: $n_snapshots\n");
    print(" Detailed snapshots: [");
    my $first_detailed = 1;
    for (my $i = 0; $i < $n_snapshots; $i++) {
        if ($is_detaileds[$i]) {
            if ($first_detailed) {
                printf("$i");
                $first_detailed = 0;
            } else {
                printf(", $i");
            }
            if ($i == $peak_num) {
                print(" (peak)");
            }
        }
    }
    print("]\n\n");

    #-------------------------------------------------------------------------
    # Print snapshots, from $tmp_file.
    #-------------------------------------------------------------------------
    open(TMPFILE, "< $tmp_file") 
         || die "Cannot open $tmp_file for reading\n";

    while (my $line = <TMPFILE>) {
        print($line);
    }
    unlink($tmp_file);
}

#-----------------------------------------------------------------------------
# Misc functions
#-----------------------------------------------------------------------------
sub commify ($) {
    my ($val) = @_;
    1 while ($val =~ s/^(\d+)(\d{3})/$1,$2/);
    return $val;
}


#----------------------------------------------------------------------------
# "main()"
#----------------------------------------------------------------------------
process_cmd_line();
read_input_file();

##--------------------------------------------------------------------##
##--- end                                              ms_print.in ---##
##--------------------------------------------------------------------##