#!/bin/bash

########################
# Function definitions #
########################

source "$(dirname $0)/measurement-functions"

function run_test {
  local tmp avg1 stddev1 avg2 stddev2 avg4 stddev4 p

  tmp="/tmp/test-timing.$$"

  rm -f "${tmp}"
  p=1
  test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp"
  read avg1 stddev1 vsz1 vszdev1 rest < "$tmp"
  echo "Average time: ${avg1} +/- ${stddev1} seconds." \
       " VSZ: ${vsz1} +/- ${vszdev1} KB"

  if [ "${rest}" != "" ]; then
    echo "Internal error ($rest)"
    exit 1
  fi

  rm -f "${tmp}"
  p=2
  test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp"
  read avg2 stddev2 vsz2 vszdev2 rest < "$tmp"
  echo "Average time: ${avg2} +/- ${stddev2} seconds." \
       " VSZ: ${vsz2} +/- ${vszdev2} KB"

  if [ "${rest}" != "" ]; then
    echo "Internal error ($rest)"
    exit 1
  fi

  rm -f "${tmp}"
  p=4
  test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp"
  read avg4 stddev4 vsz4 vszdev4 rest < "$tmp"
  echo "Average time: ${avg4} +/- ${stddev4} seconds." \
       " VSZ: ${vsz4} +/- ${vszdev4} KB"
  rm -f "$tmp"

  if [ "${rest}" != "" ]; then
    echo "Internal error ($rest)"
    exit 1
  fi

  p=1
  test_output="/dev/null" \
  print_runtime_ratio "${avg1}" "${stddev1}" "${vsz1}" "${vszdev1}" "$VG" --tool=none "$@" -p${psep}${p} "${test_args}"

  p=4
  test_output="/dev/null" \
  print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=none "$@" -p${psep}${p} "${test_args}"

  p=4
  test_output="${1}-drd-with-stack-var-4.out" \
  print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" \
    "$VG" --tool=drd --first-race-only=yes --check-stack-var=yes \
    --drd-stats=yes "$@" -p${psep}${p} "${test_args}"

  p=4
  test_output="${1}-drd-without-stack-var-4.out" \
  print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" \
    "$VG" --tool=drd --first-race-only=yes --check-stack-var=no \
    --drd-stats=yes "$@" -p${psep}${p} "${test_args}"

  p=4
  test_output="${1}-helgrind-4-none.out" \
  print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=helgrind --history-level=none "$@" -p${psep}${p} "${test_args}"

  p=4
  test_output="${1}-helgrind-4-approx.out" \
  print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=helgrind --history-level=approx "$@" -p${psep}${p} "${test_args}"

  p=4
  test_output="${1}-helgrind-4-full.out" \
  print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=helgrind --history-level=full "$@" -p${psep}${p} "${test_args}"

  echo ''
}


########################
# Script body          #
########################

DRD_SCRIPTS_DIR="$(dirname $0)"
if [ "${DRD_SCRIPTS_DIR:0:1}" != "/" ]; then
  DRD_SCRIPTS_DIR="$PWD/$DRD_SCRIPTS_DIR"
fi

SPLASH2="${DRD_SCRIPTS_DIR}/../splash2"
if [ ! -e "${SPLASH2}" ]; then
  echo "Error: splash2 directory not found (${SPLASH2})."
  exit 1
fi

if [ "$VG" = "" ]; then
  VG="${DRD_SCRIPTS_DIR}/../../vg-in-place"
fi

if [ ! -e "$VG" ]; then
  echo "Could not find $VG."
  exit 1
fi

######################################################################################################################
# Meaning of the different colums:
#  1. SPLASH2 test name.
#  2. Execution time in seconds for native run with argument -p1.
#  3. Virtual memory size in KB for the native run with argument -p1.
#  4. Execution time in seconds for native run with argument -p2.
#  5. Virtual memory size in KB for the native run with argument -p2.
#  6. Execution time in seconds for native run with argument -p4.
#  7. Virtual memory size in KB for the native run with argument -p4.
#  8. Execution time ratio for --tool=none -p1 versus -p1.
#  9. Virtual memory size ratio for --tool=none -p1 versus -p1.
# 10. Execution time ratio for --tool=none -p4 versus -p4.
# 11. Virtual memory size ratio for --tool=none -p4 versus -p4.
# 12. Execution time ratio for --tool=drd --check-stack-var=yes -p4 versus -p4.
# 13. Virtual memory size ratio for --tool=drd --check-stack-var=yes -p4 versus -p4.
# 14. Execution time ratio for --tool=drd --check-stack-var=no -p4 versus -p4.
# 15. Virtual memory size ratio for --tool=drd --check-stack-var=no -p4 versus -p4.
# 16. Execution time ratio for --tool=helgrind --history-level=none -p4 versus -p4.
# 17. Virtual memory size ratio for --tool=helgrind --history-level=none -p4 versus -p4.
# 18. Execution time ratio for --tool=helgrind --history-level=approx -p4 versus -p4.
# 19. Virtual memory size ratio for --tool=helgrind --history-level=approx -p4 versus -p4.
# 20. Execution time ratio for --tool=helgrind --history-level=full -p4 versus -p4.
# 21. Virtual memory size ratio for --tool=helgrind --history-level=full -p4 versus -p4.
# 22. Execution time ratio for Intel Thread Checker -p4 versus -p4.
# 23. Execution time ratio for Intel Thread Checker -p4 versus -p4.
#
# Notes:
# - Both Helgrind and DRD use a granularity of one byte for data race detection.
# - Helgrind does detect data races on stack variables. DRD only detects
#   data races on stack variables with --check-stack-var=yes.
# - The ITC tests have been run on a 4-way 2.5 GHz Pentium 4 workstation, most
#   likely running a 32-bit OS. Not yet clear to me: which OS ? Which
#   granularity does ITC use ? And which m4 macro's have been used by ITC as
#   implementation of the synchronization primitives ?
#
#     1                    2     3      4      5      6     7     8    9   10   11   12  13     14  15    16    17  18    19  20    21   22   23
################################################################################################################################################
# Results:                native       native       native       none      none       DRD        DRD      HG        HG        HG         ITC ITC
#                         -p1          -p2          -p4           -p1       -p4       -p4       -p4+f     -p4       -p4       -p4      -p4 -p4+f
# ..............................................................................................................................................
# Cholesky                0.11  12016  0.06  22016  0.55  41328 10.3  4.92  1.7 2.14   15 2.61    8 2.61   10  3.96  10  3.96  15  6.14  239  82
# FFT                     0.02   6692  0.02  14888  0.02  31621 17.0  8.01 20.0 2.48  114 3.15   64 3.28   81  4.52  81  4.52 116  5.56   90  41
# LU, contiguous          0.08   4100  0.05  12304  0.06  28712 11.1 12.44 18.5 2.64  104 3.18   70 3.18   87  4.84  89  4.84 118  5.55  428 128
# Ocean, contiguous       0.23  16848  0.19  25384  0.23  42528  6.3  3.78  8.3 2.11   87 2.82   62 4.02   71  3.75  71  3.75 195  5.96   90  28
# Radix                   0.21  15136  0.14  23336  0.15  39728 12.6  4.10 22.3 2.19   61 2.87   41 2.94   52  4.03  52  4.03  85  6.13  222  56
# Raytrace                0.63 207104  0.49 215296  0.49 231680  8.9  1.23 12.9 1.20  385 1.38   86 2.10  158  3.70 160  3.70 222  4.15  172  53
# Water-n2                0.18  10696  0.09  27072  0.11  59832 12.5  5.46 26.7 1.80 3092 3.03  263 3.06   92  3.28  92  3.28  92  3.55  189  39
# Water-sp                0.20   4444  0.15  13536  0.10  30269 10.6 11.56 27.0 2.52  405 3.29   69 3.42   95  4.59  95  4.59  97  4.73  183  34
# ..............................................................................................................................................
# geometric mean          0.14  13024  0.10  25669  0.14  47655 10.8  5.26 13.5 2.08  161 2.71   59 3.03   66  4.05  66  4.05  95  5.13  180  51
# ..............................................................................................................................................
# Hardware: dual-core Intel Core2 Duo E6750, 2.66 GHz, 4 MB L2 cache, 2 GB RAM.                                                        
# Software: openSUSE 11.0 (64-bit edition), runlevel 3, kernel 2.6.30.1, gcc 4.3.1, 32 bit SPLASH-2 executables, valgrind trunk r10648.
################################################################################################################################################

####
# Notes:
# - The ITC performance numbers in the above table originate from table 1 in
#   the following paper:
#   Paul Sack, Brian E. Bliss, Zhiqiang Ma, Paul Petersen, Josep Torrellas,
#   Accurate and efficient filtering for the Intel thread checker race
#   detector, Proceedings of the 1st workshop on Architectural and system
#   support for improving software dependability, San Jose, California,
#   2006. Pages: 34 - 41.
# - The input parameters for benchmarks below originate from table 1 in the
#   following paper:
#   The SPLASH-2 programs: characterization and methodological considerations
#   Woo, S.C.; Ohara, M.; Torrie, E.; Singh, J.P.; Gupta, A.
#   1995. Proceedings of the 22nd Annual International Symposium on Computer
#   Architecture, 22-24 Jun 1995, Page(s): 24 - 36.
#   ftp://www-flash.stanford.edu/pub/splash2/splash2_isca95.ps.Z
####

cache_size=$(get_cache_size)
log2_cache_size=$(log2 ${cache_size})

# Cholesky
(
  cd ${SPLASH2}/codes/kernels/cholesky/inputs
  for f in *Z
  do
    gzip -cd <$f >${f%.Z}
  done
  test_args=tk15.O run_test ../CHOLESKY -C$((cache_size))
)

# FFT
run_test ${SPLASH2}/codes/kernels/fft/FFT -t -l$((log2_cache_size/2)) -m16

# LU, contiguous blocks.
run_test ${SPLASH2}/codes/kernels/lu/contiguous_blocks/LU -n512

# LU, non-contiguous blocks.
#run_test ${SPLASH2}/codes/kernels/lu/non_contiguous_blocks/LU -n512

# Ocean
run_test ${SPLASH2}/codes/apps/ocean/contiguous_partitions/OCEAN -n258
#run_test ${SPLASH2}/codes/apps/ocean/non_contiguous_partitions/OCEAN -n258

# Radiosity. Runs fine on a 32-bit OS, but deadlocks on a 64-bit OS. Not clear to me why.
if [ $(uname -p) = "i686" ]; then
  psep=' ' run_test ${SPLASH2}/codes/apps/radiosity/RADIOSITY -batch -room -ae 5000.0 -en 0.050 -bf 0.10
fi

# Radix
run_test ${SPLASH2}/codes/kernels/radix/RADIX -n$((2**20)) -r1024

# Raytrace
(
  cd ${SPLASH2}/codes/apps/raytrace/inputs
  rm -f *.env *.geo *.rl
  for f in *Z
  do
    gzip -cd <$f >${f%.Z}
  done
  cd ..
  test_args=inputs/car.env psep='' run_test ./RAYTRACE -m64
)

# Water-n2
(
  cd ${SPLASH2}/codes/apps/water-nsquared
  test_input=${DRD_SCRIPTS_DIR}/run-splash2-water-input psep=' ' run_test ./WATER-NSQUARED
)

# Water-sp
(
  cd ${SPLASH2}/codes/apps/water-spatial
  test_input=${DRD_SCRIPTS_DIR}/run-splash2-water-input psep=' ' run_test ./WATER-SPATIAL
)



# Local variables:
# compile-command: "./run-splash2"
# End: