#define _GNU_SOURCE
#include <string.h>
#include <pthread.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
// memrw provides a simulation of an application
// reading and writing memory, for the sake of tuning helgrind.
// It is a very simple (simplistic) model:
// * only one thread
// * only one exe context reading or writing the memory
// * the working set of the application is unrealistically
// concentrated on a consecutive nr of MB.
// At this moment, it was just used to tune the EvM data structure
// of helgrind.
// It would be nice to enhance this program to cope with a richer
// model e.g. multiple threads, many different stack traces touching
// the memory, better working set distribution, ...
static int sz_b; // size of a block
static int nr_b; // total nr of blocks used by the program
static int nr_b_ws; // nr_b in program working set
static int nr_loops; // nr of loops reading or writing the ws
static int nr_thr; // nr of threads (hardcoded to 1 currently)
static int nr_repeat; // nr of times we will allocate, use, then free total+ws
// Note: the total nr of MB is what is explicitely allocated.
// On top of that, we have the stacks, local vars, lib vars, ...
// The working set is just the first nr_b_ws blocks of nr_b.
static int verbose = 0;
static unsigned char **t_b; // Pointers to all blocks
static void *memrw_fn(void *v)
{
int loops, m, b;
int dowrite;
int differs = 0;
unsigned char prev = 0;
for (loops = 0; loops < nr_loops; loops++) {
// printf("loop %d dowrite %d\n", loops, dowrite);
// Note: in case of multiple threads, we will have
// to add lock/unlock somewhere in the below, maybe to lock
// the MB we are reading or writing.
for (m = 0; m < nr_b_ws; m++) {
for (b = 0; b < sz_b; b++) {
dowrite = b % 5 == 0;
// Do some write or read operations.
if (dowrite) {
if (t_b[m][b] < 255)
t_b[m][b] += differs;
else
t_b[m][b] = 0;
} else {
differs = t_b[m][b] != prev;
prev = t_b[m][b];
}
}
}
}
return NULL;
}
int main (int argc, char *argv[])
{
int a;
int ret;
int i;
int r;
pthread_t thr;
// usage: memrw [-b blocksize default 1MB ]
// [-t nr_b default 10] [-w nr_b_ws default 10]
// [-l nr_loops_on_ws default 3]
// [-r nr_repeat default 1]
// [-f fan_out default 0]
// [-v verbosity default 0]
sz_b = 1024 * 1024;
nr_b = 10;
nr_b_ws = 10;
nr_loops = 3;
nr_repeat = 1;
verbose = 0;
for (a = 1; a < argc; a+=2) {
if (strcmp(argv[a], "-b") == 0) {
sz_b = atoi(argv[a+1]);
} else if (strcmp(argv[a], "-t") == 0) {
nr_b = atoi(argv[a+1]);
} else if (strcmp(argv[a], "-w") == 0) {
nr_b_ws = atoi(argv[a+1]);
} else if (strcmp(argv[a], "-l") == 0) {
nr_loops = atoi(argv[a+1]);
} else if (strcmp(argv[a], "-r") == 0) {
nr_repeat = atoi(argv[a+1]);
} else if (strcmp(argv[a], "-v") == 0) {
verbose = atoi(argv[a+1]);
} else {
printf("unknown arg %s\n", argv[a]);
}
}
if (nr_b_ws > nr_b)
nr_b_ws = nr_b; // to make it easy to do loops combining values
nr_thr = 1;
printf ("total program memory -t %llu MB"
" working set -w %llu MB\n",
((unsigned long long)nr_b * sz_b)
/ (unsigned long long) (1024*1024),
((unsigned long long)nr_b_ws * sz_b)
/ (unsigned long long)(1024*1024));
printf (" working set R or W -l %d times"
" repeat the whole stuff -r %d times\n",
nr_loops,
nr_repeat);
for (r = 0; r < nr_repeat; r++) {
printf ("creating and initialising the total program memory\n");
t_b = malloc(nr_b * sizeof(char*));
if (t_b == NULL)
perror("malloc t_b");
for (i = 0; i < nr_b; i++) {
t_b[i] = calloc(sz_b, 1);
if (t_b[i] == NULL)
perror("malloc t_b[i]");
}
printf("starting thread that will read or write the working set\n");
ret = pthread_create(&thr, NULL, memrw_fn, &nr_thr);
if (ret != 0)
perror("pthread_create");
printf("waiting for thread termination\n");
ret = pthread_join(thr, NULL);
if (ret != 0)
perror("pthread_join");
printf("thread terminated\n");
/* Now, free the memory used, for the next repeat */
for (i = 0; i < nr_b; i++)
free (t_b[i]);
free (t_b);
printf("memory freed\n");
}
return 0;
}