/* time-schedule.c Programme to test how long a context switch takes. Copyright (C) 1998 Richard Gooch This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Richard Gooch may be reached by email at rgooch@atnf.csiro.au The postal address is: Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. */ /* This programme will determine the context switch (scheduling) overhead on a system. It takes into account SMP machines. True context switches are measured. Written by Richard Gooch 15-SEP-1998 Last updated by Richard Gooch 25-SEP-1998 */ #include <unistd.h> #ifdef _POSIX_THREADS #ifndef _REENTRANT #define _REENTRANT #endif #ifndef _POSIX_THREAD_SAFE_FUNCTIONS #define _POSIX_THREAD_SAFE_FUNCTIONS #endif #include <pthread.h> #endif #include <stdlib.h> #include <stdio.h> #include <math.h> #include <string.h> #include <ctype.h> #include <signal.h> #include <sched.h> #include <sys/time.h> #include <sys/mman.h> #include <sys/types.h> #include <dirent.h> #include <errno.h> #ifndef __KARMA__ #define mt_num_processors() 1 /* Set to the number of processors */ #define ERRSTRING strerror(errno) #define FALSE 0 #define TRUE 1 #else #include <karma.h> #include <karma_mt.h> #endif #define MAX_ITERATIONS 1000 static unsigned int hog_other_cpus(); static void run_yielder(int use_threads, int read_fd); static void *yielder_main(void *arg); static void s_term_handler(); static void run_low_priority(unsigned int num, int read_fd); static unsigned long compute_median(unsigned long values[MAX_ITERATIONS], unsigned long max_value); static unsigned int get_run_queue_size(); static unsigned long get_num_switches(); static void use_fpu_value(double val); static volatile unsigned int sched_count = 0; /* For yielder */ static int pipe_read_fd = -1; static int pipe_write_fd = -1; static pid_t child = -1; int main(int argc, char **argv) { int use_threads = FALSE; int use_pipe = FALSE; int no_test = FALSE; int frob_fpu = FALSE; int read_fd = -1; int write_fd = -1; int num_low_priority = -1; int i, j; int fds[2]; unsigned int count, num_yields, run_queue_size1, run_queue_size2, num_hogs; unsigned long median, switches1, num_switches, num_overhead_switches; signed long overhead, total_diffs; signed long min_diff = 1000000000; signed long max_diff = -1000000000; double dcount = 0.0; unsigned long diffs[MAX_ITERATIONS]; struct timeval before, after; sigset_t set; static char *usage = "time-schedule [-h] [-thread] [-notest] [-pipe] [-fpu] [num_running]"; setpgrp(); /* First create pipe used to sychronise low priority processes */ if (pipe(fds) != 0) { fprintf(stderr, "Error creating pipe\t%s\n", ERRSTRING); exit(1); } read_fd = fds[0]; pipe_write_fd = fds[1]; for (count = 1; count < argc; ++count) { if (strcmp(argv[count], "-thread") == 0) { #ifdef _POSIX_THREADS use_threads = TRUE; #else fprintf(stderr, "POSIX threads not available\n"); #endif } else if (strcmp(argv[count], "-pipe") == 0) use_pipe = TRUE; else if (strcmp(argv[count], "-notest") == 0) no_test = TRUE; else if (strcmp(argv[count], "-fpu") == 0) frob_fpu = TRUE; else if (isdigit(argv[count][0])) num_low_priority = atoi(argv[count]); else { fprintf(stderr, "Programme to time context switches (schedules)\n"); fprintf(stderr, "(C) 1998 Richard Gooch <rgooch@atnf.csiro.au>\n"); fprintf(stderr, "Usage:\t%s\n", usage); fprintf(stderr, "\t-thread\t\tswitch threads not processes\n"); fprintf(stderr, "\t-pipe\t\tuse pipes not sched_yield()\n"); fprintf(stderr, "\t-fpu\t\tpollute the FPU after each switch in main\n"); fprintf(stderr, "\tnum_running\tnumber of extra processes\n"); exit(0); } } if (no_test) { if (num_low_priority > 0) run_low_priority(num_low_priority, read_fd); while (TRUE) pause(); } if (geteuid() == 0) { struct sched_param sp; memset(&sp, 0, sizeof sp); sp.sched_priority = 10; if (sched_setscheduler(0, SCHED_FIFO, &sp) != 0) { fprintf(stderr, "Error changing to RT class\t%s\n", ERRSTRING); exit(1); } if (mlockall(MCL_CURRENT | MCL_FUTURE) != 0) { fprintf(stderr, "Error locking pages\t%s\n", ERRSTRING); exit(1); } } else fprintf(stderr, "Not running with RT priority!\n"); /* Give shell and login programme time to finish up and get off the run queue */ usleep(200000); if (use_pipe) { if (pipe(fds) != 0) { fprintf(stderr, "Error creating pipe\t%s\n", ERRSTRING); exit(1); } pipe_read_fd = fds[0]; write_fd = fds[1]; } num_hogs = hog_other_cpus(); /* Determine overhead. Do it in a loop=2. The first iteration should warm the cache, the second will compute the overhead */ for (j = 0; j < 2; ++j) { switches1 = get_num_switches(); gettimeofday(&before, NULL); for (i = 0; i < 20; ++i) { if (use_pipe) { char ch = 0; write(pipe_write_fd, &ch, 1); read(read_fd, &ch, 1); } else sched_yield(); if (frob_fpu) ++dcount; } gettimeofday(&after, NULL); num_overhead_switches = get_num_switches() - switches1; overhead = 1000000 * (after.tv_sec - before.tv_sec); overhead += after.tv_usec - before.tv_usec; } use_fpu_value(dcount); if (num_low_priority > 0) run_low_priority(num_low_priority, read_fd); /* Set up for the benchmark */ run_yielder(use_threads, read_fd); memset(diffs, 0, sizeof diffs); run_queue_size1 = get_run_queue_size(); total_diffs = 0; switches1 = get_num_switches(); /* Benchmark! */ for (count = 0; count < MAX_ITERATIONS; ++count) { signed long diff; gettimeofday(&before, NULL); /* Generate 20 context switches */ for (i = 0; i < 10; ++i) { if (use_pipe) { char ch = 0; write(write_fd, &ch, 1); read(read_fd, &ch, 1); } else sched_yield(); if (frob_fpu) dcount += 1.0; } gettimeofday(&after, NULL); diff = 1000000 * (after.tv_sec - before.tv_sec); diff += after.tv_usec - before.tv_usec; diffs[count] = diff; total_diffs += diff; if (diff < min_diff) min_diff = diff; if (diff > max_diff) max_diff = diff; } num_yields = sched_count; run_queue_size2 = get_run_queue_size(); num_switches = get_num_switches() - switches1; if (!use_threads) kill(child, SIGTERM); fprintf(stderr, "Started %u hog processes\n", num_hogs); fprintf(stderr, "Syscall%s overhead: %.1f us\n", frob_fpu ? "/FPU" : "", (double)overhead / 20.0); if (switches1 > 0) fprintf(stderr, "Num switches during overhead check: %lu\n", num_overhead_switches); fprintf(stderr, "Minimum scheduling latency: %.1f (%.1f) us\n", (double)min_diff / 20.0, (double)(min_diff - overhead) / 20.0); median = compute_median(diffs, max_diff); fprintf(stderr, "Median scheduling latency: %.1f (%.1f) us\n", (double)median / 20.0, (double)(median - overhead) / 20.0); fprintf(stderr, "Average scheduling latency: %.1f (%.1f) us\n", (double)total_diffs / (double)MAX_ITERATIONS / 20.0, (double)(total_diffs - overhead * MAX_ITERATIONS) / (double)MAX_ITERATIONS / 20.0); fprintf(stderr, "Maximum scheduling latency: %.1f (%.1f) us\n", (double)max_diff / 20.0, (double)(max_diff - overhead) / 20.0); fprintf(stderr, "Run queue size: %u, %u\n", run_queue_size1, run_queue_size2); use_fpu_value(dcount); if (use_threads) fprintf(stderr, "Number of yields: %u\n", num_yields); if (num_switches > 0) fprintf(stderr, "Num switches: %lu\n", num_switches); /* Terminate all child processes */ sigemptyset(&set); sigaddset(&set, SIGTERM); sigprocmask(SIG_BLOCK, &set, NULL); kill(0, SIGTERM); return (0); } /* End Function main */ static unsigned int hog_other_cpus() /* [SUMMARY] Hog other CPUs with a high-priority job. [RETURNS] The number of hogged CPUs. */ { unsigned int count; for (count = mt_num_processors(); count > 1; --count) { switch (fork()) { case 0: /* Child */ while (TRUE) ; break; case -1: /* Error */ fprintf(stderr, "Error forking\t%s\n", ERRSTRING); kill(0, SIGTERM); break; default: /* Parent */ break; } } return mt_num_processors() - 1; } /* End Function hog_other_cpus */ static void run_yielder(int use_threads, int read_fd) /* [SUMMARY] Run other process which will continuously yield. <use_threads> If TRUE, the yielding process is just a thread. <read_fd> The pipe to read the synchronisation byte from. [RETURNS] Nothing. */ { char ch; struct sigaction new_action; #ifdef _POSIX_THREADS pthread_t thread; if (use_threads) { if (pthread_create(&thread, NULL, yielder_main, NULL) != 0) { fprintf(stderr, "Error creating thread\t%s\n", ERRSTRING); kill(0, SIGTERM); } read(read_fd, &ch, 1); return; } #endif switch (child = fork()) { case 0: /* Child */ break; case -1: /* Error */ fprintf(stderr, "Error forking\t%s\n", ERRSTRING); kill(0, SIGTERM); break; default: /* Parent */ read(read_fd, &ch, 1); return; /*break; */ } memset(&new_action, 0, sizeof new_action); sigemptyset(&new_action.sa_mask); new_action.sa_handler = s_term_handler; if (sigaction(SIGTERM, &new_action, NULL) != 0) { fprintf(stderr, "Error setting SIGTERM handler\t%s\n", ERRSTRING); exit(1); } yielder_main(NULL); } /* End Function run_yielder */ static void *yielder_main(void *arg) /* [SUMMARY] Yielder function. <arg> An arbirtrary pointer. Ignored. [RETURNS] NULL. */ { char ch = 0; sched_count = 0; write(pipe_write_fd, &ch, 1); while (TRUE) { if (pipe_read_fd >= 0) { read(pipe_read_fd, &ch, 1); write(pipe_write_fd, &ch, 1); } else sched_yield(); ++sched_count; } return (NULL); } /* End Function yielder_main */ static void s_term_handler() { fprintf(stderr, "Number of yields: %u\n", sched_count); exit(0); } /* End Function s_term_handler */ static void run_low_priority(unsigned int num, int read_fd) /* [SUMMARY] Run low priority processes. <num> Number of processes. <read_fd> The pipe to read the synchronisation byte from. [RETURNS] Nothing. */ { char ch = 0; for (; num > 0; --num) { switch (fork()) { case 0: /* Child */ if (geteuid() == 0) { struct sched_param sp; memset(&sp, 0, sizeof sp); sp.sched_priority = 0; if (sched_setscheduler(0, SCHED_OTHER, &sp) != 0) { fprintf(stderr, "Error changing to SCHED_OTHER class\t%s\n", ERRSTRING); exit(1); } } if (nice(20) != 0) { fprintf(stderr, "Error nicing\t%s\n", ERRSTRING); kill(0, SIGTERM); } write(pipe_write_fd, &ch, 1); while (TRUE) sched_yield(); break; case -1: /* Error */ fprintf(stderr, "Error forking\t%s\n", ERRSTRING); kill(0, SIGTERM); break; default: /* Parent */ read(read_fd, &ch, 1); break; } } } /* End Function run_low_priority */ static unsigned long compute_median(unsigned long values[MAX_ITERATIONS], unsigned long max_value) /* [SUMMARY] Compute the median from an array of values. <values> The array of values. <max_value> The maximum value in the array. [RETURNS] The median value. */ { unsigned long count; unsigned long median = 0; unsigned long peak = 0; unsigned long *table; /* Crude but effective */ if ((table = calloc(max_value + 1, sizeof *table)) == NULL) { fprintf(stderr, "Error allocating median table\n"); exit(1); } for (count = 0; count < MAX_ITERATIONS; ++count) { ++table[values[count]]; } /* Now search for peak. Position of peak is median */ for (count = 0; count < max_value + 1; ++count) { if (table[count] < peak) continue; peak = table[count]; median = count; } free(table); return (median); } /* End Function compute_median */ static unsigned int get_run_queue_size() /* [SUMMARY] Compute the current size of the run queue. [RETURNS] The length of the run queue. */ { int dummy_i; unsigned int length = 0; FILE *fp; DIR *dp; struct dirent *de; char txt[64], dummy_str[64]; if ((dp = opendir("/proc")) == NULL) return (0); while ((de = readdir(dp)) != NULL) { if (!isdigit(de->d_name[0])) continue; sprintf(txt, "/proc/%s/stat", de->d_name); if ((fp = fopen(txt, "r")) == NULL) return (length); fscanf(fp, "%d %s %s", &dummy_i, dummy_str, txt); if (txt[0] == 'R') ++length; fclose(fp); } closedir(dp); return (length); } /* End Function get_run_queue_size */ static unsigned long get_num_switches() /* [SUMMARY] Get the number of context switches. [RETURNS] The number of context switches on success, else 0. */ { unsigned long val; FILE *fp; char line[256], name[64]; if ((fp = fopen("/proc/stat", "r")) == NULL) return (0); while (fgets(line, sizeof line, fp) != NULL) { sscanf(line, "%s %lu", name, &val); if (strcasecmp(name, "ctxt") != 0) continue; fclose(fp); return (val); } fclose(fp); return (0); } /* End Function get_num_switches */ static void use_fpu_value(double val) /* [SUMMARY] Dummy function to consume FPU value. Fools compiler. <val> The value. [RETURNS] Nothing. */ { } /* End Function use_fpu_value */