#include <stdio.h> #include <sys/time.h> #include <getopt.h> #include <thread> #include <iostream> #include <iomanip> #include <sched.h> #include "Profiler.h" extern "C" void icache_test(long count, long step); static constexpr size_t MAX_CODE_SIZE = 128*1024; static constexpr size_t CACHE_LINE_SIZE = 64; static constexpr size_t MAX_ITERATIONS_COUNT = MAX_CODE_SIZE / CACHE_LINE_SIZE; static constexpr size_t REPETITIONS = 0x800000L; using namespace utils; static cpu_set_t g_cpu_set; static void printUsage(char* name) { std::string exec_name(name); std::string usage( "ICACHE is a command-line tool for testing the L1 instruction cache performance.\n" "(Make sure security.perf_harden is set to 0)\n\n" "Usages:\n" " ICACHE [options]\n" "\n" "Options:\n" " --help, -h\n" " print this message\n\n" " --affinity=N, -a N\n" " Specify which CPU the test should run on.\n\n" ); const std::string from("ICACHE"); for (size_t pos = usage.find(from); pos != std::string::npos; pos = usage.find(from, pos)) { usage.replace(pos, from.length(), exec_name); } printf("%s", usage.c_str()); } static int handleCommandLineArgments(int argc, char* argv[]) { static constexpr const char* OPTSTR = "ha:"; static const struct option OPTIONS[] = { { "help", no_argument, 0, 'h' }, { "affinity", required_argument, 0, 'a' }, { 0, 0, 0, 0 } // termination of the option list }; int opt; int option_index = 0; while ((opt = getopt_long(argc, argv, OPTSTR, OPTIONS, &option_index)) >= 0) { std::string arg(optarg ? optarg : ""); switch (opt) { default: case 'h': printUsage(argv[0]); exit(0); break; case 'a': size_t cpu = std::stoi(arg); if (cpu < std::thread::hardware_concurrency()) { CPU_SET(cpu, &g_cpu_set); } else { std::cerr << "N must be < " << std::thread::hardware_concurrency() << std::endl; exit(0); } break; } } return optind; } int main(int argc, char* argv[]) { CPU_ZERO(&g_cpu_set); [[maybe_unused]] int option_index = handleCommandLineArgments(argc, argv); [[maybe_unused]] int num_args = argc - option_index; if (CPU_COUNT(&g_cpu_set)) { sched_setaffinity(gettid(), sizeof(g_cpu_set), &g_cpu_set); } Profiler& profiler = Profiler::get(); profiler.resetEvents(Profiler::EV_CPU_CYCLES | Profiler::EV_L1I_RATES); if (!profiler.isValid()) { fprintf(stderr, "performance counters not enabled. try \"setprop security.perf_harden 0\"\n"); exit(0); } size_t const stepInBytes = 1024; // 1 KiB steps size_t const step = stepInBytes / CACHE_LINE_SIZE; std::cout << std::fixed << std::setprecision(2); printf("[KiB]\t[cyc]\t[refs]\t[MPKI]\t[ns]\n"); Profiler::Counters counters; for (size_t i=step ; i <= MAX_ITERATIONS_COUNT ; i += step) { profiler.reset(); auto now = std::chrono::steady_clock::now(); profiler.start(); icache_test(REPETITIONS, i); profiler.stop(); auto duration = std::chrono::steady_clock::now() - now; profiler.readCounters(&counters); std::cout << ((i*CACHE_LINE_SIZE)/1024) << "\t" << counters.getCpuCycles()/double(REPETITIONS) << "\t" << counters.getL1IReferences()/double(REPETITIONS) << "\t" << counters.getMPKI(counters.getL1IMisses()) << "\t" << duration.count()/double(REPETITIONS) << "\t" << std::endl; } return 0; }