/*
* Copyright (C) 2012-2017 Red Hat, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* Description:
*
* The program is designed to test max_map_count tunable file
*
* The kernel Documentation say that:
* /proc/sys/vm/max_map_count contains the maximum number of memory map
* areas a process may have. Memory map areas are used as a side-effect
* of calling malloc, directly by mmap and mprotect, and also when
* loading shared libraries.
*
* Each process has his own maps file: /proc/[pid]/maps, and each line
* indicates a map entry, so it can caculate the amount of maps by reading
* the file lines' number to check the tunable performance.
*
* The program tries to invoke mmap() endlessly until it triggers MAP_FAILED,
* then reads the process's maps file /proc/[pid]/maps, save the line number to
* map_count variable, and compare it with /proc/sys/vm/max_map_count,
* map_count should be greater than max_map_count by 1;
*
* Note: On some architectures there is a special vma VSYSCALL, which
* is allocated without incrementing mm->map_count variable. On these
* architectures each /proc/<pid>/maps has at the end:
* ...
* ...
* ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]
*
* so we ignore this line during /proc/[pid]/maps reading.
*/
#define _GNU_SOURCE
#include <sys/wait.h>
#include <errno.h>
#include <fcntl.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/utsname.h>
#include "mem.h"
#define MAP_COUNT_DEFAULT 1024
#define MAX_MAP_COUNT 65536L
static long old_max_map_count;
static long old_overcommit;
static struct utsname un;
static void setup(void)
{
if (access(PATH_SYSVM "max_map_count", F_OK) != 0)
tst_brk(TBROK | TERRNO,
"Can't support to test max_map_count");
old_max_map_count = get_sys_tune("max_map_count");
old_overcommit = get_sys_tune("overcommit_memory");
set_sys_tune("overcommit_memory", 2, 1);
if (uname(&un) != 0)
tst_brk(TBROK | TERRNO, "uname error");
}
static void cleanup(void)
{
set_sys_tune("overcommit_memory", old_overcommit, 0);
set_sys_tune("max_map_count", old_max_map_count, 0);
}
/* This is a filter to exclude map entries which aren't accounted
* for in the vm_area_struct's map_count.
*/
static bool filter_map(const char *line)
{
char buf[BUFSIZ];
int ret;
ret = sscanf(line, "%*p-%*p %*4s %*p %*2d:%*2d %*d %s", buf);
if (ret != 1)
return false;
#if defined(__x86_64__) || defined(__x86__)
/* On x86, there's an old compat vsyscall page */
if (!strcmp(buf, "[vsyscall]"))
return true;
#elif defined(__ia64__)
/* On ia64, the vdso is not a proper mapping */
if (!strcmp(buf, "[vdso]"))
return true;
#elif defined(__arm__)
/* Skip it when run it in aarch64 */
if ((!strcmp(un.machine, "aarch64"))
|| (!strcmp(un.machine, "aarch64_be")))
return false;
/* Older arm kernels didn't label their vdso maps */
if (!strncmp(line, "ffff0000-ffff1000", 17))
return true;
#endif
return false;
}
static long count_maps(pid_t pid)
{
FILE *fp;
size_t len;
char *line = NULL;
char buf[BUFSIZ];
long map_count = 0;
snprintf(buf, BUFSIZ, "/proc/%d/maps", pid);
fp = fopen(buf, "r");
if (fp == NULL)
tst_brk(TBROK | TERRNO, "fopen %s", buf);
while (getline(&line, &len, fp) != -1) {
/* exclude vdso and vsyscall */
if (filter_map(line))
continue;
map_count++;
}
fclose(fp);
return map_count;
}
static void max_map_count_test(void)
{
int status;
pid_t pid;
long max_maps;
long map_count;
long max_iters;
long memfree;
/*
* XXX Due to a possible kernel bug, oom-killer can be easily
* triggered when doing small piece mmaps in huge amount even if
* enough free memory available. Also it has been observed that
* oom-killer often kill wrong victims in this situation, we
* decided to do following steps to make sure no oom happen:
* 1) use a safe maximum max_map_count value as upper-bound,
* we set it 65536 in this case, i.e., we don't test too big
* value;
* 2) make sure total mapping isn't larger tha
* CommitLimit - Committed_AS
* and set overcommit_memory to 2, this could help mapping
* returns ENOMEM instead of triggering oom-killer when
* memory is tight. (When there are enough free memory,
* step 1) will be used first.
* Hope OOM-killer can be more stable oneday.
*/
memfree = SAFE_READ_MEMINFO("CommitLimit:") - SAFE_READ_MEMINFO("Committed_AS:");
/* 64 used as a bias to make sure no overflow happen */
max_iters = memfree / sysconf(_SC_PAGESIZE) * 1024 - 64;
if (max_iters > MAX_MAP_COUNT)
max_iters = MAX_MAP_COUNT;
max_maps = MAP_COUNT_DEFAULT;
while (max_maps <= max_iters) {
set_sys_tune("max_map_count", max_maps, 1);
switch (pid = SAFE_FORK()) {
case 0:
while (mmap(NULL, 1, PROT_READ,
MAP_SHARED | MAP_ANONYMOUS, -1, 0)
!= MAP_FAILED) ;
if (raise(SIGSTOP) != 0)
tst_brk(TBROK | TERRNO, "raise");
exit(0);
default:
break;
}
/* wait child done mmap and stop */
SAFE_WAITPID(pid, &status, WUNTRACED);
if (!WIFSTOPPED(status))
tst_brk(TBROK, "child did not stopped");
map_count = count_maps(pid);
/* Note max_maps will be exceeded by one for
* the sysctl setting of max_map_count. This
* is the mm failure point at the time of
* writing this COMMENT!
*/
if (map_count == (max_maps + 1))
tst_res(TPASS, "%ld map entries in total "
"as expected.", max_maps);
else
tst_res(TFAIL, "%ld map entries in total, but "
"expected %ld entries", map_count, max_maps);
/* make child continue to exit */
SAFE_KILL(pid, SIGCONT);
SAFE_WAITPID(pid, &status, 0);
max_maps = max_maps << 1;
}
}
static struct tst_test test = {
.needs_root = 1,
.forks_child = 1,
.setup = setup,
.cleanup = cleanup,
.test_all = max_map_count_test,
};