/* * Copyright (C) 2013 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #define LOG_TAG "lowmemorykiller" #include <arpa/inet.h> #include <errno.h> #include <signal.h> #include <stdlib.h> #include <string.h> #include <time.h> #include <sys/cdefs.h> #include <sys/epoll.h> #include <sys/eventfd.h> #include <sys/mman.h> #include <sys/socket.h> #include <sys/types.h> #include <unistd.h> #include <cutils/sockets.h> #include <log/log.h> #include <processgroup/processgroup.h> #ifndef __unused #define __unused __attribute__((__unused__)) #endif #define MEMCG_SYSFS_PATH "/dev/memcg/" #define MEMPRESSURE_WATCH_LEVEL "medium" #define ZONEINFO_PATH "/proc/zoneinfo" #define LINE_MAX 128 #define INKERNEL_MINFREE_PATH "/sys/module/lowmemorykiller/parameters/minfree" #define INKERNEL_ADJ_PATH "/sys/module/lowmemorykiller/parameters/adj" #define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) enum lmk_cmd { LMK_TARGET, LMK_PROCPRIO, LMK_PROCREMOVE, }; #define MAX_TARGETS 6 /* * longest is LMK_TARGET followed by MAX_TARGETS each minfree and minkillprio * values */ #define CTRL_PACKET_MAX (sizeof(int) * (MAX_TARGETS * 2 + 1)) /* default to old in-kernel interface if no memory pressure events */ static int use_inkernel_interface = 1; /* memory pressure level medium event */ static int mpevfd; /* control socket listen and data */ static int ctrl_lfd; static int ctrl_dfd = -1; static int ctrl_dfd_reopened; /* did we reopen ctrl conn on this loop? */ /* 1 memory pressure level, 1 ctrl listen socket, 1 ctrl data socket */ #define MAX_EPOLL_EVENTS 3 static int epollfd; static int maxevents; #define OOM_DISABLE (-17) /* inclusive */ #define OOM_ADJUST_MIN (-16) #define OOM_ADJUST_MAX 15 /* kernel OOM score values */ #define OOM_SCORE_ADJ_MIN (-1000) #define OOM_SCORE_ADJ_MAX 1000 static int lowmem_adj[MAX_TARGETS]; static int lowmem_minfree[MAX_TARGETS]; static int lowmem_targets_size; struct sysmeminfo { int nr_free_pages; int nr_file_pages; int nr_shmem; int totalreserve_pages; }; struct adjslot_list { struct adjslot_list *next; struct adjslot_list *prev; }; struct proc { struct adjslot_list asl; int pid; uid_t uid; int oomadj; struct proc *pidhash_next; }; #define PIDHASH_SZ 1024 static struct proc *pidhash[PIDHASH_SZ]; #define pid_hashfn(x) ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1)) #define ADJTOSLOT(adj) (adj + -OOM_ADJUST_MIN) static struct adjslot_list procadjslot_list[ADJTOSLOT(OOM_ADJUST_MAX) + 1]; /* * Wait 1-2 seconds for the death report of a killed process prior to * considering killing more processes. */ #define KILL_TIMEOUT 2 /* Time of last process kill we initiated, stop me before I kill again */ static time_t kill_lasttime; /* PAGE_SIZE / 1024 */ static long page_k; static ssize_t read_all(int fd, char *buf, size_t max_len) { ssize_t ret = 0; while (max_len > 0) { ssize_t r = read(fd, buf, max_len); if (r == 0) { break; } if (r == -1) { return -1; } ret += r; buf += r; max_len -= r; } return ret; } static int lowmem_oom_adj_to_oom_score_adj(int oom_adj) { if (oom_adj == OOM_ADJUST_MAX) return OOM_SCORE_ADJ_MAX; else return (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; } static struct proc *pid_lookup(int pid) { struct proc *procp; for (procp = pidhash[pid_hashfn(pid)]; procp && procp->pid != pid; procp = procp->pidhash_next) ; return procp; } static void adjslot_insert(struct adjslot_list *head, struct adjslot_list *new) { struct adjslot_list *next = head->next; new->prev = head; new->next = next; next->prev = new; head->next = new; } static void adjslot_remove(struct adjslot_list *old) { struct adjslot_list *prev = old->prev; struct adjslot_list *next = old->next; next->prev = prev; prev->next = next; } static struct adjslot_list *adjslot_tail(struct adjslot_list *head) { struct adjslot_list *asl = head->prev; return asl == head ? NULL : asl; } static void proc_slot(struct proc *procp) { int adjslot = ADJTOSLOT(procp->oomadj); adjslot_insert(&procadjslot_list[adjslot], &procp->asl); } static void proc_unslot(struct proc *procp) { adjslot_remove(&procp->asl); } static void proc_insert(struct proc *procp) { int hval = pid_hashfn(procp->pid); procp->pidhash_next = pidhash[hval]; pidhash[hval] = procp; proc_slot(procp); } static int pid_remove(int pid) { int hval = pid_hashfn(pid); struct proc *procp; struct proc *prevp; for (procp = pidhash[hval], prevp = NULL; procp && procp->pid != pid; procp = procp->pidhash_next) prevp = procp; if (!procp) return -1; if (!prevp) pidhash[hval] = procp->pidhash_next; else prevp->pidhash_next = procp->pidhash_next; proc_unslot(procp); free(procp); return 0; } static void writefilestring(char *path, char *s) { int fd = open(path, O_WRONLY); int len = strlen(s); int ret; if (fd < 0) { ALOGE("Error opening %s; errno=%d", path, errno); return; } ret = write(fd, s, len); if (ret < 0) { ALOGE("Error writing %s; errno=%d", path, errno); } else if (ret < len) { ALOGE("Short write on %s; length=%d", path, ret); } close(fd); } static void cmd_procprio(int pid, int uid, int oomadj) { struct proc *procp; char path[80]; char val[20]; if (oomadj < OOM_DISABLE || oomadj > OOM_ADJUST_MAX) { ALOGE("Invalid PROCPRIO oomadj argument %d", oomadj); return; } snprintf(path, sizeof(path), "/proc/%d/oom_score_adj", pid); snprintf(val, sizeof(val), "%d", lowmem_oom_adj_to_oom_score_adj(oomadj)); writefilestring(path, val); if (use_inkernel_interface) return; procp = pid_lookup(pid); if (!procp) { procp = malloc(sizeof(struct proc)); if (!procp) { // Oh, the irony. May need to rebuild our state. return; } procp->pid = pid; procp->uid = uid; procp->oomadj = oomadj; proc_insert(procp); } else { proc_unslot(procp); procp->oomadj = oomadj; proc_slot(procp); } } static void cmd_procremove(int pid) { if (use_inkernel_interface) return; pid_remove(pid); kill_lasttime = 0; } static void cmd_target(int ntargets, int *params) { int i; if (ntargets > (int)ARRAY_SIZE(lowmem_adj)) return; for (i = 0; i < ntargets; i++) { lowmem_minfree[i] = ntohl(*params++); lowmem_adj[i] = ntohl(*params++); } lowmem_targets_size = ntargets; if (use_inkernel_interface) { char minfreestr[128]; char killpriostr[128]; minfreestr[0] = '\0'; killpriostr[0] = '\0'; for (i = 0; i < lowmem_targets_size; i++) { char val[40]; if (i) { strlcat(minfreestr, ",", sizeof(minfreestr)); strlcat(killpriostr, ",", sizeof(killpriostr)); } snprintf(val, sizeof(val), "%d", lowmem_minfree[i]); strlcat(minfreestr, val, sizeof(minfreestr)); snprintf(val, sizeof(val), "%d", lowmem_adj[i]); strlcat(killpriostr, val, sizeof(killpriostr)); } writefilestring(INKERNEL_MINFREE_PATH, minfreestr); writefilestring(INKERNEL_ADJ_PATH, killpriostr); } } static void ctrl_data_close(void) { ALOGI("Closing Activity Manager data connection"); close(ctrl_dfd); ctrl_dfd = -1; maxevents--; } static int ctrl_data_read(char *buf, size_t bufsz) { int ret = 0; ret = read(ctrl_dfd, buf, bufsz); if (ret == -1) { ALOGE("control data socket read failed; errno=%d", errno); } else if (ret == 0) { ALOGE("Got EOF on control data socket"); ret = -1; } return ret; } static void ctrl_command_handler(void) { int ibuf[CTRL_PACKET_MAX / sizeof(int)]; int len; int cmd = -1; int nargs; int targets; len = ctrl_data_read((char *)ibuf, CTRL_PACKET_MAX); if (len <= 0) return; nargs = len / sizeof(int) - 1; if (nargs < 0) goto wronglen; cmd = ntohl(ibuf[0]); switch(cmd) { case LMK_TARGET: targets = nargs / 2; if (nargs & 0x1 || targets > (int)ARRAY_SIZE(lowmem_adj)) goto wronglen; cmd_target(targets, &ibuf[1]); break; case LMK_PROCPRIO: if (nargs != 3) goto wronglen; cmd_procprio(ntohl(ibuf[1]), ntohl(ibuf[2]), ntohl(ibuf[3])); break; case LMK_PROCREMOVE: if (nargs != 1) goto wronglen; cmd_procremove(ntohl(ibuf[1])); break; default: ALOGE("Received unknown command code %d", cmd); return; } return; wronglen: ALOGE("Wrong control socket read length cmd=%d len=%d", cmd, len); } static void ctrl_data_handler(uint32_t events) { if (events & EPOLLHUP) { ALOGI("ActivityManager disconnected"); if (!ctrl_dfd_reopened) ctrl_data_close(); } else if (events & EPOLLIN) { ctrl_command_handler(); } } static void ctrl_connect_handler(uint32_t events __unused) { struct sockaddr addr; socklen_t alen; struct epoll_event epev; if (ctrl_dfd >= 0) { ctrl_data_close(); ctrl_dfd_reopened = 1; } alen = sizeof(addr); ctrl_dfd = accept(ctrl_lfd, &addr, &alen); if (ctrl_dfd < 0) { ALOGE("lmkd control socket accept failed; errno=%d", errno); return; } ALOGI("ActivityManager connected"); maxevents++; epev.events = EPOLLIN; epev.data.ptr = (void *)ctrl_data_handler; if (epoll_ctl(epollfd, EPOLL_CTL_ADD, ctrl_dfd, &epev) == -1) { ALOGE("epoll_ctl for data connection socket failed; errno=%d", errno); ctrl_data_close(); return; } } static int zoneinfo_parse_protection(char *cp) { int max = 0; int zoneval; char *save_ptr; for (cp = strtok_r(cp, "(), ", &save_ptr); cp; cp = strtok_r(NULL, "), ", &save_ptr)) { zoneval = strtol(cp, &cp, 0); if (zoneval > max) max = zoneval; } return max; } static void zoneinfo_parse_line(char *line, struct sysmeminfo *mip) { char *cp = line; char *ap; char *save_ptr; cp = strtok_r(line, " ", &save_ptr); if (!cp) return; ap = strtok_r(NULL, " ", &save_ptr); if (!ap) return; if (!strcmp(cp, "nr_free_pages")) mip->nr_free_pages += strtol(ap, NULL, 0); else if (!strcmp(cp, "nr_file_pages")) mip->nr_file_pages += strtol(ap, NULL, 0); else if (!strcmp(cp, "nr_shmem")) mip->nr_shmem += strtol(ap, NULL, 0); else if (!strcmp(cp, "high")) mip->totalreserve_pages += strtol(ap, NULL, 0); else if (!strcmp(cp, "protection:")) mip->totalreserve_pages += zoneinfo_parse_protection(ap); } static int zoneinfo_parse(struct sysmeminfo *mip) { int fd; ssize_t size; char buf[PAGE_SIZE]; char *save_ptr; char *line; memset(mip, 0, sizeof(struct sysmeminfo)); fd = open(ZONEINFO_PATH, O_RDONLY); if (fd == -1) { ALOGE("%s open: errno=%d", ZONEINFO_PATH, errno); return -1; } size = read_all(fd, buf, sizeof(buf) - 1); if (size < 0) { ALOGE("%s read: errno=%d", ZONEINFO_PATH, errno); close(fd); return -1; } ALOG_ASSERT((size_t)size < sizeof(buf) - 1, "/proc/zoneinfo too large"); buf[size] = 0; for (line = strtok_r(buf, "\n", &save_ptr); line; line = strtok_r(NULL, "\n", &save_ptr)) zoneinfo_parse_line(line, mip); close(fd); return 0; } static int proc_get_size(int pid) { char path[PATH_MAX]; char line[LINE_MAX]; int fd; int rss = 0; int total; ssize_t ret; snprintf(path, PATH_MAX, "/proc/%d/statm", pid); fd = open(path, O_RDONLY); if (fd == -1) return -1; ret = read_all(fd, line, sizeof(line) - 1); if (ret < 0) { close(fd); return -1; } sscanf(line, "%d %d ", &total, &rss); close(fd); return rss; } static char *proc_get_name(int pid) { char path[PATH_MAX]; static char line[LINE_MAX]; int fd; char *cp; ssize_t ret; snprintf(path, PATH_MAX, "/proc/%d/cmdline", pid); fd = open(path, O_RDONLY); if (fd == -1) return NULL; ret = read_all(fd, line, sizeof(line) - 1); close(fd); if (ret < 0) { return NULL; } cp = strchr(line, ' '); if (cp) *cp = '\0'; return line; } static struct proc *proc_adj_lru(int oomadj) { return (struct proc *)adjslot_tail(&procadjslot_list[ADJTOSLOT(oomadj)]); } /* Kill one process specified by procp. Returns the size of the process killed */ static int kill_one_process(struct proc *procp, int other_free, int other_file, int minfree, int min_score_adj, bool first) { int pid = procp->pid; uid_t uid = procp->uid; char *taskname; int tasksize; int r; taskname = proc_get_name(pid); if (!taskname) { pid_remove(pid); return -1; } tasksize = proc_get_size(pid); if (tasksize <= 0) { pid_remove(pid); return -1; } ALOGI("Killing '%s' (%d), uid %d, adj %d\n" " to free %ldkB because cache %s%ldkB is below limit %ldkB for oom_adj %d\n" " Free memory is %s%ldkB %s reserved", taskname, pid, uid, procp->oomadj, tasksize * page_k, first ? "" : "~", other_file * page_k, minfree * page_k, min_score_adj, first ? "" : "~", other_free * page_k, other_free >= 0 ? "above" : "below"); r = kill(pid, SIGKILL); killProcessGroup(uid, pid, SIGKILL); pid_remove(pid); if (r) { ALOGE("kill(%d): errno=%d", procp->pid, errno); return -1; } else { return tasksize; } } /* * Find a process to kill based on the current (possibly estimated) free memory * and cached memory sizes. Returns the size of the killed processes. */ static int find_and_kill_process(int other_free, int other_file, bool first) { int i; int r; int min_score_adj = OOM_ADJUST_MAX + 1; int minfree = 0; int killed_size = 0; for (i = 0; i < lowmem_targets_size; i++) { minfree = lowmem_minfree[i]; if (other_free < minfree && other_file < minfree) { min_score_adj = lowmem_adj[i]; break; } } if (min_score_adj == OOM_ADJUST_MAX + 1) return 0; for (i = OOM_ADJUST_MAX; i >= min_score_adj; i--) { struct proc *procp; retry: procp = proc_adj_lru(i); if (procp) { killed_size = kill_one_process(procp, other_free, other_file, minfree, min_score_adj, first); if (killed_size < 0) { goto retry; } else { return killed_size; } } } return 0; } static void mp_event(uint32_t events __unused) { int i; int ret; unsigned long long evcount; struct sysmeminfo mi; int other_free; int other_file; int killed_size; bool first = true; ret = read(mpevfd, &evcount, sizeof(evcount)); if (ret < 0) ALOGE("Error reading memory pressure event fd; errno=%d", errno); if (time(NULL) - kill_lasttime < KILL_TIMEOUT) return; while (zoneinfo_parse(&mi) < 0) { // Failed to read /proc/zoneinfo, assume ENOMEM and kill something find_and_kill_process(0, 0, true); } other_free = mi.nr_free_pages - mi.totalreserve_pages; other_file = mi.nr_file_pages - mi.nr_shmem; do { killed_size = find_and_kill_process(other_free, other_file, first); if (killed_size > 0) { first = false; other_free += killed_size; other_file += killed_size; } } while (killed_size > 0); } static int init_mp(char *levelstr, void *event_handler) { int mpfd; int evfd; int evctlfd; char buf[256]; struct epoll_event epev; int ret; mpfd = open(MEMCG_SYSFS_PATH "memory.pressure_level", O_RDONLY); if (mpfd < 0) { ALOGI("No kernel memory.pressure_level support (errno=%d)", errno); goto err_open_mpfd; } evctlfd = open(MEMCG_SYSFS_PATH "cgroup.event_control", O_WRONLY); if (evctlfd < 0) { ALOGI("No kernel memory cgroup event control (errno=%d)", errno); goto err_open_evctlfd; } evfd = eventfd(0, EFD_NONBLOCK); if (evfd < 0) { ALOGE("eventfd failed for level %s; errno=%d", levelstr, errno); goto err_eventfd; } ret = snprintf(buf, sizeof(buf), "%d %d %s", evfd, mpfd, levelstr); if (ret >= (ssize_t)sizeof(buf)) { ALOGE("cgroup.event_control line overflow for level %s", levelstr); goto err; } ret = write(evctlfd, buf, strlen(buf) + 1); if (ret == -1) { ALOGE("cgroup.event_control write failed for level %s; errno=%d", levelstr, errno); goto err; } epev.events = EPOLLIN; epev.data.ptr = event_handler; ret = epoll_ctl(epollfd, EPOLL_CTL_ADD, evfd, &epev); if (ret == -1) { ALOGE("epoll_ctl for level %s failed; errno=%d", levelstr, errno); goto err; } maxevents++; mpevfd = evfd; return 0; err: close(evfd); err_eventfd: close(evctlfd); err_open_evctlfd: close(mpfd); err_open_mpfd: return -1; } static int init(void) { struct epoll_event epev; int i; int ret; page_k = sysconf(_SC_PAGESIZE); if (page_k == -1) page_k = PAGE_SIZE; page_k /= 1024; epollfd = epoll_create(MAX_EPOLL_EVENTS); if (epollfd == -1) { ALOGE("epoll_create failed (errno=%d)", errno); return -1; } ctrl_lfd = android_get_control_socket("lmkd"); if (ctrl_lfd < 0) { ALOGE("get lmkd control socket failed"); return -1; } ret = listen(ctrl_lfd, 1); if (ret < 0) { ALOGE("lmkd control socket listen failed (errno=%d)", errno); return -1; } epev.events = EPOLLIN; epev.data.ptr = (void *)ctrl_connect_handler; if (epoll_ctl(epollfd, EPOLL_CTL_ADD, ctrl_lfd, &epev) == -1) { ALOGE("epoll_ctl for lmkd control socket failed (errno=%d)", errno); return -1; } maxevents++; use_inkernel_interface = !access(INKERNEL_MINFREE_PATH, W_OK); if (use_inkernel_interface) { ALOGI("Using in-kernel low memory killer interface"); } else { ret = init_mp(MEMPRESSURE_WATCH_LEVEL, (void *)&mp_event); if (ret) ALOGE("Kernel does not support memory pressure events or in-kernel low memory killer"); } for (i = 0; i <= ADJTOSLOT(OOM_ADJUST_MAX); i++) { procadjslot_list[i].next = &procadjslot_list[i]; procadjslot_list[i].prev = &procadjslot_list[i]; } return 0; } static void mainloop(void) { while (1) { struct epoll_event events[maxevents]; int nevents; int i; ctrl_dfd_reopened = 0; nevents = epoll_wait(epollfd, events, maxevents, -1); if (nevents == -1) { if (errno == EINTR) continue; ALOGE("epoll_wait failed (errno=%d)", errno); continue; } for (i = 0; i < nevents; ++i) { if (events[i].events & EPOLLERR) ALOGD("EPOLLERR on event #%d", i); if (events[i].data.ptr) (*(void (*)(uint32_t))events[i].data.ptr)(events[i].events); } } } int main(int argc __unused, char **argv __unused) { struct sched_param param = { .sched_priority = 1, }; mlockall(MCL_FUTURE); sched_setscheduler(0, SCHED_FIFO, ¶m); if (!init()) mainloop(); ALOGI("exiting"); return 0; }