/**
* @file op_syscalls.c
* Tracing of system calls
*
* @remark Copyright 2002 OProfile authors
* @remark Read the file COPYING
*
* @author Bob Montgomery
* @author Will Cohen
* @author John Levon
* @author Philippe Elie
*/
#include <linux/sched.h>
#include <linux/unistd.h>
#include <linux/mman.h>
#include <linux/file.h>
#include "oprofile.h"
#include "op_dcache.h"
#include "op_util.h"
uint dname_top;
struct qstr **dname_stack;
char * pool_pos;
char * pool_start;
char * pool_end;
void oprof_put_note(struct op_note * samp);
/* ------------ system calls --------------- */
struct mmap_arg_struct {
unsigned long addr;
unsigned long len;
unsigned long prot;
unsigned long flags;
unsigned long fd;
unsigned long offset;
};
/* --------- IA64 versions of system calls ------ */
asmlinkage static int (*old_sys_clone)(long, long);
asmlinkage static int (*old_sys_clone2)(long, long, long);
asmlinkage static int (*old_sys_execve)(char *, char **, char **);
asmlinkage static unsigned long (*old_sys_mmap)(unsigned long,
unsigned long, int, int, int, long);
asmlinkage static unsigned long (*old_sys_mmap2)(unsigned long,
unsigned long, int, int, int, long);
asmlinkage static long (*old_sys_init_module)(char const *, struct module *);
asmlinkage static long (*old_sys_exit)(int);
/* --------- declarations of interception stubs for IA64 ------ */
asmlinkage long post_stub_clone(long, long);
asmlinkage long post_stub_clone2(long, long, long);
asmlinkage long my_ia64_execve(char *, char **, char **);
asmlinkage unsigned long post_stub_mmap(unsigned long,
unsigned long, int, int, int, long);
asmlinkage unsigned long post_stub_mmap2(unsigned long,
unsigned long, int, int, int, long);
asmlinkage long post_stub_init_module(char const *, struct module *);
asmlinkage long pre_stub_exit(int);
/* IA64 system call table doesn't use function pointers, it uses
* pointers to code (not the same thing). Basically it can violate the
* procedure calling rules because these "procedure calls" are made by
* the assembly language BREAK handler in ivt.S.
*/
struct fdesc {
void * ip;
void * gp;
};
struct fdesc fdesc_clone;
struct fdesc fdesc_clone2;
struct fdesc fdesc_execve;
struct fdesc fdesc_mmap;
struct fdesc fdesc_mmap2;
struct fdesc fdesc_init_module;
struct fdesc fdesc_exit;
/* ----------- End of IA64 weirdness for now -------------- */
spinlock_t map_lock = SPIN_LOCK_UNLOCKED;
/* called with map_lock held */
static void oprof_output_map(ulong addr, ulong len,
ulong offset, struct file * file, int is_execve)
{
struct op_note note;
/* don't bother with /dev/zero mappings etc. */
if (!len)
return;
note.pid = current->pid;
note.tgid = op_get_tgid();
note.addr = addr;
note.len = len;
note.offset = offset;
note.type = is_execve ? OP_EXEC : OP_MAP;
note.hash = hash_path(file);
if (note.hash == -1)
return;
oprof_put_note(¬e);
}
static int oprof_output_maps(struct task_struct * task)
{
int size=0;
struct mm_struct * mm;
struct vm_area_struct * map;
/* we don't need to worry about mm_users here, since there is at
least one user (current), and if there's other code using this
mm, then mm_users must be at least 2; we should never have to
mmput() here. */
if (!(mm = task->mm))
goto out;
lock_mmap(mm);
spin_lock(&map_lock);
/* We need two pass, daemon assume than the first mmap notification
* is for the executable but some process doesn't follow this model.
*/
for (map = mm->mmap; map; map = map->vm_next) {
if (!(map->vm_flags & VM_EXEC) || !map->vm_file)
continue;
if (!(map->vm_flags & VM_EXECUTABLE))
continue;
oprof_output_map(map->vm_start, map->vm_end-map->vm_start,
GET_VM_OFFSET(map), map->vm_file, 1);
}
for (map = mm->mmap; map; map = map->vm_next) {
if (!(map->vm_flags & VM_EXEC) || !map->vm_file)
continue;
if (map->vm_flags & VM_EXECUTABLE)
continue;
oprof_output_map(map->vm_start, map->vm_end-map->vm_start,
GET_VM_OFFSET(map), map->vm_file, 0);
}
spin_unlock(&map_lock);
unlock_mmap(mm);
out:
return size;
}
/* execve is a special case on IA64. The others get the result and
* arguments after the system call has been made from the ASM stub. */
asmlinkage long
my_sys_execve (char * filename, char **argv, char **envp, struct pt_regs * regs)
{
int error;
MOD_INC_USE_COUNT;
filename = getname(filename);
error = PTR_ERR(filename);
if (IS_ERR(filename))
goto out;
error = do_execve(filename, argv, envp, regs);
if (!error) {
PTRACE_OFF(current);
oprof_output_maps(current);
}
putname(filename);
out:
unlock_execve();
MOD_DEC_USE_COUNT;
return error;
}
static void out_mmap(ulong addr, ulong len, ulong prot, ulong flags,
ulong fd, ulong offset)
{
struct file * file;
lock_out_mmap();
file = fget(fd);
if (!file)
goto out;
spin_lock(&map_lock);
oprof_output_map(addr, len, offset, file, 0);
spin_unlock(&map_lock);
fput(file);
out:
unlock_out_mmap();
}
/*
* IA64 mmap routines:
* The post_sys_* routines are called after the syscall has been made.
* The first argument is the return value from the system call.
*/
asmlinkage void post_sys_mmap2(ulong ret, ulong addr, ulong len,
ulong prot, ulong flags, ulong fd, ulong pgoff)
{
/* FIXME: This should be done in the ASM stub. */
MOD_INC_USE_COUNT;
if ((prot & PROT_EXEC) && ret >= 0)
out_mmap(ret, len, prot, flags, fd, pgoff << PAGE_SHIFT);
goto out;
out:
MOD_DEC_USE_COUNT;
}
asmlinkage void post_sys_mmap(ulong ret, ulong addr, ulong len,
ulong prot, ulong flags, ulong fd, ulong off)
{
/* FIXME: This should be done in the ASM stub. */
MOD_INC_USE_COUNT;
if ((prot & PROT_EXEC) && ret >= 0)
out_mmap(ret, len, prot, flags, fd, off);
goto out;
out:
MOD_DEC_USE_COUNT;
}
inline static void oprof_report_fork(u32 old_pid, u32 new_pid, u32 old_tgid, u32 new_tgid)
{
struct op_note note;
note.type = OP_FORK;
note.pid = old_pid;
note.tgid = old_tgid;
note.addr = new_pid;
note.len = new_tgid;
oprof_put_note(¬e);
}
asmlinkage void post_sys_clone(long ret, long arg0, long arg1)
{
u32 pid = current->pid;
u32 tgid = op_get_tgid();
/* FIXME: This should be done in the ASM stub. */
MOD_INC_USE_COUNT;
if (ret)
/* FIXME: my libc show clone() is not implemented in ia64
* but used only by fork() with a SIGCHILD first parameter
* so we assume it's a fork */
oprof_report_fork(pid, ret, pid, tgid);
MOD_DEC_USE_COUNT;
}
asmlinkage void post_sys_clone2(long ret, long arg0, long arg1, long arg2)
{
u32 pid = current->pid;
u32 tgid = op_get_tgid();
long clone_flags = arg0;
/* FIXME: This should be done in the ASM stub. */
MOD_INC_USE_COUNT;
if (ret) {
if (clone_flags & CLONE_THREAD)
oprof_report_fork(pid, ret, tgid, tgid);
else
oprof_report_fork(pid, ret, tgid, ret);
}
MOD_DEC_USE_COUNT;
}
asmlinkage void
post_sys_init_module(long ret, char const * name_user,
struct module * mod_user)
{
/* FIXME: This should be done in the ASM stub. */
MOD_INC_USE_COUNT;
if (ret >= 0) {
struct op_note note;
note.type = OP_DROP_MODULES;
oprof_put_note(¬e);
}
MOD_DEC_USE_COUNT;
}
/* Exit must use a pre-call intercept stub. There is no post exit. */
asmlinkage void pre_sys_exit(int error_code)
{
struct op_note note;
MOD_INC_USE_COUNT;
note.addr = current->times.tms_utime;
note.len = current->times.tms_stime;
note.offset = current->start_time;
note.type = OP_EXIT;
note.pid = current->pid;
note.tgid = op_get_tgid();
oprof_put_note(¬e);
/* this looks UP-dangerous, as the exit sleeps and we don't
* have a use count, but in fact its ok as sys_exit is noreturn,
* so we can never come back to this non-existent exec page
*/
MOD_DEC_USE_COUNT;
}
extern void * sys_call_table[];
/* FIXME: Now that I'm never trying to do a C-level call through these
* pointers, I should just save, intercept, and restore with void *
* instead of the void * part of the function descriptor, I think.
*/
void op_save_syscalls(void)
{
fdesc_clone.ip = sys_call_table[__NR_clone - __NR_ni_syscall];
old_sys_clone = (void *)&fdesc_clone;
fdesc_clone2.ip = sys_call_table[__NR_clone2 - __NR_ni_syscall];
old_sys_clone2 = (void *)&fdesc_clone2;
fdesc_execve.ip = sys_call_table[__NR_execve - __NR_ni_syscall];
old_sys_execve = (void *)&fdesc_execve;
fdesc_mmap.ip = sys_call_table[__NR_mmap - __NR_ni_syscall];
old_sys_mmap = (void *)&fdesc_mmap;
fdesc_mmap2.ip = sys_call_table[__NR_mmap2 - __NR_ni_syscall];
old_sys_mmap2 = (void *)&fdesc_mmap2;
fdesc_init_module.ip = sys_call_table[__NR_init_module - __NR_ni_syscall];
old_sys_init_module = (void *)&fdesc_init_module;
fdesc_exit.ip = sys_call_table[__NR_exit - __NR_ni_syscall];
old_sys_exit = (void *)&fdesc_exit;
}
void op_intercept_syscalls(void)
{
/* Must extract the function address from the stub function
* descriptors.
*/
sys_call_table[__NR_clone - __NR_ni_syscall] =
((struct fdesc *)post_stub_clone)->ip;
sys_call_table[__NR_clone2 - __NR_ni_syscall] =
((struct fdesc *)post_stub_clone2)->ip;
sys_call_table[__NR_execve - __NR_ni_syscall] =
((struct fdesc *)my_ia64_execve)->ip;
sys_call_table[__NR_mmap - __NR_ni_syscall] =
((struct fdesc *)post_stub_mmap)->ip;
sys_call_table[__NR_mmap2 - __NR_ni_syscall] =
((struct fdesc *)post_stub_mmap2)->ip;
sys_call_table[__NR_init_module - __NR_ni_syscall] =
((struct fdesc *)post_stub_init_module)->ip;
sys_call_table[__NR_exit - __NR_ni_syscall] =
((struct fdesc *)pre_stub_exit)->ip;
}
void op_restore_syscalls(void)
{
sys_call_table[__NR_clone - __NR_ni_syscall] =
((struct fdesc *)old_sys_clone)->ip;
sys_call_table[__NR_clone2 - __NR_ni_syscall] =
((struct fdesc *)old_sys_clone2)->ip;
sys_call_table[__NR_execve - __NR_ni_syscall] =
((struct fdesc *)old_sys_execve)->ip;
sys_call_table[__NR_mmap - __NR_ni_syscall] =
((struct fdesc *)old_sys_mmap)->ip;
sys_call_table[__NR_mmap2 - __NR_ni_syscall] =
((struct fdesc *)old_sys_mmap2)->ip;
sys_call_table[__NR_init_module - __NR_ni_syscall] =
((struct fdesc *)old_sys_init_module)->ip;
sys_call_table[__NR_exit - __NR_ni_syscall] =
((struct fdesc *)old_sys_exit)->ip;
}