/**
 * kmemcheck - a heavyweight memory checker for the linux kernel
 * Copyright (C) 2007, 2008  Vegard Nossum <vegardno@ifi.uio.no>
 * (With a lot of help from Ingo Molnar and Pekka Enberg.)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License (version 2) as
 * published by the Free Software Foundation.
 */

#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/kallsyms.h>
#include <linux/kernel.h>
#include <linux/kmemcheck.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/page-flags.h>
#include <linux/percpu.h>
#include <linux/ptrace.h>
#include <linux/string.h>
#include <linux/types.h>

#include <asm/cacheflush.h>
#include <asm/kmemcheck.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>

#include "error.h"
#include "opcode.h"
#include "pte.h"
#include "selftest.h"
#include "shadow.h"


#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
#  define KMEMCHECK_ENABLED 0
#endif

#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
#  define KMEMCHECK_ENABLED 1
#endif

#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
#  define KMEMCHECK_ENABLED 2
#endif

int kmemcheck_enabled = KMEMCHECK_ENABLED;

int __init kmemcheck_init(void)
{
#ifdef CONFIG_SMP
	/*
	 * Limit SMP to use a single CPU. We rely on the fact that this code
	 * runs before SMP is set up.
	 */
	if (setup_max_cpus > 1) {
		printk(KERN_INFO
			"kmemcheck: Limiting number of CPUs to 1.\n");
		setup_max_cpus = 1;
	}
#endif

	if (!kmemcheck_selftest()) {
		printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n");
		kmemcheck_enabled = 0;
		return -EINVAL;
	}

	printk(KERN_INFO "kmemcheck: Initialized\n");
	return 0;
}

early_initcall(kmemcheck_init);

/*
 * We need to parse the kmemcheck= option before any memory is allocated.
 */
static int __init param_kmemcheck(char *str)
{
	if (!str)
		return -EINVAL;

	sscanf(str, "%d", &kmemcheck_enabled);
	return 0;
}

early_param("kmemcheck", param_kmemcheck);

int kmemcheck_show_addr(unsigned long address)
{
	pte_t *pte;

	pte = kmemcheck_pte_lookup(address);
	if (!pte)
		return 0;

	set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
	__flush_tlb_one(address);
	return 1;
}

int kmemcheck_hide_addr(unsigned long address)
{
	pte_t *pte;

	pte = kmemcheck_pte_lookup(address);
	if (!pte)
		return 0;

	set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
	__flush_tlb_one(address);
	return 1;
}

struct kmemcheck_context {
	bool busy;
	int balance;

	/*
	 * There can be at most two memory operands to an instruction, but
	 * each address can cross a page boundary -- so we may need up to
	 * four addresses that must be hidden/revealed for each fault.
	 */
	unsigned long addr[4];
	unsigned long n_addrs;
	unsigned long flags;

	/* Data size of the instruction that caused a fault. */
	unsigned int size;
};

static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context);

bool kmemcheck_active(struct pt_regs *regs)
{
	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);

	return data->balance > 0;
}

/* Save an address that needs to be shown/hidden */
static void kmemcheck_save_addr(unsigned long addr)
{
	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);

	BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr));
	data->addr[data->n_addrs++] = addr;
}

static unsigned int kmemcheck_show_all(void)
{
	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
	unsigned int i;
	unsigned int n;

	n = 0;
	for (i = 0; i < data->n_addrs; ++i)
		n += kmemcheck_show_addr(data->addr[i]);

	return n;
}

static unsigned int kmemcheck_hide_all(void)
{
	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
	unsigned int i;
	unsigned int n;

	n = 0;
	for (i = 0; i < data->n_addrs; ++i)
		n += kmemcheck_hide_addr(data->addr[i]);

	return n;
}

/*
 * Called from the #PF handler.
 */
void kmemcheck_show(struct pt_regs *regs)
{
	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);

	BUG_ON(!irqs_disabled());

	if (unlikely(data->balance != 0)) {
		kmemcheck_show_all();
		kmemcheck_error_save_bug(regs);
		data->balance = 0;
		return;
	}

	/*
	 * None of the addresses actually belonged to kmemcheck. Note that
	 * this is not an error.
	 */
	if (kmemcheck_show_all() == 0)
		return;

	++data->balance;

	/*
	 * The IF needs to be cleared as well, so that the faulting
	 * instruction can run "uninterrupted". Otherwise, we might take
	 * an interrupt and start executing that before we've had a chance
	 * to hide the page again.
	 *
	 * NOTE: In the rare case of multiple faults, we must not override
	 * the original flags:
	 */
	if (!(regs->flags & X86_EFLAGS_TF))
		data->flags = regs->flags;

	regs->flags |= X86_EFLAGS_TF;
	regs->flags &= ~X86_EFLAGS_IF;
}

/*
 * Called from the #DB handler.
 */
void kmemcheck_hide(struct pt_regs *regs)
{
	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
	int n;

	BUG_ON(!irqs_disabled());

	if (unlikely(data->balance != 1)) {
		kmemcheck_show_all();
		kmemcheck_error_save_bug(regs);
		data->n_addrs = 0;
		data->balance = 0;

		if (!(data->flags & X86_EFLAGS_TF))
			regs->flags &= ~X86_EFLAGS_TF;
		if (data->flags & X86_EFLAGS_IF)
			regs->flags |= X86_EFLAGS_IF;
		return;
	}

	if (kmemcheck_enabled)
		n = kmemcheck_hide_all();
	else
		n = kmemcheck_show_all();

	if (n == 0)
		return;

	--data->balance;

	data->n_addrs = 0;

	if (!(data->flags & X86_EFLAGS_TF))
		regs->flags &= ~X86_EFLAGS_TF;
	if (data->flags & X86_EFLAGS_IF)
		regs->flags |= X86_EFLAGS_IF;
}

void kmemcheck_show_pages(struct page *p, unsigned int n)
{
	unsigned int i;

	for (i = 0; i < n; ++i) {
		unsigned long address;
		pte_t *pte;
		unsigned int level;

		address = (unsigned long) page_address(&p[i]);
		pte = lookup_address(address, &level);
		BUG_ON(!pte);
		BUG_ON(level != PG_LEVEL_4K);

		set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
		set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN));
		__flush_tlb_one(address);
	}
}

bool kmemcheck_page_is_tracked(struct page *p)
{
	/* This will also check the "hidden" flag of the PTE. */
	return kmemcheck_pte_lookup((unsigned long) page_address(p));
}

void kmemcheck_hide_pages(struct page *p, unsigned int n)
{
	unsigned int i;

	for (i = 0; i < n; ++i) {
		unsigned long address;
		pte_t *pte;
		unsigned int level;

		address = (unsigned long) page_address(&p[i]);
		pte = lookup_address(address, &level);
		BUG_ON(!pte);
		BUG_ON(level != PG_LEVEL_4K);

		set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
		set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN));
		__flush_tlb_one(address);
	}
}

/* Access may NOT cross page boundary */
static void kmemcheck_read_strict(struct pt_regs *regs,
	unsigned long addr, unsigned int size)
{
	void *shadow;
	enum kmemcheck_shadow status;

	shadow = kmemcheck_shadow_lookup(addr);
	if (!shadow)
		return;

	kmemcheck_save_addr(addr);
	status = kmemcheck_shadow_test(shadow, size);
	if (status == KMEMCHECK_SHADOW_INITIALIZED)
		return;

	if (kmemcheck_enabled)
		kmemcheck_error_save(status, addr, size, regs);

	if (kmemcheck_enabled == 2)
		kmemcheck_enabled = 0;

	/* Don't warn about it again. */
	kmemcheck_shadow_set(shadow, size);
}

bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
{
	enum kmemcheck_shadow status;
	void *shadow;

	shadow = kmemcheck_shadow_lookup(addr);
	if (!shadow)
		return true;

	status = kmemcheck_shadow_test_all(shadow, size);

	return status == KMEMCHECK_SHADOW_INITIALIZED;
}

/* Access may cross page boundary */
static void kmemcheck_read(struct pt_regs *regs,
	unsigned long addr, unsigned int size)
{
	unsigned long page = addr & PAGE_MASK;
	unsigned long next_addr = addr + size - 1;
	unsigned long next_page = next_addr & PAGE_MASK;

	if (likely(page == next_page)) {
		kmemcheck_read_strict(regs, addr, size);
		return;
	}

	/*
	 * What we do is basically to split the access across the
	 * two pages and handle each part separately. Yes, this means
	 * that we may now see reads that are 3 + 5 bytes, for
	 * example (and if both are uninitialized, there will be two
	 * reports), but it makes the code a lot simpler.
	 */
	kmemcheck_read_strict(regs, addr, next_page - addr);
	kmemcheck_read_strict(regs, next_page, next_addr - next_page);
}

static void kmemcheck_write_strict(struct pt_regs *regs,
	unsigned long addr, unsigned int size)
{
	void *shadow;

	shadow = kmemcheck_shadow_lookup(addr);
	if (!shadow)
		return;

	kmemcheck_save_addr(addr);
	kmemcheck_shadow_set(shadow, size);
}

static void kmemcheck_write(struct pt_regs *regs,
	unsigned long addr, unsigned int size)
{
	unsigned long page = addr & PAGE_MASK;
	unsigned long next_addr = addr + size - 1;
	unsigned long next_page = next_addr & PAGE_MASK;

	if (likely(page == next_page)) {
		kmemcheck_write_strict(regs, addr, size);
		return;
	}

	/* See comment in kmemcheck_read(). */
	kmemcheck_write_strict(regs, addr, next_page - addr);
	kmemcheck_write_strict(regs, next_page, next_addr - next_page);
}

/*
 * Copying is hard. We have two addresses, each of which may be split across
 * a page (and each page will have different shadow addresses).
 */
static void kmemcheck_copy(struct pt_regs *regs,
	unsigned long src_addr, unsigned long dst_addr, unsigned int size)
{
	uint8_t shadow[8];
	enum kmemcheck_shadow status;

	unsigned long page;
	unsigned long next_addr;
	unsigned long next_page;

	uint8_t *x;
	unsigned int i;
	unsigned int n;

	BUG_ON(size > sizeof(shadow));

	page = src_addr & PAGE_MASK;
	next_addr = src_addr + size - 1;
	next_page = next_addr & PAGE_MASK;

	if (likely(page == next_page)) {
		/* Same page */
		x = kmemcheck_shadow_lookup(src_addr);
		if (x) {
			kmemcheck_save_addr(src_addr);
			for (i = 0; i < size; ++i)
				shadow[i] = x[i];
		} else {
			for (i = 0; i < size; ++i)
				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
		}
	} else {
		n = next_page - src_addr;
		BUG_ON(n > sizeof(shadow));

		/* First page */
		x = kmemcheck_shadow_lookup(src_addr);
		if (x) {
			kmemcheck_save_addr(src_addr);
			for (i = 0; i < n; ++i)
				shadow[i] = x[i];
		} else {
			/* Not tracked */
			for (i = 0; i < n; ++i)
				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
		}

		/* Second page */
		x = kmemcheck_shadow_lookup(next_page);
		if (x) {
			kmemcheck_save_addr(next_page);
			for (i = n; i < size; ++i)
				shadow[i] = x[i - n];
		} else {
			/* Not tracked */
			for (i = n; i < size; ++i)
				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
		}
	}

	page = dst_addr & PAGE_MASK;
	next_addr = dst_addr + size - 1;
	next_page = next_addr & PAGE_MASK;

	if (likely(page == next_page)) {
		/* Same page */
		x = kmemcheck_shadow_lookup(dst_addr);
		if (x) {
			kmemcheck_save_addr(dst_addr);
			for (i = 0; i < size; ++i) {
				x[i] = shadow[i];
				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
			}
		}
	} else {
		n = next_page - dst_addr;
		BUG_ON(n > sizeof(shadow));

		/* First page */
		x = kmemcheck_shadow_lookup(dst_addr);
		if (x) {
			kmemcheck_save_addr(dst_addr);
			for (i = 0; i < n; ++i) {
				x[i] = shadow[i];
				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
			}
		}

		/* Second page */
		x = kmemcheck_shadow_lookup(next_page);
		if (x) {
			kmemcheck_save_addr(next_page);
			for (i = n; i < size; ++i) {
				x[i - n] = shadow[i];
				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
			}
		}
	}

	status = kmemcheck_shadow_test(shadow, size);
	if (status == KMEMCHECK_SHADOW_INITIALIZED)
		return;

	if (kmemcheck_enabled)
		kmemcheck_error_save(status, src_addr, size, regs);

	if (kmemcheck_enabled == 2)
		kmemcheck_enabled = 0;
}

enum kmemcheck_method {
	KMEMCHECK_READ,
	KMEMCHECK_WRITE,
};

static void kmemcheck_access(struct pt_regs *regs,
	unsigned long fallback_address, enum kmemcheck_method fallback_method)
{
	const uint8_t *insn;
	const uint8_t *insn_primary;
	unsigned int size;

	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);

	/* Recursive fault -- ouch. */
	if (data->busy) {
		kmemcheck_show_addr(fallback_address);
		kmemcheck_error_save_bug(regs);
		return;
	}

	data->busy = true;

	insn = (const uint8_t *) regs->ip;
	insn_primary = kmemcheck_opcode_get_primary(insn);

	kmemcheck_opcode_decode(insn, &size);

	switch (insn_primary[0]) {
#ifdef CONFIG_KMEMCHECK_BITOPS_OK
		/* AND, OR, XOR */
		/*
		 * Unfortunately, these instructions have to be excluded from
		 * our regular checking since they access only some (and not
		 * all) bits. This clears out "bogus" bitfield-access warnings.
		 */
	case 0x80:
	case 0x81:
	case 0x82:
	case 0x83:
		switch ((insn_primary[1] >> 3) & 7) {
			/* OR */
		case 1:
			/* AND */
		case 4:
			/* XOR */
		case 6:
			kmemcheck_write(regs, fallback_address, size);
			goto out;

			/* ADD */
		case 0:
			/* ADC */
		case 2:
			/* SBB */
		case 3:
			/* SUB */
		case 5:
			/* CMP */
		case 7:
			break;
		}
		break;
#endif

		/* MOVS, MOVSB, MOVSW, MOVSD */
	case 0xa4:
	case 0xa5:
		/*
		 * These instructions are special because they take two
		 * addresses, but we only get one page fault.
		 */
		kmemcheck_copy(regs, regs->si, regs->di, size);
		goto out;

		/* CMPS, CMPSB, CMPSW, CMPSD */
	case 0xa6:
	case 0xa7:
		kmemcheck_read(regs, regs->si, size);
		kmemcheck_read(regs, regs->di, size);
		goto out;
	}

	/*
	 * If the opcode isn't special in any way, we use the data from the
	 * page fault handler to determine the address and type of memory
	 * access.
	 */
	switch (fallback_method) {
	case KMEMCHECK_READ:
		kmemcheck_read(regs, fallback_address, size);
		goto out;
	case KMEMCHECK_WRITE:
		kmemcheck_write(regs, fallback_address, size);
		goto out;
	}

out:
	data->busy = false;
}

bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
	unsigned long error_code)
{
	pte_t *pte;

	/*
	 * XXX: Is it safe to assume that memory accesses from virtual 86
	 * mode or non-kernel code segments will _never_ access kernel
	 * memory (e.g. tracked pages)? For now, we need this to avoid
	 * invoking kmemcheck for PnP BIOS calls.
	 */
	if (regs->flags & X86_VM_MASK)
		return false;
	if (regs->cs != __KERNEL_CS)
		return false;

	pte = kmemcheck_pte_lookup(address);
	if (!pte)
		return false;

	WARN_ON_ONCE(in_nmi());

	if (error_code & 2)
		kmemcheck_access(regs, address, KMEMCHECK_WRITE);
	else
		kmemcheck_access(regs, address, KMEMCHECK_READ);

	kmemcheck_show(regs);
	return true;
}

bool kmemcheck_trap(struct pt_regs *regs)
{
	if (!kmemcheck_active(regs))
		return false;

	/* We're done. */
	kmemcheck_hide(regs);
	return true;
}