#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <asm/sections.h>
#include <asm/pgtable.h>

static unsigned long max_addr;

struct addr_marker {
	unsigned long start_address;
	const char *name;
};

enum address_markers_idx {
	IDENTITY_NR = 0,
	KERNEL_START_NR,
	KERNEL_END_NR,
	VMEMMAP_NR,
	VMALLOC_NR,
#ifdef CONFIG_64BIT
	MODULES_NR,
#endif
};

static struct addr_marker address_markers[] = {
	[IDENTITY_NR]	  = {0, "Identity Mapping"},
	[KERNEL_START_NR] = {(unsigned long)&_stext, "Kernel Image Start"},
	[KERNEL_END_NR]	  = {(unsigned long)&_end, "Kernel Image End"},
	[VMEMMAP_NR]	  = {0, "vmemmap Area"},
	[VMALLOC_NR]	  = {0, "vmalloc Area"},
#ifdef CONFIG_64BIT
	[MODULES_NR]	  = {0, "Modules Area"},
#endif
	{ -1, NULL }
};

struct pg_state {
	int level;
	unsigned int current_prot;
	unsigned long start_address;
	unsigned long current_address;
	const struct addr_marker *marker;
};

static void print_prot(struct seq_file *m, unsigned int pr, int level)
{
	static const char * const level_name[] =
		{ "ASCE", "PGD", "PUD", "PMD", "PTE" };

	seq_printf(m, "%s ", level_name[level]);
	if (pr & _PAGE_INVALID) {
		seq_printf(m, "I\n");
		return;
	}
	seq_printf(m, "%s", pr & _PAGE_PROTECT ? "RO " : "RW ");
	seq_printf(m, "%s", pr & _PAGE_CO ? "CO " : "   ");
	seq_putc(m, '\n');
}

static void note_page(struct seq_file *m, struct pg_state *st,
		     unsigned int new_prot, int level)
{
	static const char units[] = "KMGTPE";
	int width = sizeof(unsigned long) * 2;
	const char *unit = units;
	unsigned int prot, cur;
	unsigned long delta;

	/*
	 * If we have a "break" in the series, we need to flush the state
	 * that we have now. "break" is either changing perms, levels or
	 * address space marker.
	 */
	prot = new_prot;
	cur = st->current_prot;

	if (!st->level) {
		/* First entry */
		st->current_prot = new_prot;
		st->level = level;
		st->marker = address_markers;
		seq_printf(m, "---[ %s ]---\n", st->marker->name);
	} else if (prot != cur || level != st->level ||
		   st->current_address >= st->marker[1].start_address) {
		/* Print the actual finished series */
		seq_printf(m, "0x%0*lx-0x%0*lx",
			   width, st->start_address,
			   width, st->current_address);
		delta = (st->current_address - st->start_address) >> 10;
		while (!(delta & 0x3ff) && unit[1]) {
			delta >>= 10;
			unit++;
		}
		seq_printf(m, "%9lu%c ", delta, *unit);
		print_prot(m, st->current_prot, st->level);
		if (st->current_address >= st->marker[1].start_address) {
			st->marker++;
			seq_printf(m, "---[ %s ]---\n", st->marker->name);
		}
		st->start_address = st->current_address;
		st->current_prot = new_prot;
		st->level = level;
	}
}

/*
 * The actual page table walker functions. In order to keep the
 * implementation of print_prot() short, we only check and pass
 * _PAGE_INVALID and _PAGE_PROTECT flags to note_page() if a region,
 * segment or page table entry is invalid or read-only.
 * After all it's just a hint that the current level being walked
 * contains an invalid or read-only entry.
 */
static void walk_pte_level(struct seq_file *m, struct pg_state *st,
			   pmd_t *pmd, unsigned long addr)
{
	unsigned int prot;
	pte_t *pte;
	int i;

	for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) {
		st->current_address = addr;
		pte = pte_offset_kernel(pmd, addr);
		prot = pte_val(*pte) & (_PAGE_PROTECT | _PAGE_INVALID);
		note_page(m, st, prot, 4);
		addr += PAGE_SIZE;
	}
}

#ifdef CONFIG_64BIT
#define _PMD_PROT_MASK (_SEGMENT_ENTRY_PROTECT | _SEGMENT_ENTRY_CO)
#else
#define _PMD_PROT_MASK 0
#endif

static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
			   pud_t *pud, unsigned long addr)
{
	unsigned int prot;
	pmd_t *pmd;
	int i;

	for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++) {
		st->current_address = addr;
		pmd = pmd_offset(pud, addr);
		if (!pmd_none(*pmd)) {
			if (pmd_large(*pmd)) {
				prot = pmd_val(*pmd) & _PMD_PROT_MASK;
				note_page(m, st, prot, 3);
			} else
				walk_pte_level(m, st, pmd, addr);
		} else
			note_page(m, st, _PAGE_INVALID, 3);
		addr += PMD_SIZE;
	}
}

#ifdef CONFIG_64BIT
#define _PUD_PROT_MASK (_REGION3_ENTRY_RO | _REGION3_ENTRY_CO)
#else
#define _PUD_PROT_MASK 0
#endif

static void walk_pud_level(struct seq_file *m, struct pg_state *st,
			   pgd_t *pgd, unsigned long addr)
{
	unsigned int prot;
	pud_t *pud;
	int i;

	for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) {
		st->current_address = addr;
		pud = pud_offset(pgd, addr);
		if (!pud_none(*pud))
			if (pud_large(*pud)) {
				prot = pud_val(*pud) & _PUD_PROT_MASK;
				note_page(m, st, prot, 2);
			} else
				walk_pmd_level(m, st, pud, addr);
		else
			note_page(m, st, _PAGE_INVALID, 2);
		addr += PUD_SIZE;
	}
}

static void walk_pgd_level(struct seq_file *m)
{
	unsigned long addr = 0;
	struct pg_state st;
	pgd_t *pgd;
	int i;

	memset(&st, 0, sizeof(st));
	for (i = 0; i < PTRS_PER_PGD && addr < max_addr; i++) {
		st.current_address = addr;
		pgd = pgd_offset_k(addr);
		if (!pgd_none(*pgd))
			walk_pud_level(m, &st, pgd, addr);
		else
			note_page(m, &st, _PAGE_INVALID, 1);
		addr += PGDIR_SIZE;
	}
	/* Flush out the last page */
	st.current_address = max_addr;
	note_page(m, &st, 0, 0);
}

static int ptdump_show(struct seq_file *m, void *v)
{
	walk_pgd_level(m);
	return 0;
}

static int ptdump_open(struct inode *inode, struct file *filp)
{
	return single_open(filp, ptdump_show, NULL);
}

static const struct file_operations ptdump_fops = {
	.open		= ptdump_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= single_release,
};

static int pt_dump_init(void)
{
	/*
	 * Figure out the maximum virtual address being accessible with the
	 * kernel ASCE. We need this to keep the page table walker functions
	 * from accessing non-existent entries.
	 */
#ifdef CONFIG_32BIT
	max_addr = 1UL << 31;
#else
	max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
	max_addr = 1UL << (max_addr * 11 + 31);
	address_markers[MODULES_NR].start_address = MODULES_VADDR;
#endif
	address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
	address_markers[VMALLOC_NR].start_address = VMALLOC_START;
	debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
	return 0;
}
device_initcall(pt_dump_init);