#include <linux/seq_file.h> #include <linux/debugfs.h> #include <linux/module.h> #include <linux/mm.h> #include <asm/sections.h> #include <asm/pgtable.h> static unsigned long max_addr; struct addr_marker { unsigned long start_address; const char *name; }; enum address_markers_idx { IDENTITY_NR = 0, KERNEL_START_NR, KERNEL_END_NR, VMEMMAP_NR, VMALLOC_NR, #ifdef CONFIG_64BIT MODULES_NR, #endif }; static struct addr_marker address_markers[] = { [IDENTITY_NR] = {0, "Identity Mapping"}, [KERNEL_START_NR] = {(unsigned long)&_stext, "Kernel Image Start"}, [KERNEL_END_NR] = {(unsigned long)&_end, "Kernel Image End"}, [VMEMMAP_NR] = {0, "vmemmap Area"}, [VMALLOC_NR] = {0, "vmalloc Area"}, #ifdef CONFIG_64BIT [MODULES_NR] = {0, "Modules Area"}, #endif { -1, NULL } }; struct pg_state { int level; unsigned int current_prot; unsigned long start_address; unsigned long current_address; const struct addr_marker *marker; }; static void print_prot(struct seq_file *m, unsigned int pr, int level) { static const char * const level_name[] = { "ASCE", "PGD", "PUD", "PMD", "PTE" }; seq_printf(m, "%s ", level_name[level]); if (pr & _PAGE_INVALID) { seq_printf(m, "I\n"); return; } seq_printf(m, "%s", pr & _PAGE_PROTECT ? "RO " : "RW "); seq_printf(m, "%s", pr & _PAGE_CO ? "CO " : " "); seq_putc(m, '\n'); } static void note_page(struct seq_file *m, struct pg_state *st, unsigned int new_prot, int level) { static const char units[] = "KMGTPE"; int width = sizeof(unsigned long) * 2; const char *unit = units; unsigned int prot, cur; unsigned long delta; /* * If we have a "break" in the series, we need to flush the state * that we have now. "break" is either changing perms, levels or * address space marker. */ prot = new_prot; cur = st->current_prot; if (!st->level) { /* First entry */ st->current_prot = new_prot; st->level = level; st->marker = address_markers; seq_printf(m, "---[ %s ]---\n", st->marker->name); } else if (prot != cur || level != st->level || st->current_address >= st->marker[1].start_address) { /* Print the actual finished series */ seq_printf(m, "0x%0*lx-0x%0*lx", width, st->start_address, width, st->current_address); delta = (st->current_address - st->start_address) >> 10; while (!(delta & 0x3ff) && unit[1]) { delta >>= 10; unit++; } seq_printf(m, "%9lu%c ", delta, *unit); print_prot(m, st->current_prot, st->level); if (st->current_address >= st->marker[1].start_address) { st->marker++; seq_printf(m, "---[ %s ]---\n", st->marker->name); } st->start_address = st->current_address; st->current_prot = new_prot; st->level = level; } } /* * The actual page table walker functions. In order to keep the * implementation of print_prot() short, we only check and pass * _PAGE_INVALID and _PAGE_PROTECT flags to note_page() if a region, * segment or page table entry is invalid or read-only. * After all it's just a hint that the current level being walked * contains an invalid or read-only entry. */ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t *pmd, unsigned long addr) { unsigned int prot; pte_t *pte; int i; for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) { st->current_address = addr; pte = pte_offset_kernel(pmd, addr); prot = pte_val(*pte) & (_PAGE_PROTECT | _PAGE_INVALID); note_page(m, st, prot, 4); addr += PAGE_SIZE; } } #ifdef CONFIG_64BIT #define _PMD_PROT_MASK (_SEGMENT_ENTRY_PROTECT | _SEGMENT_ENTRY_CO) #else #define _PMD_PROT_MASK 0 #endif static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t *pud, unsigned long addr) { unsigned int prot; pmd_t *pmd; int i; for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++) { st->current_address = addr; pmd = pmd_offset(pud, addr); if (!pmd_none(*pmd)) { if (pmd_large(*pmd)) { prot = pmd_val(*pmd) & _PMD_PROT_MASK; note_page(m, st, prot, 3); } else walk_pte_level(m, st, pmd, addr); } else note_page(m, st, _PAGE_INVALID, 3); addr += PMD_SIZE; } } #ifdef CONFIG_64BIT #define _PUD_PROT_MASK (_REGION3_ENTRY_RO | _REGION3_ENTRY_CO) #else #define _PUD_PROT_MASK 0 #endif static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t *pgd, unsigned long addr) { unsigned int prot; pud_t *pud; int i; for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) { st->current_address = addr; pud = pud_offset(pgd, addr); if (!pud_none(*pud)) if (pud_large(*pud)) { prot = pud_val(*pud) & _PUD_PROT_MASK; note_page(m, st, prot, 2); } else walk_pmd_level(m, st, pud, addr); else note_page(m, st, _PAGE_INVALID, 2); addr += PUD_SIZE; } } static void walk_pgd_level(struct seq_file *m) { unsigned long addr = 0; struct pg_state st; pgd_t *pgd; int i; memset(&st, 0, sizeof(st)); for (i = 0; i < PTRS_PER_PGD && addr < max_addr; i++) { st.current_address = addr; pgd = pgd_offset_k(addr); if (!pgd_none(*pgd)) walk_pud_level(m, &st, pgd, addr); else note_page(m, &st, _PAGE_INVALID, 1); addr += PGDIR_SIZE; } /* Flush out the last page */ st.current_address = max_addr; note_page(m, &st, 0, 0); } static int ptdump_show(struct seq_file *m, void *v) { walk_pgd_level(m); return 0; } static int ptdump_open(struct inode *inode, struct file *filp) { return single_open(filp, ptdump_show, NULL); } static const struct file_operations ptdump_fops = { .open = ptdump_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; static int pt_dump_init(void) { /* * Figure out the maximum virtual address being accessible with the * kernel ASCE. We need this to keep the page table walker functions * from accessing non-existent entries. */ #ifdef CONFIG_32BIT max_addr = 1UL << 31; #else max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2; max_addr = 1UL << (max_addr * 11 + 31); address_markers[MODULES_NR].start_address = MODULES_VADDR; #endif address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap; address_markers[VMALLOC_NR].start_address = VMALLOC_START; debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); return 0; } device_initcall(pt_dump_init);