1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Debug helper to dump the current kernel pagetables of the system 4 * so that we can see what the various memory ranges are set to. 5 * 6 * (C) Copyright 2008 Intel Corporation 7 * 8 * Author: Arjan van de Ven <arjan@linux.intel.com> 9 */ 10 11 #include <linux/debugfs.h> 12 #include <linux/kasan.h> 13 #include <linux/mm.h> 14 #include <linux/init.h> 15 #include <linux/sched.h> 16 #include <linux/seq_file.h> 17 #include <linux/highmem.h> 18 #include <linux/pci.h> 19 #include <linux/ptdump.h> 20 21 #include <asm/e820/types.h> 22 23 /* 24 * The dumper groups pagetable entries of the same type into one, and for 25 * that it needs to keep some state when walking, and flush this state 26 * when a "break" in the continuity is found. 27 */ 28 struct pg_state { 29 struct ptdump_state ptdump; 30 int level; 31 pgprotval_t current_prot; 32 pgprotval_t effective_prot; 33 pgprotval_t prot_levels[5]; 34 unsigned long start_address; 35 const struct addr_marker *marker; 36 unsigned long lines; 37 bool to_dmesg; 38 bool check_wx; 39 unsigned long wx_pages; 40 struct seq_file *seq; 41 }; 42 43 struct addr_marker { 44 unsigned long start_address; 45 const char *name; 46 unsigned long max_lines; 47 }; 48 49 /* Address space markers hints */ 50 51 #ifdef CONFIG_X86_64 52 53 enum address_markers_idx { 54 USER_SPACE_NR = 0, 55 KERNEL_SPACE_NR, 56 #ifdef CONFIG_MODIFY_LDT_SYSCALL 57 LDT_NR, 58 #endif 59 LOW_KERNEL_NR, 60 VMALLOC_START_NR, 61 VMEMMAP_START_NR, 62 #ifdef CONFIG_KASAN 63 KASAN_SHADOW_START_NR, 64 KASAN_SHADOW_END_NR, 65 #endif 66 CPU_ENTRY_AREA_NR, 67 #ifdef CONFIG_X86_ESPFIX64 68 ESPFIX_START_NR, 69 #endif 70 #ifdef CONFIG_EFI 71 EFI_END_NR, 72 #endif 73 HIGH_KERNEL_NR, 74 MODULES_VADDR_NR, 75 MODULES_END_NR, 76 FIXADDR_START_NR, 77 END_OF_SPACE_NR, 78 }; 79 80 static struct addr_marker address_markers[] = { 81 [USER_SPACE_NR] = { 0, "User Space" }, 82 [KERNEL_SPACE_NR] = { (1UL << 63), "Kernel Space" }, 83 [LOW_KERNEL_NR] = { 0UL, "Low Kernel Mapping" }, 84 [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, 85 [VMEMMAP_START_NR] = { 0UL, "Vmemmap" }, 86 #ifdef CONFIG_KASAN 87 /* 88 * These fields get initialized with the (dynamic) 89 * KASAN_SHADOW_{START,END} values in pt_dump_init(). 90 */ 91 [KASAN_SHADOW_START_NR] = { 0UL, "KASAN shadow" }, 92 [KASAN_SHADOW_END_NR] = { 0UL, "KASAN shadow end" }, 93 #endif 94 #ifdef CONFIG_MODIFY_LDT_SYSCALL 95 [LDT_NR] = { 0UL, "LDT remap" }, 96 #endif 97 [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" }, 98 #ifdef CONFIG_X86_ESPFIX64 99 [ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, 100 #endif 101 #ifdef CONFIG_EFI 102 [EFI_END_NR] = { EFI_VA_END, "EFI Runtime Services" }, 103 #endif 104 [HIGH_KERNEL_NR] = { __START_KERNEL_map, "High Kernel Mapping" }, 105 [MODULES_VADDR_NR] = { MODULES_VADDR, "Modules" }, 106 [MODULES_END_NR] = { MODULES_END, "End Modules" }, 107 [FIXADDR_START_NR] = { FIXADDR_START, "Fixmap Area" }, 108 [END_OF_SPACE_NR] = { -1, NULL } 109 }; 110 111 #define INIT_PGD ((pgd_t *) &init_top_pgt) 112 113 #else /* CONFIG_X86_64 */ 114 115 enum address_markers_idx { 116 USER_SPACE_NR = 0, 117 KERNEL_SPACE_NR, 118 VMALLOC_START_NR, 119 VMALLOC_END_NR, 120 #ifdef CONFIG_HIGHMEM 121 PKMAP_BASE_NR, 122 #endif 123 #ifdef CONFIG_MODIFY_LDT_SYSCALL 124 LDT_NR, 125 #endif 126 CPU_ENTRY_AREA_NR, 127 FIXADDR_START_NR, 128 END_OF_SPACE_NR, 129 }; 130 131 static struct addr_marker address_markers[] = { 132 [USER_SPACE_NR] = { 0, "User Space" }, 133 [KERNEL_SPACE_NR] = { PAGE_OFFSET, "Kernel Mapping" }, 134 [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, 135 [VMALLOC_END_NR] = { 0UL, "vmalloc() End" }, 136 #ifdef CONFIG_HIGHMEM 137 [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" }, 138 #endif 139 #ifdef CONFIG_MODIFY_LDT_SYSCALL 140 [LDT_NR] = { 0UL, "LDT remap" }, 141 #endif 142 [CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" }, 143 [FIXADDR_START_NR] = { 0UL, "Fixmap area" }, 144 [END_OF_SPACE_NR] = { -1, NULL } 145 }; 146 147 #define INIT_PGD (swapper_pg_dir) 148 149 #endif /* !CONFIG_X86_64 */ 150 151 /* Multipliers for offsets within the PTEs */ 152 #define PTE_LEVEL_MULT (PAGE_SIZE) 153 #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) 154 #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT) 155 #define P4D_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT) 156 #define PGD_LEVEL_MULT (PTRS_PER_P4D * P4D_LEVEL_MULT) 157 158 #define pt_dump_seq_printf(m, to_dmesg, fmt, args...) \ 159 ({ \ 160 if (to_dmesg) \ 161 printk(KERN_INFO fmt, ##args); \ 162 else \ 163 if (m) \ 164 seq_printf(m, fmt, ##args); \ 165 }) 166 167 #define pt_dump_cont_printf(m, to_dmesg, fmt, args...) \ 168 ({ \ 169 if (to_dmesg) \ 170 printk(KERN_CONT fmt, ##args); \ 171 else \ 172 if (m) \ 173 seq_printf(m, fmt, ##args); \ 174 }) 175 176 /* 177 * Print a readable form of a pgprot_t to the seq_file 178 */ 179 static void printk_prot(struct seq_file *m, pgprotval_t pr, int level, bool dmsg) 180 { 181 static const char * const level_name[] = 182 { "pgd", "p4d", "pud", "pmd", "pte" }; 183 184 if (!(pr & _PAGE_PRESENT)) { 185 /* Not present */ 186 pt_dump_cont_printf(m, dmsg, " "); 187 } else { 188 if (pr & _PAGE_USER) 189 pt_dump_cont_printf(m, dmsg, "USR "); 190 else 191 pt_dump_cont_printf(m, dmsg, " "); 192 if (pr & _PAGE_RW) 193 pt_dump_cont_printf(m, dmsg, "RW "); 194 else 195 pt_dump_cont_printf(m, dmsg, "ro "); 196 if (pr & _PAGE_PWT) 197 pt_dump_cont_printf(m, dmsg, "PWT "); 198 else 199 pt_dump_cont_printf(m, dmsg, " "); 200 if (pr & _PAGE_PCD) 201 pt_dump_cont_printf(m, dmsg, "PCD "); 202 else 203 pt_dump_cont_printf(m, dmsg, " "); 204 205 /* Bit 7 has a different meaning on level 3 vs 4 */ 206 if (level <= 3 && pr & _PAGE_PSE) 207 pt_dump_cont_printf(m, dmsg, "PSE "); 208 else 209 pt_dump_cont_printf(m, dmsg, " "); 210 if ((level == 4 && pr & _PAGE_PAT) || 211 ((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE)) 212 pt_dump_cont_printf(m, dmsg, "PAT "); 213 else 214 pt_dump_cont_printf(m, dmsg, " "); 215 if (pr & _PAGE_GLOBAL) 216 pt_dump_cont_printf(m, dmsg, "GLB "); 217 else 218 pt_dump_cont_printf(m, dmsg, " "); 219 if (pr & _PAGE_NX) 220 pt_dump_cont_printf(m, dmsg, "NX "); 221 else 222 pt_dump_cont_printf(m, dmsg, "x "); 223 } 224 pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]); 225 } 226 227 static void note_wx(struct pg_state *st, unsigned long addr) 228 { 229 unsigned long npages; 230 231 npages = (addr - st->start_address) / PAGE_SIZE; 232 233 #ifdef CONFIG_PCI_BIOS 234 /* 235 * If PCI BIOS is enabled, the PCI BIOS area is forced to WX. 236 * Inform about it, but avoid the warning. 237 */ 238 if (pcibios_enabled && st->start_address >= PAGE_OFFSET + BIOS_BEGIN && 239 addr <= PAGE_OFFSET + BIOS_END) { 240 pr_warn_once("x86/mm: PCI BIOS W+X mapping %lu pages\n", npages); 241 return; 242 } 243 #endif 244 /* Account the WX pages */ 245 st->wx_pages += npages; 246 WARN_ONCE(__supported_pte_mask & _PAGE_NX, 247 "x86/mm: Found insecure W+X mapping at address %pS\n", 248 (void *)st->start_address); 249 } 250 251 static void effective_prot(struct ptdump_state *pt_st, int level, u64 val) 252 { 253 struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); 254 pgprotval_t prot = val & PTE_FLAGS_MASK; 255 pgprotval_t effective; 256 257 if (level > 0) { 258 pgprotval_t higher_prot = st->prot_levels[level - 1]; 259 260 effective = (higher_prot & prot & (_PAGE_USER | _PAGE_RW)) | 261 ((higher_prot | prot) & _PAGE_NX); 262 } else { 263 effective = prot; 264 } 265 266 st->prot_levels[level] = effective; 267 } 268 269 static void effective_prot_pte(struct ptdump_state *st, pte_t pte) 270 { 271 effective_prot(st, 4, pte_val(pte)); 272 } 273 274 static void effective_prot_pmd(struct ptdump_state *st, pmd_t pmd) 275 { 276 effective_prot(st, 3, pmd_val(pmd)); 277 } 278 279 static void effective_prot_pud(struct ptdump_state *st, pud_t pud) 280 { 281 effective_prot(st, 2, pud_val(pud)); 282 } 283 284 static void effective_prot_p4d(struct ptdump_state *st, p4d_t p4d) 285 { 286 effective_prot(st, 1, p4d_val(p4d)); 287 } 288 289 static void effective_prot_pgd(struct ptdump_state *st, pgd_t pgd) 290 { 291 effective_prot(st, 0, pgd_val(pgd)); 292 } 293 294 295 /* 296 * This function gets called on a break in a continuous series 297 * of PTE entries; the next one is different so we need to 298 * print what we collected so far. 299 */ 300 static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, 301 u64 val) 302 { 303 struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); 304 pgprotval_t new_prot, new_eff; 305 pgprotval_t cur, eff; 306 static const char units[] = "BKMGTPE"; 307 struct seq_file *m = st->seq; 308 309 new_prot = val & PTE_FLAGS_MASK; 310 if (!val) 311 new_eff = 0; 312 else 313 new_eff = st->prot_levels[level]; 314 315 /* 316 * If we have a "break" in the series, we need to flush the state that 317 * we have now. "break" is either changing perms, levels or 318 * address space marker. 319 */ 320 cur = st->current_prot; 321 eff = st->effective_prot; 322 323 if (st->level == -1) { 324 /* First entry */ 325 st->current_prot = new_prot; 326 st->effective_prot = new_eff; 327 st->level = level; 328 st->marker = address_markers; 329 st->lines = 0; 330 pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", 331 st->marker->name); 332 } else if (new_prot != cur || new_eff != eff || level != st->level || 333 addr >= st->marker[1].start_address) { 334 const char *unit = units; 335 unsigned long delta; 336 int width = sizeof(unsigned long) * 2; 337 338 if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX)) 339 note_wx(st, addr); 340 341 /* 342 * Now print the actual finished series 343 */ 344 if (!st->marker->max_lines || 345 st->lines < st->marker->max_lines) { 346 pt_dump_seq_printf(m, st->to_dmesg, 347 "0x%0*lx-0x%0*lx ", 348 width, st->start_address, 349 width, addr); 350 351 delta = addr - st->start_address; 352 while (!(delta & 1023) && unit[1]) { 353 delta >>= 10; 354 unit++; 355 } 356 pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ", 357 delta, *unit); 358 printk_prot(m, st->current_prot, st->level, 359 st->to_dmesg); 360 } 361 st->lines++; 362 363 /* 364 * We print markers for special areas of address space, 365 * such as the start of vmalloc space etc. 366 * This helps in the interpretation. 367 */ 368 if (addr >= st->marker[1].start_address) { 369 if (st->marker->max_lines && 370 st->lines > st->marker->max_lines) { 371 unsigned long nskip = 372 st->lines - st->marker->max_lines; 373 pt_dump_seq_printf(m, st->to_dmesg, 374 "... %lu entr%s skipped ... \n", 375 nskip, 376 nskip == 1 ? "y" : "ies"); 377 } 378 st->marker++; 379 st->lines = 0; 380 pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", 381 st->marker->name); 382 } 383 384 st->start_address = addr; 385 st->current_prot = new_prot; 386 st->effective_prot = new_eff; 387 st->level = level; 388 } 389 } 390 391 static void note_page_pte(struct ptdump_state *pt_st, unsigned long addr, pte_t pte) 392 { 393 note_page(pt_st, addr, 4, pte_val(pte)); 394 } 395 396 static void note_page_pmd(struct ptdump_state *pt_st, unsigned long addr, pmd_t pmd) 397 { 398 note_page(pt_st, addr, 3, pmd_val(pmd)); 399 } 400 401 static void note_page_pud(struct ptdump_state *pt_st, unsigned long addr, pud_t pud) 402 { 403 note_page(pt_st, addr, 2, pud_val(pud)); 404 } 405 406 static void note_page_p4d(struct ptdump_state *pt_st, unsigned long addr, p4d_t p4d) 407 { 408 note_page(pt_st, addr, 1, p4d_val(p4d)); 409 } 410 411 static void note_page_pgd(struct ptdump_state *pt_st, unsigned long addr, pgd_t pgd) 412 { 413 note_page(pt_st, addr, 0, pgd_val(pgd)); 414 } 415 416 static void note_page_flush(struct ptdump_state *pt_st) 417 { 418 pte_t pte_zero = {0}; 419 420 note_page(pt_st, 0, -1, pte_val(pte_zero)); 421 } 422 423 bool ptdump_walk_pgd_level_core(struct seq_file *m, 424 struct mm_struct *mm, pgd_t *pgd, 425 bool checkwx, bool dmesg) 426 { 427 const struct ptdump_range ptdump_ranges[] = { 428 #ifdef CONFIG_X86_64 429 {0, PTRS_PER_PGD * PGD_LEVEL_MULT / 2}, 430 {GUARD_HOLE_END_ADDR, ~0UL}, 431 #else 432 {0, ~0UL}, 433 #endif 434 {0, 0} 435 }; 436 437 struct pg_state st = { 438 .ptdump = { 439 .note_page_pte = note_page_pte, 440 .note_page_pmd = note_page_pmd, 441 .note_page_pud = note_page_pud, 442 .note_page_p4d = note_page_p4d, 443 .note_page_pgd = note_page_pgd, 444 .note_page_flush = note_page_flush, 445 .effective_prot_pte = effective_prot_pte, 446 .effective_prot_pmd = effective_prot_pmd, 447 .effective_prot_pud = effective_prot_pud, 448 .effective_prot_p4d = effective_prot_p4d, 449 .effective_prot_pgd = effective_prot_pgd, 450 .range = ptdump_ranges 451 }, 452 .level = -1, 453 .to_dmesg = dmesg, 454 .check_wx = checkwx, 455 .seq = m 456 }; 457 458 ptdump_walk_pgd(&st.ptdump, mm, pgd); 459 460 if (!checkwx) 461 return true; 462 if (st.wx_pages) { 463 pr_info("x86/mm: Checked W+X mappings: FAILED, %lu W+X pages found.\n", 464 st.wx_pages); 465 466 return false; 467 } else { 468 pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n"); 469 470 return true; 471 } 472 } 473 474 void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm) 475 { 476 ptdump_walk_pgd_level_core(m, mm, mm->pgd, false, true); 477 } 478 479 void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, 480 bool user) 481 { 482 pgd_t *pgd = mm->pgd; 483 #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION 484 if (user && boot_cpu_has(X86_FEATURE_PTI)) 485 pgd = kernel_to_user_pgdp(pgd); 486 #endif 487 ptdump_walk_pgd_level_core(m, mm, pgd, false, false); 488 } 489 EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs); 490 491 void ptdump_walk_user_pgd_level_checkwx(void) 492 { 493 #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION 494 pgd_t *pgd = INIT_PGD; 495 496 if (!(__supported_pte_mask & _PAGE_NX) || 497 !boot_cpu_has(X86_FEATURE_PTI)) 498 return; 499 500 pr_info("x86/mm: Checking user space page tables\n"); 501 pgd = kernel_to_user_pgdp(pgd); 502 ptdump_walk_pgd_level_core(NULL, &init_mm, pgd, true, false); 503 #endif 504 } 505 506 bool ptdump_walk_pgd_level_checkwx(void) 507 { 508 if (!(__supported_pte_mask & _PAGE_NX)) 509 return true; 510 511 return ptdump_walk_pgd_level_core(NULL, &init_mm, INIT_PGD, true, false); 512 } 513 514 static int __init pt_dump_init(void) 515 { 516 /* 517 * Various markers are not compile-time constants, so assign them 518 * here. 519 */ 520 #ifdef CONFIG_X86_64 521 address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET; 522 address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; 523 address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START; 524 #ifdef CONFIG_MODIFY_LDT_SYSCALL 525 address_markers[LDT_NR].start_address = LDT_BASE_ADDR; 526 #endif 527 #ifdef CONFIG_KASAN 528 address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START; 529 address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END; 530 #endif 531 #endif 532 #ifdef CONFIG_X86_32 533 address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; 534 address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; 535 # ifdef CONFIG_HIGHMEM 536 address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; 537 # endif 538 address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; 539 address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE; 540 # ifdef CONFIG_MODIFY_LDT_SYSCALL 541 address_markers[LDT_NR].start_address = LDT_BASE_ADDR; 542 # endif 543 #endif 544 return 0; 545 } 546 __initcall(pt_dump_init); 547