1 // SPDX-License-Identifier: GPL-2.0 2 #define boot_fmt(fmt) "vmem: " fmt 3 #include <linux/cpufeature.h> 4 #include <linux/sched/task.h> 5 #include <linux/pgtable.h> 6 #include <linux/kasan.h> 7 #include <asm/page-states.h> 8 #include <asm/pgalloc.h> 9 #include <asm/facility.h> 10 #include <asm/sections.h> 11 #include <asm/ctlreg.h> 12 #include <asm/physmem_info.h> 13 #include <asm/maccess.h> 14 #include <asm/machine.h> 15 #include <asm/abs_lowcore.h> 16 #include "decompressor.h" 17 #include "boot.h" 18 19 #define INVALID_PHYS_ADDR (~(phys_addr_t)0) 20 struct ctlreg __bootdata_preserved(s390_invalid_asce); 21 22 #ifdef CONFIG_PROC_FS 23 atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); 24 #endif 25 26 #define init_mm (*(struct mm_struct *)vmlinux.init_mm_off) 27 #define swapper_pg_dir vmlinux.swapper_pg_dir_off 28 #define invalid_pg_dir vmlinux.invalid_pg_dir_off 29 30 enum populate_mode { 31 POPULATE_NONE, 32 POPULATE_DIRECT, 33 POPULATE_LOWCORE, 34 POPULATE_ABS_LOWCORE, 35 POPULATE_IDENTITY, 36 POPULATE_KERNEL, 37 #ifdef CONFIG_KASAN 38 /* KASAN modes should be last and grouped together, see is_kasan_populate_mode() */ 39 POPULATE_KASAN_MAP_SHADOW, 40 POPULATE_KASAN_ZERO_SHADOW, 41 POPULATE_KASAN_SHALLOW 42 #endif 43 }; 44 45 #define POPULATE_MODE_NAME(t) case POPULATE_ ## t: return #t 46 static inline const char *get_populate_mode_name(enum populate_mode t) 47 { 48 switch (t) { 49 POPULATE_MODE_NAME(NONE); 50 POPULATE_MODE_NAME(DIRECT); 51 POPULATE_MODE_NAME(LOWCORE); 52 POPULATE_MODE_NAME(ABS_LOWCORE); 53 POPULATE_MODE_NAME(IDENTITY); 54 POPULATE_MODE_NAME(KERNEL); 55 #ifdef CONFIG_KASAN 56 POPULATE_MODE_NAME(KASAN_MAP_SHADOW); 57 POPULATE_MODE_NAME(KASAN_ZERO_SHADOW); 58 POPULATE_MODE_NAME(KASAN_SHALLOW); 59 #endif 60 default: 61 return "UNKNOWN"; 62 } 63 } 64 65 static bool is_kasan_populate_mode(enum populate_mode mode) 66 { 67 #ifdef CONFIG_KASAN 68 return mode >= POPULATE_KASAN_MAP_SHADOW; 69 #else 70 return false; 71 #endif 72 } 73 74 static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode); 75 76 #ifdef CONFIG_KASAN 77 78 #define kasan_early_shadow_page vmlinux.kasan_early_shadow_page_off 79 #define kasan_early_shadow_pte ((pte_t *)vmlinux.kasan_early_shadow_pte_off) 80 #define kasan_early_shadow_pmd ((pmd_t *)vmlinux.kasan_early_shadow_pmd_off) 81 #define kasan_early_shadow_pud ((pud_t *)vmlinux.kasan_early_shadow_pud_off) 82 #define kasan_early_shadow_p4d ((p4d_t *)vmlinux.kasan_early_shadow_p4d_off) 83 #define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) 84 85 static pte_t pte_z; 86 87 static inline void kasan_populate(unsigned long start, unsigned long end, enum populate_mode mode) 88 { 89 unsigned long sha_start = PAGE_ALIGN_DOWN(__sha(start)); 90 unsigned long sha_end = PAGE_ALIGN(__sha(end)); 91 92 boot_debug("%-17s 0x%016lx-0x%016lx >> 0x%016lx-0x%016lx\n", get_populate_mode_name(mode), 93 start, end, sha_start, sha_end); 94 pgtable_populate(sha_start, sha_end, mode); 95 } 96 97 static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end) 98 { 99 pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); 100 pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); 101 p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); 102 unsigned long memgap_start = 0; 103 unsigned long start, end; 104 int i; 105 106 pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO)); 107 crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z)); 108 crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); 109 crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); 110 memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); 111 __arch_set_page_dat(kasan_early_shadow_p4d, 1UL << CRST_ALLOC_ORDER); 112 __arch_set_page_dat(kasan_early_shadow_pud, 1UL << CRST_ALLOC_ORDER); 113 __arch_set_page_dat(kasan_early_shadow_pmd, 1UL << CRST_ALLOC_ORDER); 114 __arch_set_page_dat(kasan_early_shadow_pte, 1); 115 116 for_each_physmem_usable_range(i, &start, &end) { 117 kasan_populate((unsigned long)__identity_va(start), 118 (unsigned long)__identity_va(end), 119 POPULATE_KASAN_MAP_SHADOW); 120 if (memgap_start && physmem_info.info_source == MEM_DETECT_DIAG260) { 121 kasan_populate((unsigned long)__identity_va(memgap_start), 122 (unsigned long)__identity_va(start), 123 POPULATE_KASAN_ZERO_SHADOW); 124 } 125 memgap_start = end; 126 } 127 kasan_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KASAN_MAP_SHADOW); 128 kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW); 129 kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW); 130 /* shallowly populate kasan shadow for vmalloc and modules */ 131 kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW); 132 /* populate kasan shadow for untracked memory */ 133 kasan_populate((unsigned long)__identity_va(ident_map_size), VMALLOC_START, 134 POPULATE_KASAN_ZERO_SHADOW); 135 kasan_populate(kernel_end, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW); 136 } 137 138 static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, 139 unsigned long end, enum populate_mode mode) 140 { 141 if (mode == POPULATE_KASAN_ZERO_SHADOW && 142 IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) { 143 pgd_populate(&init_mm, pgd, kasan_early_shadow_p4d); 144 return true; 145 } 146 return false; 147 } 148 149 static bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, 150 unsigned long end, enum populate_mode mode) 151 { 152 if (mode == POPULATE_KASAN_ZERO_SHADOW && 153 IS_ALIGNED(addr, P4D_SIZE) && end - addr >= P4D_SIZE) { 154 p4d_populate(&init_mm, p4d, kasan_early_shadow_pud); 155 return true; 156 } 157 return false; 158 } 159 160 static bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, 161 unsigned long end, enum populate_mode mode) 162 { 163 if (mode == POPULATE_KASAN_ZERO_SHADOW && 164 IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE) { 165 pud_populate(&init_mm, pud, kasan_early_shadow_pmd); 166 return true; 167 } 168 return false; 169 } 170 171 static bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, 172 unsigned long end, enum populate_mode mode) 173 { 174 if (mode == POPULATE_KASAN_ZERO_SHADOW && 175 IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) { 176 pmd_populate(&init_mm, pmd, kasan_early_shadow_pte); 177 return true; 178 } 179 return false; 180 } 181 182 static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) 183 { 184 if (mode == POPULATE_KASAN_ZERO_SHADOW) { 185 set_pte(pte, pte_z); 186 return true; 187 } 188 return false; 189 } 190 #else 191 192 static inline void kasan_populate_shadow(unsigned long kernel_start, unsigned long kernel_end) 193 { 194 } 195 196 static inline bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, 197 unsigned long end, enum populate_mode mode) 198 { 199 return false; 200 } 201 202 static inline bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, 203 unsigned long end, enum populate_mode mode) 204 { 205 return false; 206 } 207 208 static inline bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, 209 unsigned long end, enum populate_mode mode) 210 { 211 return false; 212 } 213 214 static inline bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, 215 unsigned long end, enum populate_mode mode) 216 { 217 return false; 218 } 219 220 static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) 221 { 222 return false; 223 } 224 225 #endif 226 227 /* 228 * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though. 229 */ 230 static inline pte_t *__virt_to_kpte(unsigned long va) 231 { 232 return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va); 233 } 234 235 static void *boot_crst_alloc(unsigned long val) 236 { 237 unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER; 238 unsigned long *table; 239 240 table = (unsigned long *)physmem_alloc_or_die(RR_VMEM, size, size); 241 crst_table_init(table, val); 242 __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER); 243 return table; 244 } 245 246 static pte_t *boot_pte_alloc(void) 247 { 248 static void *pte_leftover; 249 pte_t *pte; 250 251 /* 252 * handling pte_leftovers this way helps to avoid memory fragmentation 253 * during POPULATE_KASAN_MAP_SHADOW when EDAT is off 254 */ 255 if (!pte_leftover) { 256 pte_leftover = (void *)physmem_alloc_or_die(RR_VMEM, PAGE_SIZE, PAGE_SIZE); 257 pte = pte_leftover + _PAGE_TABLE_SIZE; 258 __arch_set_page_dat(pte, 1); 259 } else { 260 pte = pte_leftover; 261 pte_leftover = NULL; 262 } 263 264 memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); 265 return pte; 266 } 267 268 static unsigned long resolve_pa_may_alloc(unsigned long addr, unsigned long size, 269 enum populate_mode mode) 270 { 271 switch (mode) { 272 case POPULATE_NONE: 273 return INVALID_PHYS_ADDR; 274 case POPULATE_DIRECT: 275 return addr; 276 case POPULATE_LOWCORE: 277 return __lowcore_pa(addr); 278 case POPULATE_ABS_LOWCORE: 279 return __abs_lowcore_pa(addr); 280 case POPULATE_KERNEL: 281 return __kernel_pa(addr); 282 case POPULATE_IDENTITY: 283 return __identity_pa(addr); 284 #ifdef CONFIG_KASAN 285 case POPULATE_KASAN_MAP_SHADOW: 286 /* Allow to fail large page allocations, this will fall back to 1mb/4k pages */ 287 addr = physmem_alloc(RR_VMEM, size, size, size == PAGE_SIZE); 288 if (addr) { 289 memset((void *)addr, 0, size); 290 return addr; 291 } 292 return INVALID_PHYS_ADDR; 293 #endif 294 default: 295 return INVALID_PHYS_ADDR; 296 } 297 } 298 299 static bool large_page_mapping_allowed(enum populate_mode mode) 300 { 301 switch (mode) { 302 case POPULATE_DIRECT: 303 case POPULATE_IDENTITY: 304 case POPULATE_KERNEL: 305 #ifdef CONFIG_KASAN 306 case POPULATE_KASAN_MAP_SHADOW: 307 #endif 308 return true; 309 default: 310 return false; 311 } 312 } 313 314 static unsigned long try_get_large_pud_pa(pud_t *pu_dir, unsigned long addr, unsigned long end, 315 enum populate_mode mode) 316 { 317 unsigned long pa, size = end - addr; 318 319 if (!cpu_has_edat2() || !large_page_mapping_allowed(mode) || 320 !IS_ALIGNED(addr, PUD_SIZE) || (size < PUD_SIZE)) 321 return INVALID_PHYS_ADDR; 322 323 pa = resolve_pa_may_alloc(addr, size, mode); 324 if (!IS_ALIGNED(pa, PUD_SIZE)) 325 return INVALID_PHYS_ADDR; 326 327 return pa; 328 } 329 330 static unsigned long try_get_large_pmd_pa(pmd_t *pm_dir, unsigned long addr, unsigned long end, 331 enum populate_mode mode) 332 { 333 unsigned long pa, size = end - addr; 334 335 if (!cpu_has_edat1() || !large_page_mapping_allowed(mode) || 336 !IS_ALIGNED(addr, PMD_SIZE) || (size < PMD_SIZE)) 337 return INVALID_PHYS_ADDR; 338 339 pa = resolve_pa_may_alloc(addr, size, mode); 340 if (!IS_ALIGNED(pa, PMD_SIZE)) 341 return INVALID_PHYS_ADDR; 342 343 return pa; 344 } 345 346 static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, 347 enum populate_mode mode) 348 { 349 unsigned long pages = 0; 350 pte_t *pte, entry; 351 352 pte = pte_offset_kernel(pmd, addr); 353 for (; addr < end; addr += PAGE_SIZE, pte++) { 354 if (pte_none(*pte)) { 355 if (kasan_pte_populate_zero_shadow(pte, mode)) 356 continue; 357 entry = __pte(resolve_pa_may_alloc(addr, PAGE_SIZE, mode)); 358 entry = set_pte_bit(entry, PAGE_KERNEL); 359 set_pte(pte, entry); 360 pages++; 361 } 362 } 363 if (mode == POPULATE_IDENTITY) 364 update_page_count(PG_DIRECT_MAP_4K, pages); 365 } 366 367 static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end, 368 enum populate_mode mode) 369 { 370 unsigned long pa, next, pages = 0; 371 pmd_t *pmd, entry; 372 pte_t *pte; 373 374 pmd = pmd_offset(pud, addr); 375 for (; addr < end; addr = next, pmd++) { 376 next = pmd_addr_end(addr, end); 377 if (pmd_none(*pmd)) { 378 if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode)) 379 continue; 380 pa = try_get_large_pmd_pa(pmd, addr, next, mode); 381 if (pa != INVALID_PHYS_ADDR) { 382 entry = __pmd(pa); 383 entry = set_pmd_bit(entry, SEGMENT_KERNEL); 384 set_pmd(pmd, entry); 385 pages++; 386 continue; 387 } 388 pte = boot_pte_alloc(); 389 pmd_populate(&init_mm, pmd, pte); 390 } else if (pmd_leaf(*pmd)) { 391 continue; 392 } 393 pgtable_pte_populate(pmd, addr, next, mode); 394 } 395 if (mode == POPULATE_IDENTITY) 396 update_page_count(PG_DIRECT_MAP_1M, pages); 397 } 398 399 static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end, 400 enum populate_mode mode) 401 { 402 unsigned long pa, next, pages = 0; 403 pud_t *pud, entry; 404 pmd_t *pmd; 405 406 pud = pud_offset(p4d, addr); 407 for (; addr < end; addr = next, pud++) { 408 next = pud_addr_end(addr, end); 409 if (pud_none(*pud)) { 410 if (kasan_pud_populate_zero_shadow(pud, addr, next, mode)) 411 continue; 412 pa = try_get_large_pud_pa(pud, addr, next, mode); 413 if (pa != INVALID_PHYS_ADDR) { 414 entry = __pud(pa); 415 entry = set_pud_bit(entry, REGION3_KERNEL); 416 set_pud(pud, entry); 417 pages++; 418 continue; 419 } 420 pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY); 421 pud_populate(&init_mm, pud, pmd); 422 } else if (pud_leaf(*pud)) { 423 continue; 424 } 425 pgtable_pmd_populate(pud, addr, next, mode); 426 } 427 if (mode == POPULATE_IDENTITY) 428 update_page_count(PG_DIRECT_MAP_2G, pages); 429 } 430 431 static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end, 432 enum populate_mode mode) 433 { 434 unsigned long next; 435 p4d_t *p4d; 436 pud_t *pud; 437 438 p4d = p4d_offset(pgd, addr); 439 for (; addr < end; addr = next, p4d++) { 440 next = p4d_addr_end(addr, end); 441 if (p4d_none(*p4d)) { 442 if (kasan_p4d_populate_zero_shadow(p4d, addr, next, mode)) 443 continue; 444 pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY); 445 p4d_populate(&init_mm, p4d, pud); 446 } 447 pgtable_pud_populate(p4d, addr, next, mode); 448 } 449 } 450 451 static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode) 452 { 453 unsigned long next; 454 pgd_t *pgd; 455 p4d_t *p4d; 456 457 if (!is_kasan_populate_mode(mode)) { 458 boot_debug("%-17s 0x%016lx-0x%016lx -> 0x%016lx-0x%016lx\n", 459 get_populate_mode_name(mode), addr, end, 460 resolve_pa_may_alloc(addr, 0, mode), 461 resolve_pa_may_alloc(end - 1, 0, mode) + 1); 462 } 463 464 pgd = pgd_offset(&init_mm, addr); 465 for (; addr < end; addr = next, pgd++) { 466 next = pgd_addr_end(addr, end); 467 if (pgd_none(*pgd)) { 468 if (kasan_pgd_populate_zero_shadow(pgd, addr, next, mode)) 469 continue; 470 p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY); 471 pgd_populate(&init_mm, pgd, p4d); 472 } 473 #ifdef CONFIG_KASAN 474 if (mode == POPULATE_KASAN_SHALLOW) 475 continue; 476 #endif 477 pgtable_p4d_populate(pgd, addr, next, mode); 478 } 479 } 480 481 void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit) 482 { 483 unsigned long lowcore_address = 0; 484 unsigned long start, end; 485 unsigned long asce_type; 486 unsigned long asce_bits; 487 pgd_t *init_mm_pgd; 488 int i; 489 490 /* 491 * Mark whole memory as no-dat. This must be done before any 492 * page tables are allocated, or kernel image builtin pages 493 * are marked as dat tables. 494 */ 495 for_each_physmem_online_range(i, &start, &end) 496 __arch_set_page_nodat((void *)start, (end - start) >> PAGE_SHIFT); 497 498 /* 499 * init_mm->pgd contains virtual address of swapper_pg_dir. 500 * It is unusable at this stage since DAT is yet off. Swap 501 * it for physical address of swapper_pg_dir and restore 502 * the virtual address after all page tables are created. 503 */ 504 init_mm_pgd = init_mm.pgd; 505 init_mm.pgd = (pgd_t *)swapper_pg_dir; 506 507 if (asce_limit == _REGION1_SIZE) { 508 asce_type = _REGION2_ENTRY_EMPTY; 509 asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; 510 } else { 511 asce_type = _REGION3_ENTRY_EMPTY; 512 asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; 513 } 514 s390_invalid_asce.val = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH; 515 516 crst_table_init((unsigned long *)swapper_pg_dir, asce_type); 517 crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY); 518 __arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER); 519 __arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER); 520 521 if (machine_has_relocated_lowcore()) 522 lowcore_address = LOWCORE_ALT_ADDRESS; 523 524 /* 525 * To allow prefixing the lowcore must be mapped with 4KB pages. 526 * To prevent creation of a large page at address 0 first map 527 * the lowcore and create the identity mapping only afterwards. 528 */ 529 pgtable_populate(lowcore_address, 530 lowcore_address + sizeof(struct lowcore), 531 POPULATE_LOWCORE); 532 for_each_physmem_usable_range(i, &start, &end) { 533 pgtable_populate((unsigned long)__identity_va(start), 534 (unsigned long)__identity_va(end), 535 POPULATE_IDENTITY); 536 } 537 538 /* 539 * [kernel_start..kernel_start + TEXT_OFFSET] region is never 540 * accessed as per the linker script: 541 * 542 * . = TEXT_OFFSET; 543 * 544 * Therefore, skip mapping TEXT_OFFSET bytes to prevent access to 545 * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region. 546 */ 547 pgtable_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KERNEL); 548 pgtable_populate(AMODE31_START, AMODE31_END, POPULATE_DIRECT); 549 pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore), 550 POPULATE_ABS_LOWCORE); 551 pgtable_populate(__memcpy_real_area, __memcpy_real_area + PAGE_SIZE, 552 POPULATE_NONE); 553 memcpy_real_ptep = __identity_va(__virt_to_kpte(__memcpy_real_area)); 554 555 kasan_populate_shadow(kernel_start, kernel_end); 556 557 get_lowcore()->kernel_asce.val = swapper_pg_dir | asce_bits; 558 get_lowcore()->user_asce = s390_invalid_asce; 559 560 local_ctl_load(1, &get_lowcore()->kernel_asce); 561 local_ctl_load(7, &get_lowcore()->user_asce); 562 local_ctl_load(13, &get_lowcore()->kernel_asce); 563 564 init_mm.context.asce = get_lowcore()->kernel_asce.val; 565 init_mm.pgd = init_mm_pgd; 566 } 567