1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Architecture specific (PPC64) functions for kexec based crash dumps. 4 * 5 * Copyright (C) 2005, IBM Corp. 6 * 7 * Created by: Haren Myneni 8 */ 9 10 #include <linux/kernel.h> 11 #include <linux/smp.h> 12 #include <linux/reboot.h> 13 #include <linux/kexec.h> 14 #include <linux/export.h> 15 #include <linux/crash_dump.h> 16 #include <linux/delay.h> 17 #include <linux/irq.h> 18 #include <linux/types.h> 19 #include <linux/libfdt.h> 20 #include <linux/memory.h> 21 22 #include <asm/processor.h> 23 #include <asm/machdep.h> 24 #include <asm/kexec.h> 25 #include <asm/smp.h> 26 #include <asm/setjmp.h> 27 #include <asm/debug.h> 28 #include <asm/interrupt.h> 29 #include <asm/kexec_ranges.h> 30 31 /* 32 * The primary CPU waits a while for all secondary CPUs to enter. This is to 33 * avoid sending an IPI if the secondary CPUs are entering 34 * crash_kexec_secondary on their own (eg via a system reset). 35 * 36 * The secondary timeout has to be longer than the primary. Both timeouts are 37 * in milliseconds. 38 */ 39 #define PRIMARY_TIMEOUT 500 40 #define SECONDARY_TIMEOUT 1000 41 42 #define IPI_TIMEOUT 10000 43 #define REAL_MODE_TIMEOUT 10000 44 45 static int time_to_dump; 46 47 /* 48 * In case of system reset, secondary CPUs enter crash_kexec_secondary with out 49 * having to send an IPI explicitly. So, indicate if the crash is via 50 * system reset to avoid sending another IPI. 51 */ 52 static int is_via_system_reset; 53 54 /* 55 * crash_wake_offline should be set to 1 by platforms that intend to wake 56 * up offline cpus prior to jumping to a kdump kernel. Currently powernv 57 * sets it to 1, since we want to avoid things from happening when an 58 * offline CPU wakes up due to something like an HMI (malfunction error), 59 * which propagates to all threads. 60 */ 61 int crash_wake_offline; 62 63 #define CRASH_HANDLER_MAX 3 64 /* List of shutdown handles */ 65 static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX]; 66 static DEFINE_SPINLOCK(crash_handlers_lock); 67 68 static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; 69 static int crash_shutdown_cpu = -1; 70 71 static int handle_fault(struct pt_regs *regs) 72 { 73 if (crash_shutdown_cpu == smp_processor_id()) 74 longjmp(crash_shutdown_buf, 1); 75 return 0; 76 } 77 78 #ifdef CONFIG_SMP 79 80 static atomic_t cpus_in_crash; 81 void crash_ipi_callback(struct pt_regs *regs) 82 { 83 static cpumask_t cpus_state_saved = CPU_MASK_NONE; 84 85 int cpu = smp_processor_id(); 86 87 hard_irq_disable(); 88 if (!cpumask_test_cpu(cpu, &cpus_state_saved)) { 89 crash_save_cpu(regs, cpu); 90 cpumask_set_cpu(cpu, &cpus_state_saved); 91 } 92 93 atomic_inc(&cpus_in_crash); 94 smp_mb__after_atomic(); 95 96 /* 97 * Starting the kdump boot. 98 * This barrier is needed to make sure that all CPUs are stopped. 99 */ 100 while (!time_to_dump) 101 cpu_relax(); 102 103 if (ppc_md.kexec_cpu_down) 104 ppc_md.kexec_cpu_down(1, 1); 105 106 #ifdef CONFIG_PPC64 107 kexec_smp_wait(); 108 #else 109 for (;;); /* FIXME */ 110 #endif 111 112 /* NOTREACHED */ 113 } 114 115 static void crash_kexec_prepare_cpus(void) 116 { 117 unsigned int msecs; 118 volatile unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ 119 volatile int tries = 0; 120 int (*old_handler)(struct pt_regs *regs); 121 122 printk(KERN_EMERG "Sending IPI to other CPUs\n"); 123 124 if (crash_wake_offline) 125 ncpus = num_present_cpus() - 1; 126 127 /* 128 * If we came in via system reset, secondaries enter via crash_kexec_secondary(). 129 * So, wait a while for the secondary CPUs to enter for that case. 130 * Else, send IPI to all other CPUs. 131 */ 132 if (is_via_system_reset) 133 mdelay(PRIMARY_TIMEOUT); 134 else 135 crash_send_ipi(crash_ipi_callback); 136 smp_wmb(); 137 138 again: 139 /* 140 * FIXME: Until we will have the way to stop other CPUs reliably, 141 * the crash CPU will send an IPI and wait for other CPUs to 142 * respond. 143 */ 144 msecs = IPI_TIMEOUT; 145 while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0)) 146 mdelay(1); 147 148 /* Would it be better to replace the trap vector here? */ 149 150 if (atomic_read(&cpus_in_crash) >= ncpus) { 151 printk(KERN_EMERG "IPI complete\n"); 152 return; 153 } 154 155 printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n", 156 ncpus - atomic_read(&cpus_in_crash)); 157 158 /* 159 * If we have a panic timeout set then we can't wait indefinitely 160 * for someone to activate system reset. We also give up on the 161 * second time through if system reset fail to work. 162 */ 163 if ((panic_timeout > 0) || (tries > 0)) 164 return; 165 166 /* 167 * A system reset will cause all CPUs to take an 0x100 exception. 168 * The primary CPU returns here via setjmp, and the secondary 169 * CPUs reexecute the crash_kexec_secondary path. 170 */ 171 old_handler = __debugger; 172 __debugger = handle_fault; 173 crash_shutdown_cpu = smp_processor_id(); 174 175 if (setjmp(crash_shutdown_buf) == 0) { 176 printk(KERN_EMERG "Activate system reset (dumprestart) " 177 "to stop other cpu(s)\n"); 178 179 /* 180 * A system reset will force all CPUs to execute the 181 * crash code again. We need to reset cpus_in_crash so we 182 * wait for everyone to do this. 183 */ 184 atomic_set(&cpus_in_crash, 0); 185 smp_mb(); 186 187 while (atomic_read(&cpus_in_crash) < ncpus) 188 cpu_relax(); 189 } 190 191 crash_shutdown_cpu = -1; 192 __debugger = old_handler; 193 194 tries++; 195 goto again; 196 } 197 198 /* 199 * This function will be called by secondary cpus. 200 */ 201 void crash_kexec_secondary(struct pt_regs *regs) 202 { 203 unsigned long flags; 204 int msecs = SECONDARY_TIMEOUT; 205 206 local_irq_save(flags); 207 208 /* Wait for the primary crash CPU to signal its progress */ 209 while (crashing_cpu < 0) { 210 if (--msecs < 0) { 211 /* No response, kdump image may not have been loaded */ 212 local_irq_restore(flags); 213 return; 214 } 215 216 mdelay(1); 217 } 218 219 crash_ipi_callback(regs); 220 } 221 222 #else /* ! CONFIG_SMP */ 223 224 static void crash_kexec_prepare_cpus(void) 225 { 226 /* 227 * move the secondaries to us so that we can copy 228 * the new kernel 0-0x100 safely 229 * 230 * do this if kexec in setup.c ? 231 */ 232 #ifdef CONFIG_PPC64 233 smp_release_cpus(); 234 #else 235 /* FIXME */ 236 #endif 237 } 238 239 void crash_kexec_secondary(struct pt_regs *regs) 240 { 241 } 242 #endif /* CONFIG_SMP */ 243 244 /* wait for all the CPUs to hit real mode but timeout if they don't come in */ 245 #if defined(CONFIG_SMP) && defined(CONFIG_PPC64) 246 noinstr static void __maybe_unused crash_kexec_wait_realmode(int cpu) 247 { 248 unsigned int msecs; 249 int i; 250 251 msecs = REAL_MODE_TIMEOUT; 252 for (i=0; i < nr_cpu_ids && msecs > 0; i++) { 253 if (i == cpu) 254 continue; 255 256 while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) { 257 barrier(); 258 if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0)) 259 break; 260 msecs--; 261 mdelay(1); 262 } 263 } 264 mb(); 265 } 266 #else 267 static inline void crash_kexec_wait_realmode(int cpu) {} 268 #endif /* CONFIG_SMP && CONFIG_PPC64 */ 269 270 void crash_kexec_prepare(void) 271 { 272 /* Avoid hardlocking with irresponsive CPU holding logbuf_lock */ 273 printk_deferred_enter(); 274 275 /* 276 * This function is only called after the system 277 * has panicked or is otherwise in a critical state. 278 * The minimum amount of code to allow a kexec'd kernel 279 * to run successfully needs to happen here. 280 * 281 * In practice this means stopping other cpus in 282 * an SMP system. 283 * The kernel is broken so disable interrupts. 284 */ 285 hard_irq_disable(); 286 287 /* 288 * Make a note of crashing cpu. Will be used in machine_kexec 289 * such that another IPI will not be sent. 290 */ 291 crashing_cpu = smp_processor_id(); 292 293 crash_kexec_prepare_cpus(); 294 } 295 296 /* 297 * Register a function to be called on shutdown. Only use this if you 298 * can't reset your device in the second kernel. 299 */ 300 int crash_shutdown_register(crash_shutdown_t handler) 301 { 302 unsigned int i, rc; 303 304 spin_lock(&crash_handlers_lock); 305 for (i = 0 ; i < CRASH_HANDLER_MAX; i++) 306 if (!crash_shutdown_handles[i]) { 307 /* Insert handle at first empty entry */ 308 crash_shutdown_handles[i] = handler; 309 rc = 0; 310 break; 311 } 312 313 if (i == CRASH_HANDLER_MAX) { 314 printk(KERN_ERR "Crash shutdown handles full, " 315 "not registered.\n"); 316 rc = 1; 317 } 318 319 spin_unlock(&crash_handlers_lock); 320 return rc; 321 } 322 EXPORT_SYMBOL(crash_shutdown_register); 323 324 int crash_shutdown_unregister(crash_shutdown_t handler) 325 { 326 unsigned int i, rc; 327 328 spin_lock(&crash_handlers_lock); 329 for (i = 0 ; i < CRASH_HANDLER_MAX; i++) 330 if (crash_shutdown_handles[i] == handler) 331 break; 332 333 if (i == CRASH_HANDLER_MAX) { 334 printk(KERN_ERR "Crash shutdown handle not found\n"); 335 rc = 1; 336 } else { 337 /* Shift handles down */ 338 for (; i < (CRASH_HANDLER_MAX - 1); i++) 339 crash_shutdown_handles[i] = 340 crash_shutdown_handles[i+1]; 341 /* 342 * Reset last entry to NULL now that it has been shifted down, 343 * this will allow new handles to be added here. 344 */ 345 crash_shutdown_handles[i] = NULL; 346 rc = 0; 347 } 348 349 spin_unlock(&crash_handlers_lock); 350 return rc; 351 } 352 EXPORT_SYMBOL(crash_shutdown_unregister); 353 354 void default_machine_crash_shutdown(struct pt_regs *regs) 355 { 356 volatile unsigned int i; 357 int (*old_handler)(struct pt_regs *regs); 358 359 if (TRAP(regs) == INTERRUPT_SYSTEM_RESET) 360 is_via_system_reset = 1; 361 362 if (IS_ENABLED(CONFIG_SMP)) 363 crash_smp_send_stop(); 364 else 365 crash_kexec_prepare(); 366 367 crash_save_cpu(regs, crashing_cpu); 368 369 time_to_dump = 1; 370 371 crash_kexec_wait_realmode(crashing_cpu); 372 373 machine_kexec_mask_interrupts(); 374 375 /* 376 * Call registered shutdown routines safely. Swap out 377 * __debugger_fault_handler, and replace on exit. 378 */ 379 old_handler = __debugger_fault_handler; 380 __debugger_fault_handler = handle_fault; 381 crash_shutdown_cpu = smp_processor_id(); 382 for (i = 0; i < CRASH_HANDLER_MAX && crash_shutdown_handles[i]; i++) { 383 if (setjmp(crash_shutdown_buf) == 0) { 384 /* 385 * Insert syncs and delay to ensure 386 * instructions in the dangerous region don't 387 * leak away from this protected region. 388 */ 389 asm volatile("sync; isync"); 390 /* dangerous region */ 391 crash_shutdown_handles[i](); 392 asm volatile("sync; isync"); 393 } 394 } 395 crash_shutdown_cpu = -1; 396 __debugger_fault_handler = old_handler; 397 398 if (ppc_md.kexec_cpu_down) 399 ppc_md.kexec_cpu_down(1, 0); 400 } 401 402 #ifdef CONFIG_CRASH_HOTPLUG 403 #undef pr_fmt 404 #define pr_fmt(fmt) "crash hp: " fmt 405 406 /* 407 * Advertise preferred elfcorehdr size to userspace via 408 * /sys/kernel/crash_elfcorehdr_size sysfs interface. 409 */ 410 unsigned int arch_crash_get_elfcorehdr_size(void) 411 { 412 unsigned long phdr_cnt; 413 414 /* A program header for possible CPUs + vmcoreinfo */ 415 phdr_cnt = num_possible_cpus() + 1; 416 if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) 417 phdr_cnt += CONFIG_CRASH_MAX_MEMORY_RANGES; 418 419 return sizeof(struct elfhdr) + (phdr_cnt * sizeof(Elf64_Phdr)); 420 } 421 422 /** 423 * update_crash_elfcorehdr() - Recreate the elfcorehdr and replace it with old 424 * elfcorehdr in the kexec segment array. 425 * @image: the active struct kimage 426 * @mn: struct memory_notify data handler 427 */ 428 static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify *mn) 429 { 430 int ret; 431 struct crash_mem *cmem = NULL; 432 struct kexec_segment *ksegment; 433 void *ptr, *mem, *elfbuf = NULL; 434 unsigned long elfsz, memsz, base_addr, size; 435 436 ksegment = &image->segment[image->elfcorehdr_index]; 437 mem = (void *) ksegment->mem; 438 memsz = ksegment->memsz; 439 440 ret = get_crash_memory_ranges(&cmem); 441 if (ret) { 442 pr_err("Failed to get crash mem range\n"); 443 return; 444 } 445 446 /* 447 * The hot unplugged memory is part of crash memory ranges, 448 * remove it here. 449 */ 450 if (image->hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY) { 451 base_addr = PFN_PHYS(mn->start_pfn); 452 size = mn->nr_pages * PAGE_SIZE; 453 ret = remove_mem_range(&cmem, base_addr, size); 454 if (ret) { 455 pr_err("Failed to remove hot-unplugged memory from crash memory ranges\n"); 456 goto out; 457 } 458 } 459 460 ret = crash_prepare_elf64_headers(cmem, false, &elfbuf, &elfsz); 461 if (ret) { 462 pr_err("Failed to prepare elf header\n"); 463 goto out; 464 } 465 466 /* 467 * It is unlikely that kernel hit this because elfcorehdr kexec 468 * segment (memsz) is built with addition space to accommodate growing 469 * number of crash memory ranges while loading the kdump kernel. It is 470 * Just to avoid any unforeseen case. 471 */ 472 if (elfsz > memsz) { 473 pr_err("Updated crash elfcorehdr elfsz %lu > memsz %lu", elfsz, memsz); 474 goto out; 475 } 476 477 ptr = __va(mem); 478 if (ptr) { 479 /* Temporarily invalidate the crash image while it is replaced */ 480 xchg(&kexec_crash_image, NULL); 481 482 /* Replace the old elfcorehdr with newly prepared elfcorehdr */ 483 memcpy((void *)ptr, elfbuf, elfsz); 484 485 /* The crash image is now valid once again */ 486 xchg(&kexec_crash_image, image); 487 } 488 out: 489 kvfree(cmem); 490 kvfree(elfbuf); 491 } 492 493 /** 494 * get_fdt_index - Loop through the kexec segment array and find 495 * the index of the FDT segment. 496 * @image: a pointer to kexec_crash_image 497 * 498 * Returns the index of FDT segment in the kexec segment array 499 * if found; otherwise -1. 500 */ 501 static int get_fdt_index(struct kimage *image) 502 { 503 void *ptr; 504 unsigned long mem; 505 int i, fdt_index = -1; 506 507 /* Find the FDT segment index in kexec segment array. */ 508 for (i = 0; i < image->nr_segments; i++) { 509 mem = image->segment[i].mem; 510 ptr = __va(mem); 511 512 if (ptr && fdt_magic(ptr) == FDT_MAGIC) { 513 fdt_index = i; 514 break; 515 } 516 } 517 518 return fdt_index; 519 } 520 521 /** 522 * update_crash_fdt - updates the cpus node of the crash FDT. 523 * 524 * @image: a pointer to kexec_crash_image 525 */ 526 static void update_crash_fdt(struct kimage *image) 527 { 528 void *fdt; 529 int fdt_index; 530 531 fdt_index = get_fdt_index(image); 532 if (fdt_index < 0) { 533 pr_err("Unable to locate FDT segment.\n"); 534 return; 535 } 536 537 fdt = __va((void *)image->segment[fdt_index].mem); 538 539 /* Temporarily invalidate the crash image while it is replaced */ 540 xchg(&kexec_crash_image, NULL); 541 542 /* update FDT to reflect changes in CPU resources */ 543 if (update_cpus_node(fdt)) 544 pr_err("Failed to update crash FDT"); 545 546 /* The crash image is now valid once again */ 547 xchg(&kexec_crash_image, image); 548 } 549 550 int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags) 551 { 552 #ifdef CONFIG_KEXEC_FILE 553 if (image->file_mode) 554 return 1; 555 #endif 556 return kexec_flags & KEXEC_CRASH_HOTPLUG_SUPPORT; 557 } 558 559 /** 560 * arch_crash_handle_hotplug_event - Handle crash CPU/Memory hotplug events to update the 561 * necessary kexec segments based on the hotplug event. 562 * @image: a pointer to kexec_crash_image 563 * @arg: struct memory_notify handler for memory hotplug case and NULL for CPU hotplug case. 564 * 565 * Update the kdump image based on the type of hotplug event, represented by image->hp_action. 566 * CPU add: Update the FDT segment to include the newly added CPU. 567 * CPU remove: No action is needed, with the assumption that it's okay to have offline CPUs 568 * part of the FDT. 569 * Memory add/remove: No action is taken as this is not yet supported. 570 */ 571 void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) 572 { 573 struct memory_notify *mn; 574 575 switch (image->hp_action) { 576 case KEXEC_CRASH_HP_REMOVE_CPU: 577 return; 578 579 case KEXEC_CRASH_HP_ADD_CPU: 580 update_crash_fdt(image); 581 break; 582 583 case KEXEC_CRASH_HP_REMOVE_MEMORY: 584 case KEXEC_CRASH_HP_ADD_MEMORY: 585 mn = (struct memory_notify *)arg; 586 update_crash_elfcorehdr(image, mn); 587 return; 588 default: 589 pr_warn_once("Unknown hotplug action\n"); 590 } 591 } 592 #endif /* CONFIG_CRASH_HOTPLUG */ 593