1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * S390 version
4 * Copyright IBM Corp. 1999, 2012
5 * Author(s): Hartmut Penner (hp@de.ibm.com),
6 * Martin Schwidefsky (schwidefsky@de.ibm.com)
7 *
8 * Derived from "arch/i386/kernel/setup.c"
9 * Copyright (C) 1995, Linus Torvalds
10 */
11
12 /*
13 * This file handles the architecture-dependent parts of initialization
14 */
15
16 #define pr_fmt(fmt) "setup: " fmt
17
18 #include <linux/errno.h>
19 #include <linux/export.h>
20 #include <linux/sched.h>
21 #include <linux/sched/task.h>
22 #include <linux/cpu.h>
23 #include <linux/kernel.h>
24 #include <linux/memblock.h>
25 #include <linux/mm.h>
26 #include <linux/stddef.h>
27 #include <linux/unistd.h>
28 #include <linux/ptrace.h>
29 #include <linux/random.h>
30 #include <linux/user.h>
31 #include <linux/tty.h>
32 #include <linux/ioport.h>
33 #include <linux/delay.h>
34 #include <linux/init.h>
35 #include <linux/initrd.h>
36 #include <linux/root_dev.h>
37 #include <linux/console.h>
38 #include <linux/kernel_stat.h>
39 #include <linux/dma-map-ops.h>
40 #include <linux/device.h>
41 #include <linux/notifier.h>
42 #include <linux/pfn.h>
43 #include <linux/ctype.h>
44 #include <linux/reboot.h>
45 #include <linux/topology.h>
46 #include <linux/kexec.h>
47 #include <linux/crash_dump.h>
48 #include <linux/memory.h>
49 #include <linux/start_kernel.h>
50 #include <linux/hugetlb.h>
51 #include <linux/kmemleak.h>
52
53 #include <asm/archrandom.h>
54 #include <asm/boot_data.h>
55 #include <asm/machine.h>
56 #include <asm/ipl.h>
57 #include <asm/facility.h>
58 #include <asm/smp.h>
59 #include <asm/mmu_context.h>
60 #include <asm/cpcmd.h>
61 #include <asm/abs_lowcore.h>
62 #include <asm/nmi.h>
63 #include <asm/irq.h>
64 #include <asm/page.h>
65 #include <asm/ptrace.h>
66 #include <asm/sections.h>
67 #include <asm/ebcdic.h>
68 #include <asm/diag.h>
69 #include <asm/os_info.h>
70 #include <asm/sclp.h>
71 #include <asm/stacktrace.h>
72 #include <asm/sysinfo.h>
73 #include <asm/numa.h>
74 #include <asm/alternative.h>
75 #include <asm/nospec-branch.h>
76 #include <asm/physmem_info.h>
77 #include <asm/maccess.h>
78 #include <asm/uv.h>
79 #include <asm/asm-offsets.h>
80 #include "entry.h"
81
82 /*
83 * Machine setup..
84 */
85 unsigned int console_mode = 0;
86 EXPORT_SYMBOL(console_mode);
87
88 unsigned int console_devno = -1;
89 EXPORT_SYMBOL(console_devno);
90
91 unsigned int console_irq = -1;
92 EXPORT_SYMBOL(console_irq);
93
94 /*
95 * Some code and data needs to stay below 2 GB, even when the kernel would be
96 * relocated above 2 GB, because it has to use 31 bit addresses.
97 * Such code and data is part of the .amode31 section.
98 */
99 char __amode31_ref *__samode31 = _samode31;
100 char __amode31_ref *__eamode31 = _eamode31;
101 char __amode31_ref *__stext_amode31 = _stext_amode31;
102 char __amode31_ref *__etext_amode31 = _etext_amode31;
103 struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table;
104 struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table;
105
106 /*
107 * Control registers CR2, CR5 and CR15 are initialized with addresses
108 * of tables that must be placed below 2G which is handled by the AMODE31
109 * sections.
110 * Because the AMODE31 sections are relocated below 2G at startup,
111 * the content of control registers CR2, CR5 and CR15 must be updated
112 * with new addresses after the relocation. The initial initialization of
113 * control registers occurs in head.S and then gets updated again after AMODE31
114 * relocation. We must access the relevant AMODE31 tables indirectly via
115 * pointers placed in the .amode31.refs linker section. Those pointers get
116 * updated automatically during AMODE31 relocation and always contain a valid
117 * address within AMODE31 sections.
118 */
119
120 static __amode31_data u32 __ctl_duct_amode31[16] __aligned(64);
121
122 static __amode31_data u64 __ctl_aste_amode31[8] __aligned(64) = {
123 [1] = 0xffffffffffffffff
124 };
125
126 static __amode31_data u32 __ctl_duald_amode31[32] __aligned(128) = {
127 0x80000000, 0, 0, 0,
128 0x80000000, 0, 0, 0,
129 0x80000000, 0, 0, 0,
130 0x80000000, 0, 0, 0,
131 0x80000000, 0, 0, 0,
132 0x80000000, 0, 0, 0,
133 0x80000000, 0, 0, 0,
134 0x80000000, 0, 0, 0
135 };
136
137 static __amode31_data u32 __ctl_linkage_stack_amode31[8] __aligned(64) = {
138 0, 0, 0x89000000, 0,
139 0, 0, 0x8a000000, 0
140 };
141
142 static u64 __amode31_ref *__ctl_aste = __ctl_aste_amode31;
143 static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31;
144 static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31;
145 static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31;
146
147 unsigned long __bootdata_preserved(max_mappable);
148 struct physmem_info __bootdata(physmem_info);
149
150 struct vm_layout __bootdata_preserved(vm_layout);
151 EXPORT_SYMBOL(vm_layout);
152 int __bootdata_preserved(__kaslr_enabled);
153 unsigned int __bootdata_preserved(zlib_dfltcc_support);
154 EXPORT_SYMBOL(zlib_dfltcc_support);
155 u64 __bootdata_preserved(stfle_fac_list[16]);
156 EXPORT_SYMBOL(stfle_fac_list);
157 struct oldmem_data __bootdata_preserved(oldmem_data);
158
159 char __bootdata(boot_rb)[PAGE_SIZE * 2];
160 bool __bootdata(boot_earlyprintk);
161 size_t __bootdata(boot_rb_off);
162 char __bootdata(bootdebug_filter)[128];
163 bool __bootdata(bootdebug);
164
165 unsigned long __bootdata_preserved(VMALLOC_START);
166 EXPORT_SYMBOL(VMALLOC_START);
167
168 unsigned long __bootdata_preserved(VMALLOC_END);
169 EXPORT_SYMBOL(VMALLOC_END);
170
171 struct page *__bootdata_preserved(vmemmap);
172 EXPORT_SYMBOL(vmemmap);
173 unsigned long __bootdata_preserved(vmemmap_size);
174
175 unsigned long __bootdata_preserved(MODULES_VADDR);
176 unsigned long __bootdata_preserved(MODULES_END);
177
178 /* An array with a pointer to the lowcore of every CPU. */
179 struct lowcore *lowcore_ptr[NR_CPUS];
180 EXPORT_SYMBOL(lowcore_ptr);
181
182 /*
183 * The Write Back bit position in the physaddr is given by the SLPC PCI.
184 * Leaving the mask zero always uses write through which is safe
185 */
186 unsigned long mio_wb_bit_mask __ro_after_init;
187
188 /*
189 * This is set up by the setup-routine at boot-time
190 * for S390 need to find out, what we have to setup
191 * using address 0x10400 ...
192 */
193
194 #include <asm/setup.h>
195
196 /*
197 * condev= and conmode= setup parameter.
198 */
199
condev_setup(char * str)200 static int __init condev_setup(char *str)
201 {
202 int vdev;
203
204 vdev = simple_strtoul(str, &str, 0);
205 if (vdev >= 0 && vdev < 65536) {
206 console_devno = vdev;
207 console_irq = -1;
208 }
209 return 1;
210 }
211
212 __setup("condev=", condev_setup);
213
set_preferred_console(void)214 static void __init set_preferred_console(void)
215 {
216 if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
217 add_preferred_console("ttyS", 0, NULL);
218 else if (CONSOLE_IS_3270)
219 add_preferred_console("tty3270", 0, NULL);
220 else if (CONSOLE_IS_VT220)
221 add_preferred_console("ttysclp", 0, NULL);
222 else if (CONSOLE_IS_HVC)
223 add_preferred_console("hvc", 0, NULL);
224 }
225
conmode_setup(char * str)226 static int __init conmode_setup(char *str)
227 {
228 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
229 if (!strcmp(str, "hwc") || !strcmp(str, "sclp"))
230 SET_CONSOLE_SCLP;
231 #endif
232 #if defined(CONFIG_TN3215_CONSOLE)
233 if (!strcmp(str, "3215"))
234 SET_CONSOLE_3215;
235 #endif
236 #if defined(CONFIG_TN3270_CONSOLE)
237 if (!strcmp(str, "3270"))
238 SET_CONSOLE_3270;
239 #endif
240 set_preferred_console();
241 return 1;
242 }
243
244 __setup("conmode=", conmode_setup);
245
conmode_default(void)246 static void __init conmode_default(void)
247 {
248 char query_buffer[1024];
249 char *ptr;
250
251 if (machine_is_vm()) {
252 cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
253 console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
254 ptr = strstr(query_buffer, "SUBCHANNEL =");
255 console_irq = simple_strtoul(ptr + 13, NULL, 16);
256 cpcmd("QUERY TERM", query_buffer, 1024, NULL);
257 ptr = strstr(query_buffer, "CONMODE");
258 /*
259 * Set the conmode to 3215 so that the device recognition
260 * will set the cu_type of the console to 3215. If the
261 * conmode is 3270 and we don't set it back then both
262 * 3215 and the 3270 driver will try to access the console
263 * device (3215 as console and 3270 as normal tty).
264 */
265 cpcmd("TERM CONMODE 3215", NULL, 0, NULL);
266 if (ptr == NULL) {
267 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
268 SET_CONSOLE_SCLP;
269 #endif
270 return;
271 }
272 if (str_has_prefix(ptr + 8, "3270")) {
273 #if defined(CONFIG_TN3270_CONSOLE)
274 SET_CONSOLE_3270;
275 #elif defined(CONFIG_TN3215_CONSOLE)
276 SET_CONSOLE_3215;
277 #elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
278 SET_CONSOLE_SCLP;
279 #endif
280 } else if (str_has_prefix(ptr + 8, "3215")) {
281 #if defined(CONFIG_TN3215_CONSOLE)
282 SET_CONSOLE_3215;
283 #elif defined(CONFIG_TN3270_CONSOLE)
284 SET_CONSOLE_3270;
285 #elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
286 SET_CONSOLE_SCLP;
287 #endif
288 }
289 } else if (machine_is_kvm()) {
290 if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE))
291 SET_CONSOLE_VT220;
292 else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE))
293 SET_CONSOLE_SCLP;
294 else
295 SET_CONSOLE_HVC;
296 } else {
297 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
298 SET_CONSOLE_SCLP;
299 #endif
300 }
301 }
302
303 #ifdef CONFIG_CRASH_DUMP
setup_zfcpdump(void)304 static void __init setup_zfcpdump(void)
305 {
306 if (!is_ipl_type_dump())
307 return;
308 if (oldmem_data.start)
309 return;
310 strlcat(boot_command_line, " cio_ignore=all,!ipldev,!condev", COMMAND_LINE_SIZE);
311 console_loglevel = 2;
312 }
313 #else
setup_zfcpdump(void)314 static inline void setup_zfcpdump(void) {}
315 #endif /* CONFIG_CRASH_DUMP */
316
317 /*
318 * Reboot, halt and power_off stubs. They just call _machine_restart,
319 * _machine_halt or _machine_power_off.
320 */
321
machine_restart(char * command)322 void machine_restart(char *command)
323 {
324 if ((!in_interrupt() && !in_atomic()) || oops_in_progress)
325 /*
326 * Only unblank the console if we are called in enabled
327 * context or a bust_spinlocks cleared the way for us.
328 */
329 console_unblank();
330 _machine_restart(command);
331 }
332
machine_halt(void)333 void machine_halt(void)
334 {
335 if (!in_interrupt() || oops_in_progress)
336 /*
337 * Only unblank the console if we are called in enabled
338 * context or a bust_spinlocks cleared the way for us.
339 */
340 console_unblank();
341 _machine_halt();
342 }
343
machine_power_off(void)344 void machine_power_off(void)
345 {
346 if (!in_interrupt() || oops_in_progress)
347 /*
348 * Only unblank the console if we are called in enabled
349 * context or a bust_spinlocks cleared the way for us.
350 */
351 console_unblank();
352 _machine_power_off();
353 }
354
355 /*
356 * Dummy power off function.
357 */
358 void (*pm_power_off)(void) = machine_power_off;
359 EXPORT_SYMBOL_GPL(pm_power_off);
360
361 void *restart_stack;
362
stack_alloc(void)363 unsigned long stack_alloc(void)
364 {
365 void *stack;
366
367 stack = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
368 NUMA_NO_NODE, __builtin_return_address(0));
369 kmemleak_not_leak(stack);
370 return (unsigned long)stack;
371 }
372
stack_free(unsigned long stack)373 void stack_free(unsigned long stack)
374 {
375 vfree((void *)stack);
376 }
377
stack_alloc_early(void)378 static unsigned long __init stack_alloc_early(void)
379 {
380 unsigned long stack;
381
382 stack = (unsigned long)memblock_alloc_or_panic(THREAD_SIZE, THREAD_SIZE);
383 return stack;
384 }
385
setup_lowcore(void)386 static void __init setup_lowcore(void)
387 {
388 struct lowcore *lc, *abs_lc;
389
390 /*
391 * Setup lowcore for boot cpu
392 */
393 BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE);
394 lc = memblock_alloc_low(sizeof(*lc), sizeof(*lc));
395 if (!lc)
396 panic("%s: Failed to allocate %zu bytes align=%zx\n",
397 __func__, sizeof(*lc), sizeof(*lc));
398
399 lc->pcpu = (unsigned long)per_cpu_ptr(&pcpu_devices, 0);
400 lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT;
401 lc->restart_psw.addr = __pa(restart_int_handler);
402 lc->external_new_psw.mask = PSW_KERNEL_BITS;
403 lc->external_new_psw.addr = (unsigned long) ext_int_handler;
404 lc->svc_new_psw.mask = PSW_KERNEL_BITS;
405 lc->svc_new_psw.addr = (unsigned long) system_call;
406 lc->program_new_psw.mask = PSW_KERNEL_BITS;
407 lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
408 lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
409 lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
410 lc->io_new_psw.mask = PSW_KERNEL_BITS;
411 lc->io_new_psw.addr = (unsigned long) io_int_handler;
412 lc->clock_comparator = clock_comparator_max;
413 lc->current_task = (unsigned long)&init_task;
414 lc->lpp = LPP_MAGIC;
415 lc->preempt_count = get_lowcore()->preempt_count;
416 nmi_alloc_mcesa_early(&lc->mcesad);
417 lc->sys_enter_timer = get_lowcore()->sys_enter_timer;
418 lc->exit_timer = get_lowcore()->exit_timer;
419 lc->user_timer = get_lowcore()->user_timer;
420 lc->system_timer = get_lowcore()->system_timer;
421 lc->steal_timer = get_lowcore()->steal_timer;
422 lc->last_update_timer = get_lowcore()->last_update_timer;
423 lc->last_update_clock = get_lowcore()->last_update_clock;
424 /*
425 * Allocate the global restart stack which is the same for
426 * all CPUs in case *one* of them does a PSW restart.
427 */
428 restart_stack = (void *)(stack_alloc_early() + STACK_INIT_OFFSET);
429 lc->mcck_stack = stack_alloc_early() + STACK_INIT_OFFSET;
430 lc->async_stack = stack_alloc_early() + STACK_INIT_OFFSET;
431 lc->nodat_stack = stack_alloc_early() + STACK_INIT_OFFSET;
432 lc->kernel_stack = get_lowcore()->kernel_stack;
433 /*
434 * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
435 * restart data to the absolute zero lowcore. This is necessary if
436 * PSW restart is done on an offline CPU that has lowcore zero.
437 */
438 lc->restart_stack = (unsigned long) restart_stack;
439 lc->restart_fn = (unsigned long) do_restart;
440 lc->restart_data = 0;
441 lc->restart_source = -1U;
442 lc->spinlock_lockval = arch_spin_lockval(0);
443 lc->spinlock_index = 0;
444 arch_spin_lock_setup(0);
445 lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
446 lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
447 lc->preempt_count = PREEMPT_DISABLED;
448 lc->kernel_asce = get_lowcore()->kernel_asce;
449 lc->user_asce = get_lowcore()->user_asce;
450
451 system_ctlreg_init_save_area(lc);
452 abs_lc = get_abs_lowcore();
453 abs_lc->restart_stack = lc->restart_stack;
454 abs_lc->restart_fn = lc->restart_fn;
455 abs_lc->restart_data = lc->restart_data;
456 abs_lc->restart_source = lc->restart_source;
457 abs_lc->restart_psw = lc->restart_psw;
458 abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
459 abs_lc->program_new_psw = lc->program_new_psw;
460 abs_lc->mcesad = lc->mcesad;
461 put_abs_lowcore(abs_lc);
462
463 set_prefix(__pa(lc));
464 lowcore_ptr[0] = lc;
465 if (abs_lowcore_map(0, lowcore_ptr[0], false))
466 panic("Couldn't setup absolute lowcore");
467 }
468
469 static struct resource code_resource = {
470 .name = "Kernel code",
471 .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
472 };
473
474 static struct resource data_resource = {
475 .name = "Kernel data",
476 .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
477 };
478
479 static struct resource bss_resource = {
480 .name = "Kernel bss",
481 .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
482 };
483
484 static struct resource __initdata *standard_resources[] = {
485 &code_resource,
486 &data_resource,
487 &bss_resource,
488 };
489
setup_resources(void)490 static void __init setup_resources(void)
491 {
492 struct resource *res, *std_res, *sub_res;
493 phys_addr_t start, end;
494 int j;
495 u64 i;
496
497 code_resource.start = __pa_symbol(_text);
498 code_resource.end = __pa_symbol(_etext) - 1;
499 data_resource.start = __pa_symbol(_etext);
500 data_resource.end = __pa_symbol(_edata) - 1;
501 bss_resource.start = __pa_symbol(__bss_start);
502 bss_resource.end = __pa_symbol(__bss_stop) - 1;
503
504 for_each_mem_range(i, &start, &end) {
505 res = memblock_alloc_or_panic(sizeof(*res), 8);
506 res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
507
508 res->name = "System RAM";
509 res->start = start;
510 /*
511 * In memblock, end points to the first byte after the
512 * range while in resources, end points to the last byte in
513 * the range.
514 */
515 res->end = end - 1;
516 request_resource(&iomem_resource, res);
517
518 for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
519 std_res = standard_resources[j];
520 if (std_res->start < res->start ||
521 std_res->start > res->end)
522 continue;
523 if (std_res->end > res->end) {
524 sub_res = memblock_alloc_or_panic(sizeof(*sub_res), 8);
525 *sub_res = *std_res;
526 sub_res->end = res->end;
527 std_res->start = res->end + 1;
528 request_resource(res, sub_res);
529 } else {
530 request_resource(res, std_res);
531 }
532 }
533 }
534 #ifdef CONFIG_CRASH_DUMP
535 /*
536 * Re-add removed crash kernel memory as reserved memory. This makes
537 * sure it will be mapped with the identity mapping and struct pages
538 * will be created, so it can be resized later on.
539 * However add it later since the crash kernel resource should not be
540 * part of the System RAM resource.
541 */
542 if (crashk_res.end) {
543 memblock_add_node(crashk_res.start, resource_size(&crashk_res),
544 0, MEMBLOCK_NONE);
545 memblock_reserve(crashk_res.start, resource_size(&crashk_res));
546 insert_resource(&iomem_resource, &crashk_res);
547 }
548 #endif
549 }
550
setup_memory_end(void)551 static void __init setup_memory_end(void)
552 {
553 max_pfn = max_low_pfn = PFN_DOWN(ident_map_size);
554 pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20);
555 }
556
557 #ifdef CONFIG_CRASH_DUMP
558
559 /*
560 * When kdump is enabled, we have to ensure that no memory from the area
561 * [0 - crashkernel memory size] is set offline - it will be exchanged with
562 * the crashkernel memory region when kdump is triggered. The crashkernel
563 * memory region can never get offlined (pages are unmovable).
564 */
kdump_mem_notifier(struct notifier_block * nb,unsigned long action,void * data)565 static int kdump_mem_notifier(struct notifier_block *nb,
566 unsigned long action, void *data)
567 {
568 struct memory_notify *arg = data;
569
570 if (action != MEM_GOING_OFFLINE)
571 return NOTIFY_OK;
572 if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
573 return NOTIFY_BAD;
574 return NOTIFY_OK;
575 }
576
577 static struct notifier_block kdump_mem_nb = {
578 .notifier_call = kdump_mem_notifier,
579 };
580
581 #endif
582
583 /*
584 * Reserve page tables created by decompressor
585 */
reserve_pgtables(void)586 static void __init reserve_pgtables(void)
587 {
588 unsigned long start, end;
589 struct reserved_range *range;
590
591 for_each_physmem_reserved_type_range(RR_VMEM, range, &start, &end)
592 memblock_reserve(start, end - start);
593 }
594
595 /*
596 * Reserve memory for kdump kernel to be loaded with kexec
597 */
reserve_crashkernel(void)598 static void __init reserve_crashkernel(void)
599 {
600 #ifdef CONFIG_CRASH_DUMP
601 unsigned long long crash_base, crash_size;
602 phys_addr_t low, high;
603 int rc;
604
605 rc = parse_crashkernel(boot_command_line, ident_map_size,
606 &crash_size, &crash_base, NULL, NULL, NULL);
607
608 crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
609 crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
610 if (rc || crash_size == 0)
611 return;
612
613 if (memblock.memory.regions[0].size < crash_size) {
614 pr_info("crashkernel reservation failed: %s\n",
615 "first memory chunk must be at least crashkernel size");
616 return;
617 }
618
619 low = crash_base ?: oldmem_data.start;
620 high = low + crash_size;
621 if (low >= oldmem_data.start && high <= oldmem_data.start + oldmem_data.size) {
622 /* The crashkernel fits into OLDMEM, reuse OLDMEM */
623 crash_base = low;
624 } else {
625 /* Find suitable area in free memory */
626 low = max_t(unsigned long, crash_size, sclp.hsa_size);
627 high = crash_base ? crash_base + crash_size : ULONG_MAX;
628
629 if (crash_base && crash_base < low) {
630 pr_info("crashkernel reservation failed: %s\n",
631 "crash_base too low");
632 return;
633 }
634 low = crash_base ?: low;
635 crash_base = memblock_phys_alloc_range(crash_size,
636 KEXEC_CRASH_MEM_ALIGN,
637 low, high);
638 }
639
640 if (!crash_base) {
641 pr_info("crashkernel reservation failed: %s\n",
642 "no suitable area found");
643 return;
644 }
645
646 if (register_memory_notifier(&kdump_mem_nb)) {
647 memblock_phys_free(crash_base, crash_size);
648 return;
649 }
650
651 if (!oldmem_data.start && machine_is_vm())
652 diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
653 crashk_res.start = crash_base;
654 crashk_res.end = crash_base + crash_size - 1;
655 memblock_remove(crash_base, crash_size);
656 pr_info("Reserving %lluMB of memory at %lluMB "
657 "for crashkernel (System RAM: %luMB)\n",
658 crash_size >> 20, crash_base >> 20,
659 (unsigned long)memblock.memory.total_size >> 20);
660 os_info_crashkernel_add(crash_base, crash_size);
661 #endif
662 }
663
664 /*
665 * Reserve the initrd from being used by memblock
666 */
reserve_initrd(void)667 static void __init reserve_initrd(void)
668 {
669 unsigned long addr, size;
670
671 if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD) || !get_physmem_reserved(RR_INITRD, &addr, &size))
672 return;
673 initrd_start = (unsigned long)__va(addr);
674 initrd_end = initrd_start + size;
675 memblock_reserve(addr, size);
676 }
677
678 /*
679 * Reserve the memory area used to pass the certificate lists
680 */
reserve_certificate_list(void)681 static void __init reserve_certificate_list(void)
682 {
683 if (ipl_cert_list_addr)
684 memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size);
685 }
686
reserve_physmem_info(void)687 static void __init reserve_physmem_info(void)
688 {
689 unsigned long addr, size;
690
691 if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size))
692 memblock_reserve(addr, size);
693 }
694
free_physmem_info(void)695 static void __init free_physmem_info(void)
696 {
697 unsigned long addr, size;
698
699 if (get_physmem_reserved(RR_MEM_DETECT_EXT, &addr, &size))
700 memblock_phys_free(addr, size);
701 }
702
memblock_add_physmem_info(void)703 static void __init memblock_add_physmem_info(void)
704 {
705 unsigned long start, end;
706 int i;
707
708 pr_debug("physmem info source: %s (%hhd)\n",
709 get_physmem_info_source(), physmem_info.info_source);
710 /* keep memblock lists close to the kernel */
711 memblock_set_bottom_up(true);
712 for_each_physmem_usable_range(i, &start, &end)
713 memblock_add(start, end - start);
714 for_each_physmem_online_range(i, &start, &end)
715 memblock_physmem_add(start, end - start);
716 memblock_set_bottom_up(false);
717 memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
718 }
719
setup_high_memory(void)720 static void __init setup_high_memory(void)
721 {
722 high_memory = __va(ident_map_size);
723 }
724
725 /*
726 * Reserve memory used for lowcore.
727 */
reserve_lowcore(void)728 static void __init reserve_lowcore(void)
729 {
730 void *lowcore_start = get_lowcore();
731 void *lowcore_end = lowcore_start + sizeof(struct lowcore);
732 void *start, *end;
733
734 if (absolute_pointer(__identity_base) < lowcore_end) {
735 start = max(lowcore_start, (void *)__identity_base);
736 end = min(lowcore_end, (void *)(__identity_base + ident_map_size));
737 memblock_reserve(__pa(start), __pa(end));
738 }
739 }
740
741 /*
742 * Reserve memory used for absolute lowcore/command line/kernel image.
743 */
reserve_kernel(void)744 static void __init reserve_kernel(void)
745 {
746 memblock_reserve(0, STARTUP_NORMAL_OFFSET);
747 memblock_reserve(OLDMEM_BASE, sizeof(unsigned long));
748 memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long));
749 memblock_reserve(physmem_info.reserved[RR_AMODE31].start, __eamode31 - __samode31);
750 memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP);
751 memblock_reserve(__pa(_stext), _end - _stext);
752 }
753
setup_memory(void)754 static void __init setup_memory(void)
755 {
756 phys_addr_t start, end;
757 u64 i;
758
759 /*
760 * Init storage key for present memory
761 */
762 for_each_mem_range(i, &start, &end)
763 storage_key_init_range(start, end);
764
765 psw_set_key(PAGE_DEFAULT_KEY);
766 }
767
relocate_amode31_section(void)768 static void __init relocate_amode31_section(void)
769 {
770 unsigned long amode31_size = __eamode31 - __samode31;
771 long amode31_offset, *ptr;
772
773 amode31_offset = AMODE31_START - (unsigned long)__samode31;
774 pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size);
775
776 /* Move original AMODE31 section to the new one */
777 memmove((void *)physmem_info.reserved[RR_AMODE31].start, __samode31, amode31_size);
778 /* Zero out the old AMODE31 section to catch invalid accesses within it */
779 memset(__samode31, 0, amode31_size);
780
781 /* Update all AMODE31 region references */
782 for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++)
783 *ptr += amode31_offset;
784 }
785
786 /* This must be called after AMODE31 relocation */
setup_cr(void)787 static void __init setup_cr(void)
788 {
789 union ctlreg2 cr2;
790 union ctlreg5 cr5;
791 union ctlreg15 cr15;
792
793 __ctl_duct[1] = (unsigned long)__ctl_aste;
794 __ctl_duct[2] = (unsigned long)__ctl_aste;
795 __ctl_duct[4] = (unsigned long)__ctl_duald;
796
797 /* Update control registers CR2, CR5 and CR15 */
798 local_ctl_store(2, &cr2.reg);
799 local_ctl_store(5, &cr5.reg);
800 local_ctl_store(15, &cr15.reg);
801 cr2.ducto = (unsigned long)__ctl_duct >> 6;
802 cr5.pasteo = (unsigned long)__ctl_duct >> 6;
803 cr15.lsea = (unsigned long)__ctl_linkage_stack >> 3;
804 system_ctl_load(2, &cr2.reg);
805 system_ctl_load(5, &cr5.reg);
806 system_ctl_load(15, &cr15.reg);
807 }
808
809 /*
810 * Add system information as device randomness
811 */
setup_randomness(void)812 static void __init setup_randomness(void)
813 {
814 struct sysinfo_3_2_2 *vmms;
815
816 vmms = memblock_alloc_or_panic(PAGE_SIZE, PAGE_SIZE);
817 if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
818 add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
819 memblock_free(vmms, PAGE_SIZE);
820
821 if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
822 static_branch_enable(&s390_arch_random_available);
823 }
824
825 /*
826 * Issue diagnose 318 to set the control program name and
827 * version codes.
828 */
setup_control_program_code(void)829 static void __init setup_control_program_code(void)
830 {
831 union diag318_info diag318_info = {
832 .cpnc = CPNC_LINUX,
833 .cpvc = 0,
834 };
835
836 if (!sclp.has_diag318)
837 return;
838
839 diag_stat_inc(DIAG_STAT_X318);
840 asm volatile("diag %0,0,0x318" : : "d" (diag318_info.val));
841 }
842
843 /*
844 * Print the component list from the IPL report
845 */
log_component_list(void)846 static void __init log_component_list(void)
847 {
848 struct ipl_rb_component_entry *ptr, *end;
849 char *str;
850
851 if (!early_ipl_comp_list_addr)
852 return;
853 if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL)
854 pr_info("Linux is running with Secure-IPL enabled\n");
855 else
856 pr_info("Linux is running with Secure-IPL disabled\n");
857 ptr = __va(early_ipl_comp_list_addr);
858 end = (void *) ptr + early_ipl_comp_list_size;
859 pr_info("The IPL report contains the following components:\n");
860 while (ptr < end) {
861 if (ptr->flags & IPL_RB_COMPONENT_FLAG_SIGNED) {
862 if (ptr->flags & IPL_RB_COMPONENT_FLAG_VERIFIED)
863 str = "signed, verified";
864 else
865 str = "signed, verification failed";
866 } else {
867 str = "not signed";
868 }
869 pr_info("%016llx - %016llx (%s)\n",
870 ptr->addr, ptr->addr + ptr->len, str);
871 ptr++;
872 }
873 }
874
875 /*
876 * Print avoiding interpretation of % in buf and taking bootdebug option
877 * into consideration.
878 */
print_rb_entry(const char * buf)879 static void __init print_rb_entry(const char *buf)
880 {
881 char fmt[] = KERN_SOH "0boot: %s";
882 int level = printk_get_level(buf);
883
884 buf = skip_timestamp(printk_skip_level(buf));
885 if (level == KERN_DEBUG[1] && (!bootdebug || !bootdebug_filter_match(buf)))
886 return;
887
888 fmt[1] = level;
889 printk(fmt, buf);
890 }
891
892 /*
893 * Setup function called from init/main.c just after the banner
894 * was printed.
895 */
896
setup_arch(char ** cmdline_p)897 void __init setup_arch(char **cmdline_p)
898 {
899 /*
900 * print what head.S has found out about the machine
901 */
902 if (machine_is_vm())
903 pr_info("Linux is running as a z/VM "
904 "guest operating system in 64-bit mode\n");
905 else if (machine_is_kvm())
906 pr_info("Linux is running under KVM in 64-bit mode\n");
907 else if (machine_is_lpar())
908 pr_info("Linux is running natively in 64-bit mode\n");
909 else
910 pr_info("Linux is running as a guest in 64-bit mode\n");
911 /* Print decompressor messages if not already printed */
912 if (!boot_earlyprintk)
913 boot_rb_foreach(print_rb_entry);
914
915 if (machine_has_relocated_lowcore())
916 pr_info("Lowcore relocated to 0x%px\n", get_lowcore());
917
918 log_component_list();
919
920 /* Have one command line that is parsed and saved in /proc/cmdline */
921 /* boot_command_line has been already set up in early.c */
922 *cmdline_p = boot_command_line;
923
924 ROOT_DEV = Root_RAM0;
925
926 setup_initial_init_mm(_text, _etext, _edata, _end);
927
928 if (IS_ENABLED(CONFIG_EXPOLINE_AUTO))
929 nospec_auto_detect();
930
931 jump_label_init();
932 parse_early_param();
933 #ifdef CONFIG_CRASH_DUMP
934 /* Deactivate elfcorehdr= kernel parameter */
935 elfcorehdr_addr = ELFCORE_ADDR_MAX;
936 #endif
937
938 os_info_init();
939 setup_ipl();
940 setup_control_program_code();
941
942 /* Do some memory reservations *before* memory is added to memblock */
943 reserve_pgtables();
944 reserve_lowcore();
945 reserve_kernel();
946 reserve_initrd();
947 reserve_certificate_list();
948 reserve_physmem_info();
949 memblock_set_current_limit(ident_map_size);
950 memblock_allow_resize();
951
952 /* Get information about *all* installed memory */
953 memblock_add_physmem_info();
954
955 free_physmem_info();
956 setup_memory_end();
957 setup_high_memory();
958 memblock_dump_all();
959 setup_memory();
960
961 relocate_amode31_section();
962 setup_cr();
963 setup_uv();
964 dma_contiguous_reserve(ident_map_size);
965 vmcp_cma_reserve();
966
967 reserve_crashkernel();
968 #ifdef CONFIG_CRASH_DUMP
969 /*
970 * Be aware that smp_save_dump_secondary_cpus() triggers a system reset.
971 * Therefore CPU and device initialization should be done afterwards.
972 */
973 smp_save_dump_secondary_cpus();
974 #endif
975
976 setup_resources();
977 setup_lowcore();
978 smp_fill_possible_mask();
979 cpu_detect_mhz_feature();
980 cpu_init();
981 numa_setup();
982 smp_detect_cpus();
983 topology_init_early();
984 setup_protection_map();
985 /*
986 * Create kernel page tables.
987 */
988 paging_init();
989
990 /*
991 * After paging_init created the kernel page table, the new PSWs
992 * in lowcore can now run with DAT enabled.
993 */
994 #ifdef CONFIG_CRASH_DUMP
995 smp_save_dump_ipl_cpu();
996 #endif
997
998 /* Setup default console */
999 conmode_default();
1000 set_preferred_console();
1001
1002 apply_alternative_instructions();
1003 if (IS_ENABLED(CONFIG_EXPOLINE))
1004 nospec_init_branches();
1005
1006 /* Setup zfcp/nvme dump support */
1007 setup_zfcpdump();
1008
1009 /* Add system specific data to the random pool */
1010 setup_randomness();
1011 }
1012
arch_cpu_finalize_init(void)1013 void __init arch_cpu_finalize_init(void)
1014 {
1015 sclp_init();
1016 }
1017