1 // SPDX-License-Identifier: GPL-2.0-or-later
2 #include <linux/acpi.h>
3 #include <linux/cpu.h>
4 #include <linux/delay.h>
5 #include <linux/io.h>
6 #include <linux/kexec.h>
7 #include <linux/memblock.h>
8 #include <linux/pgtable.h>
9 #include <linux/sched/hotplug.h>
10 #include <asm/apic.h>
11 #include <asm/barrier.h>
12 #include <asm/init.h>
13 #include <asm/intel_pt.h>
14 #include <asm/nmi.h>
15 #include <asm/processor.h>
16 #include <asm/reboot.h>
17 
18 /* Physical address of the Multiprocessor Wakeup Structure mailbox */
19 static u64 acpi_mp_wake_mailbox_paddr __ro_after_init;
20 
21 /* Virtual address of the Multiprocessor Wakeup Structure mailbox */
22 static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox;
23 
24 static u64 acpi_mp_pgd __ro_after_init;
25 static u64 acpi_mp_reset_vector_paddr __ro_after_init;
26 
acpi_mp_stop_this_cpu(void)27 static void acpi_mp_stop_this_cpu(void)
28 {
29 	asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, acpi_mp_pgd);
30 }
31 
acpi_mp_play_dead(void)32 static void acpi_mp_play_dead(void)
33 {
34 	play_dead_common();
35 	asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, acpi_mp_pgd);
36 }
37 
acpi_mp_cpu_die(unsigned int cpu)38 static void acpi_mp_cpu_die(unsigned int cpu)
39 {
40 	u32 apicid = per_cpu(x86_cpu_to_apicid, cpu);
41 	unsigned long timeout;
42 
43 	/*
44 	 * Use TEST mailbox command to prove that BIOS got control over
45 	 * the CPU before declaring it dead.
46 	 *
47 	 * BIOS has to clear 'command' field of the mailbox.
48 	 */
49 	acpi_mp_wake_mailbox->apic_id = apicid;
50 	smp_store_release(&acpi_mp_wake_mailbox->command,
51 			  ACPI_MP_WAKE_COMMAND_TEST);
52 
53 	/* Don't wait longer than a second. */
54 	timeout = USEC_PER_SEC;
55 	while (READ_ONCE(acpi_mp_wake_mailbox->command) && --timeout)
56 		udelay(1);
57 
58 	if (!timeout)
59 		pr_err("Failed to hand over CPU %d to BIOS\n", cpu);
60 }
61 
62 /* The argument is required to match type of x86_mapping_info::alloc_pgt_page */
alloc_pgt_page(void * dummy)63 static void __init *alloc_pgt_page(void *dummy)
64 {
65 	return memblock_alloc(PAGE_SIZE, PAGE_SIZE);
66 }
67 
free_pgt_page(void * pgt,void * dummy)68 static void __init free_pgt_page(void *pgt, void *dummy)
69 {
70 	return memblock_free(pgt, PAGE_SIZE);
71 }
72 
acpi_mp_setup_reset(u64 reset_vector)73 static int __init acpi_mp_setup_reset(u64 reset_vector)
74 {
75 	struct x86_mapping_info info = {
76 		.alloc_pgt_page = alloc_pgt_page,
77 		.free_pgt_page	= free_pgt_page,
78 		.page_flag      = __PAGE_KERNEL_LARGE_EXEC,
79 		.kernpg_flag    = _KERNPG_TABLE_NOENC,
80 	};
81 	unsigned long mstart, mend;
82 	pgd_t *pgd;
83 
84 	pgd = alloc_pgt_page(NULL);
85 	if (!pgd)
86 		return -ENOMEM;
87 
88 	for (int i = 0; i < nr_pfn_mapped; i++) {
89 		mstart = pfn_mapped[i].start << PAGE_SHIFT;
90 		mend   = pfn_mapped[i].end << PAGE_SHIFT;
91 		if (kernel_ident_mapping_init(&info, pgd, mstart, mend)) {
92 			kernel_ident_mapping_free(&info, pgd);
93 			return -ENOMEM;
94 		}
95 	}
96 
97 	mstart = PAGE_ALIGN_DOWN(reset_vector);
98 	mend = mstart + PAGE_SIZE;
99 	if (kernel_ident_mapping_init(&info, pgd, mstart, mend)) {
100 		kernel_ident_mapping_free(&info, pgd);
101 		return -ENOMEM;
102 	}
103 
104 	/*
105 	 * Make sure asm_acpi_mp_play_dead() is present in the identity mapping
106 	 * at the same place as in the kernel page tables.
107 	 * asm_acpi_mp_play_dead() switches to the identity mapping and the
108 	 * function must be present at the same spot in the virtual address space
109 	 * before and after switching page tables.
110 	 */
111 	info.offset = __START_KERNEL_map - phys_base;
112 	mstart = PAGE_ALIGN_DOWN(__pa(asm_acpi_mp_play_dead));
113 	mend = mstart + PAGE_SIZE;
114 	if (kernel_ident_mapping_init(&info, pgd, mstart, mend)) {
115 		kernel_ident_mapping_free(&info, pgd);
116 		return -ENOMEM;
117 	}
118 
119 	smp_ops.play_dead = acpi_mp_play_dead;
120 	smp_ops.stop_this_cpu = acpi_mp_stop_this_cpu;
121 	smp_ops.cpu_die = acpi_mp_cpu_die;
122 
123 	acpi_mp_reset_vector_paddr = reset_vector;
124 	acpi_mp_pgd = __pa(pgd);
125 
126 	return 0;
127 }
128 
acpi_wakeup_cpu(u32 apicid,unsigned long start_ip)129 static int acpi_wakeup_cpu(u32 apicid, unsigned long start_ip)
130 {
131 	if (!acpi_mp_wake_mailbox_paddr) {
132 		pr_warn_once("No MADT mailbox: cannot bringup secondary CPUs. Booting with kexec?\n");
133 		return -EOPNOTSUPP;
134 	}
135 
136 	/*
137 	 * Remap mailbox memory only for the first call to acpi_wakeup_cpu().
138 	 *
139 	 * Wakeup of secondary CPUs is fully serialized in the core code.
140 	 * No need to protect acpi_mp_wake_mailbox from concurrent accesses.
141 	 */
142 	if (!acpi_mp_wake_mailbox) {
143 		acpi_mp_wake_mailbox = memremap(acpi_mp_wake_mailbox_paddr,
144 						sizeof(*acpi_mp_wake_mailbox),
145 						MEMREMAP_WB);
146 	}
147 
148 	/*
149 	 * Mailbox memory is shared between the firmware and OS. Firmware will
150 	 * listen on mailbox command address, and once it receives the wakeup
151 	 * command, the CPU associated with the given apicid will be booted.
152 	 *
153 	 * The value of 'apic_id' and 'wakeup_vector' must be visible to the
154 	 * firmware before the wakeup command is visible.  smp_store_release()
155 	 * ensures ordering and visibility.
156 	 */
157 	acpi_mp_wake_mailbox->apic_id	    = apicid;
158 	acpi_mp_wake_mailbox->wakeup_vector = start_ip;
159 	smp_store_release(&acpi_mp_wake_mailbox->command,
160 			  ACPI_MP_WAKE_COMMAND_WAKEUP);
161 
162 	/*
163 	 * Wait for the CPU to wake up.
164 	 *
165 	 * The CPU being woken up is essentially in a spin loop waiting to be
166 	 * woken up. It should not take long for it wake up and acknowledge by
167 	 * zeroing out ->command.
168 	 *
169 	 * ACPI specification doesn't provide any guidance on how long kernel
170 	 * has to wait for a wake up acknowledgment. It also doesn't provide
171 	 * a way to cancel a wake up request if it takes too long.
172 	 *
173 	 * In TDX environment, the VMM has control over how long it takes to
174 	 * wake up secondary. It can postpone scheduling secondary vCPU
175 	 * indefinitely. Giving up on wake up request and reporting error opens
176 	 * possible attack vector for VMM: it can wake up a secondary CPU when
177 	 * kernel doesn't expect it. Wait until positive result of the wake up
178 	 * request.
179 	 */
180 	while (READ_ONCE(acpi_mp_wake_mailbox->command))
181 		cpu_relax();
182 
183 	return 0;
184 }
185 
acpi_mp_disable_offlining(struct acpi_madt_multiproc_wakeup * mp_wake)186 static void acpi_mp_disable_offlining(struct acpi_madt_multiproc_wakeup *mp_wake)
187 {
188 	cpu_hotplug_disable_offlining();
189 
190 	/*
191 	 * ACPI MADT doesn't allow to offline a CPU after it was onlined. This
192 	 * limits kexec: the second kernel won't be able to use more than one CPU.
193 	 *
194 	 * To prevent a kexec kernel from onlining secondary CPUs invalidate the
195 	 * mailbox address in the ACPI MADT wakeup structure which prevents a
196 	 * kexec kernel to use it.
197 	 *
198 	 * This is safe as the booting kernel has the mailbox address cached
199 	 * already and acpi_wakeup_cpu() uses the cached value to bring up the
200 	 * secondary CPUs.
201 	 *
202 	 * Note: This is a Linux specific convention and not covered by the
203 	 *       ACPI specification.
204 	 */
205 	mp_wake->mailbox_address = 0;
206 }
207 
acpi_parse_mp_wake(union acpi_subtable_headers * header,const unsigned long end)208 int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
209 			      const unsigned long end)
210 {
211 	struct acpi_madt_multiproc_wakeup *mp_wake;
212 
213 	mp_wake = (struct acpi_madt_multiproc_wakeup *)header;
214 
215 	/*
216 	 * Cannot use the standard BAD_MADT_ENTRY() to sanity check the @mp_wake
217 	 * entry.  'sizeof (struct acpi_madt_multiproc_wakeup)' can be larger
218 	 * than the actual size of the MP wakeup entry in ACPI table because the
219 	 * 'reset_vector' is only available in the V1 MP wakeup structure.
220 	 */
221 	if (!mp_wake)
222 		return -EINVAL;
223 	if (end - (unsigned long)mp_wake < ACPI_MADT_MP_WAKEUP_SIZE_V0)
224 		return -EINVAL;
225 	if (mp_wake->header.length < ACPI_MADT_MP_WAKEUP_SIZE_V0)
226 		return -EINVAL;
227 
228 	acpi_table_print_madt_entry(&header->common);
229 
230 	acpi_mp_wake_mailbox_paddr = mp_wake->mailbox_address;
231 
232 	if (mp_wake->version >= ACPI_MADT_MP_WAKEUP_VERSION_V1 &&
233 	    mp_wake->header.length >= ACPI_MADT_MP_WAKEUP_SIZE_V1) {
234 		if (acpi_mp_setup_reset(mp_wake->reset_vector)) {
235 			pr_warn("Failed to setup MADT reset vector\n");
236 			acpi_mp_disable_offlining(mp_wake);
237 		}
238 	} else {
239 		/*
240 		 * CPU offlining requires version 1 of the ACPI MADT wakeup
241 		 * structure.
242 		 */
243 		acpi_mp_disable_offlining(mp_wake);
244 	}
245 
246 	apic_update_callback(wakeup_secondary_cpu_64, acpi_wakeup_cpu);
247 
248 	return 0;
249 }
250