1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * relocate_kernel.S - put the kernel image in place to boot
4 * Copyright (C) 2002-2005 Eric Biederman  <ebiederm@xmission.com>
5 */
6
7#include <linux/linkage.h>
8#include <linux/stringify.h>
9#include <asm/alternative.h>
10#include <asm/page_types.h>
11#include <asm/kexec.h>
12#include <asm/processor-flags.h>
13#include <asm/pgtable_types.h>
14#include <asm/nospec-branch.h>
15#include <asm/unwind_hints.h>
16#include <asm/asm-offsets.h>
17
18/*
19 * Must be relocatable PIC code callable as a C function, in particular
20 * there must be a plain RET and not jump to return thunk.
21 */
22
23#define PTR(x) (x << 3)
24#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
25
26/*
27 * The .text..relocate_kernel and .data..relocate_kernel sections are copied
28 * into the control page, and the remainder of the page is used as the stack.
29 */
30
31	.section .data..relocate_kernel,"a";
32/* Minimal CPU state */
33SYM_DATA_LOCAL(saved_rsp, .quad 0)
34SYM_DATA_LOCAL(saved_cr0, .quad 0)
35SYM_DATA_LOCAL(saved_cr3, .quad 0)
36SYM_DATA_LOCAL(saved_cr4, .quad 0)
37	/* other data */
38SYM_DATA(kexec_va_control_page, .quad 0)
39SYM_DATA(kexec_pa_table_page, .quad 0)
40SYM_DATA(kexec_pa_swap_page, .quad 0)
41SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0)
42
43	.balign 16
44SYM_DATA_START_LOCAL(kexec_debug_gdt)
45	.word   kexec_debug_gdt_end - kexec_debug_gdt - 1
46	.long   0
47	.word   0
48	.quad   0x00cf9a000000ffff      /* __KERNEL32_CS */
49	.quad   0x00af9a000000ffff      /* __KERNEL_CS */
50	.quad   0x00cf92000000ffff      /* __KERNEL_DS */
51SYM_DATA_END_LABEL(kexec_debug_gdt, SYM_L_LOCAL, kexec_debug_gdt_end)
52
53	.section .text..relocate_kernel,"ax";
54	.code64
55SYM_CODE_START_NOALIGN(relocate_kernel)
56	UNWIND_HINT_END_OF_STACK
57	ANNOTATE_NOENDBR
58	/*
59	 * %rdi indirection_page
60	 * %rsi pa_control_page
61	 * %rdx start address
62	 * %rcx preserve_context
63	 * %r8  host_mem_enc_active
64	 */
65
66	/* Save the CPU context, used for jumping back */
67	pushq %rbx
68	pushq %rbp
69	pushq %r12
70	pushq %r13
71	pushq %r14
72	pushq %r15
73	pushf
74
75	/* zero out flags, and disable interrupts */
76	pushq $0
77	popfq
78
79	/* Switch to the identity mapped page tables */
80	movq	%cr3, %rax
81	movq	kexec_pa_table_page(%rip), %r9
82	movq	%r9, %cr3
83
84	/* Leave CR4 in %r13 to enable the right paging mode later. */
85	movq	%cr4, %r13
86
87	/* Disable global pages immediately to ensure this mapping is RWX */
88	movq	%r13, %r12
89	andq	$~(X86_CR4_PGE), %r12
90	movq	%r12, %cr4
91
92	/* Save %rsp and CRs. */
93	movq	%r13, saved_cr4(%rip)
94	movq    %rsp, saved_rsp(%rip)
95	movq	%rax, saved_cr3(%rip)
96	movq	%cr0, %rax
97	movq	%rax, saved_cr0(%rip)
98
99	/* save indirection list for jumping back */
100	movq	%rdi, pa_backup_pages_map(%rip)
101
102	/* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */
103	movq	%rcx, %r11
104
105	/* setup a new stack at the end of the physical control page */
106	lea	PAGE_SIZE(%rsi), %rsp
107
108	/* jump to identity mapped page */
1090:	addq	$identity_mapped - 0b, %rsi
110	subq	$__relocate_kernel_start - 0b, %rsi
111	ANNOTATE_RETPOLINE_SAFE
112	jmp	*%rsi
113SYM_CODE_END(relocate_kernel)
114
115SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
116	UNWIND_HINT_END_OF_STACK
117	/*
118	 * %rdi	indirection page
119	 * %rdx start address
120	 * %r8 host_mem_enc_active
121	 * %r9 page table page
122	 * %r11 preserve_context
123	 * %r13 original CR4 when relocate_kernel() was invoked
124	 */
125
126	/* store the start address on the stack */
127	pushq   %rdx
128
129	/* Create a GDTR (16 bits limit, 64 bits addr) on stack */
130	leaq	kexec_debug_gdt(%rip), %rax
131	pushq	%rax
132	pushw	(%rax)
133
134	/* Load the GDT, put the stack back */
135	lgdt	(%rsp)
136	addq	$10, %rsp
137
138	/* Test that we can load segments */
139	movq	%ds, %rax
140	movq	%rax, %ds
141
142	/*
143	 * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP
144	 * below.
145	 */
146	movq	%cr4, %rax
147	andq	$~(X86_CR4_CET), %rax
148	movq	%rax, %cr4
149
150	/*
151	 * Set cr0 to a known state:
152	 *  - Paging enabled
153	 *  - Alignment check disabled
154	 *  - Write protect disabled
155	 *  - No task switch
156	 *  - Don't do FP software emulation.
157	 *  - Protected mode enabled
158	 */
159	movq	%cr0, %rax
160	andq	$~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax
161	orl	$(X86_CR0_PG | X86_CR0_PE), %eax
162	movq	%rax, %cr0
163
164	/*
165	 * Set cr4 to a known state:
166	 *  - physical address extension enabled
167	 *  - 5-level paging, if it was enabled before
168	 *  - Machine check exception on TDX guest, if it was enabled before.
169	 *    Clearing MCE might not be allowed in TDX guests, depending on setup.
170	 *
171	 * Use R13 that contains the original CR4 value, read in relocate_kernel().
172	 * PAE is always set in the original CR4.
173	 */
174	andl	$(X86_CR4_PAE | X86_CR4_LA57), %r13d
175	ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST
176	movq	%r13, %cr4
177
178	/* Flush the TLB (needed?) */
179	movq	%r9, %cr3
180
181	/*
182	 * If SME is active, there could be old encrypted cache line
183	 * entries that will conflict with the now unencrypted memory
184	 * used by kexec. Flush the caches before copying the kernel.
185	 */
186	testq	%r8, %r8
187	jz .Lsme_off
188	wbinvd
189.Lsme_off:
190
191	call	swap_pages
192
193	/*
194	 * To be certain of avoiding problems with self-modifying code
195	 * I need to execute a serializing instruction here.
196	 * So I flush the TLB by reloading %cr3 here, it's handy,
197	 * and not processor dependent.
198	 */
199	movq	%cr3, %rax
200	movq	%rax, %cr3
201
202	testq	%r11, %r11	/* preserve_context */
203	jnz .Lrelocate
204
205	/*
206	 * set all of the registers to known values
207	 * leave %rsp alone
208	 */
209
210	xorl	%eax, %eax
211	xorl	%ebx, %ebx
212	xorl    %ecx, %ecx
213	xorl    %edx, %edx
214	xorl    %esi, %esi
215	xorl    %edi, %edi
216	xorl    %ebp, %ebp
217	xorl	%r8d, %r8d
218	xorl	%r9d, %r9d
219	xorl	%r10d, %r10d
220	xorl	%r11d, %r11d
221	xorl	%r12d, %r12d
222	xorl	%r13d, %r13d
223	xorl	%r14d, %r14d
224	xorl	%r15d, %r15d
225
226	ANNOTATE_UNRET_SAFE
227	ret
228	int3
229
230.Lrelocate:
231	popq	%rdx
232
233	/* Use the swap page for the callee's stack */
234	movq	kexec_pa_swap_page(%rip), %r10
235	leaq	PAGE_SIZE(%r10), %rsp
236
237	/* push the existing entry point onto the callee's stack */
238	pushq	%rdx
239
240	ANNOTATE_RETPOLINE_SAFE
241	call	*%rdx
242
243	/* get the re-entry point of the peer system */
244	popq	%rbp
245	movq	kexec_pa_swap_page(%rip), %r10
246	movq	pa_backup_pages_map(%rip), %rdi
247	movq	kexec_pa_table_page(%rip), %rax
248	movq	%rax, %cr3
249
250	/* Find start (and end) of this physical mapping of control page */
251	leaq	(%rip), %r8
252	ANNOTATE_NOENDBR
253	andq	$PAGE_MASK, %r8
254	lea	PAGE_SIZE(%r8), %rsp
255	movl	$1, %r11d	/* Ensure preserve_context flag is set */
256	call	swap_pages
257	movq	kexec_va_control_page(%rip), %rax
2580:	addq	$virtual_mapped - 0b, %rax
259	subq	$__relocate_kernel_start - 0b, %rax
260	pushq	%rax
261	ANNOTATE_UNRET_SAFE
262	ret
263	int3
264SYM_CODE_END(identity_mapped)
265
266SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
267	UNWIND_HINT_END_OF_STACK
268	ANNOTATE_NOENDBR // RET target, above
269	movq	saved_rsp(%rip), %rsp
270	movq	saved_cr4(%rip), %rax
271	movq	%rax, %cr4
272	movq	saved_cr3(%rip), %rax
273	movq	saved_cr0(%rip), %r8
274	movq	%rax, %cr3
275	movq	%r8, %cr0
276
277#ifdef CONFIG_KEXEC_JUMP
278	/* Saved in save_processor_state. */
279	movq    $saved_context, %rax
280	lgdt    saved_context_gdt_desc(%rax)
281#endif
282
283	/* relocate_kernel() returns the re-entry point for next time */
284	movq	%rbp, %rax
285
286	popf
287	popq	%r15
288	popq	%r14
289	popq	%r13
290	popq	%r12
291	popq	%rbp
292	popq	%rbx
293	ANNOTATE_UNRET_SAFE
294	ret
295	int3
296SYM_CODE_END(virtual_mapped)
297
298	/* Do the copies */
299SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
300	UNWIND_HINT_END_OF_STACK
301	/*
302	 * %rdi indirection page
303	 * %r11 preserve_context
304	 */
305	movq	%rdi, %rcx	/* Put the indirection_page in %rcx */
306	xorl	%edi, %edi
307	xorl	%esi, %esi
308	jmp	.Lstart		/* Should start with an indirection record */
309
310.Lloop:	/* top, read another word for the indirection page */
311
312	movq	(%rbx), %rcx
313	addq	$8,	%rbx
314.Lstart:
315	testb	$0x1,	%cl   /* is it a destination page? */
316	jz	.Lnotdest
317	movq	%rcx,	%rdi
318	andq	$0xfffffffffffff000, %rdi
319	jmp	.Lloop
320.Lnotdest:
321	testb	$0x2,	%cl   /* is it an indirection page? */
322	jz	.Lnotind
323	movq	%rcx,   %rbx
324	andq	$0xfffffffffffff000, %rbx
325	jmp	.Lloop
326.Lnotind:
327	testb	$0x4,	%cl   /* is it the done indicator? */
328	jz	.Lnotdone
329	jmp	.Ldone
330.Lnotdone:
331	testb	$0x8,	%cl   /* is it the source indicator? */
332	jz	.Lloop	      /* Ignore it otherwise */
333	movq	%rcx,   %rsi  /* For ever source page do a copy */
334	andq	$0xfffffffffffff000, %rsi
335
336	movq	%rdi, %rdx    /* Save destination page to %rdx */
337	movq	%rsi, %rax    /* Save source page to %rax */
338
339	testq	%r11, %r11    /* Only actually swap for ::preserve_context */
340	jz	.Lnoswap
341
342	/* copy source page to swap page */
343	movq	kexec_pa_swap_page(%rip), %rdi
344	movl	$512, %ecx
345	rep ; movsq
346
347	/* copy destination page to source page */
348	movq	%rax, %rdi
349	movq	%rdx, %rsi
350	movl	$512, %ecx
351	rep ; movsq
352
353	/* copy swap page to destination page */
354	movq	%rdx, %rdi
355	movq	kexec_pa_swap_page(%rip), %rsi
356.Lnoswap:
357	movl	$512, %ecx
358	rep ; movsq
359
360	lea	PAGE_SIZE(%rax), %rsi
361	jmp	.Lloop
362.Ldone:
363	ANNOTATE_UNRET_SAFE
364	ret
365	int3
366SYM_CODE_END(swap_pages)
367