1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * relocate_kernel.S - put the kernel image in place to boot 4 * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> 5 */ 6 7#include <linux/linkage.h> 8#include <linux/stringify.h> 9#include <asm/alternative.h> 10#include <asm/page_types.h> 11#include <asm/kexec.h> 12#include <asm/processor-flags.h> 13#include <asm/pgtable_types.h> 14#include <asm/nospec-branch.h> 15#include <asm/unwind_hints.h> 16#include <asm/asm-offsets.h> 17 18/* 19 * Must be relocatable PIC code callable as a C function, in particular 20 * there must be a plain RET and not jump to return thunk. 21 */ 22 23#define PTR(x) (x << 3) 24#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) 25 26/* 27 * The .text..relocate_kernel and .data..relocate_kernel sections are copied 28 * into the control page, and the remainder of the page is used as the stack. 29 */ 30 31 .section .data..relocate_kernel,"a"; 32/* Minimal CPU state */ 33SYM_DATA_LOCAL(saved_rsp, .quad 0) 34SYM_DATA_LOCAL(saved_cr0, .quad 0) 35SYM_DATA_LOCAL(saved_cr3, .quad 0) 36SYM_DATA_LOCAL(saved_cr4, .quad 0) 37 /* other data */ 38SYM_DATA(kexec_va_control_page, .quad 0) 39SYM_DATA(kexec_pa_table_page, .quad 0) 40SYM_DATA(kexec_pa_swap_page, .quad 0) 41SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0) 42 43 .balign 16 44SYM_DATA_START_LOCAL(kexec_debug_gdt) 45 .word kexec_debug_gdt_end - kexec_debug_gdt - 1 46 .long 0 47 .word 0 48 .quad 0x00cf9a000000ffff /* __KERNEL32_CS */ 49 .quad 0x00af9a000000ffff /* __KERNEL_CS */ 50 .quad 0x00cf92000000ffff /* __KERNEL_DS */ 51SYM_DATA_END_LABEL(kexec_debug_gdt, SYM_L_LOCAL, kexec_debug_gdt_end) 52 53 .section .text..relocate_kernel,"ax"; 54 .code64 55SYM_CODE_START_NOALIGN(relocate_kernel) 56 UNWIND_HINT_END_OF_STACK 57 ANNOTATE_NOENDBR 58 /* 59 * %rdi indirection_page 60 * %rsi pa_control_page 61 * %rdx start address 62 * %rcx preserve_context 63 * %r8 host_mem_enc_active 64 */ 65 66 /* Save the CPU context, used for jumping back */ 67 pushq %rbx 68 pushq %rbp 69 pushq %r12 70 pushq %r13 71 pushq %r14 72 pushq %r15 73 pushf 74 75 /* zero out flags, and disable interrupts */ 76 pushq $0 77 popfq 78 79 /* Switch to the identity mapped page tables */ 80 movq %cr3, %rax 81 movq kexec_pa_table_page(%rip), %r9 82 movq %r9, %cr3 83 84 /* Leave CR4 in %r13 to enable the right paging mode later. */ 85 movq %cr4, %r13 86 87 /* Disable global pages immediately to ensure this mapping is RWX */ 88 movq %r13, %r12 89 andq $~(X86_CR4_PGE), %r12 90 movq %r12, %cr4 91 92 /* Save %rsp and CRs. */ 93 movq %r13, saved_cr4(%rip) 94 movq %rsp, saved_rsp(%rip) 95 movq %rax, saved_cr3(%rip) 96 movq %cr0, %rax 97 movq %rax, saved_cr0(%rip) 98 99 /* save indirection list for jumping back */ 100 movq %rdi, pa_backup_pages_map(%rip) 101 102 /* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */ 103 movq %rcx, %r11 104 105 /* setup a new stack at the end of the physical control page */ 106 lea PAGE_SIZE(%rsi), %rsp 107 108 /* jump to identity mapped page */ 1090: addq $identity_mapped - 0b, %rsi 110 subq $__relocate_kernel_start - 0b, %rsi 111 ANNOTATE_RETPOLINE_SAFE 112 jmp *%rsi 113SYM_CODE_END(relocate_kernel) 114 115SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) 116 UNWIND_HINT_END_OF_STACK 117 /* 118 * %rdi indirection page 119 * %rdx start address 120 * %r8 host_mem_enc_active 121 * %r9 page table page 122 * %r11 preserve_context 123 * %r13 original CR4 when relocate_kernel() was invoked 124 */ 125 126 /* store the start address on the stack */ 127 pushq %rdx 128 129 /* Create a GDTR (16 bits limit, 64 bits addr) on stack */ 130 leaq kexec_debug_gdt(%rip), %rax 131 pushq %rax 132 pushw (%rax) 133 134 /* Load the GDT, put the stack back */ 135 lgdt (%rsp) 136 addq $10, %rsp 137 138 /* Test that we can load segments */ 139 movq %ds, %rax 140 movq %rax, %ds 141 142 /* 143 * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP 144 * below. 145 */ 146 movq %cr4, %rax 147 andq $~(X86_CR4_CET), %rax 148 movq %rax, %cr4 149 150 /* 151 * Set cr0 to a known state: 152 * - Paging enabled 153 * - Alignment check disabled 154 * - Write protect disabled 155 * - No task switch 156 * - Don't do FP software emulation. 157 * - Protected mode enabled 158 */ 159 movq %cr0, %rax 160 andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax 161 orl $(X86_CR0_PG | X86_CR0_PE), %eax 162 movq %rax, %cr0 163 164 /* 165 * Set cr4 to a known state: 166 * - physical address extension enabled 167 * - 5-level paging, if it was enabled before 168 * - Machine check exception on TDX guest, if it was enabled before. 169 * Clearing MCE might not be allowed in TDX guests, depending on setup. 170 * 171 * Use R13 that contains the original CR4 value, read in relocate_kernel(). 172 * PAE is always set in the original CR4. 173 */ 174 andl $(X86_CR4_PAE | X86_CR4_LA57), %r13d 175 ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST 176 movq %r13, %cr4 177 178 /* Flush the TLB (needed?) */ 179 movq %r9, %cr3 180 181 /* 182 * If SME is active, there could be old encrypted cache line 183 * entries that will conflict with the now unencrypted memory 184 * used by kexec. Flush the caches before copying the kernel. 185 */ 186 testq %r8, %r8 187 jz .Lsme_off 188 wbinvd 189.Lsme_off: 190 191 call swap_pages 192 193 /* 194 * To be certain of avoiding problems with self-modifying code 195 * I need to execute a serializing instruction here. 196 * So I flush the TLB by reloading %cr3 here, it's handy, 197 * and not processor dependent. 198 */ 199 movq %cr3, %rax 200 movq %rax, %cr3 201 202 testq %r11, %r11 /* preserve_context */ 203 jnz .Lrelocate 204 205 /* 206 * set all of the registers to known values 207 * leave %rsp alone 208 */ 209 210 xorl %eax, %eax 211 xorl %ebx, %ebx 212 xorl %ecx, %ecx 213 xorl %edx, %edx 214 xorl %esi, %esi 215 xorl %edi, %edi 216 xorl %ebp, %ebp 217 xorl %r8d, %r8d 218 xorl %r9d, %r9d 219 xorl %r10d, %r10d 220 xorl %r11d, %r11d 221 xorl %r12d, %r12d 222 xorl %r13d, %r13d 223 xorl %r14d, %r14d 224 xorl %r15d, %r15d 225 226 ANNOTATE_UNRET_SAFE 227 ret 228 int3 229 230.Lrelocate: 231 popq %rdx 232 233 /* Use the swap page for the callee's stack */ 234 movq kexec_pa_swap_page(%rip), %r10 235 leaq PAGE_SIZE(%r10), %rsp 236 237 /* push the existing entry point onto the callee's stack */ 238 pushq %rdx 239 240 ANNOTATE_RETPOLINE_SAFE 241 call *%rdx 242 243 /* get the re-entry point of the peer system */ 244 popq %rbp 245 movq kexec_pa_swap_page(%rip), %r10 246 movq pa_backup_pages_map(%rip), %rdi 247 movq kexec_pa_table_page(%rip), %rax 248 movq %rax, %cr3 249 250 /* Find start (and end) of this physical mapping of control page */ 251 leaq (%rip), %r8 252 ANNOTATE_NOENDBR 253 andq $PAGE_MASK, %r8 254 lea PAGE_SIZE(%r8), %rsp 255 movl $1, %r11d /* Ensure preserve_context flag is set */ 256 call swap_pages 257 movq kexec_va_control_page(%rip), %rax 2580: addq $virtual_mapped - 0b, %rax 259 subq $__relocate_kernel_start - 0b, %rax 260 pushq %rax 261 ANNOTATE_UNRET_SAFE 262 ret 263 int3 264SYM_CODE_END(identity_mapped) 265 266SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) 267 UNWIND_HINT_END_OF_STACK 268 ANNOTATE_NOENDBR // RET target, above 269 movq saved_rsp(%rip), %rsp 270 movq saved_cr4(%rip), %rax 271 movq %rax, %cr4 272 movq saved_cr3(%rip), %rax 273 movq saved_cr0(%rip), %r8 274 movq %rax, %cr3 275 movq %r8, %cr0 276 277#ifdef CONFIG_KEXEC_JUMP 278 /* Saved in save_processor_state. */ 279 movq $saved_context, %rax 280 lgdt saved_context_gdt_desc(%rax) 281#endif 282 283 /* relocate_kernel() returns the re-entry point for next time */ 284 movq %rbp, %rax 285 286 popf 287 popq %r15 288 popq %r14 289 popq %r13 290 popq %r12 291 popq %rbp 292 popq %rbx 293 ANNOTATE_UNRET_SAFE 294 ret 295 int3 296SYM_CODE_END(virtual_mapped) 297 298 /* Do the copies */ 299SYM_CODE_START_LOCAL_NOALIGN(swap_pages) 300 UNWIND_HINT_END_OF_STACK 301 /* 302 * %rdi indirection page 303 * %r11 preserve_context 304 */ 305 movq %rdi, %rcx /* Put the indirection_page in %rcx */ 306 xorl %edi, %edi 307 xorl %esi, %esi 308 jmp .Lstart /* Should start with an indirection record */ 309 310.Lloop: /* top, read another word for the indirection page */ 311 312 movq (%rbx), %rcx 313 addq $8, %rbx 314.Lstart: 315 testb $0x1, %cl /* is it a destination page? */ 316 jz .Lnotdest 317 movq %rcx, %rdi 318 andq $0xfffffffffffff000, %rdi 319 jmp .Lloop 320.Lnotdest: 321 testb $0x2, %cl /* is it an indirection page? */ 322 jz .Lnotind 323 movq %rcx, %rbx 324 andq $0xfffffffffffff000, %rbx 325 jmp .Lloop 326.Lnotind: 327 testb $0x4, %cl /* is it the done indicator? */ 328 jz .Lnotdone 329 jmp .Ldone 330.Lnotdone: 331 testb $0x8, %cl /* is it the source indicator? */ 332 jz .Lloop /* Ignore it otherwise */ 333 movq %rcx, %rsi /* For ever source page do a copy */ 334 andq $0xfffffffffffff000, %rsi 335 336 movq %rdi, %rdx /* Save destination page to %rdx */ 337 movq %rsi, %rax /* Save source page to %rax */ 338 339 testq %r11, %r11 /* Only actually swap for ::preserve_context */ 340 jz .Lnoswap 341 342 /* copy source page to swap page */ 343 movq kexec_pa_swap_page(%rip), %rdi 344 movl $512, %ecx 345 rep ; movsq 346 347 /* copy destination page to source page */ 348 movq %rax, %rdi 349 movq %rdx, %rsi 350 movl $512, %ecx 351 rep ; movsq 352 353 /* copy swap page to destination page */ 354 movq %rdx, %rdi 355 movq kexec_pa_swap_page(%rip), %rsi 356.Lnoswap: 357 movl $512, %ecx 358 rep ; movsq 359 360 lea PAGE_SIZE(%rax), %rsi 361 jmp .Lloop 362.Ldone: 363 ANNOTATE_UNRET_SAFE 364 ret 365 int3 366SYM_CODE_END(swap_pages) 367