1 // SPDX-License-Identifier: GPL-2.0
2
3 #include <linux/init.h>
4 #include <linux/linkage.h>
5 #include <linux/types.h>
6 #include <linux/kernel.h>
7 #include <linux/pgtable.h>
8
9 #include <asm/init.h>
10 #include <asm/sections.h>
11 #include <asm/setup.h>
12 #include <asm/sev.h>
13
14 extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
15 extern unsigned int next_early_pgt;
16
check_la57_support(void)17 static inline bool check_la57_support(void)
18 {
19 /*
20 * 5-level paging is detected and enabled at kernel decompression
21 * stage. Only check if it has been enabled there.
22 */
23 if (!(native_read_cr4() & X86_CR4_LA57))
24 return false;
25
26 __pgtable_l5_enabled = 1;
27 pgdir_shift = 48;
28 ptrs_per_p4d = 512;
29
30 return true;
31 }
32
sme_postprocess_startup(struct boot_params * bp,pmdval_t * pmd,unsigned long p2v_offset)33 static unsigned long __head sme_postprocess_startup(struct boot_params *bp,
34 pmdval_t *pmd,
35 unsigned long p2v_offset)
36 {
37 unsigned long paddr, paddr_end;
38 int i;
39
40 /* Encrypt the kernel and related (if SME is active) */
41 sme_encrypt_kernel(bp);
42
43 /*
44 * Clear the memory encryption mask from the .bss..decrypted section.
45 * The bss section will be memset to zero later in the initialization so
46 * there is no need to zero it after changing the memory encryption
47 * attribute.
48 */
49 if (sme_get_me_mask()) {
50 paddr = (unsigned long)rip_rel_ptr(__start_bss_decrypted);
51 paddr_end = (unsigned long)rip_rel_ptr(__end_bss_decrypted);
52
53 for (; paddr < paddr_end; paddr += PMD_SIZE) {
54 /*
55 * On SNP, transition the page to shared in the RMP table so that
56 * it is consistent with the page table attribute change.
57 *
58 * __start_bss_decrypted has a virtual address in the high range
59 * mapping (kernel .text). PVALIDATE, by way of
60 * early_snp_set_memory_shared(), requires a valid virtual
61 * address but the kernel is currently running off of the identity
62 * mapping so use the PA to get a *currently* valid virtual address.
63 */
64 early_snp_set_memory_shared(paddr, paddr, PTRS_PER_PMD);
65
66 i = pmd_index(paddr - p2v_offset);
67 pmd[i] -= sme_get_me_mask();
68 }
69 }
70
71 /*
72 * Return the SME encryption mask (if SME is active) to be used as a
73 * modifier for the initial pgdir entry programmed into CR3.
74 */
75 return sme_get_me_mask();
76 }
77
78 /*
79 * This code is compiled using PIC codegen because it will execute from the
80 * early 1:1 mapping of memory, which deviates from the mapping expected by the
81 * linker. Due to this deviation, taking the address of a global variable will
82 * produce an ambiguous result when using the plain & operator. Instead,
83 * rip_rel_ptr() must be used, which will return the RIP-relative address in
84 * the 1:1 mapping of memory. Kernel virtual addresses can be determined by
85 * subtracting p2v_offset from the RIP-relative address.
86 */
__startup_64(unsigned long p2v_offset,struct boot_params * bp)87 unsigned long __head __startup_64(unsigned long p2v_offset,
88 struct boot_params *bp)
89 {
90 pmd_t (*early_pgts)[PTRS_PER_PMD] = rip_rel_ptr(early_dynamic_pgts);
91 unsigned long physaddr = (unsigned long)rip_rel_ptr(_text);
92 unsigned long va_text, va_end;
93 unsigned long pgtable_flags;
94 unsigned long load_delta;
95 pgdval_t *pgd;
96 p4dval_t *p4d;
97 pudval_t *pud;
98 pmdval_t *pmd, pmd_entry;
99 bool la57;
100 int i;
101
102 la57 = check_la57_support();
103
104 /* Is the address too large? */
105 if (physaddr >> MAX_PHYSMEM_BITS)
106 for (;;);
107
108 /*
109 * Compute the delta between the address I am compiled to run at
110 * and the address I am actually running at.
111 */
112 phys_base = load_delta = __START_KERNEL_map + p2v_offset;
113
114 /* Is the address not 2M aligned? */
115 if (load_delta & ~PMD_MASK)
116 for (;;);
117
118 va_text = physaddr - p2v_offset;
119 va_end = (unsigned long)rip_rel_ptr(_end) - p2v_offset;
120
121 /* Include the SME encryption mask in the fixup value */
122 load_delta += sme_get_me_mask();
123
124 /* Fixup the physical addresses in the page table */
125
126 pgd = rip_rel_ptr(early_top_pgt);
127 pgd[pgd_index(__START_KERNEL_map)] += load_delta;
128
129 if (la57) {
130 p4d = (p4dval_t *)rip_rel_ptr(level4_kernel_pgt);
131 p4d[MAX_PTRS_PER_P4D - 1] += load_delta;
132
133 pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE;
134 }
135
136 level3_kernel_pgt[PTRS_PER_PUD - 2].pud += load_delta;
137 level3_kernel_pgt[PTRS_PER_PUD - 1].pud += load_delta;
138
139 for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--)
140 level2_fixmap_pgt[i].pmd += load_delta;
141
142 /*
143 * Set up the identity mapping for the switchover. These
144 * entries should *NOT* have the global bit set! This also
145 * creates a bunch of nonsense entries but that is fine --
146 * it avoids problems around wraparound.
147 */
148
149 pud = &early_pgts[0]->pmd;
150 pmd = &early_pgts[1]->pmd;
151 next_early_pgt = 2;
152
153 pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
154
155 if (la57) {
156 p4d = &early_pgts[next_early_pgt++]->pmd;
157
158 i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
159 pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
160 pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
161
162 i = physaddr >> P4D_SHIFT;
163 p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
164 p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
165 } else {
166 i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
167 pgd[i + 0] = (pgdval_t)pud + pgtable_flags;
168 pgd[i + 1] = (pgdval_t)pud + pgtable_flags;
169 }
170
171 i = physaddr >> PUD_SHIFT;
172 pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
173 pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
174
175 pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
176 pmd_entry += sme_get_me_mask();
177 pmd_entry += physaddr;
178
179 for (i = 0; i < DIV_ROUND_UP(va_end - va_text, PMD_SIZE); i++) {
180 int idx = i + (physaddr >> PMD_SHIFT);
181
182 pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE;
183 }
184
185 /*
186 * Fixup the kernel text+data virtual addresses. Note that
187 * we might write invalid pmds, when the kernel is relocated
188 * cleanup_highmap() fixes this up along with the mappings
189 * beyond _end.
190 *
191 * Only the region occupied by the kernel image has so far
192 * been checked against the table of usable memory regions
193 * provided by the firmware, so invalidate pages outside that
194 * region. A page table entry that maps to a reserved area of
195 * memory would allow processor speculation into that area,
196 * and on some hardware (particularly the UV platform) even
197 * speculative access to some reserved areas is caught as an
198 * error, causing the BIOS to halt the system.
199 */
200
201 pmd = rip_rel_ptr(level2_kernel_pgt);
202
203 /* invalidate pages before the kernel image */
204 for (i = 0; i < pmd_index(va_text); i++)
205 pmd[i] &= ~_PAGE_PRESENT;
206
207 /* fixup pages that are part of the kernel image */
208 for (; i <= pmd_index(va_end); i++)
209 if (pmd[i] & _PAGE_PRESENT)
210 pmd[i] += load_delta;
211
212 /* invalidate pages after the kernel image */
213 for (; i < PTRS_PER_PMD; i++)
214 pmd[i] &= ~_PAGE_PRESENT;
215
216 return sme_postprocess_startup(bp, pmd, p2v_offset);
217 }
218