1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2020 - Google LLC
4 * Author: Quentin Perret <qperret@google.com>
5 */
6
7 #include <linux/init.h>
8 #include <linux/kmemleak.h>
9 #include <linux/kvm_host.h>
10 #include <asm/kvm_mmu.h>
11 #include <linux/memblock.h>
12 #include <linux/mutex.h>
13 #include <linux/sort.h>
14
15 #include <asm/kvm_pkvm.h>
16
17 #include "hyp_constants.h"
18
19 DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
20
21 static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
22 static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
23
24 phys_addr_t hyp_mem_base;
25 phys_addr_t hyp_mem_size;
26
cmp_hyp_memblock(const void * p1,const void * p2)27 static int cmp_hyp_memblock(const void *p1, const void *p2)
28 {
29 const struct memblock_region *r1 = p1;
30 const struct memblock_region *r2 = p2;
31
32 return r1->base < r2->base ? -1 : (r1->base > r2->base);
33 }
34
sort_memblock_regions(void)35 static void __init sort_memblock_regions(void)
36 {
37 sort(hyp_memory,
38 *hyp_memblock_nr_ptr,
39 sizeof(struct memblock_region),
40 cmp_hyp_memblock,
41 NULL);
42 }
43
register_memblock_regions(void)44 static int __init register_memblock_regions(void)
45 {
46 struct memblock_region *reg;
47
48 for_each_mem_region(reg) {
49 if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
50 return -ENOMEM;
51
52 hyp_memory[*hyp_memblock_nr_ptr] = *reg;
53 (*hyp_memblock_nr_ptr)++;
54 }
55 sort_memblock_regions();
56
57 return 0;
58 }
59
kvm_hyp_reserve(void)60 void __init kvm_hyp_reserve(void)
61 {
62 u64 hyp_mem_pages = 0;
63 int ret;
64
65 if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
66 return;
67
68 if (kvm_get_mode() != KVM_MODE_PROTECTED)
69 return;
70
71 ret = register_memblock_regions();
72 if (ret) {
73 *hyp_memblock_nr_ptr = 0;
74 kvm_err("Failed to register hyp memblocks: %d\n", ret);
75 return;
76 }
77
78 hyp_mem_pages += hyp_s1_pgtable_pages();
79 hyp_mem_pages += host_s2_pgtable_pages();
80 hyp_mem_pages += hyp_vm_table_pages();
81 hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
82 hyp_mem_pages += hyp_ffa_proxy_pages();
83
84 /*
85 * Try to allocate a PMD-aligned region to reduce TLB pressure once
86 * this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
87 */
88 hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
89 hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
90 PMD_SIZE);
91 if (!hyp_mem_base)
92 hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
93 else
94 hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
95
96 if (!hyp_mem_base) {
97 kvm_err("Failed to reserve hyp memory\n");
98 return;
99 }
100
101 kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
102 hyp_mem_base);
103 }
104
__pkvm_destroy_hyp_vm(struct kvm * host_kvm)105 static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
106 {
107 if (host_kvm->arch.pkvm.handle) {
108 WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
109 host_kvm->arch.pkvm.handle));
110 }
111
112 host_kvm->arch.pkvm.handle = 0;
113 free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
114 free_hyp_memcache(&host_kvm->arch.pkvm.stage2_teardown_mc);
115 }
116
__pkvm_create_hyp_vcpu(struct kvm_vcpu * vcpu)117 static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
118 {
119 size_t hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
120 pkvm_handle_t handle = vcpu->kvm->arch.pkvm.handle;
121 void *hyp_vcpu;
122 int ret;
123
124 vcpu->arch.pkvm_memcache.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
125
126 hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
127 if (!hyp_vcpu)
128 return -ENOMEM;
129
130 ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, vcpu, hyp_vcpu);
131 if (!ret)
132 vcpu_set_flag(vcpu, VCPU_PKVM_FINALIZED);
133 else
134 free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
135
136 return ret;
137 }
138
139 /*
140 * Allocates and donates memory for hypervisor VM structs at EL2.
141 *
142 * Allocates space for the VM state, which includes the hyp vm as well as
143 * the hyp vcpus.
144 *
145 * Stores an opaque handler in the kvm struct for future reference.
146 *
147 * Return 0 on success, negative error code on failure.
148 */
__pkvm_create_hyp_vm(struct kvm * host_kvm)149 static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
150 {
151 size_t pgd_sz, hyp_vm_sz;
152 void *pgd, *hyp_vm;
153 int ret;
154
155 if (host_kvm->created_vcpus < 1)
156 return -EINVAL;
157
158 pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr);
159
160 /*
161 * The PGD pages will be reclaimed using a hyp_memcache which implies
162 * page granularity. So, use alloc_pages_exact() to get individual
163 * refcounts.
164 */
165 pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
166 if (!pgd)
167 return -ENOMEM;
168
169 /* Allocate memory to donate to hyp for vm and vcpu pointers. */
170 hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
171 size_mul(sizeof(void *),
172 host_kvm->created_vcpus)));
173 hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT);
174 if (!hyp_vm) {
175 ret = -ENOMEM;
176 goto free_pgd;
177 }
178
179 /* Donate the VM memory to hyp and let hyp initialize it. */
180 ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd);
181 if (ret < 0)
182 goto free_vm;
183
184 host_kvm->arch.pkvm.handle = ret;
185 host_kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
186 kvm_account_pgtable_pages(pgd, pgd_sz / PAGE_SIZE);
187
188 return 0;
189 free_vm:
190 free_pages_exact(hyp_vm, hyp_vm_sz);
191 free_pgd:
192 free_pages_exact(pgd, pgd_sz);
193 return ret;
194 }
195
pkvm_create_hyp_vm(struct kvm * host_kvm)196 int pkvm_create_hyp_vm(struct kvm *host_kvm)
197 {
198 int ret = 0;
199
200 mutex_lock(&host_kvm->arch.config_lock);
201 if (!host_kvm->arch.pkvm.handle)
202 ret = __pkvm_create_hyp_vm(host_kvm);
203 mutex_unlock(&host_kvm->arch.config_lock);
204
205 return ret;
206 }
207
pkvm_create_hyp_vcpu(struct kvm_vcpu * vcpu)208 int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
209 {
210 int ret = 0;
211
212 mutex_lock(&vcpu->kvm->arch.config_lock);
213 if (!vcpu_get_flag(vcpu, VCPU_PKVM_FINALIZED))
214 ret = __pkvm_create_hyp_vcpu(vcpu);
215 mutex_unlock(&vcpu->kvm->arch.config_lock);
216
217 return ret;
218 }
219
pkvm_destroy_hyp_vm(struct kvm * host_kvm)220 void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
221 {
222 mutex_lock(&host_kvm->arch.config_lock);
223 __pkvm_destroy_hyp_vm(host_kvm);
224 mutex_unlock(&host_kvm->arch.config_lock);
225 }
226
pkvm_init_host_vm(struct kvm * host_kvm)227 int pkvm_init_host_vm(struct kvm *host_kvm)
228 {
229 return 0;
230 }
231
_kvm_host_prot_finalize(void * arg)232 static void __init _kvm_host_prot_finalize(void *arg)
233 {
234 int *err = arg;
235
236 if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
237 WRITE_ONCE(*err, -EINVAL);
238 }
239
pkvm_drop_host_privileges(void)240 static int __init pkvm_drop_host_privileges(void)
241 {
242 int ret = 0;
243
244 /*
245 * Flip the static key upfront as that may no longer be possible
246 * once the host stage 2 is installed.
247 */
248 static_branch_enable(&kvm_protected_mode_initialized);
249 on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
250 return ret;
251 }
252
finalize_pkvm(void)253 static int __init finalize_pkvm(void)
254 {
255 int ret;
256
257 if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised())
258 return 0;
259
260 /*
261 * Exclude HYP sections from kmemleak so that they don't get peeked
262 * at, which would end badly once inaccessible.
263 */
264 kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
265 kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start);
266 kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
267
268 ret = pkvm_drop_host_privileges();
269 if (ret)
270 pr_err("Failed to finalize Hyp protection: %d\n", ret);
271
272 return ret;
273 }
274 device_initcall_sync(finalize_pkvm);
275
cmp_mappings(struct rb_node * node,const struct rb_node * parent)276 static int cmp_mappings(struct rb_node *node, const struct rb_node *parent)
277 {
278 struct pkvm_mapping *a = rb_entry(node, struct pkvm_mapping, node);
279 struct pkvm_mapping *b = rb_entry(parent, struct pkvm_mapping, node);
280
281 if (a->gfn < b->gfn)
282 return -1;
283 if (a->gfn > b->gfn)
284 return 1;
285 return 0;
286 }
287
find_first_mapping_node(struct rb_root * root,u64 gfn)288 static struct rb_node *find_first_mapping_node(struct rb_root *root, u64 gfn)
289 {
290 struct rb_node *node = root->rb_node, *prev = NULL;
291 struct pkvm_mapping *mapping;
292
293 while (node) {
294 mapping = rb_entry(node, struct pkvm_mapping, node);
295 if (mapping->gfn == gfn)
296 return node;
297 prev = node;
298 node = (gfn < mapping->gfn) ? node->rb_left : node->rb_right;
299 }
300
301 return prev;
302 }
303
304 /*
305 * __tmp is updated to rb_next(__tmp) *before* entering the body of the loop to allow freeing
306 * of __map inline.
307 */
308 #define for_each_mapping_in_range_safe(__pgt, __start, __end, __map) \
309 for (struct rb_node *__tmp = find_first_mapping_node(&(__pgt)->pkvm_mappings, \
310 ((__start) >> PAGE_SHIFT)); \
311 __tmp && ({ \
312 __map = rb_entry(__tmp, struct pkvm_mapping, node); \
313 __tmp = rb_next(__tmp); \
314 true; \
315 }); \
316 ) \
317 if (__map->gfn < ((__start) >> PAGE_SHIFT)) \
318 continue; \
319 else if (__map->gfn >= ((__end) >> PAGE_SHIFT)) \
320 break; \
321 else
322
pkvm_pgtable_stage2_init(struct kvm_pgtable * pgt,struct kvm_s2_mmu * mmu,struct kvm_pgtable_mm_ops * mm_ops)323 int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
324 struct kvm_pgtable_mm_ops *mm_ops)
325 {
326 pgt->pkvm_mappings = RB_ROOT;
327 pgt->mmu = mmu;
328
329 return 0;
330 }
331
pkvm_pgtable_stage2_destroy(struct kvm_pgtable * pgt)332 void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
333 {
334 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
335 pkvm_handle_t handle = kvm->arch.pkvm.handle;
336 struct pkvm_mapping *mapping;
337 struct rb_node *node;
338
339 if (!handle)
340 return;
341
342 node = rb_first(&pgt->pkvm_mappings);
343 while (node) {
344 mapping = rb_entry(node, struct pkvm_mapping, node);
345 kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn);
346 node = rb_next(node);
347 rb_erase(&mapping->node, &pgt->pkvm_mappings);
348 kfree(mapping);
349 }
350 }
351
pkvm_pgtable_stage2_map(struct kvm_pgtable * pgt,u64 addr,u64 size,u64 phys,enum kvm_pgtable_prot prot,void * mc,enum kvm_pgtable_walk_flags flags)352 int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
353 u64 phys, enum kvm_pgtable_prot prot,
354 void *mc, enum kvm_pgtable_walk_flags flags)
355 {
356 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
357 struct pkvm_mapping *mapping = NULL;
358 struct kvm_hyp_memcache *cache = mc;
359 u64 gfn = addr >> PAGE_SHIFT;
360 u64 pfn = phys >> PAGE_SHIFT;
361 int ret;
362
363 if (size != PAGE_SIZE)
364 return -EINVAL;
365
366 lockdep_assert_held_write(&kvm->mmu_lock);
367 ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, prot);
368 if (ret) {
369 /* Is the gfn already mapped due to a racing vCPU? */
370 if (ret == -EPERM)
371 return -EAGAIN;
372 }
373
374 swap(mapping, cache->mapping);
375 mapping->gfn = gfn;
376 mapping->pfn = pfn;
377 WARN_ON(rb_find_add(&mapping->node, &pgt->pkvm_mappings, cmp_mappings));
378
379 return ret;
380 }
381
pkvm_pgtable_stage2_unmap(struct kvm_pgtable * pgt,u64 addr,u64 size)382 int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
383 {
384 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
385 pkvm_handle_t handle = kvm->arch.pkvm.handle;
386 struct pkvm_mapping *mapping;
387 int ret = 0;
388
389 lockdep_assert_held_write(&kvm->mmu_lock);
390 for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
391 ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn);
392 if (WARN_ON(ret))
393 break;
394 rb_erase(&mapping->node, &pgt->pkvm_mappings);
395 kfree(mapping);
396 }
397
398 return ret;
399 }
400
pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable * pgt,u64 addr,u64 size)401 int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
402 {
403 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
404 pkvm_handle_t handle = kvm->arch.pkvm.handle;
405 struct pkvm_mapping *mapping;
406 int ret = 0;
407
408 lockdep_assert_held(&kvm->mmu_lock);
409 for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
410 ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn);
411 if (WARN_ON(ret))
412 break;
413 }
414
415 return ret;
416 }
417
pkvm_pgtable_stage2_flush(struct kvm_pgtable * pgt,u64 addr,u64 size)418 int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
419 {
420 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
421 struct pkvm_mapping *mapping;
422
423 lockdep_assert_held(&kvm->mmu_lock);
424 for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
425 __clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn), PAGE_SIZE);
426
427 return 0;
428 }
429
pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable * pgt,u64 addr,u64 size,bool mkold)430 bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold)
431 {
432 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
433 pkvm_handle_t handle = kvm->arch.pkvm.handle;
434 struct pkvm_mapping *mapping;
435 bool young = false;
436
437 lockdep_assert_held(&kvm->mmu_lock);
438 for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
439 young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
440 mkold);
441
442 return young;
443 }
444
pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable * pgt,u64 addr,enum kvm_pgtable_prot prot,enum kvm_pgtable_walk_flags flags)445 int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
446 enum kvm_pgtable_walk_flags flags)
447 {
448 return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot);
449 }
450
pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable * pgt,u64 addr,enum kvm_pgtable_walk_flags flags)451 void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
452 enum kvm_pgtable_walk_flags flags)
453 {
454 WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT));
455 }
456
pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops * mm_ops,void * pgtable,s8 level)457 void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
458 {
459 WARN_ON_ONCE(1);
460 }
461
pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable * pgt,u64 phys,s8 level,enum kvm_pgtable_prot prot,void * mc,bool force_pte)462 kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
463 enum kvm_pgtable_prot prot, void *mc, bool force_pte)
464 {
465 WARN_ON_ONCE(1);
466 return NULL;
467 }
468
pkvm_pgtable_stage2_split(struct kvm_pgtable * pgt,u64 addr,u64 size,struct kvm_mmu_memory_cache * mc)469 int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
470 struct kvm_mmu_memory_cache *mc)
471 {
472 WARN_ON_ONCE(1);
473 return -EINVAL;
474 }
475