1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2020 - Google LLC
4  * Author: Quentin Perret <qperret@google.com>
5  */
6 
7 #include <linux/init.h>
8 #include <linux/kmemleak.h>
9 #include <linux/kvm_host.h>
10 #include <asm/kvm_mmu.h>
11 #include <linux/memblock.h>
12 #include <linux/mutex.h>
13 #include <linux/sort.h>
14 
15 #include <asm/kvm_pkvm.h>
16 
17 #include "hyp_constants.h"
18 
19 DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
20 
21 static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
22 static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
23 
24 phys_addr_t hyp_mem_base;
25 phys_addr_t hyp_mem_size;
26 
cmp_hyp_memblock(const void * p1,const void * p2)27 static int cmp_hyp_memblock(const void *p1, const void *p2)
28 {
29 	const struct memblock_region *r1 = p1;
30 	const struct memblock_region *r2 = p2;
31 
32 	return r1->base < r2->base ? -1 : (r1->base > r2->base);
33 }
34 
sort_memblock_regions(void)35 static void __init sort_memblock_regions(void)
36 {
37 	sort(hyp_memory,
38 	     *hyp_memblock_nr_ptr,
39 	     sizeof(struct memblock_region),
40 	     cmp_hyp_memblock,
41 	     NULL);
42 }
43 
register_memblock_regions(void)44 static int __init register_memblock_regions(void)
45 {
46 	struct memblock_region *reg;
47 
48 	for_each_mem_region(reg) {
49 		if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
50 			return -ENOMEM;
51 
52 		hyp_memory[*hyp_memblock_nr_ptr] = *reg;
53 		(*hyp_memblock_nr_ptr)++;
54 	}
55 	sort_memblock_regions();
56 
57 	return 0;
58 }
59 
kvm_hyp_reserve(void)60 void __init kvm_hyp_reserve(void)
61 {
62 	u64 hyp_mem_pages = 0;
63 	int ret;
64 
65 	if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
66 		return;
67 
68 	if (kvm_get_mode() != KVM_MODE_PROTECTED)
69 		return;
70 
71 	ret = register_memblock_regions();
72 	if (ret) {
73 		*hyp_memblock_nr_ptr = 0;
74 		kvm_err("Failed to register hyp memblocks: %d\n", ret);
75 		return;
76 	}
77 
78 	hyp_mem_pages += hyp_s1_pgtable_pages();
79 	hyp_mem_pages += host_s2_pgtable_pages();
80 	hyp_mem_pages += hyp_vm_table_pages();
81 	hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
82 	hyp_mem_pages += hyp_ffa_proxy_pages();
83 
84 	/*
85 	 * Try to allocate a PMD-aligned region to reduce TLB pressure once
86 	 * this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
87 	 */
88 	hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
89 	hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
90 					   PMD_SIZE);
91 	if (!hyp_mem_base)
92 		hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
93 	else
94 		hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
95 
96 	if (!hyp_mem_base) {
97 		kvm_err("Failed to reserve hyp memory\n");
98 		return;
99 	}
100 
101 	kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
102 		 hyp_mem_base);
103 }
104 
__pkvm_destroy_hyp_vm(struct kvm * host_kvm)105 static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
106 {
107 	if (host_kvm->arch.pkvm.handle) {
108 		WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
109 					  host_kvm->arch.pkvm.handle));
110 	}
111 
112 	host_kvm->arch.pkvm.handle = 0;
113 	free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
114 	free_hyp_memcache(&host_kvm->arch.pkvm.stage2_teardown_mc);
115 }
116 
__pkvm_create_hyp_vcpu(struct kvm_vcpu * vcpu)117 static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
118 {
119 	size_t hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
120 	pkvm_handle_t handle = vcpu->kvm->arch.pkvm.handle;
121 	void *hyp_vcpu;
122 	int ret;
123 
124 	vcpu->arch.pkvm_memcache.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
125 
126 	hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
127 	if (!hyp_vcpu)
128 		return -ENOMEM;
129 
130 	ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, vcpu, hyp_vcpu);
131 	if (!ret)
132 		vcpu_set_flag(vcpu, VCPU_PKVM_FINALIZED);
133 	else
134 		free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
135 
136 	return ret;
137 }
138 
139 /*
140  * Allocates and donates memory for hypervisor VM structs at EL2.
141  *
142  * Allocates space for the VM state, which includes the hyp vm as well as
143  * the hyp vcpus.
144  *
145  * Stores an opaque handler in the kvm struct for future reference.
146  *
147  * Return 0 on success, negative error code on failure.
148  */
__pkvm_create_hyp_vm(struct kvm * host_kvm)149 static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
150 {
151 	size_t pgd_sz, hyp_vm_sz;
152 	void *pgd, *hyp_vm;
153 	int ret;
154 
155 	if (host_kvm->created_vcpus < 1)
156 		return -EINVAL;
157 
158 	pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr);
159 
160 	/*
161 	 * The PGD pages will be reclaimed using a hyp_memcache which implies
162 	 * page granularity. So, use alloc_pages_exact() to get individual
163 	 * refcounts.
164 	 */
165 	pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
166 	if (!pgd)
167 		return -ENOMEM;
168 
169 	/* Allocate memory to donate to hyp for vm and vcpu pointers. */
170 	hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
171 					size_mul(sizeof(void *),
172 						 host_kvm->created_vcpus)));
173 	hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT);
174 	if (!hyp_vm) {
175 		ret = -ENOMEM;
176 		goto free_pgd;
177 	}
178 
179 	/* Donate the VM memory to hyp and let hyp initialize it. */
180 	ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd);
181 	if (ret < 0)
182 		goto free_vm;
183 
184 	host_kvm->arch.pkvm.handle = ret;
185 	host_kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
186 	kvm_account_pgtable_pages(pgd, pgd_sz / PAGE_SIZE);
187 
188 	return 0;
189 free_vm:
190 	free_pages_exact(hyp_vm, hyp_vm_sz);
191 free_pgd:
192 	free_pages_exact(pgd, pgd_sz);
193 	return ret;
194 }
195 
pkvm_create_hyp_vm(struct kvm * host_kvm)196 int pkvm_create_hyp_vm(struct kvm *host_kvm)
197 {
198 	int ret = 0;
199 
200 	mutex_lock(&host_kvm->arch.config_lock);
201 	if (!host_kvm->arch.pkvm.handle)
202 		ret = __pkvm_create_hyp_vm(host_kvm);
203 	mutex_unlock(&host_kvm->arch.config_lock);
204 
205 	return ret;
206 }
207 
pkvm_create_hyp_vcpu(struct kvm_vcpu * vcpu)208 int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
209 {
210 	int ret = 0;
211 
212 	mutex_lock(&vcpu->kvm->arch.config_lock);
213 	if (!vcpu_get_flag(vcpu, VCPU_PKVM_FINALIZED))
214 		ret = __pkvm_create_hyp_vcpu(vcpu);
215 	mutex_unlock(&vcpu->kvm->arch.config_lock);
216 
217 	return ret;
218 }
219 
pkvm_destroy_hyp_vm(struct kvm * host_kvm)220 void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
221 {
222 	mutex_lock(&host_kvm->arch.config_lock);
223 	__pkvm_destroy_hyp_vm(host_kvm);
224 	mutex_unlock(&host_kvm->arch.config_lock);
225 }
226 
pkvm_init_host_vm(struct kvm * host_kvm)227 int pkvm_init_host_vm(struct kvm *host_kvm)
228 {
229 	return 0;
230 }
231 
_kvm_host_prot_finalize(void * arg)232 static void __init _kvm_host_prot_finalize(void *arg)
233 {
234 	int *err = arg;
235 
236 	if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
237 		WRITE_ONCE(*err, -EINVAL);
238 }
239 
pkvm_drop_host_privileges(void)240 static int __init pkvm_drop_host_privileges(void)
241 {
242 	int ret = 0;
243 
244 	/*
245 	 * Flip the static key upfront as that may no longer be possible
246 	 * once the host stage 2 is installed.
247 	 */
248 	static_branch_enable(&kvm_protected_mode_initialized);
249 	on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
250 	return ret;
251 }
252 
finalize_pkvm(void)253 static int __init finalize_pkvm(void)
254 {
255 	int ret;
256 
257 	if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised())
258 		return 0;
259 
260 	/*
261 	 * Exclude HYP sections from kmemleak so that they don't get peeked
262 	 * at, which would end badly once inaccessible.
263 	 */
264 	kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
265 	kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start);
266 	kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
267 
268 	ret = pkvm_drop_host_privileges();
269 	if (ret)
270 		pr_err("Failed to finalize Hyp protection: %d\n", ret);
271 
272 	return ret;
273 }
274 device_initcall_sync(finalize_pkvm);
275 
cmp_mappings(struct rb_node * node,const struct rb_node * parent)276 static int cmp_mappings(struct rb_node *node, const struct rb_node *parent)
277 {
278 	struct pkvm_mapping *a = rb_entry(node, struct pkvm_mapping, node);
279 	struct pkvm_mapping *b = rb_entry(parent, struct pkvm_mapping, node);
280 
281 	if (a->gfn < b->gfn)
282 		return -1;
283 	if (a->gfn > b->gfn)
284 		return 1;
285 	return 0;
286 }
287 
find_first_mapping_node(struct rb_root * root,u64 gfn)288 static struct rb_node *find_first_mapping_node(struct rb_root *root, u64 gfn)
289 {
290 	struct rb_node *node = root->rb_node, *prev = NULL;
291 	struct pkvm_mapping *mapping;
292 
293 	while (node) {
294 		mapping = rb_entry(node, struct pkvm_mapping, node);
295 		if (mapping->gfn == gfn)
296 			return node;
297 		prev = node;
298 		node = (gfn < mapping->gfn) ? node->rb_left : node->rb_right;
299 	}
300 
301 	return prev;
302 }
303 
304 /*
305  * __tmp is updated to rb_next(__tmp) *before* entering the body of the loop to allow freeing
306  * of __map inline.
307  */
308 #define for_each_mapping_in_range_safe(__pgt, __start, __end, __map)				\
309 	for (struct rb_node *__tmp = find_first_mapping_node(&(__pgt)->pkvm_mappings,		\
310 							     ((__start) >> PAGE_SHIFT));	\
311 	     __tmp && ({									\
312 				__map = rb_entry(__tmp, struct pkvm_mapping, node);		\
313 				__tmp = rb_next(__tmp);						\
314 				true;								\
315 		       });									\
316 	    )											\
317 		if (__map->gfn < ((__start) >> PAGE_SHIFT))					\
318 			continue;								\
319 		else if (__map->gfn >= ((__end) >> PAGE_SHIFT))					\
320 			break;									\
321 		else
322 
pkvm_pgtable_stage2_init(struct kvm_pgtable * pgt,struct kvm_s2_mmu * mmu,struct kvm_pgtable_mm_ops * mm_ops)323 int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
324 			     struct kvm_pgtable_mm_ops *mm_ops)
325 {
326 	pgt->pkvm_mappings	= RB_ROOT;
327 	pgt->mmu		= mmu;
328 
329 	return 0;
330 }
331 
pkvm_pgtable_stage2_destroy(struct kvm_pgtable * pgt)332 void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
333 {
334 	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
335 	pkvm_handle_t handle = kvm->arch.pkvm.handle;
336 	struct pkvm_mapping *mapping;
337 	struct rb_node *node;
338 
339 	if (!handle)
340 		return;
341 
342 	node = rb_first(&pgt->pkvm_mappings);
343 	while (node) {
344 		mapping = rb_entry(node, struct pkvm_mapping, node);
345 		kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn);
346 		node = rb_next(node);
347 		rb_erase(&mapping->node, &pgt->pkvm_mappings);
348 		kfree(mapping);
349 	}
350 }
351 
pkvm_pgtable_stage2_map(struct kvm_pgtable * pgt,u64 addr,u64 size,u64 phys,enum kvm_pgtable_prot prot,void * mc,enum kvm_pgtable_walk_flags flags)352 int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
353 			   u64 phys, enum kvm_pgtable_prot prot,
354 			   void *mc, enum kvm_pgtable_walk_flags flags)
355 {
356 	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
357 	struct pkvm_mapping *mapping = NULL;
358 	struct kvm_hyp_memcache *cache = mc;
359 	u64 gfn = addr >> PAGE_SHIFT;
360 	u64 pfn = phys >> PAGE_SHIFT;
361 	int ret;
362 
363 	if (size != PAGE_SIZE)
364 		return -EINVAL;
365 
366 	lockdep_assert_held_write(&kvm->mmu_lock);
367 	ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, prot);
368 	if (ret) {
369 		/* Is the gfn already mapped due to a racing vCPU? */
370 		if (ret == -EPERM)
371 			return -EAGAIN;
372 	}
373 
374 	swap(mapping, cache->mapping);
375 	mapping->gfn = gfn;
376 	mapping->pfn = pfn;
377 	WARN_ON(rb_find_add(&mapping->node, &pgt->pkvm_mappings, cmp_mappings));
378 
379 	return ret;
380 }
381 
pkvm_pgtable_stage2_unmap(struct kvm_pgtable * pgt,u64 addr,u64 size)382 int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
383 {
384 	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
385 	pkvm_handle_t handle = kvm->arch.pkvm.handle;
386 	struct pkvm_mapping *mapping;
387 	int ret = 0;
388 
389 	lockdep_assert_held_write(&kvm->mmu_lock);
390 	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
391 		ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn);
392 		if (WARN_ON(ret))
393 			break;
394 		rb_erase(&mapping->node, &pgt->pkvm_mappings);
395 		kfree(mapping);
396 	}
397 
398 	return ret;
399 }
400 
pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable * pgt,u64 addr,u64 size)401 int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
402 {
403 	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
404 	pkvm_handle_t handle = kvm->arch.pkvm.handle;
405 	struct pkvm_mapping *mapping;
406 	int ret = 0;
407 
408 	lockdep_assert_held(&kvm->mmu_lock);
409 	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
410 		ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn);
411 		if (WARN_ON(ret))
412 			break;
413 	}
414 
415 	return ret;
416 }
417 
pkvm_pgtable_stage2_flush(struct kvm_pgtable * pgt,u64 addr,u64 size)418 int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
419 {
420 	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
421 	struct pkvm_mapping *mapping;
422 
423 	lockdep_assert_held(&kvm->mmu_lock);
424 	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
425 		__clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn), PAGE_SIZE);
426 
427 	return 0;
428 }
429 
pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable * pgt,u64 addr,u64 size,bool mkold)430 bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold)
431 {
432 	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
433 	pkvm_handle_t handle = kvm->arch.pkvm.handle;
434 	struct pkvm_mapping *mapping;
435 	bool young = false;
436 
437 	lockdep_assert_held(&kvm->mmu_lock);
438 	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
439 		young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
440 					   mkold);
441 
442 	return young;
443 }
444 
pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable * pgt,u64 addr,enum kvm_pgtable_prot prot,enum kvm_pgtable_walk_flags flags)445 int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
446 				    enum kvm_pgtable_walk_flags flags)
447 {
448 	return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot);
449 }
450 
pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable * pgt,u64 addr,enum kvm_pgtable_walk_flags flags)451 void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
452 				 enum kvm_pgtable_walk_flags flags)
453 {
454 	WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT));
455 }
456 
pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops * mm_ops,void * pgtable,s8 level)457 void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
458 {
459 	WARN_ON_ONCE(1);
460 }
461 
pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable * pgt,u64 phys,s8 level,enum kvm_pgtable_prot prot,void * mc,bool force_pte)462 kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
463 					enum kvm_pgtable_prot prot, void *mc, bool force_pte)
464 {
465 	WARN_ON_ONCE(1);
466 	return NULL;
467 }
468 
pkvm_pgtable_stage2_split(struct kvm_pgtable * pgt,u64 addr,u64 size,struct kvm_mmu_memory_cache * mc)469 int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
470 			      struct kvm_mmu_memory_cache *mc)
471 {
472 	WARN_ON_ONCE(1);
473 	return -EINVAL;
474 }
475