1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2020 - Google LLC 4 * Author: Quentin Perret <qperret@google.com> 5 */ 6 7 #include <linux/init.h> 8 #include <linux/interval_tree_generic.h> 9 #include <linux/kmemleak.h> 10 #include <linux/kvm_host.h> 11 #include <asm/kvm_mmu.h> 12 #include <linux/memblock.h> 13 #include <linux/mutex.h> 14 15 #include <asm/kvm_pkvm.h> 16 17 #include "hyp_constants.h" 18 19 DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); 20 21 static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory); 22 static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr); 23 24 phys_addr_t hyp_mem_base; 25 phys_addr_t hyp_mem_size; 26 27 static int __init register_memblock_regions(void) 28 { 29 struct memblock_region *reg; 30 31 for_each_mem_region(reg) { 32 if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS) 33 return -ENOMEM; 34 35 hyp_memory[*hyp_memblock_nr_ptr] = *reg; 36 (*hyp_memblock_nr_ptr)++; 37 } 38 39 return 0; 40 } 41 42 void __init kvm_hyp_reserve(void) 43 { 44 u64 hyp_mem_pages = 0; 45 int ret; 46 47 if (!is_hyp_mode_available() || is_kernel_in_hyp_mode()) 48 return; 49 50 if (kvm_get_mode() != KVM_MODE_PROTECTED) 51 return; 52 53 ret = register_memblock_regions(); 54 if (ret) { 55 *hyp_memblock_nr_ptr = 0; 56 kvm_err("Failed to register hyp memblocks: %d\n", ret); 57 return; 58 } 59 60 hyp_mem_pages += hyp_s1_pgtable_pages(); 61 hyp_mem_pages += host_s2_pgtable_pages(); 62 hyp_mem_pages += hyp_vm_table_pages(); 63 hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE); 64 hyp_mem_pages += pkvm_selftest_pages(); 65 hyp_mem_pages += hyp_ffa_proxy_pages(); 66 67 /* 68 * Try to allocate a PMD-aligned region to reduce TLB pressure once 69 * this is unmapped from the host stage-2, and fallback to PAGE_SIZE. 70 */ 71 hyp_mem_size = hyp_mem_pages << PAGE_SHIFT; 72 hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE), 73 PMD_SIZE); 74 if (!hyp_mem_base) 75 hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE); 76 else 77 hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE); 78 79 if (!hyp_mem_base) { 80 kvm_err("Failed to reserve hyp memory\n"); 81 return; 82 } 83 84 kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20, 85 hyp_mem_base); 86 } 87 88 static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm) 89 { 90 if (host_kvm->arch.pkvm.handle) { 91 WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm, 92 host_kvm->arch.pkvm.handle)); 93 } 94 95 host_kvm->arch.pkvm.handle = 0; 96 free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc); 97 free_hyp_memcache(&host_kvm->arch.pkvm.stage2_teardown_mc); 98 } 99 100 static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu) 101 { 102 size_t hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE); 103 pkvm_handle_t handle = vcpu->kvm->arch.pkvm.handle; 104 void *hyp_vcpu; 105 int ret; 106 107 vcpu->arch.pkvm_memcache.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2; 108 109 hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT); 110 if (!hyp_vcpu) 111 return -ENOMEM; 112 113 ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, vcpu, hyp_vcpu); 114 if (!ret) 115 vcpu_set_flag(vcpu, VCPU_PKVM_FINALIZED); 116 else 117 free_pages_exact(hyp_vcpu, hyp_vcpu_sz); 118 119 return ret; 120 } 121 122 /* 123 * Allocates and donates memory for hypervisor VM structs at EL2. 124 * 125 * Allocates space for the VM state, which includes the hyp vm as well as 126 * the hyp vcpus. 127 * 128 * Stores an opaque handler in the kvm struct for future reference. 129 * 130 * Return 0 on success, negative error code on failure. 131 */ 132 static int __pkvm_create_hyp_vm(struct kvm *host_kvm) 133 { 134 size_t pgd_sz, hyp_vm_sz; 135 void *pgd, *hyp_vm; 136 int ret; 137 138 if (host_kvm->created_vcpus < 1) 139 return -EINVAL; 140 141 pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr); 142 143 /* 144 * The PGD pages will be reclaimed using a hyp_memcache which implies 145 * page granularity. So, use alloc_pages_exact() to get individual 146 * refcounts. 147 */ 148 pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT); 149 if (!pgd) 150 return -ENOMEM; 151 152 /* Allocate memory to donate to hyp for vm and vcpu pointers. */ 153 hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE, 154 size_mul(sizeof(void *), 155 host_kvm->created_vcpus))); 156 hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT); 157 if (!hyp_vm) { 158 ret = -ENOMEM; 159 goto free_pgd; 160 } 161 162 /* Donate the VM memory to hyp and let hyp initialize it. */ 163 ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd); 164 if (ret < 0) 165 goto free_vm; 166 167 host_kvm->arch.pkvm.handle = ret; 168 host_kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2; 169 kvm_account_pgtable_pages(pgd, pgd_sz / PAGE_SIZE); 170 171 return 0; 172 free_vm: 173 free_pages_exact(hyp_vm, hyp_vm_sz); 174 free_pgd: 175 free_pages_exact(pgd, pgd_sz); 176 return ret; 177 } 178 179 int pkvm_create_hyp_vm(struct kvm *host_kvm) 180 { 181 int ret = 0; 182 183 mutex_lock(&host_kvm->arch.config_lock); 184 if (!host_kvm->arch.pkvm.handle) 185 ret = __pkvm_create_hyp_vm(host_kvm); 186 mutex_unlock(&host_kvm->arch.config_lock); 187 188 return ret; 189 } 190 191 int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu) 192 { 193 int ret = 0; 194 195 mutex_lock(&vcpu->kvm->arch.config_lock); 196 if (!vcpu_get_flag(vcpu, VCPU_PKVM_FINALIZED)) 197 ret = __pkvm_create_hyp_vcpu(vcpu); 198 mutex_unlock(&vcpu->kvm->arch.config_lock); 199 200 return ret; 201 } 202 203 void pkvm_destroy_hyp_vm(struct kvm *host_kvm) 204 { 205 mutex_lock(&host_kvm->arch.config_lock); 206 __pkvm_destroy_hyp_vm(host_kvm); 207 mutex_unlock(&host_kvm->arch.config_lock); 208 } 209 210 int pkvm_init_host_vm(struct kvm *host_kvm) 211 { 212 return 0; 213 } 214 215 static void __init _kvm_host_prot_finalize(void *arg) 216 { 217 int *err = arg; 218 219 if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize))) 220 WRITE_ONCE(*err, -EINVAL); 221 } 222 223 static int __init pkvm_drop_host_privileges(void) 224 { 225 int ret = 0; 226 227 /* 228 * Flip the static key upfront as that may no longer be possible 229 * once the host stage 2 is installed. 230 */ 231 static_branch_enable(&kvm_protected_mode_initialized); 232 on_each_cpu(_kvm_host_prot_finalize, &ret, 1); 233 return ret; 234 } 235 236 static int __init finalize_pkvm(void) 237 { 238 int ret; 239 240 if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised()) 241 return 0; 242 243 /* 244 * Exclude HYP sections from kmemleak so that they don't get peeked 245 * at, which would end badly once inaccessible. 246 */ 247 kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); 248 kmemleak_free_part(__hyp_data_start, __hyp_data_end - __hyp_data_start); 249 kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start); 250 kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size); 251 252 ret = pkvm_drop_host_privileges(); 253 if (ret) 254 pr_err("Failed to finalize Hyp protection: %d\n", ret); 255 256 return ret; 257 } 258 device_initcall_sync(finalize_pkvm); 259 260 static u64 __pkvm_mapping_start(struct pkvm_mapping *m) 261 { 262 return m->gfn * PAGE_SIZE; 263 } 264 265 static u64 __pkvm_mapping_end(struct pkvm_mapping *m) 266 { 267 return (m->gfn + m->nr_pages) * PAGE_SIZE - 1; 268 } 269 270 INTERVAL_TREE_DEFINE(struct pkvm_mapping, node, u64, __subtree_last, 271 __pkvm_mapping_start, __pkvm_mapping_end, static, 272 pkvm_mapping); 273 274 /* 275 * __tmp is updated to iter_first(pkvm_mappings) *before* entering the body of the loop to allow 276 * freeing of __map inline. 277 */ 278 #define for_each_mapping_in_range_safe(__pgt, __start, __end, __map) \ 279 for (struct pkvm_mapping *__tmp = pkvm_mapping_iter_first(&(__pgt)->pkvm_mappings, \ 280 __start, __end - 1); \ 281 __tmp && ({ \ 282 __map = __tmp; \ 283 __tmp = pkvm_mapping_iter_next(__map, __start, __end - 1); \ 284 true; \ 285 }); \ 286 ) 287 288 int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, 289 struct kvm_pgtable_mm_ops *mm_ops) 290 { 291 pgt->pkvm_mappings = RB_ROOT_CACHED; 292 pgt->mmu = mmu; 293 294 return 0; 295 } 296 297 static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 end) 298 { 299 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 300 pkvm_handle_t handle = kvm->arch.pkvm.handle; 301 struct pkvm_mapping *mapping; 302 int ret; 303 304 if (!handle) 305 return 0; 306 307 for_each_mapping_in_range_safe(pgt, start, end, mapping) { 308 ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn, 309 mapping->nr_pages); 310 if (WARN_ON(ret)) 311 return ret; 312 pkvm_mapping_remove(mapping, &pgt->pkvm_mappings); 313 kfree(mapping); 314 } 315 316 return 0; 317 } 318 319 void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) 320 { 321 __pkvm_pgtable_stage2_unmap(pgt, 0, ~(0ULL)); 322 } 323 324 int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, 325 u64 phys, enum kvm_pgtable_prot prot, 326 void *mc, enum kvm_pgtable_walk_flags flags) 327 { 328 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 329 struct pkvm_mapping *mapping = NULL; 330 struct kvm_hyp_memcache *cache = mc; 331 u64 gfn = addr >> PAGE_SHIFT; 332 u64 pfn = phys >> PAGE_SHIFT; 333 int ret; 334 335 if (size != PAGE_SIZE && size != PMD_SIZE) 336 return -EINVAL; 337 338 lockdep_assert_held_write(&kvm->mmu_lock); 339 340 /* 341 * Calling stage2_map() on top of existing mappings is either happening because of a race 342 * with another vCPU, or because we're changing between page and block mappings. As per 343 * user_mem_abort(), same-size permission faults are handled in the relax_perms() path. 344 */ 345 mapping = pkvm_mapping_iter_first(&pgt->pkvm_mappings, addr, addr + size - 1); 346 if (mapping) { 347 if (size == (mapping->nr_pages * PAGE_SIZE)) 348 return -EAGAIN; 349 350 /* Remove _any_ pkvm_mapping overlapping with the range, bigger or smaller. */ 351 ret = __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size); 352 if (ret) 353 return ret; 354 mapping = NULL; 355 } 356 357 ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, size / PAGE_SIZE, prot); 358 if (WARN_ON(ret)) 359 return ret; 360 361 swap(mapping, cache->mapping); 362 mapping->gfn = gfn; 363 mapping->pfn = pfn; 364 mapping->nr_pages = size / PAGE_SIZE; 365 pkvm_mapping_insert(mapping, &pgt->pkvm_mappings); 366 367 return ret; 368 } 369 370 int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) 371 { 372 lockdep_assert_held_write(&kvm_s2_mmu_to_kvm(pgt->mmu)->mmu_lock); 373 374 return __pkvm_pgtable_stage2_unmap(pgt, addr, addr + size); 375 } 376 377 int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) 378 { 379 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 380 pkvm_handle_t handle = kvm->arch.pkvm.handle; 381 struct pkvm_mapping *mapping; 382 int ret = 0; 383 384 lockdep_assert_held(&kvm->mmu_lock); 385 for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) { 386 ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn, 387 mapping->nr_pages); 388 if (WARN_ON(ret)) 389 break; 390 } 391 392 return ret; 393 } 394 395 int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) 396 { 397 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 398 struct pkvm_mapping *mapping; 399 400 lockdep_assert_held(&kvm->mmu_lock); 401 for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) 402 __clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn), 403 PAGE_SIZE * mapping->nr_pages); 404 405 return 0; 406 } 407 408 bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold) 409 { 410 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu); 411 pkvm_handle_t handle = kvm->arch.pkvm.handle; 412 struct pkvm_mapping *mapping; 413 bool young = false; 414 415 lockdep_assert_held(&kvm->mmu_lock); 416 for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) 417 young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn, 418 mapping->nr_pages, mkold); 419 420 return young; 421 } 422 423 int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot, 424 enum kvm_pgtable_walk_flags flags) 425 { 426 return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot); 427 } 428 429 void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr, 430 enum kvm_pgtable_walk_flags flags) 431 { 432 WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT)); 433 } 434 435 void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level) 436 { 437 WARN_ON_ONCE(1); 438 } 439 440 kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level, 441 enum kvm_pgtable_prot prot, void *mc, bool force_pte) 442 { 443 WARN_ON_ONCE(1); 444 return NULL; 445 } 446 447 int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size, 448 struct kvm_mmu_memory_cache *mc) 449 { 450 WARN_ON_ONCE(1); 451 return -EINVAL; 452 } 453