1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2008-2015 Nathan Whitehorn
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 /*
31 * Manages physical address maps.
32 *
33 * Since the information managed by this module is also stored by the
34 * logical address mapping module, this module may throw away valid virtual
35 * to physical mappings at almost any time. However, invalidations of
36 * mappings must be done as requested.
37 *
38 * In order to cope with hardware architectures which make virtual to
39 * physical map invalidates expensive, this module may delay invalidate
40 * reduced protection operations until such time as they are actually
41 * necessary. This module is given full information as to which processors
42 * are currently using which maps, and to when physical maps must be made
43 * correct.
44 */
45
46 #include "opt_kstack_pages.h"
47
48 #include <sys/param.h>
49 #include <sys/kernel.h>
50 #include <sys/conf.h>
51 #include <sys/queue.h>
52 #include <sys/cpuset.h>
53 #include <sys/kerneldump.h>
54 #include <sys/ktr.h>
55 #include <sys/lock.h>
56 #include <sys/msgbuf.h>
57 #include <sys/malloc.h>
58 #include <sys/mman.h>
59 #include <sys/mutex.h>
60 #include <sys/proc.h>
61 #include <sys/rwlock.h>
62 #include <sys/sched.h>
63 #include <sys/sysctl.h>
64 #include <sys/systm.h>
65 #include <sys/vmmeter.h>
66 #include <sys/smp.h>
67 #include <sys/reboot.h>
68
69 #include <sys/kdb.h>
70
71 #include <dev/ofw/openfirm.h>
72
73 #include <vm/vm.h>
74 #include <vm/pmap.h>
75 #include <vm/vm_param.h>
76 #include <vm/vm_kern.h>
77 #include <vm/vm_page.h>
78 #include <vm/vm_phys.h>
79 #include <vm/vm_map.h>
80 #include <vm/vm_object.h>
81 #include <vm/vm_extern.h>
82 #include <vm/vm_pageout.h>
83 #include <vm/vm_dumpset.h>
84 #include <vm/vm_radix.h>
85 #include <vm/vm_reserv.h>
86 #include <vm/uma.h>
87
88 #include <machine/_inttypes.h>
89 #include <machine/cpu.h>
90 #include <machine/ifunc.h>
91 #include <machine/platform.h>
92 #include <machine/frame.h>
93 #include <machine/md_var.h>
94 #include <machine/psl.h>
95 #include <machine/bat.h>
96 #include <machine/hid.h>
97 #include <machine/pte.h>
98 #include <machine/sr.h>
99 #include <machine/trap.h>
100 #include <machine/mmuvar.h>
101
102 #include "mmu_oea64.h"
103
104 void moea64_release_vsid(uint64_t vsid);
105 uintptr_t moea64_get_unique_vsid(void);
106
107 #define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR)
108 #define ENABLE_TRANS(msr) mtmsr(msr)
109
110 #define VSID_MAKE(sr, hash) ((sr) | (((hash) & 0xfffff) << 4))
111 #define VSID_TO_HASH(vsid) (((vsid) >> 4) & 0xfffff)
112 #define VSID_HASH_MASK 0x0000007fffffffffULL
113
114 /*
115 * Locking semantics:
116 *
117 * There are two locks of interest: the page locks and the pmap locks, which
118 * protect their individual PVO lists and are locked in that order. The contents
119 * of all PVO entries are protected by the locks of their respective pmaps.
120 * The pmap of any PVO is guaranteed not to change so long as the PVO is linked
121 * into any list.
122 *
123 */
124
125 #define PV_LOCK_COUNT MAXCPU
126 static struct rwlock __exclusive_cache_line pv_lock[PV_LOCK_COUNT];
127
128 #define PV_LOCK_SHIFT HPT_SP_SHIFT
129 #define pa_index(pa) ((pa) >> PV_LOCK_SHIFT)
130
131 /*
132 * Cheap NUMA-izing of the pv locks, to reduce contention across domains.
133 * NUMA domains on POWER9 appear to be indexed as sparse memory spaces, with the
134 * index at (N << 45).
135 */
136 #ifdef __powerpc64__
137 #define PV_LOCK_IDX(pa) ((pa_index(pa) * (((pa) >> 45) + 1)) % PV_LOCK_COUNT)
138 #else
139 #define PV_LOCK_IDX(pa) (pa_index(pa) % PV_LOCK_COUNT)
140 #endif
141 #define PV_LOCKPTR(pa) ((struct rwlock *)(&pv_lock[PV_LOCK_IDX(pa)]))
142
143 #define PV_WR_LOCK(pa) rw_wlock(PV_LOCKPTR(pa))
144 #define PV_RD_LOCK(pa) rw_rlock(PV_LOCKPTR(pa))
145 #define PV_UNLOCK(pa) rw_unlock(PV_LOCKPTR(pa))
146 #define PV_LOCKASSERT(pa) rw_assert(PV_LOCKPTR(pa), RA_LOCKED)
147 #define PV_LOCK_RD_ASSERT(pa) rw_assert(PV_LOCKPTR(pa), RA_RLOCKED)
148 #define PV_LOCK_WR_ASSERT(pa) rw_assert(PV_LOCKPTR(pa), RA_WLOCKED)
149
150 #define PV_PAGE_WR_LOCK(m) PV_WR_LOCK(VM_PAGE_TO_PHYS(m))
151 #define PV_PAGE_RD_LOCK(m) PV_RD_LOCK(VM_PAGE_TO_PHYS(m))
152 #define PV_PAGE_UNLOCK(m) PV_UNLOCK(VM_PAGE_TO_PHYS(m))
153 #define PV_PAGE_LOCKASSERT(m) PV_LOCKASSERT(VM_PAGE_TO_PHYS(m))
154
155 struct ofw_map {
156 cell_t om_va;
157 cell_t om_len;
158 uint64_t om_pa;
159 cell_t om_mode;
160 };
161
162 extern unsigned char _etext[];
163 extern unsigned char _end[];
164
165 extern void *slbtrap, *slbtrapend;
166
167 /*
168 * Map of physical memory regions.
169 */
170 static struct mem_region *regions;
171 static struct mem_region *pregions;
172 static struct numa_mem_region *numa_pregions;
173 static int regions_sz, pregions_sz, numapregions_sz;
174
175 u_int phys_avail_count;
176
177 extern void bs_remap_earlyboot(void);
178
179 /*
180 * Lock for the SLB tables.
181 */
182 struct mtx moea64_slb_mutex;
183
184 /*
185 * PTEG data.
186 */
187 u_long moea64_pteg_count;
188 u_long moea64_pteg_mask;
189
190 /*
191 * PVO data.
192 */
193
194 uma_zone_t moea64_pvo_zone; /* zone for pvo entries */
195
196 static struct pvo_entry *moea64_bpvo_pool;
197 static int moea64_bpvo_pool_index = 0;
198 static int moea64_bpvo_pool_size = 0;
199 SYSCTL_INT(_machdep, OID_AUTO, moea64_allocated_bpvo_entries, CTLFLAG_RD,
200 &moea64_bpvo_pool_index, 0, "");
201
202 #define BPVO_POOL_SIZE 327680 /* Sensible historical default value */
203 #define BPVO_POOL_EXPANSION_FACTOR 3
204 #define VSID_NBPW (sizeof(u_int32_t) * 8)
205 #ifdef __powerpc64__
206 #define NVSIDS (NPMAPS * 16)
207 #define VSID_HASHMASK 0xffffffffUL
208 #else
209 #define NVSIDS NPMAPS
210 #define VSID_HASHMASK 0xfffffUL
211 #endif
212 static u_int moea64_vsid_bitmap[NVSIDS / VSID_NBPW];
213
214 static bool moea64_initialized = false;
215
216 #ifdef MOEA64_STATS
217 /*
218 * Statistics.
219 */
220 u_int moea64_pte_valid = 0;
221 u_int moea64_pte_overflow = 0;
222 u_int moea64_pvo_entries = 0;
223 u_int moea64_pvo_enter_calls = 0;
224 u_int moea64_pvo_remove_calls = 0;
225 SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_valid, CTLFLAG_RD,
226 &moea64_pte_valid, 0, "");
227 SYSCTL_INT(_machdep, OID_AUTO, moea64_pte_overflow, CTLFLAG_RD,
228 &moea64_pte_overflow, 0, "");
229 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_entries, CTLFLAG_RD,
230 &moea64_pvo_entries, 0, "");
231 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_enter_calls, CTLFLAG_RD,
232 &moea64_pvo_enter_calls, 0, "");
233 SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_remove_calls, CTLFLAG_RD,
234 &moea64_pvo_remove_calls, 0, "");
235 #endif
236
237 vm_offset_t moea64_scratchpage_va[2];
238 struct pvo_entry *moea64_scratchpage_pvo[2];
239 struct mtx moea64_scratchpage_mtx;
240
241 uint64_t moea64_large_page_mask = 0;
242 uint64_t moea64_large_page_size = 0;
243 int moea64_large_page_shift = 0;
244 bool moea64_has_lp_4k_16m = false;
245
246 /*
247 * PVO calls.
248 */
249 static int moea64_pvo_enter(struct pvo_entry *pvo,
250 struct pvo_head *pvo_head, struct pvo_entry **oldpvo);
251 static void moea64_pvo_remove_from_pmap(struct pvo_entry *pvo);
252 static void moea64_pvo_remove_from_page(struct pvo_entry *pvo);
253 static void moea64_pvo_remove_from_page_locked(
254 struct pvo_entry *pvo, vm_page_t m);
255 static struct pvo_entry *moea64_pvo_find_va(pmap_t, vm_offset_t);
256
257 /*
258 * Utility routines.
259 */
260 static bool moea64_query_bit(vm_page_t, uint64_t);
261 static u_int moea64_clear_bit(vm_page_t, uint64_t);
262 static void moea64_kremove(vm_offset_t);
263 static void moea64_syncicache(pmap_t pmap, vm_offset_t va,
264 vm_paddr_t pa, vm_size_t sz);
265 static void moea64_pmap_init_qpages(void *);
266 static void moea64_remove_locked(pmap_t, vm_offset_t,
267 vm_offset_t, struct pvo_dlist *);
268
269 /*
270 * Superpages data and routines.
271 */
272
273 /*
274 * PVO flags (in vaddr) that must match for promotion to succeed.
275 * Note that protection bits are checked separately, as they reside in
276 * another field.
277 */
278 #define PVO_FLAGS_PROMOTE (PVO_WIRED | PVO_MANAGED | PVO_PTEGIDX_VALID)
279
280 #define PVO_IS_SP(pvo) (((pvo)->pvo_vaddr & PVO_LARGE) && \
281 (pvo)->pvo_pmap != kernel_pmap)
282
283 /* Get physical address from PVO. */
284 #define PVO_PADDR(pvo) moea64_pvo_paddr(pvo)
285
286 /* MD page flag indicating that the page is a superpage. */
287 #define MDPG_ATTR_SP 0x40000000
288
289 SYSCTL_DECL(_vm_pmap);
290
291 static SYSCTL_NODE(_vm_pmap, OID_AUTO, sp, CTLFLAG_RD, 0,
292 "SP page mapping counters");
293
294 static u_long sp_demotions;
295 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, demotions, CTLFLAG_RD,
296 &sp_demotions, 0, "SP page demotions");
297
298 static u_long sp_mappings;
299 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, mappings, CTLFLAG_RD,
300 &sp_mappings, 0, "SP page mappings");
301
302 static u_long sp_p_failures;
303 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_failures, CTLFLAG_RD,
304 &sp_p_failures, 0, "SP page promotion failures");
305
306 static u_long sp_p_fail_pa;
307 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_fail_pa, CTLFLAG_RD,
308 &sp_p_fail_pa, 0, "SP page promotion failure: PAs don't match");
309
310 static u_long sp_p_fail_flags;
311 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_fail_flags, CTLFLAG_RD,
312 &sp_p_fail_flags, 0, "SP page promotion failure: page flags don't match");
313
314 static u_long sp_p_fail_prot;
315 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_fail_prot, CTLFLAG_RD,
316 &sp_p_fail_prot, 0,
317 "SP page promotion failure: page protections don't match");
318
319 static u_long sp_p_fail_wimg;
320 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_fail_wimg, CTLFLAG_RD,
321 &sp_p_fail_wimg, 0, "SP page promotion failure: WIMG bits don't match");
322
323 static u_long sp_promotions;
324 SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, promotions, CTLFLAG_RD,
325 &sp_promotions, 0, "SP page promotions");
326
327 static bool moea64_ps_enabled(pmap_t);
328 static void moea64_align_superpage(vm_object_t, vm_ooffset_t,
329 vm_offset_t *, vm_size_t);
330
331 static int moea64_sp_enter(pmap_t pmap, vm_offset_t va,
332 vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind);
333 static struct pvo_entry *moea64_sp_remove(struct pvo_entry *sp,
334 struct pvo_dlist *tofree);
335
336 #if VM_NRESERVLEVEL > 0
337 static void moea64_sp_promote(pmap_t pmap, vm_offset_t va, vm_page_t m);
338 #endif
339 static void moea64_sp_demote_aligned(struct pvo_entry *sp);
340 static void moea64_sp_demote(struct pvo_entry *pvo);
341
342 static struct pvo_entry *moea64_sp_unwire(struct pvo_entry *sp);
343 static struct pvo_entry *moea64_sp_protect(struct pvo_entry *sp,
344 vm_prot_t prot);
345
346 static int64_t moea64_sp_query(struct pvo_entry *pvo, uint64_t ptebit);
347 static int64_t moea64_sp_clear(struct pvo_entry *pvo, vm_page_t m,
348 uint64_t ptebit);
349
350 static __inline bool moea64_sp_pvo_in_range(struct pvo_entry *pvo,
351 vm_offset_t sva, vm_offset_t eva);
352
353 /*
354 * Kernel MMU interface
355 */
356 void moea64_clear_modify(vm_page_t);
357 void moea64_copy_page(vm_page_t, vm_page_t);
358 void moea64_copy_page_dmap(vm_page_t, vm_page_t);
359 void moea64_copy_pages(vm_page_t *ma, vm_offset_t a_offset,
360 vm_page_t *mb, vm_offset_t b_offset, int xfersize);
361 void moea64_copy_pages_dmap(vm_page_t *ma, vm_offset_t a_offset,
362 vm_page_t *mb, vm_offset_t b_offset, int xfersize);
363 int moea64_enter(pmap_t, vm_offset_t, vm_page_t, vm_prot_t,
364 u_int flags, int8_t psind);
365 void moea64_enter_object(pmap_t, vm_offset_t, vm_offset_t, vm_page_t,
366 vm_prot_t);
367 void moea64_enter_quick(pmap_t, vm_offset_t, vm_page_t, vm_prot_t);
368 vm_paddr_t moea64_extract(pmap_t, vm_offset_t);
369 vm_page_t moea64_extract_and_hold(pmap_t, vm_offset_t, vm_prot_t);
370 void moea64_init(void);
371 bool moea64_is_modified(vm_page_t);
372 bool moea64_is_prefaultable(pmap_t, vm_offset_t);
373 bool moea64_is_referenced(vm_page_t);
374 int moea64_ts_referenced(vm_page_t);
375 vm_offset_t moea64_map(vm_offset_t *, vm_paddr_t, vm_paddr_t, int);
376 bool moea64_page_exists_quick(pmap_t, vm_page_t);
377 void moea64_page_init(vm_page_t);
378 int moea64_page_wired_mappings(vm_page_t);
379 int moea64_pinit(pmap_t);
380 void moea64_pinit0(pmap_t);
381 void moea64_protect(pmap_t, vm_offset_t, vm_offset_t, vm_prot_t);
382 void moea64_qenter(vm_offset_t, vm_page_t *, int);
383 void moea64_qremove(vm_offset_t, int);
384 void moea64_release(pmap_t);
385 void moea64_remove(pmap_t, vm_offset_t, vm_offset_t);
386 void moea64_remove_pages(pmap_t);
387 void moea64_remove_all(vm_page_t);
388 void moea64_remove_write(vm_page_t);
389 void moea64_unwire(pmap_t, vm_offset_t, vm_offset_t);
390 void moea64_zero_page(vm_page_t);
391 void moea64_zero_page_dmap(vm_page_t);
392 void moea64_zero_page_area(vm_page_t, int, int);
393 void moea64_activate(struct thread *);
394 void moea64_deactivate(struct thread *);
395 void *moea64_mapdev(vm_paddr_t, vm_size_t);
396 void *moea64_mapdev_attr(vm_paddr_t, vm_size_t, vm_memattr_t);
397 void moea64_unmapdev(void *, vm_size_t);
398 vm_paddr_t moea64_kextract(vm_offset_t);
399 void moea64_page_set_memattr(vm_page_t m, vm_memattr_t ma);
400 void moea64_kenter_attr(vm_offset_t, vm_paddr_t, vm_memattr_t ma);
401 void moea64_kenter(vm_offset_t, vm_paddr_t);
402 int moea64_dev_direct_mapped(vm_paddr_t, vm_size_t);
403 static void moea64_sync_icache(pmap_t, vm_offset_t, vm_size_t);
404 void moea64_dumpsys_map(vm_paddr_t pa, size_t sz,
405 void **va);
406 void moea64_scan_init(void);
407 vm_offset_t moea64_quick_enter_page(vm_page_t m);
408 vm_offset_t moea64_quick_enter_page_dmap(vm_page_t m);
409 void moea64_quick_remove_page(vm_offset_t addr);
410 bool moea64_page_is_mapped(vm_page_t m);
411 static int moea64_map_user_ptr(pmap_t pm,
412 volatile const void *uaddr, void **kaddr, size_t ulen, size_t *klen);
413 static int moea64_decode_kernel_ptr(vm_offset_t addr,
414 int *is_user, vm_offset_t *decoded_addr);
415 static size_t moea64_scan_pmap(struct bitset *dump_bitset);
416 static void *moea64_dump_pmap_init(unsigned blkpgs);
417 #ifdef __powerpc64__
418 static void moea64_page_array_startup(long);
419 #endif
420 static int moea64_mincore(pmap_t, vm_offset_t, vm_paddr_t *);
421
422 static struct pmap_funcs moea64_methods = {
423 .clear_modify = moea64_clear_modify,
424 .copy_page = moea64_copy_page,
425 .copy_pages = moea64_copy_pages,
426 .enter = moea64_enter,
427 .enter_object = moea64_enter_object,
428 .enter_quick = moea64_enter_quick,
429 .extract = moea64_extract,
430 .extract_and_hold = moea64_extract_and_hold,
431 .init = moea64_init,
432 .is_modified = moea64_is_modified,
433 .is_prefaultable = moea64_is_prefaultable,
434 .is_referenced = moea64_is_referenced,
435 .ts_referenced = moea64_ts_referenced,
436 .map = moea64_map,
437 .mincore = moea64_mincore,
438 .page_exists_quick = moea64_page_exists_quick,
439 .page_init = moea64_page_init,
440 .page_wired_mappings = moea64_page_wired_mappings,
441 .pinit = moea64_pinit,
442 .pinit0 = moea64_pinit0,
443 .protect = moea64_protect,
444 .qenter = moea64_qenter,
445 .qremove = moea64_qremove,
446 .release = moea64_release,
447 .remove = moea64_remove,
448 .remove_pages = moea64_remove_pages,
449 .remove_all = moea64_remove_all,
450 .remove_write = moea64_remove_write,
451 .sync_icache = moea64_sync_icache,
452 .unwire = moea64_unwire,
453 .zero_page = moea64_zero_page,
454 .zero_page_area = moea64_zero_page_area,
455 .activate = moea64_activate,
456 .deactivate = moea64_deactivate,
457 .page_set_memattr = moea64_page_set_memattr,
458 .quick_enter_page = moea64_quick_enter_page,
459 .quick_remove_page = moea64_quick_remove_page,
460 .page_is_mapped = moea64_page_is_mapped,
461 #ifdef __powerpc64__
462 .page_array_startup = moea64_page_array_startup,
463 #endif
464 .ps_enabled = moea64_ps_enabled,
465 .align_superpage = moea64_align_superpage,
466
467 /* Internal interfaces */
468 .mapdev = moea64_mapdev,
469 .mapdev_attr = moea64_mapdev_attr,
470 .unmapdev = moea64_unmapdev,
471 .kextract = moea64_kextract,
472 .kenter = moea64_kenter,
473 .kenter_attr = moea64_kenter_attr,
474 .dev_direct_mapped = moea64_dev_direct_mapped,
475 .dumpsys_pa_init = moea64_scan_init,
476 .dumpsys_scan_pmap = moea64_scan_pmap,
477 .dumpsys_dump_pmap_init = moea64_dump_pmap_init,
478 .dumpsys_map_chunk = moea64_dumpsys_map,
479 .map_user_ptr = moea64_map_user_ptr,
480 .decode_kernel_ptr = moea64_decode_kernel_ptr,
481 };
482
483 MMU_DEF(oea64_mmu, "mmu_oea64_base", moea64_methods);
484
485 /*
486 * Get physical address from PVO.
487 *
488 * For superpages, the lower bits are not stored on pvo_pte.pa and must be
489 * obtained from VA.
490 */
491 static __always_inline vm_paddr_t
moea64_pvo_paddr(struct pvo_entry * pvo)492 moea64_pvo_paddr(struct pvo_entry *pvo)
493 {
494 vm_paddr_t pa;
495
496 pa = (pvo)->pvo_pte.pa & LPTE_RPGN;
497
498 if (PVO_IS_SP(pvo)) {
499 pa &= ~HPT_SP_MASK; /* This is needed to clear LPTE_LP bits. */
500 pa |= PVO_VADDR(pvo) & HPT_SP_MASK;
501 }
502 return (pa);
503 }
504
505 static struct pvo_head *
vm_page_to_pvoh(vm_page_t m)506 vm_page_to_pvoh(vm_page_t m)
507 {
508
509 rw_assert(PV_LOCKPTR(VM_PAGE_TO_PHYS(m)), RA_LOCKED);
510 return (&m->md.mdpg_pvoh);
511 }
512
513 static struct pvo_entry *
alloc_pvo_entry(int bootstrap)514 alloc_pvo_entry(int bootstrap)
515 {
516 struct pvo_entry *pvo;
517
518 if (!moea64_initialized || bootstrap) {
519 if (moea64_bpvo_pool_index >= moea64_bpvo_pool_size) {
520 panic("%s: bpvo pool exhausted, index=%d, size=%d, bytes=%zd."
521 "Try setting machdep.moea64_bpvo_pool_size tunable",
522 __func__, moea64_bpvo_pool_index,
523 moea64_bpvo_pool_size,
524 moea64_bpvo_pool_size * sizeof(struct pvo_entry));
525 }
526 pvo = &moea64_bpvo_pool[
527 atomic_fetchadd_int(&moea64_bpvo_pool_index, 1)];
528 bzero(pvo, sizeof(*pvo));
529 pvo->pvo_vaddr = PVO_BOOTSTRAP;
530 } else
531 pvo = uma_zalloc(moea64_pvo_zone, M_NOWAIT | M_ZERO);
532
533 return (pvo);
534 }
535
536 static void
init_pvo_entry(struct pvo_entry * pvo,pmap_t pmap,vm_offset_t va)537 init_pvo_entry(struct pvo_entry *pvo, pmap_t pmap, vm_offset_t va)
538 {
539 uint64_t vsid;
540 uint64_t hash;
541 int shift;
542
543 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
544
545 pvo->pvo_pmap = pmap;
546 va &= ~ADDR_POFF;
547 pvo->pvo_vaddr |= va;
548 vsid = va_to_vsid(pmap, va);
549 pvo->pvo_vpn = (uint64_t)((va & ADDR_PIDX) >> ADDR_PIDX_SHFT)
550 | (vsid << 16);
551
552 if (pmap == kernel_pmap && (pvo->pvo_vaddr & PVO_LARGE) != 0)
553 shift = moea64_large_page_shift;
554 else
555 shift = ADDR_PIDX_SHFT;
556 hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)va & ADDR_PIDX) >> shift);
557 pvo->pvo_pte.slot = (hash & moea64_pteg_mask) << 3;
558 }
559
560 static void
free_pvo_entry(struct pvo_entry * pvo)561 free_pvo_entry(struct pvo_entry *pvo)
562 {
563
564 if (!(pvo->pvo_vaddr & PVO_BOOTSTRAP))
565 uma_zfree(moea64_pvo_zone, pvo);
566 }
567
568 void
moea64_pte_from_pvo(const struct pvo_entry * pvo,struct lpte * lpte)569 moea64_pte_from_pvo(const struct pvo_entry *pvo, struct lpte *lpte)
570 {
571
572 lpte->pte_hi = moea64_pte_vpn_from_pvo_vpn(pvo);
573 lpte->pte_hi |= LPTE_VALID;
574
575 if (pvo->pvo_vaddr & PVO_LARGE)
576 lpte->pte_hi |= LPTE_BIG;
577 if (pvo->pvo_vaddr & PVO_WIRED)
578 lpte->pte_hi |= LPTE_WIRED;
579 if (pvo->pvo_vaddr & PVO_HID)
580 lpte->pte_hi |= LPTE_HID;
581
582 lpte->pte_lo = pvo->pvo_pte.pa; /* Includes WIMG bits */
583 if (pvo->pvo_pte.prot & VM_PROT_WRITE)
584 lpte->pte_lo |= LPTE_BW;
585 else
586 lpte->pte_lo |= LPTE_BR;
587
588 if (!(pvo->pvo_pte.prot & VM_PROT_EXECUTE))
589 lpte->pte_lo |= LPTE_NOEXEC;
590 }
591
592 static __inline uint64_t
moea64_calc_wimg(vm_paddr_t pa,vm_memattr_t ma)593 moea64_calc_wimg(vm_paddr_t pa, vm_memattr_t ma)
594 {
595 uint64_t pte_lo;
596 int i;
597
598 if (ma != VM_MEMATTR_DEFAULT) {
599 switch (ma) {
600 case VM_MEMATTR_UNCACHEABLE:
601 return (LPTE_I | LPTE_G);
602 case VM_MEMATTR_CACHEABLE:
603 return (LPTE_M);
604 case VM_MEMATTR_WRITE_COMBINING:
605 case VM_MEMATTR_WRITE_BACK:
606 case VM_MEMATTR_PREFETCHABLE:
607 return (LPTE_I);
608 case VM_MEMATTR_WRITE_THROUGH:
609 return (LPTE_W | LPTE_M);
610 }
611 }
612
613 /*
614 * Assume the page is cache inhibited and access is guarded unless
615 * it's in our available memory array.
616 */
617 pte_lo = LPTE_I | LPTE_G;
618 for (i = 0; i < pregions_sz; i++) {
619 if ((pa >= pregions[i].mr_start) &&
620 (pa < (pregions[i].mr_start + pregions[i].mr_size))) {
621 pte_lo &= ~(LPTE_I | LPTE_G);
622 pte_lo |= LPTE_M;
623 break;
624 }
625 }
626
627 return pte_lo;
628 }
629
630 /*
631 * Quick sort callout for comparing memory regions.
632 */
633 static int om_cmp(const void *a, const void *b);
634
635 static int
om_cmp(const void * a,const void * b)636 om_cmp(const void *a, const void *b)
637 {
638 const struct ofw_map *mapa;
639 const struct ofw_map *mapb;
640
641 mapa = a;
642 mapb = b;
643 if (mapa->om_pa < mapb->om_pa)
644 return (-1);
645 else if (mapa->om_pa > mapb->om_pa)
646 return (1);
647 else
648 return (0);
649 }
650
651 static void
moea64_add_ofw_mappings(phandle_t mmu,size_t sz)652 moea64_add_ofw_mappings(phandle_t mmu, size_t sz)
653 {
654 struct ofw_map translations[sz/(4*sizeof(cell_t))]; /*>= 4 cells per */
655 pcell_t acells, trans_cells[sz/sizeof(cell_t)];
656 struct pvo_entry *pvo;
657 register_t msr;
658 vm_offset_t off;
659 vm_paddr_t pa_base;
660 int i, j;
661
662 bzero(translations, sz);
663 OF_getencprop(OF_finddevice("/"), "#address-cells", &acells,
664 sizeof(acells));
665 if (OF_getencprop(mmu, "translations", trans_cells, sz) == -1)
666 panic("moea64_bootstrap: can't get ofw translations");
667
668 CTR0(KTR_PMAP, "moea64_add_ofw_mappings: translations");
669 sz /= sizeof(cell_t);
670 for (i = 0, j = 0; i < sz; j++) {
671 translations[j].om_va = trans_cells[i++];
672 translations[j].om_len = trans_cells[i++];
673 translations[j].om_pa = trans_cells[i++];
674 if (acells == 2) {
675 translations[j].om_pa <<= 32;
676 translations[j].om_pa |= trans_cells[i++];
677 }
678 translations[j].om_mode = trans_cells[i++];
679 }
680 KASSERT(i == sz, ("Translations map has incorrect cell count (%d/%zd)",
681 i, sz));
682
683 sz = j;
684 qsort(translations, sz, sizeof (*translations), om_cmp);
685
686 for (i = 0; i < sz; i++) {
687 pa_base = translations[i].om_pa;
688 #ifndef __powerpc64__
689 if ((translations[i].om_pa >> 32) != 0)
690 panic("OFW translations above 32-bit boundary!");
691 #endif
692
693 if (pa_base % PAGE_SIZE)
694 panic("OFW translation not page-aligned (phys)!");
695 if (translations[i].om_va % PAGE_SIZE)
696 panic("OFW translation not page-aligned (virt)!");
697
698 CTR3(KTR_PMAP, "translation: pa=%#zx va=%#x len=%#x",
699 pa_base, translations[i].om_va, translations[i].om_len);
700
701 /* Now enter the pages for this mapping */
702
703 DISABLE_TRANS(msr);
704 for (off = 0; off < translations[i].om_len; off += PAGE_SIZE) {
705 /* If this address is direct-mapped, skip remapping */
706 if (hw_direct_map &&
707 translations[i].om_va == PHYS_TO_DMAP(pa_base) &&
708 moea64_calc_wimg(pa_base + off, VM_MEMATTR_DEFAULT)
709 == LPTE_M)
710 continue;
711
712 PMAP_LOCK(kernel_pmap);
713 pvo = moea64_pvo_find_va(kernel_pmap,
714 translations[i].om_va + off);
715 PMAP_UNLOCK(kernel_pmap);
716 if (pvo != NULL)
717 continue;
718
719 moea64_kenter(translations[i].om_va + off,
720 pa_base + off);
721 }
722 ENABLE_TRANS(msr);
723 }
724 }
725
726 #ifdef __powerpc64__
727 static void
moea64_probe_large_page(void)728 moea64_probe_large_page(void)
729 {
730 uint16_t pvr = mfpvr() >> 16;
731
732 switch (pvr) {
733 case IBM970:
734 case IBM970FX:
735 case IBM970MP:
736 powerpc_sync(); isync();
737 mtspr(SPR_HID4, mfspr(SPR_HID4) & ~HID4_970_DISABLE_LG_PG);
738 powerpc_sync(); isync();
739
740 /* FALLTHROUGH */
741 default:
742 if (moea64_large_page_size == 0) {
743 moea64_large_page_size = 0x1000000; /* 16 MB */
744 moea64_large_page_shift = 24;
745 }
746 }
747
748 moea64_large_page_mask = moea64_large_page_size - 1;
749 }
750
751 static void
moea64_bootstrap_slb_prefault(vm_offset_t va,int large)752 moea64_bootstrap_slb_prefault(vm_offset_t va, int large)
753 {
754 struct slb *cache;
755 struct slb entry;
756 uint64_t esid, slbe;
757 uint64_t i;
758
759 cache = PCPU_GET(aim.slb);
760 esid = va >> ADDR_SR_SHFT;
761 slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID;
762
763 for (i = 0; i < 64; i++) {
764 if (cache[i].slbe == (slbe | i))
765 return;
766 }
767
768 entry.slbe = slbe;
769 entry.slbv = KERNEL_VSID(esid) << SLBV_VSID_SHIFT;
770 if (large)
771 entry.slbv |= SLBV_L;
772
773 slb_insert_kernel(entry.slbe, entry.slbv);
774 }
775 #endif
776
777 static int
moea64_kenter_large(vm_offset_t va,vm_paddr_t pa,uint64_t attr,int bootstrap)778 moea64_kenter_large(vm_offset_t va, vm_paddr_t pa, uint64_t attr, int bootstrap)
779 {
780 struct pvo_entry *pvo;
781 uint64_t pte_lo;
782 int error;
783
784 pte_lo = LPTE_M;
785 pte_lo |= attr;
786
787 pvo = alloc_pvo_entry(bootstrap);
788 pvo->pvo_vaddr |= PVO_WIRED | PVO_LARGE;
789 init_pvo_entry(pvo, kernel_pmap, va);
790
791 pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE |
792 VM_PROT_EXECUTE;
793 pvo->pvo_pte.pa = pa | pte_lo;
794 error = moea64_pvo_enter(pvo, NULL, NULL);
795 if (error != 0)
796 panic("Error %d inserting large page\n", error);
797 return (0);
798 }
799
800 static void
moea64_setup_direct_map(vm_offset_t kernelstart,vm_offset_t kernelend)801 moea64_setup_direct_map(vm_offset_t kernelstart,
802 vm_offset_t kernelend)
803 {
804 register_t msr;
805 vm_paddr_t pa, pkernelstart, pkernelend;
806 vm_offset_t size, off;
807 uint64_t pte_lo;
808 int i;
809
810 if (moea64_large_page_size == 0)
811 hw_direct_map = 0;
812
813 DISABLE_TRANS(msr);
814 if (hw_direct_map) {
815 PMAP_LOCK(kernel_pmap);
816 for (i = 0; i < pregions_sz; i++) {
817 for (pa = pregions[i].mr_start; pa < pregions[i].mr_start +
818 pregions[i].mr_size; pa += moea64_large_page_size) {
819 pte_lo = LPTE_M;
820 if (pa & moea64_large_page_mask) {
821 pa &= moea64_large_page_mask;
822 pte_lo |= LPTE_G;
823 }
824 if (pa + moea64_large_page_size >
825 pregions[i].mr_start + pregions[i].mr_size)
826 pte_lo |= LPTE_G;
827
828 moea64_kenter_large(PHYS_TO_DMAP(pa), pa, pte_lo, 1);
829 }
830 }
831 PMAP_UNLOCK(kernel_pmap);
832 }
833
834 /*
835 * Make sure the kernel and BPVO pool stay mapped on systems either
836 * without a direct map or on which the kernel is not already executing
837 * out of the direct-mapped region.
838 */
839 if (kernelstart < DMAP_BASE_ADDRESS) {
840 /*
841 * For pre-dmap execution, we need to use identity mapping
842 * because we will be operating with the mmu on but in the
843 * wrong address configuration until we __restartkernel().
844 */
845 for (pa = kernelstart & ~PAGE_MASK; pa < kernelend;
846 pa += PAGE_SIZE)
847 moea64_kenter(pa, pa);
848 } else if (!hw_direct_map) {
849 pkernelstart = kernelstart & ~DMAP_BASE_ADDRESS;
850 pkernelend = kernelend & ~DMAP_BASE_ADDRESS;
851 for (pa = pkernelstart & ~PAGE_MASK; pa < pkernelend;
852 pa += PAGE_SIZE)
853 moea64_kenter(pa | DMAP_BASE_ADDRESS, pa);
854 }
855
856 if (!hw_direct_map) {
857 size = moea64_bpvo_pool_size*sizeof(struct pvo_entry);
858 off = (vm_offset_t)(moea64_bpvo_pool);
859 for (pa = off; pa < off + size; pa += PAGE_SIZE)
860 moea64_kenter(pa, pa);
861
862 /* Map exception vectors */
863 for (pa = EXC_RSVD; pa < EXC_LAST; pa += PAGE_SIZE)
864 moea64_kenter(pa | DMAP_BASE_ADDRESS, pa);
865 }
866 ENABLE_TRANS(msr);
867
868 /*
869 * Allow user to override unmapped_buf_allowed for testing.
870 * XXXKIB Only direct map implementation was tested.
871 */
872 if (!TUNABLE_INT_FETCH("vfs.unmapped_buf_allowed",
873 &unmapped_buf_allowed))
874 unmapped_buf_allowed = hw_direct_map;
875 }
876
877 /* Quick sort callout for comparing physical addresses. */
878 static int
pa_cmp(const void * a,const void * b)879 pa_cmp(const void *a, const void *b)
880 {
881 const vm_paddr_t *pa = a, *pb = b;
882
883 if (*pa < *pb)
884 return (-1);
885 else if (*pa > *pb)
886 return (1);
887 else
888 return (0);
889 }
890
891 void
moea64_early_bootstrap(vm_offset_t kernelstart,vm_offset_t kernelend)892 moea64_early_bootstrap(vm_offset_t kernelstart, vm_offset_t kernelend)
893 {
894 int i, j;
895 vm_size_t physsz, hwphyssz;
896 vm_paddr_t kernelphysstart, kernelphysend;
897 int rm_pavail;
898
899 /* Level 0 reservations consist of 4096 pages (16MB superpage). */
900 vm_level_0_order = VM_LEVEL_0_ORDER_HPT;
901
902 #ifndef __powerpc64__
903 /* We don't have a direct map since there is no BAT */
904 hw_direct_map = 0;
905
906 /* Make sure battable is zero, since we have no BAT */
907 for (i = 0; i < 16; i++) {
908 battable[i].batu = 0;
909 battable[i].batl = 0;
910 }
911 #else
912 /* Install trap handlers for SLBs */
913 bcopy(&slbtrap, (void *)EXC_DSE,(size_t)&slbtrapend - (size_t)&slbtrap);
914 bcopy(&slbtrap, (void *)EXC_ISE,(size_t)&slbtrapend - (size_t)&slbtrap);
915 __syncicache((void *)EXC_DSE, 0x80);
916 __syncicache((void *)EXC_ISE, 0x80);
917 #endif
918
919 kernelphysstart = kernelstart & ~DMAP_BASE_ADDRESS;
920 kernelphysend = kernelend & ~DMAP_BASE_ADDRESS;
921
922 /* Get physical memory regions from firmware */
923 mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz);
924 CTR0(KTR_PMAP, "moea64_bootstrap: physical memory");
925
926 if (PHYS_AVAIL_ENTRIES < regions_sz)
927 panic("moea64_bootstrap: phys_avail too small");
928
929 phys_avail_count = 0;
930 physsz = 0;
931 hwphyssz = 0;
932 TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz);
933 for (i = 0, j = 0; i < regions_sz; i++, j += 2) {
934 CTR3(KTR_PMAP, "region: %#zx - %#zx (%#zx)",
935 regions[i].mr_start, regions[i].mr_start +
936 regions[i].mr_size, regions[i].mr_size);
937 if (hwphyssz != 0 &&
938 (physsz + regions[i].mr_size) >= hwphyssz) {
939 if (physsz < hwphyssz) {
940 phys_avail[j] = regions[i].mr_start;
941 phys_avail[j + 1] = regions[i].mr_start +
942 hwphyssz - physsz;
943 physsz = hwphyssz;
944 phys_avail_count++;
945 dump_avail[j] = phys_avail[j];
946 dump_avail[j + 1] = phys_avail[j + 1];
947 }
948 break;
949 }
950 phys_avail[j] = regions[i].mr_start;
951 phys_avail[j + 1] = regions[i].mr_start + regions[i].mr_size;
952 phys_avail_count++;
953 physsz += regions[i].mr_size;
954 dump_avail[j] = phys_avail[j];
955 dump_avail[j + 1] = phys_avail[j + 1];
956 }
957
958 /* Check for overlap with the kernel and exception vectors */
959 rm_pavail = 0;
960 for (j = 0; j < 2*phys_avail_count; j+=2) {
961 if (phys_avail[j] < EXC_LAST)
962 phys_avail[j] += EXC_LAST;
963
964 if (phys_avail[j] >= kernelphysstart &&
965 phys_avail[j+1] <= kernelphysend) {
966 phys_avail[j] = phys_avail[j+1] = ~0;
967 rm_pavail++;
968 continue;
969 }
970
971 if (kernelphysstart >= phys_avail[j] &&
972 kernelphysstart < phys_avail[j+1]) {
973 if (kernelphysend < phys_avail[j+1]) {
974 phys_avail[2*phys_avail_count] =
975 (kernelphysend & ~PAGE_MASK) + PAGE_SIZE;
976 phys_avail[2*phys_avail_count + 1] =
977 phys_avail[j+1];
978 phys_avail_count++;
979 }
980
981 phys_avail[j+1] = kernelphysstart & ~PAGE_MASK;
982 }
983
984 if (kernelphysend >= phys_avail[j] &&
985 kernelphysend < phys_avail[j+1]) {
986 if (kernelphysstart > phys_avail[j]) {
987 phys_avail[2*phys_avail_count] = phys_avail[j];
988 phys_avail[2*phys_avail_count + 1] =
989 kernelphysstart & ~PAGE_MASK;
990 phys_avail_count++;
991 }
992
993 phys_avail[j] = (kernelphysend & ~PAGE_MASK) +
994 PAGE_SIZE;
995 }
996 }
997
998 /* Remove physical available regions marked for removal (~0) */
999 if (rm_pavail) {
1000 qsort(phys_avail, 2*phys_avail_count, sizeof(phys_avail[0]),
1001 pa_cmp);
1002 phys_avail_count -= rm_pavail;
1003 for (i = 2*phys_avail_count;
1004 i < 2*(phys_avail_count + rm_pavail); i+=2)
1005 phys_avail[i] = phys_avail[i+1] = 0;
1006 }
1007
1008 physmem = btoc(physsz);
1009
1010 #ifdef PTEGCOUNT
1011 moea64_pteg_count = PTEGCOUNT;
1012 #else
1013 moea64_pteg_count = 0x1000;
1014
1015 while (moea64_pteg_count < physmem)
1016 moea64_pteg_count <<= 1;
1017
1018 moea64_pteg_count >>= 1;
1019 #endif /* PTEGCOUNT */
1020 }
1021
1022 void
moea64_mid_bootstrap(vm_offset_t kernelstart,vm_offset_t kernelend)1023 moea64_mid_bootstrap(vm_offset_t kernelstart, vm_offset_t kernelend)
1024 {
1025 int i;
1026
1027 /*
1028 * Set PTEG mask
1029 */
1030 moea64_pteg_mask = moea64_pteg_count - 1;
1031
1032 /*
1033 * Initialize SLB table lock and page locks
1034 */
1035 mtx_init(&moea64_slb_mutex, "SLB table", NULL, MTX_DEF);
1036 for (i = 0; i < PV_LOCK_COUNT; i++)
1037 rw_init(&pv_lock[i], "pv lock");
1038
1039 /*
1040 * Initialise the bootstrap pvo pool.
1041 */
1042 TUNABLE_INT_FETCH("machdep.moea64_bpvo_pool_size", &moea64_bpvo_pool_size);
1043 if (moea64_bpvo_pool_size == 0) {
1044 if (!hw_direct_map)
1045 moea64_bpvo_pool_size = ((ptoa((uintmax_t)physmem) * sizeof(struct vm_page)) /
1046 (PAGE_SIZE * PAGE_SIZE)) * BPVO_POOL_EXPANSION_FACTOR;
1047 else
1048 moea64_bpvo_pool_size = BPVO_POOL_SIZE;
1049 }
1050
1051 if (boothowto & RB_VERBOSE) {
1052 printf("mmu_oea64: bpvo pool entries = %d, bpvo pool size = %zu MB\n",
1053 moea64_bpvo_pool_size,
1054 moea64_bpvo_pool_size*sizeof(struct pvo_entry) / 1048576);
1055 }
1056
1057 moea64_bpvo_pool = (struct pvo_entry *)moea64_bootstrap_alloc(
1058 moea64_bpvo_pool_size*sizeof(struct pvo_entry), PAGE_SIZE);
1059 moea64_bpvo_pool_index = 0;
1060
1061 /* Place at address usable through the direct map */
1062 if (hw_direct_map)
1063 moea64_bpvo_pool = (struct pvo_entry *)
1064 PHYS_TO_DMAP((uintptr_t)moea64_bpvo_pool);
1065
1066 /*
1067 * Make sure kernel vsid is allocated as well as VSID 0.
1068 */
1069 #ifndef __powerpc64__
1070 moea64_vsid_bitmap[(KERNEL_VSIDBITS & (NVSIDS - 1)) / VSID_NBPW]
1071 |= 1 << (KERNEL_VSIDBITS % VSID_NBPW);
1072 moea64_vsid_bitmap[0] |= 1;
1073 #endif
1074
1075 /*
1076 * Initialize the kernel pmap (which is statically allocated).
1077 */
1078 #ifdef __powerpc64__
1079 for (i = 0; i < 64; i++) {
1080 pcpup->pc_aim.slb[i].slbv = 0;
1081 pcpup->pc_aim.slb[i].slbe = 0;
1082 }
1083 #else
1084 for (i = 0; i < 16; i++)
1085 kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i;
1086 #endif
1087
1088 kernel_pmap->pmap_phys = kernel_pmap;
1089 CPU_FILL(&kernel_pmap->pm_active);
1090 RB_INIT(&kernel_pmap->pmap_pvo);
1091
1092 PMAP_LOCK_INIT(kernel_pmap);
1093
1094 /*
1095 * Now map in all the other buffers we allocated earlier
1096 */
1097
1098 moea64_setup_direct_map(kernelstart, kernelend);
1099 }
1100
1101 void
moea64_late_bootstrap(vm_offset_t kernelstart,vm_offset_t kernelend)1102 moea64_late_bootstrap(vm_offset_t kernelstart, vm_offset_t kernelend)
1103 {
1104 ihandle_t mmui;
1105 phandle_t chosen;
1106 phandle_t mmu;
1107 ssize_t sz;
1108 int i;
1109 vm_offset_t pa, va;
1110 void *dpcpu;
1111
1112 /*
1113 * Set up the Open Firmware pmap and add its mappings if not in real
1114 * mode.
1115 */
1116
1117 chosen = OF_finddevice("/chosen");
1118 if (chosen != -1 && OF_getencprop(chosen, "mmu", &mmui, 4) != -1) {
1119 mmu = OF_instance_to_package(mmui);
1120 if (mmu == -1 ||
1121 (sz = OF_getproplen(mmu, "translations")) == -1)
1122 sz = 0;
1123 if (sz > 6144 /* tmpstksz - 2 KB headroom */)
1124 panic("moea64_bootstrap: too many ofw translations");
1125
1126 if (sz > 0)
1127 moea64_add_ofw_mappings(mmu, sz);
1128 }
1129
1130 /*
1131 * Calculate the last available physical address.
1132 */
1133 Maxmem = 0;
1134 for (i = 0; phys_avail[i + 1] != 0; i += 2)
1135 Maxmem = MAX(Maxmem, powerpc_btop(phys_avail[i + 1]));
1136
1137 /*
1138 * Initialize MMU.
1139 */
1140 pmap_cpu_bootstrap(0);
1141 mtmsr(mfmsr() | PSL_DR | PSL_IR);
1142 pmap_bootstrapped++;
1143
1144 /*
1145 * Set the start and end of kva.
1146 */
1147 virtual_avail = VM_MIN_KERNEL_ADDRESS;
1148 virtual_end = VM_MAX_SAFE_KERNEL_ADDRESS;
1149
1150 /*
1151 * Map the entire KVA range into the SLB. We must not fault there.
1152 */
1153 #ifdef __powerpc64__
1154 for (va = virtual_avail; va < virtual_end; va += SEGMENT_LENGTH)
1155 moea64_bootstrap_slb_prefault(va, 0);
1156 #endif
1157
1158 /*
1159 * Remap any early IO mappings (console framebuffer, etc.)
1160 */
1161 bs_remap_earlyboot();
1162
1163 /*
1164 * Figure out how far we can extend virtual_end into segment 16
1165 * without running into existing mappings. Segment 16 is guaranteed
1166 * to contain neither RAM nor devices (at least on Apple hardware),
1167 * but will generally contain some OFW mappings we should not
1168 * step on.
1169 */
1170
1171 #ifndef __powerpc64__ /* KVA is in high memory on PPC64 */
1172 PMAP_LOCK(kernel_pmap);
1173 while (virtual_end < VM_MAX_KERNEL_ADDRESS &&
1174 moea64_pvo_find_va(kernel_pmap, virtual_end+1) == NULL)
1175 virtual_end += PAGE_SIZE;
1176 PMAP_UNLOCK(kernel_pmap);
1177 #endif
1178
1179 /*
1180 * Allocate a kernel stack with a guard page for thread0 and map it
1181 * into the kernel page map.
1182 */
1183 pa = moea64_bootstrap_alloc(kstack_pages * PAGE_SIZE, PAGE_SIZE);
1184 va = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE;
1185 virtual_avail = va + kstack_pages * PAGE_SIZE;
1186 CTR2(KTR_PMAP, "moea64_bootstrap: kstack0 at %#x (%#x)", pa, va);
1187 thread0.td_kstack = va;
1188 thread0.td_kstack_pages = kstack_pages;
1189 for (i = 0; i < kstack_pages; i++) {
1190 moea64_kenter(va, pa);
1191 pa += PAGE_SIZE;
1192 va += PAGE_SIZE;
1193 }
1194
1195 /*
1196 * Allocate virtual address space for the message buffer.
1197 */
1198 pa = msgbuf_phys = moea64_bootstrap_alloc(msgbufsize, PAGE_SIZE);
1199 msgbufp = (struct msgbuf *)virtual_avail;
1200 va = virtual_avail;
1201 virtual_avail += round_page(msgbufsize);
1202 while (va < virtual_avail) {
1203 moea64_kenter(va, pa);
1204 pa += PAGE_SIZE;
1205 va += PAGE_SIZE;
1206 }
1207
1208 /*
1209 * Allocate virtual address space for the dynamic percpu area.
1210 */
1211 pa = moea64_bootstrap_alloc(DPCPU_SIZE, PAGE_SIZE);
1212 dpcpu = (void *)virtual_avail;
1213 va = virtual_avail;
1214 virtual_avail += DPCPU_SIZE;
1215 while (va < virtual_avail) {
1216 moea64_kenter(va, pa);
1217 pa += PAGE_SIZE;
1218 va += PAGE_SIZE;
1219 }
1220 dpcpu_init(dpcpu, curcpu);
1221
1222 crashdumpmap = (caddr_t)virtual_avail;
1223 virtual_avail += MAXDUMPPGS * PAGE_SIZE;
1224
1225 /*
1226 * Allocate some things for page zeroing. We put this directly
1227 * in the page table and use MOEA64_PTE_REPLACE to avoid any
1228 * of the PVO book-keeping or other parts of the VM system
1229 * from even knowing that this hack exists.
1230 */
1231
1232 if (!hw_direct_map) {
1233 mtx_init(&moea64_scratchpage_mtx, "pvo zero page", NULL,
1234 MTX_DEF);
1235 for (i = 0; i < 2; i++) {
1236 moea64_scratchpage_va[i] = (virtual_end+1) - PAGE_SIZE;
1237 virtual_end -= PAGE_SIZE;
1238
1239 moea64_kenter(moea64_scratchpage_va[i], 0);
1240
1241 PMAP_LOCK(kernel_pmap);
1242 moea64_scratchpage_pvo[i] = moea64_pvo_find_va(
1243 kernel_pmap, (vm_offset_t)moea64_scratchpage_va[i]);
1244 PMAP_UNLOCK(kernel_pmap);
1245 }
1246 }
1247
1248 numa_mem_regions(&numa_pregions, &numapregions_sz);
1249 }
1250
1251 static void
moea64_pmap_init_qpages(void * dummy __unused)1252 moea64_pmap_init_qpages(void *dummy __unused)
1253 {
1254 struct pcpu *pc;
1255 int i;
1256
1257 if (hw_direct_map)
1258 return;
1259
1260 CPU_FOREACH(i) {
1261 pc = pcpu_find(i);
1262 pc->pc_qmap_addr = kva_alloc(PAGE_SIZE);
1263 if (pc->pc_qmap_addr == 0)
1264 panic("pmap_init_qpages: unable to allocate KVA");
1265 PMAP_LOCK(kernel_pmap);
1266 pc->pc_aim.qmap_pvo =
1267 moea64_pvo_find_va(kernel_pmap, pc->pc_qmap_addr);
1268 PMAP_UNLOCK(kernel_pmap);
1269 mtx_init(&pc->pc_aim.qmap_lock, "qmap lock", NULL, MTX_DEF);
1270 }
1271 }
1272
1273 SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, moea64_pmap_init_qpages, NULL);
1274
1275 /*
1276 * Activate a user pmap. This mostly involves setting some non-CPU
1277 * state.
1278 */
1279 void
moea64_activate(struct thread * td)1280 moea64_activate(struct thread *td)
1281 {
1282 pmap_t pm;
1283
1284 pm = &td->td_proc->p_vmspace->vm_pmap;
1285 CPU_SET(PCPU_GET(cpuid), &pm->pm_active);
1286
1287 #ifdef __powerpc64__
1288 PCPU_SET(aim.userslb, pm->pm_slb);
1289 __asm __volatile("slbmte %0, %1; isync" ::
1290 "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), "r"(USER_SLB_SLBE));
1291 #else
1292 PCPU_SET(curpmap, pm->pmap_phys);
1293 mtsrin(USER_SR << ADDR_SR_SHFT, td->td_pcb->pcb_cpu.aim.usr_vsid);
1294 #endif
1295 }
1296
1297 void
moea64_deactivate(struct thread * td)1298 moea64_deactivate(struct thread *td)
1299 {
1300 pmap_t pm;
1301
1302 __asm __volatile("isync; slbie %0" :: "r"(USER_ADDR));
1303
1304 pm = &td->td_proc->p_vmspace->vm_pmap;
1305 CPU_CLR(PCPU_GET(cpuid), &pm->pm_active);
1306 #ifdef __powerpc64__
1307 PCPU_SET(aim.userslb, NULL);
1308 #else
1309 PCPU_SET(curpmap, NULL);
1310 #endif
1311 }
1312
1313 void
moea64_unwire(pmap_t pm,vm_offset_t sva,vm_offset_t eva)1314 moea64_unwire(pmap_t pm, vm_offset_t sva, vm_offset_t eva)
1315 {
1316 struct pvo_entry key, *pvo;
1317 vm_page_t m;
1318 int64_t refchg;
1319
1320 key.pvo_vaddr = sva;
1321 PMAP_LOCK(pm);
1322 for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
1323 pvo != NULL && PVO_VADDR(pvo) < eva;
1324 pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
1325 if (PVO_IS_SP(pvo)) {
1326 if (moea64_sp_pvo_in_range(pvo, sva, eva)) {
1327 pvo = moea64_sp_unwire(pvo);
1328 continue;
1329 } else {
1330 CTR1(KTR_PMAP, "%s: demote before unwire",
1331 __func__);
1332 moea64_sp_demote(pvo);
1333 }
1334 }
1335
1336 if ((pvo->pvo_vaddr & PVO_WIRED) == 0)
1337 panic("moea64_unwire: pvo %p is missing PVO_WIRED",
1338 pvo);
1339 pvo->pvo_vaddr &= ~PVO_WIRED;
1340 refchg = moea64_pte_replace(pvo, 0 /* No invalidation */);
1341 if ((pvo->pvo_vaddr & PVO_MANAGED) &&
1342 (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
1343 if (refchg < 0)
1344 refchg = LPTE_CHG;
1345 m = PHYS_TO_VM_PAGE(PVO_PADDR(pvo));
1346
1347 refchg |= atomic_readandclear_32(&m->md.mdpg_attrs);
1348 if (refchg & LPTE_CHG)
1349 vm_page_dirty(m);
1350 if (refchg & LPTE_REF)
1351 vm_page_aflag_set(m, PGA_REFERENCED);
1352 }
1353 pm->pm_stats.wired_count--;
1354 }
1355 PMAP_UNLOCK(pm);
1356 }
1357
1358 static int
moea64_mincore(pmap_t pmap,vm_offset_t addr,vm_paddr_t * pap)1359 moea64_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *pap)
1360 {
1361 struct pvo_entry *pvo;
1362 vm_paddr_t pa;
1363 vm_page_t m;
1364 int val;
1365 bool managed;
1366
1367 PMAP_LOCK(pmap);
1368
1369 pvo = moea64_pvo_find_va(pmap, addr);
1370 if (pvo != NULL) {
1371 pa = PVO_PADDR(pvo);
1372 m = PHYS_TO_VM_PAGE(pa);
1373 managed = (pvo->pvo_vaddr & PVO_MANAGED) == PVO_MANAGED;
1374 if (PVO_IS_SP(pvo))
1375 val = MINCORE_INCORE | MINCORE_PSIND(1);
1376 else
1377 val = MINCORE_INCORE;
1378 } else {
1379 PMAP_UNLOCK(pmap);
1380 return (0);
1381 }
1382
1383 PMAP_UNLOCK(pmap);
1384
1385 if (m == NULL)
1386 return (0);
1387
1388 if (managed) {
1389 if (moea64_is_modified(m))
1390 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
1391
1392 if (moea64_is_referenced(m))
1393 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
1394 }
1395
1396 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
1397 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
1398 managed) {
1399 *pap = pa;
1400 }
1401
1402 return (val);
1403 }
1404
1405 /*
1406 * This goes through and sets the physical address of our
1407 * special scratch PTE to the PA we want to zero or copy. Because
1408 * of locking issues (this can get called in pvo_enter() by
1409 * the UMA allocator), we can't use most other utility functions here
1410 */
1411
1412 static __inline
moea64_set_scratchpage_pa(int which,vm_paddr_t pa)1413 void moea64_set_scratchpage_pa(int which, vm_paddr_t pa)
1414 {
1415 struct pvo_entry *pvo;
1416
1417 KASSERT(!hw_direct_map, ("Using OEA64 scratchpage with a direct map!"));
1418 mtx_assert(&moea64_scratchpage_mtx, MA_OWNED);
1419
1420 pvo = moea64_scratchpage_pvo[which];
1421 PMAP_LOCK(pvo->pvo_pmap);
1422 pvo->pvo_pte.pa =
1423 moea64_calc_wimg(pa, VM_MEMATTR_DEFAULT) | (uint64_t)pa;
1424 moea64_pte_replace(pvo, MOEA64_PTE_INVALIDATE);
1425 PMAP_UNLOCK(pvo->pvo_pmap);
1426 isync();
1427 }
1428
1429 void
moea64_copy_page(vm_page_t msrc,vm_page_t mdst)1430 moea64_copy_page(vm_page_t msrc, vm_page_t mdst)
1431 {
1432 mtx_lock(&moea64_scratchpage_mtx);
1433
1434 moea64_set_scratchpage_pa(0, VM_PAGE_TO_PHYS(msrc));
1435 moea64_set_scratchpage_pa(1, VM_PAGE_TO_PHYS(mdst));
1436
1437 bcopy((void *)moea64_scratchpage_va[0],
1438 (void *)moea64_scratchpage_va[1], PAGE_SIZE);
1439
1440 mtx_unlock(&moea64_scratchpage_mtx);
1441 }
1442
1443 void
moea64_copy_page_dmap(vm_page_t msrc,vm_page_t mdst)1444 moea64_copy_page_dmap(vm_page_t msrc, vm_page_t mdst)
1445 {
1446 vm_offset_t dst;
1447 vm_offset_t src;
1448
1449 dst = VM_PAGE_TO_PHYS(mdst);
1450 src = VM_PAGE_TO_PHYS(msrc);
1451
1452 bcopy((void *)PHYS_TO_DMAP(src), (void *)PHYS_TO_DMAP(dst),
1453 PAGE_SIZE);
1454 }
1455
1456 inline void
moea64_copy_pages_dmap(vm_page_t * ma,vm_offset_t a_offset,vm_page_t * mb,vm_offset_t b_offset,int xfersize)1457 moea64_copy_pages_dmap(vm_page_t *ma, vm_offset_t a_offset,
1458 vm_page_t *mb, vm_offset_t b_offset, int xfersize)
1459 {
1460 void *a_cp, *b_cp;
1461 vm_offset_t a_pg_offset, b_pg_offset;
1462 int cnt;
1463
1464 while (xfersize > 0) {
1465 a_pg_offset = a_offset & PAGE_MASK;
1466 cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
1467 a_cp = (char *)(uintptr_t)PHYS_TO_DMAP(
1468 VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT])) +
1469 a_pg_offset;
1470 b_pg_offset = b_offset & PAGE_MASK;
1471 cnt = min(cnt, PAGE_SIZE - b_pg_offset);
1472 b_cp = (char *)(uintptr_t)PHYS_TO_DMAP(
1473 VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT])) +
1474 b_pg_offset;
1475 bcopy(a_cp, b_cp, cnt);
1476 a_offset += cnt;
1477 b_offset += cnt;
1478 xfersize -= cnt;
1479 }
1480 }
1481
1482 void
moea64_copy_pages(vm_page_t * ma,vm_offset_t a_offset,vm_page_t * mb,vm_offset_t b_offset,int xfersize)1483 moea64_copy_pages(vm_page_t *ma, vm_offset_t a_offset,
1484 vm_page_t *mb, vm_offset_t b_offset, int xfersize)
1485 {
1486 void *a_cp, *b_cp;
1487 vm_offset_t a_pg_offset, b_pg_offset;
1488 int cnt;
1489
1490 mtx_lock(&moea64_scratchpage_mtx);
1491 while (xfersize > 0) {
1492 a_pg_offset = a_offset & PAGE_MASK;
1493 cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
1494 moea64_set_scratchpage_pa(0,
1495 VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT]));
1496 a_cp = (char *)moea64_scratchpage_va[0] + a_pg_offset;
1497 b_pg_offset = b_offset & PAGE_MASK;
1498 cnt = min(cnt, PAGE_SIZE - b_pg_offset);
1499 moea64_set_scratchpage_pa(1,
1500 VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT]));
1501 b_cp = (char *)moea64_scratchpage_va[1] + b_pg_offset;
1502 bcopy(a_cp, b_cp, cnt);
1503 a_offset += cnt;
1504 b_offset += cnt;
1505 xfersize -= cnt;
1506 }
1507 mtx_unlock(&moea64_scratchpage_mtx);
1508 }
1509
1510 void
moea64_zero_page_area(vm_page_t m,int off,int size)1511 moea64_zero_page_area(vm_page_t m, int off, int size)
1512 {
1513 vm_paddr_t pa = VM_PAGE_TO_PHYS(m);
1514
1515 if (size + off > PAGE_SIZE)
1516 panic("moea64_zero_page: size + off > PAGE_SIZE");
1517
1518 if (hw_direct_map) {
1519 bzero((caddr_t)(uintptr_t)PHYS_TO_DMAP(pa) + off, size);
1520 } else {
1521 mtx_lock(&moea64_scratchpage_mtx);
1522 moea64_set_scratchpage_pa(0, pa);
1523 bzero((caddr_t)moea64_scratchpage_va[0] + off, size);
1524 mtx_unlock(&moea64_scratchpage_mtx);
1525 }
1526 }
1527
1528 /*
1529 * Zero a page of physical memory by temporarily mapping it
1530 */
1531 void
moea64_zero_page(vm_page_t m)1532 moea64_zero_page(vm_page_t m)
1533 {
1534 vm_paddr_t pa = VM_PAGE_TO_PHYS(m);
1535 vm_offset_t va;
1536
1537 mtx_lock(&moea64_scratchpage_mtx);
1538
1539 moea64_set_scratchpage_pa(0, pa);
1540 va = moea64_scratchpage_va[0];
1541
1542 bzero((void *)va, PAGE_SIZE);
1543
1544 mtx_unlock(&moea64_scratchpage_mtx);
1545 }
1546
1547 void
moea64_zero_page_dmap(vm_page_t m)1548 moea64_zero_page_dmap(vm_page_t m)
1549 {
1550 vm_paddr_t pa = VM_PAGE_TO_PHYS(m);
1551 vm_offset_t va;
1552
1553 va = PHYS_TO_DMAP(pa);
1554 bzero((void *)va, PAGE_SIZE);
1555 }
1556
1557 vm_offset_t
moea64_quick_enter_page(vm_page_t m)1558 moea64_quick_enter_page(vm_page_t m)
1559 {
1560 struct pvo_entry *pvo;
1561 vm_paddr_t pa = VM_PAGE_TO_PHYS(m);
1562
1563 /*
1564 * MOEA64_PTE_REPLACE does some locking, so we can't just grab
1565 * a critical section and access the PCPU data like on i386.
1566 * Instead, pin the thread and grab the PCPU lock to prevent
1567 * a preempting thread from using the same PCPU data.
1568 */
1569 sched_pin();
1570
1571 mtx_assert(PCPU_PTR(aim.qmap_lock), MA_NOTOWNED);
1572 pvo = PCPU_GET(aim.qmap_pvo);
1573
1574 mtx_lock(PCPU_PTR(aim.qmap_lock));
1575 pvo->pvo_pte.pa = moea64_calc_wimg(pa, pmap_page_get_memattr(m)) |
1576 (uint64_t)pa;
1577 moea64_pte_replace(pvo, MOEA64_PTE_INVALIDATE);
1578 isync();
1579
1580 return (PCPU_GET(qmap_addr));
1581 }
1582
1583 vm_offset_t
moea64_quick_enter_page_dmap(vm_page_t m)1584 moea64_quick_enter_page_dmap(vm_page_t m)
1585 {
1586
1587 return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)));
1588 }
1589
1590 void
moea64_quick_remove_page(vm_offset_t addr)1591 moea64_quick_remove_page(vm_offset_t addr)
1592 {
1593
1594 mtx_assert(PCPU_PTR(aim.qmap_lock), MA_OWNED);
1595 KASSERT(PCPU_GET(qmap_addr) == addr,
1596 ("moea64_quick_remove_page: invalid address"));
1597 mtx_unlock(PCPU_PTR(aim.qmap_lock));
1598 sched_unpin();
1599 }
1600
1601 bool
moea64_page_is_mapped(vm_page_t m)1602 moea64_page_is_mapped(vm_page_t m)
1603 {
1604 return (!LIST_EMPTY(&(m)->md.mdpg_pvoh));
1605 }
1606
1607 /*
1608 * Map the given physical page at the specified virtual address in the
1609 * target pmap with the protection requested. If specified the page
1610 * will be wired down.
1611 */
1612
1613 int
moea64_enter(pmap_t pmap,vm_offset_t va,vm_page_t m,vm_prot_t prot,u_int flags,int8_t psind)1614 moea64_enter(pmap_t pmap, vm_offset_t va, vm_page_t m,
1615 vm_prot_t prot, u_int flags, int8_t psind)
1616 {
1617 struct pvo_entry *pvo, *oldpvo, *tpvo;
1618 struct pvo_head *pvo_head;
1619 uint64_t pte_lo;
1620 int error;
1621 vm_paddr_t pa;
1622
1623 if ((m->oflags & VPO_UNMANAGED) == 0) {
1624 if ((flags & PMAP_ENTER_QUICK_LOCKED) == 0)
1625 VM_PAGE_OBJECT_BUSY_ASSERT(m);
1626 else
1627 VM_OBJECT_ASSERT_LOCKED(m->object);
1628 }
1629
1630 if (psind > 0)
1631 return (moea64_sp_enter(pmap, va, m, prot, flags, psind));
1632
1633 pvo = alloc_pvo_entry(0);
1634 if (pvo == NULL)
1635 return (KERN_RESOURCE_SHORTAGE);
1636 pvo->pvo_pmap = NULL; /* to be filled in later */
1637 pvo->pvo_pte.prot = prot;
1638
1639 pa = VM_PAGE_TO_PHYS(m);
1640 pte_lo = moea64_calc_wimg(pa, pmap_page_get_memattr(m));
1641 pvo->pvo_pte.pa = pa | pte_lo;
1642
1643 if ((flags & PMAP_ENTER_WIRED) != 0)
1644 pvo->pvo_vaddr |= PVO_WIRED;
1645
1646 if ((m->oflags & VPO_UNMANAGED) != 0 || !moea64_initialized) {
1647 pvo_head = NULL;
1648 } else {
1649 pvo_head = &m->md.mdpg_pvoh;
1650 pvo->pvo_vaddr |= PVO_MANAGED;
1651 }
1652
1653 PV_WR_LOCK(pa);
1654 PMAP_LOCK(pmap);
1655 if (pvo->pvo_pmap == NULL)
1656 init_pvo_entry(pvo, pmap, va);
1657
1658 if (moea64_ps_enabled(pmap) &&
1659 (tpvo = moea64_pvo_find_va(pmap, va & ~HPT_SP_MASK)) != NULL &&
1660 PVO_IS_SP(tpvo)) {
1661 /* Demote SP before entering a regular page */
1662 CTR2(KTR_PMAP, "%s: demote before enter: va=%#jx",
1663 __func__, (uintmax_t)va);
1664 moea64_sp_demote_aligned(tpvo);
1665 }
1666
1667 if (prot & VM_PROT_WRITE)
1668 if (pmap_bootstrapped &&
1669 (m->oflags & VPO_UNMANAGED) == 0)
1670 vm_page_aflag_set(m, PGA_WRITEABLE);
1671
1672 error = moea64_pvo_enter(pvo, pvo_head, &oldpvo);
1673 if (error == EEXIST) {
1674 if (oldpvo->pvo_vaddr == pvo->pvo_vaddr &&
1675 oldpvo->pvo_pte.pa == pvo->pvo_pte.pa &&
1676 oldpvo->pvo_pte.prot == prot) {
1677 /* Identical mapping already exists */
1678 error = 0;
1679
1680 /* If not in page table, reinsert it */
1681 if (moea64_pte_synch(oldpvo) < 0) {
1682 STAT_MOEA64(moea64_pte_overflow--);
1683 moea64_pte_insert(oldpvo);
1684 }
1685
1686 /* Then just clean up and go home */
1687 PMAP_UNLOCK(pmap);
1688 PV_UNLOCK(pa);
1689 free_pvo_entry(pvo);
1690 pvo = NULL;
1691 goto out;
1692 } else {
1693 /* Otherwise, need to kill it first */
1694 KASSERT(oldpvo->pvo_pmap == pmap, ("pmap of old "
1695 "mapping does not match new mapping"));
1696 moea64_pvo_remove_from_pmap(oldpvo);
1697 moea64_pvo_enter(pvo, pvo_head, NULL);
1698 }
1699 }
1700 PMAP_UNLOCK(pmap);
1701 PV_UNLOCK(pa);
1702
1703 /* Free any dead pages */
1704 if (error == EEXIST) {
1705 moea64_pvo_remove_from_page(oldpvo);
1706 free_pvo_entry(oldpvo);
1707 }
1708
1709 out:
1710 /*
1711 * Flush the page from the instruction cache if this page is
1712 * mapped executable and cacheable.
1713 */
1714 if (pmap != kernel_pmap && (m->a.flags & PGA_EXECUTABLE) == 0 &&
1715 (pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {
1716 vm_page_aflag_set(m, PGA_EXECUTABLE);
1717 moea64_syncicache(pmap, va, pa, PAGE_SIZE);
1718 }
1719
1720 #if VM_NRESERVLEVEL > 0
1721 /*
1722 * Try to promote pages.
1723 *
1724 * If the VA of the entered page is not aligned with its PA,
1725 * don't try page promotion as it is not possible.
1726 * This reduces the number of promotion failures dramatically.
1727 *
1728 * Ignore VM_PROT_NO_PROMOTE unless PMAP_ENTER_QUICK_LOCKED.
1729 */
1730 if (moea64_ps_enabled(pmap) && pmap != kernel_pmap && pvo != NULL &&
1731 (pvo->pvo_vaddr & PVO_MANAGED) != 0 &&
1732 (va & HPT_SP_MASK) == (pa & HPT_SP_MASK) &&
1733 ((prot & VM_PROT_NO_PROMOTE) == 0 ||
1734 (flags & PMAP_ENTER_QUICK_LOCKED) == 0) &&
1735 (m->flags & PG_FICTITIOUS) == 0 &&
1736 vm_reserv_level_iffullpop(m) == 0)
1737 moea64_sp_promote(pmap, va, m);
1738 #endif
1739
1740 return (KERN_SUCCESS);
1741 }
1742
1743 static void
moea64_syncicache(pmap_t pmap,vm_offset_t va,vm_paddr_t pa,vm_size_t sz)1744 moea64_syncicache(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
1745 vm_size_t sz)
1746 {
1747
1748 /*
1749 * This is much trickier than on older systems because
1750 * we can't sync the icache on physical addresses directly
1751 * without a direct map. Instead we check a couple of cases
1752 * where the memory is already mapped in and, failing that,
1753 * use the same trick we use for page zeroing to create
1754 * a temporary mapping for this physical address.
1755 */
1756
1757 if (!pmap_bootstrapped) {
1758 /*
1759 * If PMAP is not bootstrapped, we are likely to be
1760 * in real mode.
1761 */
1762 __syncicache((void *)(uintptr_t)pa, sz);
1763 } else if (pmap == kernel_pmap) {
1764 __syncicache((void *)va, sz);
1765 } else if (hw_direct_map) {
1766 __syncicache((void *)(uintptr_t)PHYS_TO_DMAP(pa), sz);
1767 } else {
1768 /* Use the scratch page to set up a temp mapping */
1769
1770 mtx_lock(&moea64_scratchpage_mtx);
1771
1772 moea64_set_scratchpage_pa(1, pa & ~ADDR_POFF);
1773 __syncicache((void *)(moea64_scratchpage_va[1] +
1774 (va & ADDR_POFF)), sz);
1775
1776 mtx_unlock(&moea64_scratchpage_mtx);
1777 }
1778 }
1779
1780 /*
1781 * Maps a sequence of resident pages belonging to the same object.
1782 * The sequence begins with the given page m_start. This page is
1783 * mapped at the given virtual address start. Each subsequent page is
1784 * mapped at a virtual address that is offset from start by the same
1785 * amount as the page is offset from m_start within the object. The
1786 * last page in the sequence is the page with the largest offset from
1787 * m_start that can be mapped at a virtual address less than the given
1788 * virtual address end. Not every virtual page between start and end
1789 * is mapped; only those for which a resident page exists with the
1790 * corresponding offset from m_start are mapped.
1791 */
1792 void
moea64_enter_object(pmap_t pm,vm_offset_t start,vm_offset_t end,vm_page_t m_start,vm_prot_t prot)1793 moea64_enter_object(pmap_t pm, vm_offset_t start, vm_offset_t end,
1794 vm_page_t m_start, vm_prot_t prot)
1795 {
1796 struct pctrie_iter pages;
1797 vm_page_t m;
1798 vm_offset_t va;
1799 int8_t psind;
1800
1801 VM_OBJECT_ASSERT_LOCKED(m_start->object);
1802
1803 vm_page_iter_limit_init(&pages, m_start->object,
1804 m_start->pindex + atop(end - start));
1805 m = vm_radix_iter_lookup(&pages, m_start->pindex);
1806 while (m != NULL) {
1807 va = start + ptoa(m->pindex - m_start->pindex);
1808 if ((va & HPT_SP_MASK) == 0 && va + HPT_SP_SIZE <= end &&
1809 m->psind == 1 && moea64_ps_enabled(pm))
1810 psind = 1;
1811 else
1812 psind = 0;
1813 moea64_enter(pm, va, m, prot &
1814 (VM_PROT_READ | VM_PROT_EXECUTE),
1815 PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, psind);
1816 if (psind == 1)
1817 m = vm_radix_iter_jump(&pages, HPT_SP_SIZE / PAGE_SIZE);
1818 else
1819 m = vm_radix_iter_step(&pages);
1820 }
1821 }
1822
1823 void
moea64_enter_quick(pmap_t pm,vm_offset_t va,vm_page_t m,vm_prot_t prot)1824 moea64_enter_quick(pmap_t pm, vm_offset_t va, vm_page_t m,
1825 vm_prot_t prot)
1826 {
1827
1828 moea64_enter(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE |
1829 VM_PROT_NO_PROMOTE), PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED,
1830 0);
1831 }
1832
1833 vm_paddr_t
moea64_extract(pmap_t pm,vm_offset_t va)1834 moea64_extract(pmap_t pm, vm_offset_t va)
1835 {
1836 struct pvo_entry *pvo;
1837 vm_paddr_t pa;
1838
1839 PMAP_LOCK(pm);
1840 pvo = moea64_pvo_find_va(pm, va);
1841 if (pvo == NULL)
1842 pa = 0;
1843 else
1844 pa = PVO_PADDR(pvo) | (va - PVO_VADDR(pvo));
1845 PMAP_UNLOCK(pm);
1846
1847 return (pa);
1848 }
1849
1850 /*
1851 * Atomically extract and hold the physical page with the given
1852 * pmap and virtual address pair if that mapping permits the given
1853 * protection.
1854 */
1855 vm_page_t
moea64_extract_and_hold(pmap_t pmap,vm_offset_t va,vm_prot_t prot)1856 moea64_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
1857 {
1858 struct pvo_entry *pvo;
1859 vm_page_t m;
1860
1861 m = NULL;
1862 PMAP_LOCK(pmap);
1863 pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF);
1864 if (pvo != NULL && (pvo->pvo_pte.prot & prot) == prot) {
1865 m = PHYS_TO_VM_PAGE(PVO_PADDR(pvo));
1866 if (!vm_page_wire_mapped(m))
1867 m = NULL;
1868 }
1869 PMAP_UNLOCK(pmap);
1870 return (m);
1871 }
1872
1873 static void *
moea64_uma_page_alloc(uma_zone_t zone,vm_size_t bytes,int domain,uint8_t * flags,int wait)1874 moea64_uma_page_alloc(uma_zone_t zone, vm_size_t bytes, int domain,
1875 uint8_t *flags, int wait)
1876 {
1877 struct pvo_entry *pvo;
1878 vm_offset_t va;
1879 vm_page_t m;
1880 int needed_lock;
1881
1882 /*
1883 * This entire routine is a horrible hack to avoid bothering kmem
1884 * for new KVA addresses. Because this can get called from inside
1885 * kmem allocation routines, calling kmem for a new address here
1886 * can lead to multiply locking non-recursive mutexes.
1887 */
1888
1889 *flags = UMA_SLAB_PRIV;
1890 needed_lock = !PMAP_LOCKED(kernel_pmap);
1891
1892 m = vm_page_alloc_noobj_domain(domain, malloc2vm_flags(wait) |
1893 VM_ALLOC_WIRED);
1894 if (m == NULL)
1895 return (NULL);
1896
1897 va = VM_PAGE_TO_PHYS(m);
1898
1899 pvo = alloc_pvo_entry(1 /* bootstrap */);
1900
1901 pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE;
1902 pvo->pvo_pte.pa = VM_PAGE_TO_PHYS(m) | LPTE_M;
1903
1904 if (needed_lock)
1905 PMAP_LOCK(kernel_pmap);
1906
1907 init_pvo_entry(pvo, kernel_pmap, va);
1908 pvo->pvo_vaddr |= PVO_WIRED;
1909
1910 moea64_pvo_enter(pvo, NULL, NULL);
1911
1912 if (needed_lock)
1913 PMAP_UNLOCK(kernel_pmap);
1914
1915 return (void *)va;
1916 }
1917
1918 extern int elf32_nxstack;
1919
1920 void
moea64_init(void)1921 moea64_init(void)
1922 {
1923
1924 CTR0(KTR_PMAP, "moea64_init");
1925
1926 moea64_pvo_zone = uma_zcreate("UPVO entry", sizeof (struct pvo_entry),
1927 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
1928 UMA_ZONE_VM | UMA_ZONE_NOFREE);
1929
1930 /* Are large page mappings enabled? */
1931 superpages_enabled = 1;
1932 TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled);
1933 if (superpages_enabled) {
1934 KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
1935 ("moea64_init: can't assign to pagesizes[1]"));
1936
1937 if (moea64_large_page_size == 0) {
1938 printf("mmu_oea64: HW does not support large pages. "
1939 "Disabling superpages...\n");
1940 superpages_enabled = 0;
1941 } else if (!moea64_has_lp_4k_16m) {
1942 printf("mmu_oea64: "
1943 "HW does not support mixed 4KB/16MB page sizes. "
1944 "Disabling superpages...\n");
1945 superpages_enabled = 0;
1946 } else
1947 pagesizes[1] = HPT_SP_SIZE;
1948 }
1949
1950 if (!hw_direct_map) {
1951 uma_zone_set_allocf(moea64_pvo_zone, moea64_uma_page_alloc);
1952 }
1953
1954 #ifdef COMPAT_FREEBSD32
1955 elf32_nxstack = 1;
1956 #endif
1957
1958 moea64_initialized = true;
1959 }
1960
1961 bool
moea64_is_referenced(vm_page_t m)1962 moea64_is_referenced(vm_page_t m)
1963 {
1964
1965 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1966 ("moea64_is_referenced: page %p is not managed", m));
1967
1968 return (moea64_query_bit(m, LPTE_REF));
1969 }
1970
1971 bool
moea64_is_modified(vm_page_t m)1972 moea64_is_modified(vm_page_t m)
1973 {
1974
1975 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1976 ("moea64_is_modified: page %p is not managed", m));
1977
1978 /*
1979 * If the page is not busied then this check is racy.
1980 */
1981 if (!pmap_page_is_write_mapped(m))
1982 return (false);
1983
1984 return (moea64_query_bit(m, LPTE_CHG));
1985 }
1986
1987 bool
moea64_is_prefaultable(pmap_t pmap,vm_offset_t va)1988 moea64_is_prefaultable(pmap_t pmap, vm_offset_t va)
1989 {
1990 struct pvo_entry *pvo;
1991 bool rv = true;
1992
1993 PMAP_LOCK(pmap);
1994 pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF);
1995 if (pvo != NULL)
1996 rv = false;
1997 PMAP_UNLOCK(pmap);
1998 return (rv);
1999 }
2000
2001 void
moea64_clear_modify(vm_page_t m)2002 moea64_clear_modify(vm_page_t m)
2003 {
2004
2005 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2006 ("moea64_clear_modify: page %p is not managed", m));
2007 vm_page_assert_busied(m);
2008
2009 if (!pmap_page_is_write_mapped(m))
2010 return;
2011 moea64_clear_bit(m, LPTE_CHG);
2012 }
2013
2014 /*
2015 * Clear the write and modified bits in each of the given page's mappings.
2016 */
2017 void
moea64_remove_write(vm_page_t m)2018 moea64_remove_write(vm_page_t m)
2019 {
2020 struct pvo_entry *pvo;
2021 int64_t refchg, ret;
2022 pmap_t pmap;
2023
2024 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2025 ("moea64_remove_write: page %p is not managed", m));
2026 vm_page_assert_busied(m);
2027
2028 if (!pmap_page_is_write_mapped(m))
2029 return;
2030
2031 powerpc_sync();
2032 PV_PAGE_WR_LOCK(m);
2033 refchg = 0;
2034 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
2035 pmap = pvo->pvo_pmap;
2036 PMAP_LOCK(pmap);
2037 if (!(pvo->pvo_vaddr & PVO_DEAD) &&
2038 (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
2039 if (PVO_IS_SP(pvo)) {
2040 CTR1(KTR_PMAP, "%s: demote before remwr",
2041 __func__);
2042 moea64_sp_demote(pvo);
2043 }
2044 pvo->pvo_pte.prot &= ~VM_PROT_WRITE;
2045 ret = moea64_pte_replace(pvo, MOEA64_PTE_PROT_UPDATE);
2046 if (ret < 0)
2047 ret = LPTE_CHG;
2048 refchg |= ret;
2049 if (pvo->pvo_pmap == kernel_pmap)
2050 isync();
2051 }
2052 PMAP_UNLOCK(pmap);
2053 }
2054 if ((refchg | atomic_readandclear_32(&m->md.mdpg_attrs)) & LPTE_CHG)
2055 vm_page_dirty(m);
2056 vm_page_aflag_clear(m, PGA_WRITEABLE);
2057 PV_PAGE_UNLOCK(m);
2058 }
2059
2060 /*
2061 * moea64_ts_referenced:
2062 *
2063 * Return a count of reference bits for a page, clearing those bits.
2064 * It is not necessary for every reference bit to be cleared, but it
2065 * is necessary that 0 only be returned when there are truly no
2066 * reference bits set.
2067 *
2068 * XXX: The exact number of bits to check and clear is a matter that
2069 * should be tested and standardized at some point in the future for
2070 * optimal aging of shared pages.
2071 */
2072 int
moea64_ts_referenced(vm_page_t m)2073 moea64_ts_referenced(vm_page_t m)
2074 {
2075
2076 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2077 ("moea64_ts_referenced: page %p is not managed", m));
2078 return (moea64_clear_bit(m, LPTE_REF));
2079 }
2080
2081 /*
2082 * Modify the WIMG settings of all mappings for a page.
2083 */
2084 void
moea64_page_set_memattr(vm_page_t m,vm_memattr_t ma)2085 moea64_page_set_memattr(vm_page_t m, vm_memattr_t ma)
2086 {
2087 struct pvo_entry *pvo;
2088 int64_t refchg;
2089 pmap_t pmap;
2090 uint64_t lo;
2091
2092 CTR3(KTR_PMAP, "%s: pa=%#jx, ma=%#x",
2093 __func__, (uintmax_t)VM_PAGE_TO_PHYS(m), ma);
2094
2095 if (m->md.mdpg_cache_attrs == ma)
2096 return;
2097
2098 if ((m->oflags & VPO_UNMANAGED) != 0) {
2099 m->md.mdpg_cache_attrs = ma;
2100 return;
2101 }
2102
2103 lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), ma);
2104
2105 PV_PAGE_WR_LOCK(m);
2106 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
2107 pmap = pvo->pvo_pmap;
2108 PMAP_LOCK(pmap);
2109 if (!(pvo->pvo_vaddr & PVO_DEAD)) {
2110 if (PVO_IS_SP(pvo)) {
2111 CTR1(KTR_PMAP,
2112 "%s: demote before set_memattr", __func__);
2113 moea64_sp_demote(pvo);
2114 }
2115 pvo->pvo_pte.pa &= ~LPTE_WIMG;
2116 pvo->pvo_pte.pa |= lo;
2117 refchg = moea64_pte_replace(pvo, MOEA64_PTE_INVALIDATE);
2118 if (refchg < 0)
2119 refchg = (pvo->pvo_pte.prot & VM_PROT_WRITE) ?
2120 LPTE_CHG : 0;
2121 if ((pvo->pvo_vaddr & PVO_MANAGED) &&
2122 (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
2123 refchg |=
2124 atomic_readandclear_32(&m->md.mdpg_attrs);
2125 if (refchg & LPTE_CHG)
2126 vm_page_dirty(m);
2127 if (refchg & LPTE_REF)
2128 vm_page_aflag_set(m, PGA_REFERENCED);
2129 }
2130 if (pvo->pvo_pmap == kernel_pmap)
2131 isync();
2132 }
2133 PMAP_UNLOCK(pmap);
2134 }
2135 m->md.mdpg_cache_attrs = ma;
2136 PV_PAGE_UNLOCK(m);
2137 }
2138
2139 /*
2140 * Map a wired page into kernel virtual address space.
2141 */
2142 void
moea64_kenter_attr(vm_offset_t va,vm_paddr_t pa,vm_memattr_t ma)2143 moea64_kenter_attr(vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma)
2144 {
2145 int error;
2146 struct pvo_entry *pvo, *oldpvo;
2147
2148 do {
2149 pvo = alloc_pvo_entry(0);
2150 if (pvo == NULL)
2151 vm_wait(NULL);
2152 } while (pvo == NULL);
2153 pvo->pvo_pte.prot = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
2154 pvo->pvo_pte.pa = (pa & ~ADDR_POFF) | moea64_calc_wimg(pa, ma);
2155 pvo->pvo_vaddr |= PVO_WIRED;
2156
2157 PMAP_LOCK(kernel_pmap);
2158 oldpvo = moea64_pvo_find_va(kernel_pmap, va);
2159 if (oldpvo != NULL)
2160 moea64_pvo_remove_from_pmap(oldpvo);
2161 init_pvo_entry(pvo, kernel_pmap, va);
2162 error = moea64_pvo_enter(pvo, NULL, NULL);
2163 PMAP_UNLOCK(kernel_pmap);
2164
2165 /* Free any dead pages */
2166 if (oldpvo != NULL) {
2167 moea64_pvo_remove_from_page(oldpvo);
2168 free_pvo_entry(oldpvo);
2169 }
2170
2171 if (error != 0)
2172 panic("moea64_kenter: failed to enter va %#zx pa %#jx: %d", va,
2173 (uintmax_t)pa, error);
2174 }
2175
2176 void
moea64_kenter(vm_offset_t va,vm_paddr_t pa)2177 moea64_kenter(vm_offset_t va, vm_paddr_t pa)
2178 {
2179
2180 moea64_kenter_attr(va, pa, VM_MEMATTR_DEFAULT);
2181 }
2182
2183 /*
2184 * Extract the physical page address associated with the given kernel virtual
2185 * address.
2186 */
2187 vm_paddr_t
moea64_kextract(vm_offset_t va)2188 moea64_kextract(vm_offset_t va)
2189 {
2190 struct pvo_entry *pvo;
2191 vm_paddr_t pa;
2192
2193 /*
2194 * Shortcut the direct-mapped case when applicable. We never put
2195 * anything but 1:1 (or 62-bit aliased) mappings below
2196 * VM_MIN_KERNEL_ADDRESS.
2197 */
2198 if (va < VM_MIN_KERNEL_ADDRESS)
2199 return (va & ~DMAP_BASE_ADDRESS);
2200
2201 PMAP_LOCK(kernel_pmap);
2202 pvo = moea64_pvo_find_va(kernel_pmap, va);
2203 KASSERT(pvo != NULL, ("moea64_kextract: no addr found for %#" PRIxPTR,
2204 va));
2205 pa = PVO_PADDR(pvo) | (va - PVO_VADDR(pvo));
2206 PMAP_UNLOCK(kernel_pmap);
2207 return (pa);
2208 }
2209
2210 /*
2211 * Remove a wired page from kernel virtual address space.
2212 */
2213 void
moea64_kremove(vm_offset_t va)2214 moea64_kremove(vm_offset_t va)
2215 {
2216 moea64_remove(kernel_pmap, va, va + PAGE_SIZE);
2217 }
2218
2219 /*
2220 * Provide a kernel pointer corresponding to a given userland pointer.
2221 * The returned pointer is valid until the next time this function is
2222 * called in this thread. This is used internally in copyin/copyout.
2223 */
2224 static int
moea64_map_user_ptr(pmap_t pm,volatile const void * uaddr,void ** kaddr,size_t ulen,size_t * klen)2225 moea64_map_user_ptr(pmap_t pm, volatile const void *uaddr,
2226 void **kaddr, size_t ulen, size_t *klen)
2227 {
2228 size_t l;
2229 #ifdef __powerpc64__
2230 struct slb *slb;
2231 #endif
2232 register_t slbv;
2233
2234 *kaddr = (char *)USER_ADDR + ((uintptr_t)uaddr & ~SEGMENT_MASK);
2235 l = ((char *)USER_ADDR + SEGMENT_LENGTH) - (char *)(*kaddr);
2236 if (l > ulen)
2237 l = ulen;
2238 if (klen)
2239 *klen = l;
2240 else if (l != ulen)
2241 return (EFAULT);
2242
2243 #ifdef __powerpc64__
2244 /* Try lockless look-up first */
2245 slb = user_va_to_slb_entry(pm, (vm_offset_t)uaddr);
2246
2247 if (slb == NULL) {
2248 /* If it isn't there, we need to pre-fault the VSID */
2249 PMAP_LOCK(pm);
2250 slbv = va_to_vsid(pm, (vm_offset_t)uaddr) << SLBV_VSID_SHIFT;
2251 PMAP_UNLOCK(pm);
2252 } else {
2253 slbv = slb->slbv;
2254 }
2255
2256 /* Mark segment no-execute */
2257 slbv |= SLBV_N;
2258 #else
2259 slbv = va_to_vsid(pm, (vm_offset_t)uaddr);
2260
2261 /* Mark segment no-execute */
2262 slbv |= SR_N;
2263 #endif
2264
2265 /* If we have already set this VSID, we can just return */
2266 if (curthread->td_pcb->pcb_cpu.aim.usr_vsid == slbv)
2267 return (0);
2268
2269 __asm __volatile("isync");
2270 curthread->td_pcb->pcb_cpu.aim.usr_segm =
2271 (uintptr_t)uaddr >> ADDR_SR_SHFT;
2272 curthread->td_pcb->pcb_cpu.aim.usr_vsid = slbv;
2273 #ifdef __powerpc64__
2274 __asm __volatile ("slbie %0; slbmte %1, %2; isync" ::
2275 "r"(USER_ADDR), "r"(slbv), "r"(USER_SLB_SLBE));
2276 #else
2277 __asm __volatile("mtsr %0,%1; isync" :: "n"(USER_SR), "r"(slbv));
2278 #endif
2279
2280 return (0);
2281 }
2282
2283 /*
2284 * Figure out where a given kernel pointer (usually in a fault) points
2285 * to from the VM's perspective, potentially remapping into userland's
2286 * address space.
2287 */
2288 static int
moea64_decode_kernel_ptr(vm_offset_t addr,int * is_user,vm_offset_t * decoded_addr)2289 moea64_decode_kernel_ptr(vm_offset_t addr, int *is_user,
2290 vm_offset_t *decoded_addr)
2291 {
2292 vm_offset_t user_sr;
2293
2294 if ((addr >> ADDR_SR_SHFT) == (USER_ADDR >> ADDR_SR_SHFT)) {
2295 user_sr = curthread->td_pcb->pcb_cpu.aim.usr_segm;
2296 addr &= ADDR_PIDX | ADDR_POFF;
2297 addr |= user_sr << ADDR_SR_SHFT;
2298 *decoded_addr = addr;
2299 *is_user = 1;
2300 } else {
2301 *decoded_addr = addr;
2302 *is_user = 0;
2303 }
2304
2305 return (0);
2306 }
2307
2308 /*
2309 * Map a range of physical addresses into kernel virtual address space.
2310 *
2311 * The value passed in *virt is a suggested virtual address for the mapping.
2312 * Architectures which can support a direct-mapped physical to virtual region
2313 * can return the appropriate address within that region, leaving '*virt'
2314 * unchanged. Other architectures should map the pages starting at '*virt' and
2315 * update '*virt' with the first usable address after the mapped region.
2316 */
2317 vm_offset_t
moea64_map(vm_offset_t * virt,vm_paddr_t pa_start,vm_paddr_t pa_end,int prot)2318 moea64_map(vm_offset_t *virt, vm_paddr_t pa_start,
2319 vm_paddr_t pa_end, int prot)
2320 {
2321 vm_offset_t sva, va;
2322
2323 if (hw_direct_map) {
2324 /*
2325 * Check if every page in the region is covered by the direct
2326 * map. The direct map covers all of physical memory. Use
2327 * moea64_calc_wimg() as a shortcut to see if the page is in
2328 * physical memory as a way to see if the direct map covers it.
2329 */
2330 for (va = pa_start; va < pa_end; va += PAGE_SIZE)
2331 if (moea64_calc_wimg(va, VM_MEMATTR_DEFAULT) != LPTE_M)
2332 break;
2333 if (va == pa_end)
2334 return (PHYS_TO_DMAP(pa_start));
2335 }
2336 sva = *virt;
2337 va = sva;
2338 /* XXX respect prot argument */
2339 for (; pa_start < pa_end; pa_start += PAGE_SIZE, va += PAGE_SIZE)
2340 moea64_kenter(va, pa_start);
2341 *virt = va;
2342
2343 return (sva);
2344 }
2345
2346 /*
2347 * Returns true if the pmap's pv is one of the first
2348 * 16 pvs linked to from this page. This count may
2349 * be changed upwards or downwards in the future; it
2350 * is only necessary that true be returned for a small
2351 * subset of pmaps for proper page aging.
2352 */
2353 bool
moea64_page_exists_quick(pmap_t pmap,vm_page_t m)2354 moea64_page_exists_quick(pmap_t pmap, vm_page_t m)
2355 {
2356 int loops;
2357 struct pvo_entry *pvo;
2358 bool rv;
2359
2360 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2361 ("moea64_page_exists_quick: page %p is not managed", m));
2362 loops = 0;
2363 rv = false;
2364 PV_PAGE_RD_LOCK(m);
2365 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
2366 if (!(pvo->pvo_vaddr & PVO_DEAD) && pvo->pvo_pmap == pmap) {
2367 rv = true;
2368 break;
2369 }
2370 if (++loops >= 16)
2371 break;
2372 }
2373 PV_PAGE_UNLOCK(m);
2374 return (rv);
2375 }
2376
2377 void
moea64_page_init(vm_page_t m)2378 moea64_page_init(vm_page_t m)
2379 {
2380
2381 m->md.mdpg_attrs = 0;
2382 m->md.mdpg_cache_attrs = VM_MEMATTR_DEFAULT;
2383 LIST_INIT(&m->md.mdpg_pvoh);
2384 }
2385
2386 /*
2387 * Return the number of managed mappings to the given physical page
2388 * that are wired.
2389 */
2390 int
moea64_page_wired_mappings(vm_page_t m)2391 moea64_page_wired_mappings(vm_page_t m)
2392 {
2393 struct pvo_entry *pvo;
2394 int count;
2395
2396 count = 0;
2397 if ((m->oflags & VPO_UNMANAGED) != 0)
2398 return (count);
2399 PV_PAGE_RD_LOCK(m);
2400 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink)
2401 if ((pvo->pvo_vaddr & (PVO_DEAD | PVO_WIRED)) == PVO_WIRED)
2402 count++;
2403 PV_PAGE_UNLOCK(m);
2404 return (count);
2405 }
2406
2407 static uintptr_t moea64_vsidcontext;
2408
2409 uintptr_t
moea64_get_unique_vsid(void)2410 moea64_get_unique_vsid(void) {
2411 u_int entropy;
2412 register_t hash;
2413 uint32_t mask;
2414 int i;
2415
2416 entropy = 0;
2417 __asm __volatile("mftb %0" : "=r"(entropy));
2418
2419 mtx_lock(&moea64_slb_mutex);
2420 for (i = 0; i < NVSIDS; i += VSID_NBPW) {
2421 u_int n;
2422
2423 /*
2424 * Create a new value by multiplying by a prime and adding in
2425 * entropy from the timebase register. This is to make the
2426 * VSID more random so that the PT hash function collides
2427 * less often. (Note that the prime casues gcc to do shifts
2428 * instead of a multiply.)
2429 */
2430 moea64_vsidcontext = (moea64_vsidcontext * 0x1105) + entropy;
2431 hash = moea64_vsidcontext & (NVSIDS - 1);
2432 if (hash == 0) /* 0 is special, avoid it */
2433 continue;
2434 n = hash >> 5;
2435 mask = 1 << (hash & (VSID_NBPW - 1));
2436 hash = (moea64_vsidcontext & VSID_HASHMASK);
2437 if (moea64_vsid_bitmap[n] & mask) { /* collision? */
2438 /* anything free in this bucket? */
2439 if (moea64_vsid_bitmap[n] == 0xffffffff) {
2440 entropy = (moea64_vsidcontext >> 20);
2441 continue;
2442 }
2443 i = ffs(~moea64_vsid_bitmap[n]) - 1;
2444 mask = 1 << i;
2445 hash &= rounddown2(VSID_HASHMASK, VSID_NBPW);
2446 hash |= i;
2447 }
2448 if (hash == VSID_VRMA) /* also special, avoid this too */
2449 continue;
2450 KASSERT(!(moea64_vsid_bitmap[n] & mask),
2451 ("Allocating in-use VSID %#zx\n", hash));
2452 moea64_vsid_bitmap[n] |= mask;
2453 mtx_unlock(&moea64_slb_mutex);
2454 return (hash);
2455 }
2456
2457 mtx_unlock(&moea64_slb_mutex);
2458 panic("%s: out of segments",__func__);
2459 }
2460
2461 #ifdef __powerpc64__
2462 int
moea64_pinit(pmap_t pmap)2463 moea64_pinit(pmap_t pmap)
2464 {
2465
2466 RB_INIT(&pmap->pmap_pvo);
2467
2468 pmap->pm_slb_tree_root = slb_alloc_tree();
2469 pmap->pm_slb = slb_alloc_user_cache();
2470 pmap->pm_slb_len = 0;
2471
2472 return (1);
2473 }
2474 #else
2475 int
moea64_pinit(pmap_t pmap)2476 moea64_pinit(pmap_t pmap)
2477 {
2478 int i;
2479 uint32_t hash;
2480
2481 RB_INIT(&pmap->pmap_pvo);
2482
2483 if (pmap_bootstrapped)
2484 pmap->pmap_phys = (pmap_t)moea64_kextract((vm_offset_t)pmap);
2485 else
2486 pmap->pmap_phys = pmap;
2487
2488 /*
2489 * Allocate some segment registers for this pmap.
2490 */
2491 hash = moea64_get_unique_vsid();
2492
2493 for (i = 0; i < 16; i++)
2494 pmap->pm_sr[i] = VSID_MAKE(i, hash);
2495
2496 KASSERT(pmap->pm_sr[0] != 0, ("moea64_pinit: pm_sr[0] = 0"));
2497
2498 return (1);
2499 }
2500 #endif
2501
2502 /*
2503 * Initialize the pmap associated with process 0.
2504 */
2505 void
moea64_pinit0(pmap_t pm)2506 moea64_pinit0(pmap_t pm)
2507 {
2508
2509 PMAP_LOCK_INIT(pm);
2510 moea64_pinit(pm);
2511 bzero(&pm->pm_stats, sizeof(pm->pm_stats));
2512 }
2513
2514 /*
2515 * Set the physical protection on the specified range of this map as requested.
2516 */
2517 static void
moea64_pvo_protect(pmap_t pm,struct pvo_entry * pvo,vm_prot_t prot)2518 moea64_pvo_protect( pmap_t pm, struct pvo_entry *pvo, vm_prot_t prot)
2519 {
2520 struct vm_page *pg;
2521 vm_prot_t oldprot;
2522 int32_t refchg;
2523
2524 PMAP_LOCK_ASSERT(pm, MA_OWNED);
2525
2526 /*
2527 * Change the protection of the page.
2528 */
2529 oldprot = pvo->pvo_pte.prot;
2530 pvo->pvo_pte.prot = prot;
2531 pg = PHYS_TO_VM_PAGE(PVO_PADDR(pvo));
2532
2533 /*
2534 * If the PVO is in the page table, update mapping
2535 */
2536 refchg = moea64_pte_replace(pvo, MOEA64_PTE_PROT_UPDATE);
2537 if (refchg < 0)
2538 refchg = (oldprot & VM_PROT_WRITE) ? LPTE_CHG : 0;
2539
2540 if (pm != kernel_pmap && pg != NULL &&
2541 (pg->a.flags & PGA_EXECUTABLE) == 0 &&
2542 (pvo->pvo_pte.pa & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {
2543 if ((pg->oflags & VPO_UNMANAGED) == 0)
2544 vm_page_aflag_set(pg, PGA_EXECUTABLE);
2545 moea64_syncicache(pm, PVO_VADDR(pvo),
2546 PVO_PADDR(pvo), PAGE_SIZE);
2547 }
2548
2549 /*
2550 * Update vm about the REF/CHG bits if the page is managed and we have
2551 * removed write access.
2552 */
2553 if (pg != NULL && (pvo->pvo_vaddr & PVO_MANAGED) &&
2554 (oldprot & VM_PROT_WRITE)) {
2555 refchg |= atomic_readandclear_32(&pg->md.mdpg_attrs);
2556 if (refchg & LPTE_CHG)
2557 vm_page_dirty(pg);
2558 if (refchg & LPTE_REF)
2559 vm_page_aflag_set(pg, PGA_REFERENCED);
2560 }
2561 }
2562
2563 void
moea64_protect(pmap_t pm,vm_offset_t sva,vm_offset_t eva,vm_prot_t prot)2564 moea64_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva,
2565 vm_prot_t prot)
2566 {
2567 struct pvo_entry *pvo, key;
2568
2569 CTR4(KTR_PMAP, "moea64_protect: pm=%p sva=%#x eva=%#x prot=%#x", pm,
2570 sva, eva, prot);
2571
2572 KASSERT(pm == &curproc->p_vmspace->vm_pmap || pm == kernel_pmap,
2573 ("moea64_protect: non current pmap"));
2574
2575 if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
2576 moea64_remove(pm, sva, eva);
2577 return;
2578 }
2579
2580 PMAP_LOCK(pm);
2581 key.pvo_vaddr = sva;
2582 for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
2583 pvo != NULL && PVO_VADDR(pvo) < eva;
2584 pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
2585 if (PVO_IS_SP(pvo)) {
2586 if (moea64_sp_pvo_in_range(pvo, sva, eva)) {
2587 pvo = moea64_sp_protect(pvo, prot);
2588 continue;
2589 } else {
2590 CTR1(KTR_PMAP, "%s: demote before protect",
2591 __func__);
2592 moea64_sp_demote(pvo);
2593 }
2594 }
2595 moea64_pvo_protect(pm, pvo, prot);
2596 }
2597 PMAP_UNLOCK(pm);
2598 }
2599
2600 /*
2601 * Map a list of wired pages into kernel virtual address space. This is
2602 * intended for temporary mappings which do not need page modification or
2603 * references recorded. Existing mappings in the region are overwritten.
2604 */
2605 void
moea64_qenter(vm_offset_t va,vm_page_t * m,int count)2606 moea64_qenter(vm_offset_t va, vm_page_t *m, int count)
2607 {
2608 while (count-- > 0) {
2609 moea64_kenter(va, VM_PAGE_TO_PHYS(*m));
2610 va += PAGE_SIZE;
2611 m++;
2612 }
2613 }
2614
2615 /*
2616 * Remove page mappings from kernel virtual address space. Intended for
2617 * temporary mappings entered by moea64_qenter.
2618 */
2619 void
moea64_qremove(vm_offset_t va,int count)2620 moea64_qremove(vm_offset_t va, int count)
2621 {
2622 while (count-- > 0) {
2623 moea64_kremove(va);
2624 va += PAGE_SIZE;
2625 }
2626 }
2627
2628 void
moea64_release_vsid(uint64_t vsid)2629 moea64_release_vsid(uint64_t vsid)
2630 {
2631 int idx, mask;
2632
2633 mtx_lock(&moea64_slb_mutex);
2634 idx = vsid & (NVSIDS-1);
2635 mask = 1 << (idx % VSID_NBPW);
2636 idx /= VSID_NBPW;
2637 KASSERT(moea64_vsid_bitmap[idx] & mask,
2638 ("Freeing unallocated VSID %#jx", vsid));
2639 moea64_vsid_bitmap[idx] &= ~mask;
2640 mtx_unlock(&moea64_slb_mutex);
2641 }
2642
2643 void
moea64_release(pmap_t pmap)2644 moea64_release(pmap_t pmap)
2645 {
2646
2647 /*
2648 * Free segment registers' VSIDs
2649 */
2650 #ifdef __powerpc64__
2651 slb_free_tree(pmap);
2652 slb_free_user_cache(pmap->pm_slb);
2653 #else
2654 KASSERT(pmap->pm_sr[0] != 0, ("moea64_release: pm_sr[0] = 0"));
2655
2656 moea64_release_vsid(VSID_TO_HASH(pmap->pm_sr[0]));
2657 #endif
2658 }
2659
2660 /*
2661 * Remove all pages mapped by the specified pmap
2662 */
2663 void
moea64_remove_pages(pmap_t pm)2664 moea64_remove_pages(pmap_t pm)
2665 {
2666 struct pvo_entry *pvo, *tpvo;
2667 struct pvo_dlist tofree;
2668
2669 SLIST_INIT(&tofree);
2670
2671 PMAP_LOCK(pm);
2672 RB_FOREACH_SAFE(pvo, pvo_tree, &pm->pmap_pvo, tpvo) {
2673 if (pvo->pvo_vaddr & PVO_WIRED)
2674 continue;
2675
2676 /*
2677 * For locking reasons, remove this from the page table and
2678 * pmap, but save delinking from the vm_page for a second
2679 * pass
2680 */
2681 moea64_pvo_remove_from_pmap(pvo);
2682 SLIST_INSERT_HEAD(&tofree, pvo, pvo_dlink);
2683 }
2684 PMAP_UNLOCK(pm);
2685
2686 while (!SLIST_EMPTY(&tofree)) {
2687 pvo = SLIST_FIRST(&tofree);
2688 SLIST_REMOVE_HEAD(&tofree, pvo_dlink);
2689 moea64_pvo_remove_from_page(pvo);
2690 free_pvo_entry(pvo);
2691 }
2692 }
2693
2694 static void
moea64_remove_locked(pmap_t pm,vm_offset_t sva,vm_offset_t eva,struct pvo_dlist * tofree)2695 moea64_remove_locked(pmap_t pm, vm_offset_t sva, vm_offset_t eva,
2696 struct pvo_dlist *tofree)
2697 {
2698 struct pvo_entry *pvo, *tpvo, key;
2699
2700 PMAP_LOCK_ASSERT(pm, MA_OWNED);
2701
2702 key.pvo_vaddr = sva;
2703 for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
2704 pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
2705 if (PVO_IS_SP(pvo)) {
2706 if (moea64_sp_pvo_in_range(pvo, sva, eva)) {
2707 tpvo = moea64_sp_remove(pvo, tofree);
2708 continue;
2709 } else {
2710 CTR1(KTR_PMAP, "%s: demote before remove",
2711 __func__);
2712 moea64_sp_demote(pvo);
2713 }
2714 }
2715 tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
2716
2717 /*
2718 * For locking reasons, remove this from the page table and
2719 * pmap, but save delinking from the vm_page for a second
2720 * pass
2721 */
2722 moea64_pvo_remove_from_pmap(pvo);
2723 SLIST_INSERT_HEAD(tofree, pvo, pvo_dlink);
2724 }
2725 }
2726
2727 /*
2728 * Remove the given range of addresses from the specified map.
2729 */
2730 void
moea64_remove(pmap_t pm,vm_offset_t sva,vm_offset_t eva)2731 moea64_remove(pmap_t pm, vm_offset_t sva, vm_offset_t eva)
2732 {
2733 struct pvo_entry *pvo;
2734 struct pvo_dlist tofree;
2735
2736 /*
2737 * Perform an unsynchronized read. This is, however, safe.
2738 */
2739 if (pm->pm_stats.resident_count == 0)
2740 return;
2741
2742 SLIST_INIT(&tofree);
2743 PMAP_LOCK(pm);
2744 moea64_remove_locked(pm, sva, eva, &tofree);
2745 PMAP_UNLOCK(pm);
2746
2747 while (!SLIST_EMPTY(&tofree)) {
2748 pvo = SLIST_FIRST(&tofree);
2749 SLIST_REMOVE_HEAD(&tofree, pvo_dlink);
2750 moea64_pvo_remove_from_page(pvo);
2751 free_pvo_entry(pvo);
2752 }
2753 }
2754
2755 /*
2756 * Remove physical page from all pmaps in which it resides. moea64_pvo_remove()
2757 * will reflect changes in pte's back to the vm_page.
2758 */
2759 void
moea64_remove_all(vm_page_t m)2760 moea64_remove_all(vm_page_t m)
2761 {
2762 struct pvo_entry *pvo, *next_pvo;
2763 struct pvo_head freequeue;
2764 int wasdead;
2765 pmap_t pmap;
2766
2767 LIST_INIT(&freequeue);
2768
2769 PV_PAGE_WR_LOCK(m);
2770 LIST_FOREACH_SAFE(pvo, vm_page_to_pvoh(m), pvo_vlink, next_pvo) {
2771 pmap = pvo->pvo_pmap;
2772 PMAP_LOCK(pmap);
2773 wasdead = (pvo->pvo_vaddr & PVO_DEAD);
2774 if (!wasdead) {
2775 if (PVO_IS_SP(pvo)) {
2776 CTR1(KTR_PMAP, "%s: demote before remove_all",
2777 __func__);
2778 moea64_sp_demote(pvo);
2779 }
2780 moea64_pvo_remove_from_pmap(pvo);
2781 }
2782 moea64_pvo_remove_from_page_locked(pvo, m);
2783 if (!wasdead)
2784 LIST_INSERT_HEAD(&freequeue, pvo, pvo_vlink);
2785 PMAP_UNLOCK(pmap);
2786
2787 }
2788 KASSERT(!pmap_page_is_mapped(m), ("Page still has mappings"));
2789 KASSERT((m->a.flags & PGA_WRITEABLE) == 0, ("Page still writable"));
2790 PV_PAGE_UNLOCK(m);
2791
2792 /* Clean up UMA allocations */
2793 LIST_FOREACH_SAFE(pvo, &freequeue, pvo_vlink, next_pvo)
2794 free_pvo_entry(pvo);
2795 }
2796
2797 /*
2798 * Allocate a physical page of memory directly from the phys_avail map.
2799 * Can only be called from moea64_bootstrap before avail start and end are
2800 * calculated.
2801 */
2802 vm_offset_t
moea64_bootstrap_alloc(vm_size_t size,vm_size_t align)2803 moea64_bootstrap_alloc(vm_size_t size, vm_size_t align)
2804 {
2805 vm_offset_t s, e;
2806 int i, j;
2807
2808 size = round_page(size);
2809 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
2810 if (align != 0)
2811 s = roundup2(phys_avail[i], align);
2812 else
2813 s = phys_avail[i];
2814 e = s + size;
2815
2816 if (s < phys_avail[i] || e > phys_avail[i + 1])
2817 continue;
2818
2819 if (s + size > platform_real_maxaddr())
2820 continue;
2821
2822 if (s == phys_avail[i]) {
2823 phys_avail[i] += size;
2824 } else if (e == phys_avail[i + 1]) {
2825 phys_avail[i + 1] -= size;
2826 } else {
2827 for (j = phys_avail_count * 2; j > i; j -= 2) {
2828 phys_avail[j] = phys_avail[j - 2];
2829 phys_avail[j + 1] = phys_avail[j - 1];
2830 }
2831
2832 phys_avail[i + 3] = phys_avail[i + 1];
2833 phys_avail[i + 1] = s;
2834 phys_avail[i + 2] = e;
2835 phys_avail_count++;
2836 }
2837
2838 return (s);
2839 }
2840 panic("moea64_bootstrap_alloc: could not allocate memory");
2841 }
2842
2843 static int
moea64_pvo_enter(struct pvo_entry * pvo,struct pvo_head * pvo_head,struct pvo_entry ** oldpvop)2844 moea64_pvo_enter(struct pvo_entry *pvo, struct pvo_head *pvo_head,
2845 struct pvo_entry **oldpvop)
2846 {
2847 struct pvo_entry *old_pvo;
2848 int err;
2849
2850 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
2851
2852 STAT_MOEA64(moea64_pvo_enter_calls++);
2853
2854 /*
2855 * Add to pmap list
2856 */
2857 old_pvo = RB_INSERT(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo);
2858
2859 if (old_pvo != NULL) {
2860 if (oldpvop != NULL)
2861 *oldpvop = old_pvo;
2862 return (EEXIST);
2863 }
2864
2865 if (pvo_head != NULL) {
2866 LIST_INSERT_HEAD(pvo_head, pvo, pvo_vlink);
2867 }
2868
2869 if (pvo->pvo_vaddr & PVO_WIRED)
2870 pvo->pvo_pmap->pm_stats.wired_count++;
2871 pvo->pvo_pmap->pm_stats.resident_count++;
2872
2873 /*
2874 * Insert it into the hardware page table
2875 */
2876 err = moea64_pte_insert(pvo);
2877 if (err != 0) {
2878 panic("moea64_pvo_enter: overflow");
2879 }
2880
2881 STAT_MOEA64(moea64_pvo_entries++);
2882
2883 if (pvo->pvo_pmap == kernel_pmap)
2884 isync();
2885
2886 #ifdef __powerpc64__
2887 /*
2888 * Make sure all our bootstrap mappings are in the SLB as soon
2889 * as virtual memory is switched on.
2890 */
2891 if (!pmap_bootstrapped)
2892 moea64_bootstrap_slb_prefault(PVO_VADDR(pvo),
2893 pvo->pvo_vaddr & PVO_LARGE);
2894 #endif
2895
2896 return (0);
2897 }
2898
2899 static void
moea64_pvo_remove_from_pmap(struct pvo_entry * pvo)2900 moea64_pvo_remove_from_pmap(struct pvo_entry *pvo)
2901 {
2902 struct vm_page *pg;
2903 int32_t refchg;
2904
2905 KASSERT(pvo->pvo_pmap != NULL, ("Trying to remove PVO with no pmap"));
2906 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
2907 KASSERT(!(pvo->pvo_vaddr & PVO_DEAD), ("Trying to remove dead PVO"));
2908
2909 /*
2910 * If there is an active pte entry, we need to deactivate it
2911 */
2912 refchg = moea64_pte_unset(pvo);
2913 if (refchg < 0) {
2914 /*
2915 * If it was evicted from the page table, be pessimistic and
2916 * dirty the page.
2917 */
2918 if (pvo->pvo_pte.prot & VM_PROT_WRITE)
2919 refchg = LPTE_CHG;
2920 else
2921 refchg = 0;
2922 }
2923
2924 /*
2925 * Update our statistics.
2926 */
2927 pvo->pvo_pmap->pm_stats.resident_count--;
2928 if (pvo->pvo_vaddr & PVO_WIRED)
2929 pvo->pvo_pmap->pm_stats.wired_count--;
2930
2931 /*
2932 * Remove this PVO from the pmap list.
2933 */
2934 RB_REMOVE(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo);
2935
2936 /*
2937 * Mark this for the next sweep
2938 */
2939 pvo->pvo_vaddr |= PVO_DEAD;
2940
2941 /* Send RC bits to VM */
2942 if ((pvo->pvo_vaddr & PVO_MANAGED) &&
2943 (pvo->pvo_pte.prot & VM_PROT_WRITE)) {
2944 pg = PHYS_TO_VM_PAGE(PVO_PADDR(pvo));
2945 if (pg != NULL) {
2946 refchg |= atomic_readandclear_32(&pg->md.mdpg_attrs);
2947 if (refchg & LPTE_CHG)
2948 vm_page_dirty(pg);
2949 if (refchg & LPTE_REF)
2950 vm_page_aflag_set(pg, PGA_REFERENCED);
2951 }
2952 }
2953 }
2954
2955 static inline void
moea64_pvo_remove_from_page_locked(struct pvo_entry * pvo,vm_page_t m)2956 moea64_pvo_remove_from_page_locked(struct pvo_entry *pvo,
2957 vm_page_t m)
2958 {
2959
2960 KASSERT(pvo->pvo_vaddr & PVO_DEAD, ("Trying to delink live page"));
2961
2962 /* Use NULL pmaps as a sentinel for races in page deletion */
2963 if (pvo->pvo_pmap == NULL)
2964 return;
2965 pvo->pvo_pmap = NULL;
2966
2967 /*
2968 * Update vm about page writeability/executability if managed
2969 */
2970 PV_LOCKASSERT(PVO_PADDR(pvo));
2971 if (pvo->pvo_vaddr & PVO_MANAGED) {
2972 if (m != NULL) {
2973 LIST_REMOVE(pvo, pvo_vlink);
2974 if (LIST_EMPTY(vm_page_to_pvoh(m)))
2975 vm_page_aflag_clear(m,
2976 PGA_WRITEABLE | PGA_EXECUTABLE);
2977 }
2978 }
2979
2980 STAT_MOEA64(moea64_pvo_entries--);
2981 STAT_MOEA64(moea64_pvo_remove_calls++);
2982 }
2983
2984 static void
moea64_pvo_remove_from_page(struct pvo_entry * pvo)2985 moea64_pvo_remove_from_page(struct pvo_entry *pvo)
2986 {
2987 vm_page_t pg = NULL;
2988
2989 if (pvo->pvo_vaddr & PVO_MANAGED)
2990 pg = PHYS_TO_VM_PAGE(PVO_PADDR(pvo));
2991
2992 PV_WR_LOCK(PVO_PADDR(pvo));
2993 moea64_pvo_remove_from_page_locked(pvo, pg);
2994 PV_UNLOCK(PVO_PADDR(pvo));
2995 }
2996
2997 static struct pvo_entry *
moea64_pvo_find_va(pmap_t pm,vm_offset_t va)2998 moea64_pvo_find_va(pmap_t pm, vm_offset_t va)
2999 {
3000 struct pvo_entry key;
3001
3002 PMAP_LOCK_ASSERT(pm, MA_OWNED);
3003
3004 key.pvo_vaddr = va & ~ADDR_POFF;
3005 return (RB_FIND(pvo_tree, &pm->pmap_pvo, &key));
3006 }
3007
3008 static bool
moea64_query_bit(vm_page_t m,uint64_t ptebit)3009 moea64_query_bit(vm_page_t m, uint64_t ptebit)
3010 {
3011 struct pvo_entry *pvo;
3012 int64_t ret;
3013 bool rv;
3014 vm_page_t sp;
3015
3016 /*
3017 * See if this bit is stored in the page already.
3018 *
3019 * For superpages, the bit is stored in the first vm page.
3020 */
3021 if ((m->md.mdpg_attrs & ptebit) != 0 ||
3022 ((sp = PHYS_TO_VM_PAGE(VM_PAGE_TO_PHYS(m) & ~HPT_SP_MASK)) != NULL &&
3023 (sp->md.mdpg_attrs & (ptebit | MDPG_ATTR_SP)) ==
3024 (ptebit | MDPG_ATTR_SP)))
3025 return (true);
3026
3027 /*
3028 * Examine each PTE. Sync so that any pending REF/CHG bits are
3029 * flushed to the PTEs.
3030 */
3031 rv = false;
3032 powerpc_sync();
3033 PV_PAGE_RD_LOCK(m);
3034 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
3035 if (PVO_IS_SP(pvo)) {
3036 ret = moea64_sp_query(pvo, ptebit);
3037 /*
3038 * If SP was not demoted, check its REF/CHG bits here.
3039 */
3040 if (ret != -1) {
3041 if ((ret & ptebit) != 0) {
3042 rv = true;
3043 break;
3044 }
3045 continue;
3046 }
3047 /* else, fallthrough */
3048 }
3049
3050 ret = 0;
3051
3052 /*
3053 * See if this pvo has a valid PTE. if so, fetch the
3054 * REF/CHG bits from the valid PTE. If the appropriate
3055 * ptebit is set, return success.
3056 */
3057 PMAP_LOCK(pvo->pvo_pmap);
3058 if (!(pvo->pvo_vaddr & PVO_DEAD))
3059 ret = moea64_pte_synch(pvo);
3060 PMAP_UNLOCK(pvo->pvo_pmap);
3061
3062 if (ret > 0) {
3063 atomic_set_32(&m->md.mdpg_attrs,
3064 ret & (LPTE_CHG | LPTE_REF));
3065 if (ret & ptebit) {
3066 rv = true;
3067 break;
3068 }
3069 }
3070 }
3071 PV_PAGE_UNLOCK(m);
3072
3073 return (rv);
3074 }
3075
3076 static u_int
moea64_clear_bit(vm_page_t m,u_int64_t ptebit)3077 moea64_clear_bit(vm_page_t m, u_int64_t ptebit)
3078 {
3079 u_int count;
3080 struct pvo_entry *pvo;
3081 int64_t ret;
3082
3083 /*
3084 * Sync so that any pending REF/CHG bits are flushed to the PTEs (so
3085 * we can reset the right ones).
3086 */
3087 powerpc_sync();
3088
3089 /*
3090 * For each pvo entry, clear the pte's ptebit.
3091 */
3092 count = 0;
3093 PV_PAGE_WR_LOCK(m);
3094 LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
3095 if (PVO_IS_SP(pvo)) {
3096 if ((ret = moea64_sp_clear(pvo, m, ptebit)) != -1) {
3097 count += ret;
3098 continue;
3099 }
3100 }
3101 ret = 0;
3102
3103 PMAP_LOCK(pvo->pvo_pmap);
3104 if (!(pvo->pvo_vaddr & PVO_DEAD))
3105 ret = moea64_pte_clear(pvo, ptebit);
3106 PMAP_UNLOCK(pvo->pvo_pmap);
3107
3108 if (ret > 0 && (ret & ptebit))
3109 count++;
3110 }
3111 atomic_clear_32(&m->md.mdpg_attrs, ptebit);
3112 PV_PAGE_UNLOCK(m);
3113
3114 return (count);
3115 }
3116
3117 int
moea64_dev_direct_mapped(vm_paddr_t pa,vm_size_t size)3118 moea64_dev_direct_mapped(vm_paddr_t pa, vm_size_t size)
3119 {
3120 struct pvo_entry *pvo, key;
3121 vm_offset_t ppa;
3122 int error = 0;
3123
3124 if (hw_direct_map && mem_valid(pa, size) == 0)
3125 return (0);
3126
3127 PMAP_LOCK(kernel_pmap);
3128 ppa = pa & ~ADDR_POFF;
3129 key.pvo_vaddr = DMAP_BASE_ADDRESS + ppa;
3130 for (pvo = RB_FIND(pvo_tree, &kernel_pmap->pmap_pvo, &key);
3131 ppa < pa + size; ppa += PAGE_SIZE,
3132 pvo = RB_NEXT(pvo_tree, &kernel_pmap->pmap_pvo, pvo)) {
3133 if (pvo == NULL || PVO_PADDR(pvo) != ppa) {
3134 error = EFAULT;
3135 break;
3136 }
3137 }
3138 PMAP_UNLOCK(kernel_pmap);
3139
3140 return (error);
3141 }
3142
3143 /*
3144 * Map a set of physical memory pages into the kernel virtual
3145 * address space. Return a pointer to where it is mapped. This
3146 * routine is intended to be used for mapping device memory,
3147 * NOT real memory.
3148 */
3149 void *
moea64_mapdev_attr(vm_paddr_t pa,vm_size_t size,vm_memattr_t ma)3150 moea64_mapdev_attr(vm_paddr_t pa, vm_size_t size, vm_memattr_t ma)
3151 {
3152 vm_offset_t va, tmpva, ppa, offset;
3153
3154 ppa = trunc_page(pa);
3155 offset = pa & PAGE_MASK;
3156 size = roundup2(offset + size, PAGE_SIZE);
3157
3158 va = kva_alloc(size);
3159
3160 if (!va)
3161 panic("moea64_mapdev: Couldn't alloc kernel virtual memory");
3162
3163 for (tmpva = va; size > 0;) {
3164 moea64_kenter_attr(tmpva, ppa, ma);
3165 size -= PAGE_SIZE;
3166 tmpva += PAGE_SIZE;
3167 ppa += PAGE_SIZE;
3168 }
3169
3170 return ((void *)(va + offset));
3171 }
3172
3173 void *
moea64_mapdev(vm_paddr_t pa,vm_size_t size)3174 moea64_mapdev(vm_paddr_t pa, vm_size_t size)
3175 {
3176
3177 return moea64_mapdev_attr(pa, size, VM_MEMATTR_DEFAULT);
3178 }
3179
3180 void
moea64_unmapdev(void * p,vm_size_t size)3181 moea64_unmapdev(void *p, vm_size_t size)
3182 {
3183 vm_offset_t base, offset, va;
3184
3185 va = (vm_offset_t)p;
3186 base = trunc_page(va);
3187 offset = va & PAGE_MASK;
3188 size = roundup2(offset + size, PAGE_SIZE);
3189
3190 moea64_qremove(base, atop(size));
3191 kva_free(base, size);
3192 }
3193
3194 void
moea64_sync_icache(pmap_t pm,vm_offset_t va,vm_size_t sz)3195 moea64_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
3196 {
3197 struct pvo_entry *pvo;
3198 vm_offset_t lim;
3199 vm_paddr_t pa;
3200 vm_size_t len;
3201
3202 if (__predict_false(pm == NULL))
3203 pm = &curthread->td_proc->p_vmspace->vm_pmap;
3204
3205 PMAP_LOCK(pm);
3206 while (sz > 0) {
3207 lim = round_page(va+1);
3208 len = MIN(lim - va, sz);
3209 pvo = moea64_pvo_find_va(pm, va & ~ADDR_POFF);
3210 if (pvo != NULL && !(pvo->pvo_pte.pa & LPTE_I)) {
3211 pa = PVO_PADDR(pvo) | (va & ADDR_POFF);
3212 moea64_syncicache(pm, va, pa, len);
3213 }
3214 va += len;
3215 sz -= len;
3216 }
3217 PMAP_UNLOCK(pm);
3218 }
3219
3220 void
moea64_dumpsys_map(vm_paddr_t pa,size_t sz,void ** va)3221 moea64_dumpsys_map(vm_paddr_t pa, size_t sz, void **va)
3222 {
3223
3224 *va = (void *)(uintptr_t)pa;
3225 }
3226
3227 extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1];
3228
3229 void
moea64_scan_init(void)3230 moea64_scan_init(void)
3231 {
3232 struct pvo_entry *pvo;
3233 vm_offset_t va;
3234 int i;
3235
3236 if (!do_minidump) {
3237 /* Initialize phys. segments for dumpsys(). */
3238 memset(&dump_map, 0, sizeof(dump_map));
3239 mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz);
3240 for (i = 0; i < pregions_sz; i++) {
3241 dump_map[i].pa_start = pregions[i].mr_start;
3242 dump_map[i].pa_size = pregions[i].mr_size;
3243 }
3244 return;
3245 }
3246
3247 /* Virtual segments for minidumps: */
3248 memset(&dump_map, 0, sizeof(dump_map));
3249
3250 /* 1st: kernel .data and .bss. */
3251 dump_map[0].pa_start = trunc_page((uintptr_t)_etext);
3252 dump_map[0].pa_size = round_page((uintptr_t)_end) -
3253 dump_map[0].pa_start;
3254
3255 /* 2nd: msgbuf and tables (see pmap_bootstrap()). */
3256 dump_map[1].pa_start = (vm_paddr_t)(uintptr_t)msgbufp->msg_ptr;
3257 dump_map[1].pa_size = round_page(msgbufp->msg_size);
3258
3259 /* 3rd: kernel VM. */
3260 va = dump_map[1].pa_start + dump_map[1].pa_size;
3261 /* Find start of next chunk (from va). */
3262 while (va < virtual_end) {
3263 /* Don't dump the buffer cache. */
3264 if (va >= kmi.buffer_sva && va < kmi.buffer_eva) {
3265 va = kmi.buffer_eva;
3266 continue;
3267 }
3268 pvo = moea64_pvo_find_va(kernel_pmap, va & ~ADDR_POFF);
3269 if (pvo != NULL && !(pvo->pvo_vaddr & PVO_DEAD))
3270 break;
3271 va += PAGE_SIZE;
3272 }
3273 if (va < virtual_end) {
3274 dump_map[2].pa_start = va;
3275 va += PAGE_SIZE;
3276 /* Find last page in chunk. */
3277 while (va < virtual_end) {
3278 /* Don't run into the buffer cache. */
3279 if (va == kmi.buffer_sva)
3280 break;
3281 pvo = moea64_pvo_find_va(kernel_pmap, va & ~ADDR_POFF);
3282 if (pvo == NULL || (pvo->pvo_vaddr & PVO_DEAD))
3283 break;
3284 va += PAGE_SIZE;
3285 }
3286 dump_map[2].pa_size = va - dump_map[2].pa_start;
3287 }
3288 }
3289
3290 #ifdef __powerpc64__
3291
3292 static size_t
moea64_scan_pmap(struct bitset * dump_bitset)3293 moea64_scan_pmap(struct bitset *dump_bitset)
3294 {
3295 struct pvo_entry *pvo;
3296 vm_paddr_t pa, pa_end;
3297 vm_offset_t va, pgva, kstart, kend, kstart_lp, kend_lp;
3298 uint64_t lpsize;
3299
3300 lpsize = moea64_large_page_size;
3301 kstart = trunc_page((vm_offset_t)_etext);
3302 kend = round_page((vm_offset_t)_end);
3303 kstart_lp = kstart & ~moea64_large_page_mask;
3304 kend_lp = (kend + moea64_large_page_mask) & ~moea64_large_page_mask;
3305
3306 CTR4(KTR_PMAP, "moea64_scan_pmap: kstart=0x%016lx, kend=0x%016lx, "
3307 "kstart_lp=0x%016lx, kend_lp=0x%016lx",
3308 kstart, kend, kstart_lp, kend_lp);
3309
3310 PMAP_LOCK(kernel_pmap);
3311 RB_FOREACH(pvo, pvo_tree, &kernel_pmap->pmap_pvo) {
3312 va = pvo->pvo_vaddr;
3313
3314 if (va & PVO_DEAD)
3315 continue;
3316
3317 /* Skip DMAP (except kernel area) */
3318 if (va >= DMAP_BASE_ADDRESS && va <= DMAP_MAX_ADDRESS) {
3319 if (va & PVO_LARGE) {
3320 pgva = va & ~moea64_large_page_mask;
3321 if (pgva < kstart_lp || pgva >= kend_lp)
3322 continue;
3323 } else {
3324 pgva = trunc_page(va);
3325 if (pgva < kstart || pgva >= kend)
3326 continue;
3327 }
3328 }
3329
3330 pa = PVO_PADDR(pvo);
3331
3332 if (va & PVO_LARGE) {
3333 pa_end = pa + lpsize;
3334 for (; pa < pa_end; pa += PAGE_SIZE) {
3335 if (vm_phys_is_dumpable(pa))
3336 vm_page_dump_add(dump_bitset, pa);
3337 }
3338 } else {
3339 if (vm_phys_is_dumpable(pa))
3340 vm_page_dump_add(dump_bitset, pa);
3341 }
3342 }
3343 PMAP_UNLOCK(kernel_pmap);
3344
3345 return (sizeof(struct lpte) * moea64_pteg_count * 8);
3346 }
3347
3348 static struct dump_context dump_ctx;
3349
3350 static void *
moea64_dump_pmap_init(unsigned blkpgs)3351 moea64_dump_pmap_init(unsigned blkpgs)
3352 {
3353 dump_ctx.ptex = 0;
3354 dump_ctx.ptex_end = moea64_pteg_count * 8;
3355 dump_ctx.blksz = blkpgs * PAGE_SIZE;
3356 return (&dump_ctx);
3357 }
3358
3359 #else
3360
3361 static size_t
moea64_scan_pmap(struct bitset * dump_bitset __unused)3362 moea64_scan_pmap(struct bitset *dump_bitset __unused)
3363 {
3364 return (0);
3365 }
3366
3367 static void *
moea64_dump_pmap_init(unsigned blkpgs)3368 moea64_dump_pmap_init(unsigned blkpgs)
3369 {
3370 return (NULL);
3371 }
3372
3373 #endif
3374
3375 #ifdef __powerpc64__
3376 static void
moea64_map_range(vm_offset_t va,vm_paddr_t pa,vm_size_t npages)3377 moea64_map_range(vm_offset_t va, vm_paddr_t pa, vm_size_t npages)
3378 {
3379
3380 for (; npages > 0; --npages) {
3381 if (moea64_large_page_size != 0 &&
3382 (pa & moea64_large_page_mask) == 0 &&
3383 (va & moea64_large_page_mask) == 0 &&
3384 npages >= (moea64_large_page_size >> PAGE_SHIFT)) {
3385 PMAP_LOCK(kernel_pmap);
3386 moea64_kenter_large(va, pa, 0, 0);
3387 PMAP_UNLOCK(kernel_pmap);
3388 pa += moea64_large_page_size;
3389 va += moea64_large_page_size;
3390 npages -= (moea64_large_page_size >> PAGE_SHIFT) - 1;
3391 } else {
3392 moea64_kenter(va, pa);
3393 pa += PAGE_SIZE;
3394 va += PAGE_SIZE;
3395 }
3396 }
3397 }
3398
3399 static void
moea64_page_array_startup(long pages)3400 moea64_page_array_startup(long pages)
3401 {
3402 long dom_pages[MAXMEMDOM];
3403 vm_paddr_t pa;
3404 vm_offset_t va, vm_page_base;
3405 vm_size_t needed, size;
3406 int domain;
3407 int i;
3408
3409 vm_page_base = 0xd000000000000000ULL;
3410
3411 /* Short-circuit single-domain systems. */
3412 if (vm_ndomains == 1) {
3413 size = round_page(pages * sizeof(struct vm_page));
3414 pa = vm_phys_early_alloc(0, size);
3415 vm_page_base = moea64_map(&vm_page_base,
3416 pa, pa + size, VM_PROT_READ | VM_PROT_WRITE);
3417 vm_page_array_size = pages;
3418 vm_page_array = (vm_page_t)vm_page_base;
3419 return;
3420 }
3421
3422 for (i = 0; i < MAXMEMDOM; i++)
3423 dom_pages[i] = 0;
3424
3425 /* Now get the number of pages required per domain. */
3426 for (i = 0; i < vm_phys_nsegs; i++) {
3427 domain = vm_phys_segs[i].domain;
3428 KASSERT(domain < MAXMEMDOM,
3429 ("Invalid vm_phys_segs NUMA domain %d!\n", domain));
3430 /* Get size of vm_page_array needed for this segment. */
3431 size = btoc(vm_phys_segs[i].end - vm_phys_segs[i].start);
3432 dom_pages[domain] += size;
3433 }
3434
3435 for (i = 0; phys_avail[i + 1] != 0; i+= 2) {
3436 domain = vm_phys_domain(phys_avail[i]);
3437 KASSERT(domain < MAXMEMDOM,
3438 ("Invalid phys_avail NUMA domain %d!\n", domain));
3439 size = btoc(phys_avail[i + 1] - phys_avail[i]);
3440 dom_pages[domain] += size;
3441 }
3442
3443 /*
3444 * Map in chunks that can get us all 16MB pages. There will be some
3445 * overlap between domains, but that's acceptable for now.
3446 */
3447 vm_page_array_size = 0;
3448 va = vm_page_base;
3449 for (i = 0; i < MAXMEMDOM && vm_page_array_size < pages; i++) {
3450 if (dom_pages[i] == 0)
3451 continue;
3452 size = ulmin(pages - vm_page_array_size, dom_pages[i]);
3453 size = round_page(size * sizeof(struct vm_page));
3454 needed = size;
3455 size = roundup2(size, moea64_large_page_size);
3456 pa = vm_phys_early_alloc(i, size);
3457 vm_page_array_size += size / sizeof(struct vm_page);
3458 moea64_map_range(va, pa, size >> PAGE_SHIFT);
3459 /* Scoot up domain 0, to reduce the domain page overlap. */
3460 if (i == 0)
3461 vm_page_base += size - needed;
3462 va += size;
3463 }
3464 vm_page_array = (vm_page_t)vm_page_base;
3465 vm_page_array_size = pages;
3466 }
3467 #endif
3468
3469 static int64_t
moea64_null_method(void)3470 moea64_null_method(void)
3471 {
3472 return (0);
3473 }
3474
moea64_pte_replace_default(struct pvo_entry * pvo,int flags)3475 static int64_t moea64_pte_replace_default(struct pvo_entry *pvo, int flags)
3476 {
3477 int64_t refchg;
3478
3479 refchg = moea64_pte_unset(pvo);
3480 moea64_pte_insert(pvo);
3481
3482 return (refchg);
3483 }
3484
3485 struct moea64_funcs *moea64_ops;
3486
3487 #define DEFINE_OEA64_IFUNC(ret, func, args, def) \
3488 DEFINE_IFUNC(, ret, moea64_##func, args) { \
3489 moea64_##func##_t f; \
3490 if (moea64_ops == NULL) \
3491 return ((moea64_##func##_t)def); \
3492 f = moea64_ops->func; \
3493 return (f != NULL ? f : (moea64_##func##_t)def);\
3494 }
3495
3496 void
moea64_install(void)3497 moea64_install(void)
3498 {
3499 #ifdef __powerpc64__
3500 if (hw_direct_map == -1) {
3501 moea64_probe_large_page();
3502
3503 /* Use a direct map if we have large page support */
3504 if (moea64_large_page_size > 0)
3505 hw_direct_map = 1;
3506 else
3507 hw_direct_map = 0;
3508 }
3509 #endif
3510
3511 /*
3512 * Default to non-DMAP, and switch over to DMAP functions once we know
3513 * we have DMAP.
3514 */
3515 if (hw_direct_map) {
3516 moea64_methods.quick_enter_page = moea64_quick_enter_page_dmap;
3517 moea64_methods.quick_remove_page = NULL;
3518 moea64_methods.copy_page = moea64_copy_page_dmap;
3519 moea64_methods.zero_page = moea64_zero_page_dmap;
3520 moea64_methods.copy_pages = moea64_copy_pages_dmap;
3521 }
3522 }
3523
3524 DEFINE_OEA64_IFUNC(int64_t, pte_replace, (struct pvo_entry *, int),
3525 moea64_pte_replace_default)
3526 DEFINE_OEA64_IFUNC(int64_t, pte_insert, (struct pvo_entry *), moea64_null_method)
3527 DEFINE_OEA64_IFUNC(int64_t, pte_unset, (struct pvo_entry *), moea64_null_method)
3528 DEFINE_OEA64_IFUNC(int64_t, pte_clear, (struct pvo_entry *, uint64_t),
3529 moea64_null_method)
3530 DEFINE_OEA64_IFUNC(int64_t, pte_synch, (struct pvo_entry *), moea64_null_method)
3531 DEFINE_OEA64_IFUNC(int64_t, pte_insert_sp, (struct pvo_entry *), moea64_null_method)
3532 DEFINE_OEA64_IFUNC(int64_t, pte_unset_sp, (struct pvo_entry *), moea64_null_method)
3533 DEFINE_OEA64_IFUNC(int64_t, pte_replace_sp, (struct pvo_entry *), moea64_null_method)
3534
3535 /* Superpage functions */
3536
3537 /* MMU interface */
3538
3539 static bool
moea64_ps_enabled(pmap_t pmap)3540 moea64_ps_enabled(pmap_t pmap)
3541 {
3542 return (superpages_enabled);
3543 }
3544
3545 static void
moea64_align_superpage(vm_object_t object,vm_ooffset_t offset,vm_offset_t * addr,vm_size_t size)3546 moea64_align_superpage(vm_object_t object, vm_ooffset_t offset,
3547 vm_offset_t *addr, vm_size_t size)
3548 {
3549 vm_offset_t sp_offset;
3550
3551 if (size < HPT_SP_SIZE)
3552 return;
3553
3554 CTR4(KTR_PMAP, "%s: offs=%#jx, addr=%p, size=%#jx",
3555 __func__, (uintmax_t)offset, addr, (uintmax_t)size);
3556
3557 if (object != NULL && (object->flags & OBJ_COLORED) != 0)
3558 offset += ptoa(object->pg_color);
3559 sp_offset = offset & HPT_SP_MASK;
3560 if (size - ((HPT_SP_SIZE - sp_offset) & HPT_SP_MASK) < HPT_SP_SIZE ||
3561 (*addr & HPT_SP_MASK) == sp_offset)
3562 return;
3563 if ((*addr & HPT_SP_MASK) < sp_offset)
3564 *addr = (*addr & ~HPT_SP_MASK) + sp_offset;
3565 else
3566 *addr = ((*addr + HPT_SP_MASK) & ~HPT_SP_MASK) + sp_offset;
3567 }
3568
3569 /* Helpers */
3570
3571 static __inline void
moea64_pvo_cleanup(struct pvo_dlist * tofree)3572 moea64_pvo_cleanup(struct pvo_dlist *tofree)
3573 {
3574 struct pvo_entry *pvo;
3575
3576 /* clean up */
3577 while (!SLIST_EMPTY(tofree)) {
3578 pvo = SLIST_FIRST(tofree);
3579 SLIST_REMOVE_HEAD(tofree, pvo_dlink);
3580 if (pvo->pvo_vaddr & PVO_DEAD)
3581 moea64_pvo_remove_from_page(pvo);
3582 free_pvo_entry(pvo);
3583 }
3584 }
3585
3586 static __inline uint16_t
pvo_to_vmpage_flags(struct pvo_entry * pvo)3587 pvo_to_vmpage_flags(struct pvo_entry *pvo)
3588 {
3589 uint16_t flags;
3590
3591 flags = 0;
3592 if ((pvo->pvo_pte.prot & VM_PROT_WRITE) != 0)
3593 flags |= PGA_WRITEABLE;
3594 if ((pvo->pvo_pte.prot & VM_PROT_EXECUTE) != 0)
3595 flags |= PGA_EXECUTABLE;
3596
3597 return (flags);
3598 }
3599
3600 /*
3601 * Check if the given pvo and its superpage are in sva-eva range.
3602 */
3603 static __inline bool
moea64_sp_pvo_in_range(struct pvo_entry * pvo,vm_offset_t sva,vm_offset_t eva)3604 moea64_sp_pvo_in_range(struct pvo_entry *pvo, vm_offset_t sva, vm_offset_t eva)
3605 {
3606 vm_offset_t spva;
3607
3608 spva = PVO_VADDR(pvo) & ~HPT_SP_MASK;
3609 if (spva >= sva && spva + HPT_SP_SIZE <= eva) {
3610 /*
3611 * Because this function is intended to be called from loops
3612 * that iterate over ordered pvo entries, if the condition
3613 * above is true then the pvo must be the first of its
3614 * superpage.
3615 */
3616 KASSERT(PVO_VADDR(pvo) == spva,
3617 ("%s: unexpected unaligned superpage pvo", __func__));
3618 return (true);
3619 }
3620 return (false);
3621 }
3622
3623 /*
3624 * Update vm about the REF/CHG bits if the superpage is managed and
3625 * has (or had) write access.
3626 */
3627 static void
moea64_sp_refchg_process(struct pvo_entry * sp,vm_page_t m,int64_t sp_refchg,vm_prot_t prot)3628 moea64_sp_refchg_process(struct pvo_entry *sp, vm_page_t m,
3629 int64_t sp_refchg, vm_prot_t prot)
3630 {
3631 vm_page_t m_end;
3632 int64_t refchg;
3633
3634 if ((sp->pvo_vaddr & PVO_MANAGED) != 0 && (prot & VM_PROT_WRITE) != 0) {
3635 for (m_end = &m[HPT_SP_PAGES]; m < m_end; m++) {
3636 refchg = sp_refchg |
3637 atomic_readandclear_32(&m->md.mdpg_attrs);
3638 if (refchg & LPTE_CHG)
3639 vm_page_dirty(m);
3640 if (refchg & LPTE_REF)
3641 vm_page_aflag_set(m, PGA_REFERENCED);
3642 }
3643 }
3644 }
3645
3646 /* Superpage ops */
3647
3648 static int
moea64_sp_enter(pmap_t pmap,vm_offset_t va,vm_page_t m,vm_prot_t prot,u_int flags,int8_t psind)3649 moea64_sp_enter(pmap_t pmap, vm_offset_t va, vm_page_t m,
3650 vm_prot_t prot, u_int flags, int8_t psind)
3651 {
3652 struct pvo_entry *pvo, **pvos;
3653 struct pvo_head *pvo_head;
3654 vm_offset_t sva;
3655 vm_page_t sm;
3656 vm_paddr_t pa, spa;
3657 bool sync;
3658 struct pvo_dlist tofree;
3659 int error __diagused, i;
3660 uint16_t aflags;
3661
3662 KASSERT((va & HPT_SP_MASK) == 0, ("%s: va %#jx unaligned",
3663 __func__, (uintmax_t)va));
3664 KASSERT(psind == 1, ("%s: invalid psind: %d", __func__, psind));
3665 KASSERT(m->psind == 1, ("%s: invalid m->psind: %d",
3666 __func__, m->psind));
3667 KASSERT(pmap != kernel_pmap,
3668 ("%s: function called with kernel pmap", __func__));
3669
3670 CTR5(KTR_PMAP, "%s: va=%#jx, pa=%#jx, prot=%#x, flags=%#x, psind=1",
3671 __func__, (uintmax_t)va, (uintmax_t)VM_PAGE_TO_PHYS(m),
3672 prot, flags);
3673
3674 SLIST_INIT(&tofree);
3675
3676 sva = va;
3677 sm = m;
3678 spa = pa = VM_PAGE_TO_PHYS(sm);
3679
3680 /* Try to allocate all PVOs first, to make failure handling easier. */
3681 pvos = malloc(HPT_SP_PAGES * sizeof(struct pvo_entry *), M_TEMP,
3682 M_NOWAIT);
3683 if (pvos == NULL) {
3684 CTR1(KTR_PMAP, "%s: failed to alloc pvo array", __func__);
3685 return (KERN_RESOURCE_SHORTAGE);
3686 }
3687
3688 for (i = 0; i < HPT_SP_PAGES; i++) {
3689 pvos[i] = alloc_pvo_entry(0);
3690 if (pvos[i] == NULL) {
3691 CTR1(KTR_PMAP, "%s: failed to alloc pvo", __func__);
3692 for (i = i - 1; i >= 0; i--)
3693 free_pvo_entry(pvos[i]);
3694 free(pvos, M_TEMP);
3695 return (KERN_RESOURCE_SHORTAGE);
3696 }
3697 }
3698
3699 PV_WR_LOCK(spa);
3700 PMAP_LOCK(pmap);
3701
3702 /* Note: moea64_remove_locked() also clears cached REF/CHG bits. */
3703 moea64_remove_locked(pmap, va, va + HPT_SP_SIZE, &tofree);
3704
3705 /* Enter pages */
3706 for (i = 0; i < HPT_SP_PAGES;
3707 i++, va += PAGE_SIZE, pa += PAGE_SIZE, m++) {
3708 pvo = pvos[i];
3709
3710 pvo->pvo_pte.prot = prot;
3711 pvo->pvo_pte.pa = (pa & ~HPT_SP_MASK) | LPTE_LP_4K_16M |
3712 moea64_calc_wimg(pa, pmap_page_get_memattr(m));
3713
3714 if ((flags & PMAP_ENTER_WIRED) != 0)
3715 pvo->pvo_vaddr |= PVO_WIRED;
3716 pvo->pvo_vaddr |= PVO_LARGE;
3717
3718 if ((m->oflags & VPO_UNMANAGED) != 0)
3719 pvo_head = NULL;
3720 else {
3721 pvo_head = &m->md.mdpg_pvoh;
3722 pvo->pvo_vaddr |= PVO_MANAGED;
3723 }
3724
3725 init_pvo_entry(pvo, pmap, va);
3726
3727 error = moea64_pvo_enter(pvo, pvo_head, NULL);
3728 /*
3729 * All superpage PVOs were previously removed, so no errors
3730 * should occur while inserting the new ones.
3731 */
3732 KASSERT(error == 0, ("%s: unexpected error "
3733 "when inserting superpage PVO: %d",
3734 __func__, error));
3735 }
3736
3737 PMAP_UNLOCK(pmap);
3738 PV_UNLOCK(spa);
3739
3740 sync = (sm->a.flags & PGA_EXECUTABLE) == 0;
3741 /* Note: moea64_pvo_cleanup() also clears page prot. flags. */
3742 moea64_pvo_cleanup(&tofree);
3743 pvo = pvos[0];
3744
3745 /* Set vm page flags */
3746 aflags = pvo_to_vmpage_flags(pvo);
3747 if (aflags != 0)
3748 for (m = sm; m < &sm[HPT_SP_PAGES]; m++)
3749 vm_page_aflag_set(m, aflags);
3750
3751 /*
3752 * Flush the page from the instruction cache if this page is
3753 * mapped executable and cacheable.
3754 */
3755 if (sync && (pvo->pvo_pte.pa & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0)
3756 moea64_syncicache(pmap, sva, spa, HPT_SP_SIZE);
3757
3758 atomic_add_long(&sp_mappings, 1);
3759 CTR3(KTR_PMAP, "%s: SP success for va %#jx in pmap %p",
3760 __func__, (uintmax_t)sva, pmap);
3761
3762 free(pvos, M_TEMP);
3763 return (KERN_SUCCESS);
3764 }
3765
3766 #if VM_NRESERVLEVEL > 0
3767 static void
moea64_sp_promote(pmap_t pmap,vm_offset_t va,vm_page_t m)3768 moea64_sp_promote(pmap_t pmap, vm_offset_t va, vm_page_t m)
3769 {
3770 struct pvo_entry *first, *pvo;
3771 vm_paddr_t pa, pa_end;
3772 vm_offset_t sva, va_end;
3773 int64_t sp_refchg;
3774
3775 /* This CTR may generate a lot of output. */
3776 /* CTR2(KTR_PMAP, "%s: va=%#jx", __func__, (uintmax_t)va); */
3777
3778 va &= ~HPT_SP_MASK;
3779 sva = va;
3780 /* Get superpage */
3781 pa = VM_PAGE_TO_PHYS(m) & ~HPT_SP_MASK;
3782 m = PHYS_TO_VM_PAGE(pa);
3783
3784 PMAP_LOCK(pmap);
3785
3786 /*
3787 * Check if all pages meet promotion criteria.
3788 *
3789 * XXX In some cases the loop below may be executed for each or most
3790 * of the entered pages of a superpage, which can be expensive
3791 * (although it was not profiled) and need some optimization.
3792 *
3793 * Some cases where this seems to happen are:
3794 * - When a superpage is first entered read-only and later becomes
3795 * read-write.
3796 * - When some of the superpage's virtual addresses map to previously
3797 * wired/cached pages while others map to pages allocated from a
3798 * different physical address range. A common scenario where this
3799 * happens is when mmap'ing a file that is already present in FS
3800 * block cache and doesn't fill a superpage.
3801 */
3802 first = pvo = moea64_pvo_find_va(pmap, sva);
3803 for (pa_end = pa + HPT_SP_SIZE;
3804 pa < pa_end; pa += PAGE_SIZE, va += PAGE_SIZE) {
3805 if (pvo == NULL || (pvo->pvo_vaddr & PVO_DEAD) != 0) {
3806 CTR3(KTR_PMAP,
3807 "%s: NULL or dead PVO: pmap=%p, va=%#jx",
3808 __func__, pmap, (uintmax_t)va);
3809 goto error;
3810 }
3811 if (PVO_PADDR(pvo) != pa) {
3812 CTR5(KTR_PMAP, "%s: PAs don't match: "
3813 "pmap=%p, va=%#jx, pvo_pa=%#jx, exp_pa=%#jx",
3814 __func__, pmap, (uintmax_t)va,
3815 (uintmax_t)PVO_PADDR(pvo), (uintmax_t)pa);
3816 atomic_add_long(&sp_p_fail_pa, 1);
3817 goto error;
3818 }
3819 if ((first->pvo_vaddr & PVO_FLAGS_PROMOTE) !=
3820 (pvo->pvo_vaddr & PVO_FLAGS_PROMOTE)) {
3821 CTR5(KTR_PMAP, "%s: PVO flags don't match: "
3822 "pmap=%p, va=%#jx, pvo_flags=%#jx, exp_flags=%#jx",
3823 __func__, pmap, (uintmax_t)va,
3824 (uintmax_t)(pvo->pvo_vaddr & PVO_FLAGS_PROMOTE),
3825 (uintmax_t)(first->pvo_vaddr & PVO_FLAGS_PROMOTE));
3826 atomic_add_long(&sp_p_fail_flags, 1);
3827 goto error;
3828 }
3829 if (first->pvo_pte.prot != pvo->pvo_pte.prot) {
3830 CTR5(KTR_PMAP, "%s: PVO protections don't match: "
3831 "pmap=%p, va=%#jx, pvo_prot=%#x, exp_prot=%#x",
3832 __func__, pmap, (uintmax_t)va,
3833 pvo->pvo_pte.prot, first->pvo_pte.prot);
3834 atomic_add_long(&sp_p_fail_prot, 1);
3835 goto error;
3836 }
3837 if ((first->pvo_pte.pa & LPTE_WIMG) !=
3838 (pvo->pvo_pte.pa & LPTE_WIMG)) {
3839 CTR5(KTR_PMAP, "%s: WIMG bits don't match: "
3840 "pmap=%p, va=%#jx, pvo_wimg=%#jx, exp_wimg=%#jx",
3841 __func__, pmap, (uintmax_t)va,
3842 (uintmax_t)(pvo->pvo_pte.pa & LPTE_WIMG),
3843 (uintmax_t)(first->pvo_pte.pa & LPTE_WIMG));
3844 atomic_add_long(&sp_p_fail_wimg, 1);
3845 goto error;
3846 }
3847
3848 pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo);
3849 }
3850
3851 /* All OK, promote. */
3852
3853 /*
3854 * Handle superpage REF/CHG bits. If REF or CHG is set in
3855 * any page, then it must be set in the superpage.
3856 *
3857 * Instead of querying each page, we take advantage of two facts:
3858 * 1- If a page is being promoted, it was referenced.
3859 * 2- If promoted pages are writable, they were modified.
3860 */
3861 sp_refchg = LPTE_REF |
3862 ((first->pvo_pte.prot & VM_PROT_WRITE) != 0 ? LPTE_CHG : 0);
3863
3864 /* Promote pages */
3865
3866 for (pvo = first, va_end = PVO_VADDR(pvo) + HPT_SP_SIZE;
3867 pvo != NULL && PVO_VADDR(pvo) < va_end;
3868 pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo)) {
3869 pvo->pvo_pte.pa &= ADDR_POFF | ~HPT_SP_MASK;
3870 pvo->pvo_pte.pa |= LPTE_LP_4K_16M;
3871 pvo->pvo_vaddr |= PVO_LARGE;
3872 }
3873 moea64_pte_replace_sp(first);
3874
3875 /* Send REF/CHG bits to VM */
3876 moea64_sp_refchg_process(first, m, sp_refchg, first->pvo_pte.prot);
3877
3878 /* Use first page to cache REF/CHG bits */
3879 atomic_set_32(&m->md.mdpg_attrs, sp_refchg | MDPG_ATTR_SP);
3880
3881 PMAP_UNLOCK(pmap);
3882
3883 atomic_add_long(&sp_mappings, 1);
3884 atomic_add_long(&sp_promotions, 1);
3885 CTR3(KTR_PMAP, "%s: success for va %#jx in pmap %p",
3886 __func__, (uintmax_t)sva, pmap);
3887 return;
3888
3889 error:
3890 atomic_add_long(&sp_p_failures, 1);
3891 PMAP_UNLOCK(pmap);
3892 }
3893 #endif
3894
3895 static void
moea64_sp_demote_aligned(struct pvo_entry * sp)3896 moea64_sp_demote_aligned(struct pvo_entry *sp)
3897 {
3898 struct pvo_entry *pvo;
3899 vm_offset_t va, va_end;
3900 vm_paddr_t pa;
3901 vm_page_t m;
3902 pmap_t pmap __diagused;
3903 int64_t refchg;
3904
3905 CTR2(KTR_PMAP, "%s: va=%#jx", __func__, (uintmax_t)PVO_VADDR(sp));
3906
3907 pmap = sp->pvo_pmap;
3908 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
3909
3910 pvo = sp;
3911
3912 /* Demote pages */
3913
3914 va = PVO_VADDR(pvo);
3915 pa = PVO_PADDR(pvo);
3916 m = PHYS_TO_VM_PAGE(pa);
3917
3918 for (pvo = sp, va_end = va + HPT_SP_SIZE;
3919 pvo != NULL && PVO_VADDR(pvo) < va_end;
3920 pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo),
3921 va += PAGE_SIZE, pa += PAGE_SIZE) {
3922 KASSERT(pvo && PVO_VADDR(pvo) == va,
3923 ("%s: missing PVO for va %#jx", __func__, (uintmax_t)va));
3924
3925 pvo->pvo_vaddr &= ~PVO_LARGE;
3926 pvo->pvo_pte.pa &= ~LPTE_RPGN;
3927 pvo->pvo_pte.pa |= pa;
3928
3929 }
3930 refchg = moea64_pte_replace_sp(sp);
3931
3932 /*
3933 * Clear SP flag
3934 *
3935 * XXX It is possible that another pmap has this page mapped as
3936 * part of a superpage, but as the SP flag is used only for
3937 * caching SP REF/CHG bits, that will be queried if not set
3938 * in cache, it should be ok to clear it here.
3939 */
3940 atomic_clear_32(&m->md.mdpg_attrs, MDPG_ATTR_SP);
3941
3942 /*
3943 * Handle superpage REF/CHG bits. A bit set in the superpage
3944 * means all pages should consider it set.
3945 */
3946 moea64_sp_refchg_process(sp, m, refchg, sp->pvo_pte.prot);
3947
3948 atomic_add_long(&sp_demotions, 1);
3949 CTR3(KTR_PMAP, "%s: success for va %#jx in pmap %p",
3950 __func__, (uintmax_t)PVO_VADDR(sp), pmap);
3951 }
3952
3953 static void
moea64_sp_demote(struct pvo_entry * pvo)3954 moea64_sp_demote(struct pvo_entry *pvo)
3955 {
3956 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
3957
3958 if ((PVO_VADDR(pvo) & HPT_SP_MASK) != 0) {
3959 pvo = moea64_pvo_find_va(pvo->pvo_pmap,
3960 PVO_VADDR(pvo) & ~HPT_SP_MASK);
3961 KASSERT(pvo != NULL, ("%s: missing PVO for va %#jx",
3962 __func__, (uintmax_t)(PVO_VADDR(pvo) & ~HPT_SP_MASK)));
3963 }
3964 moea64_sp_demote_aligned(pvo);
3965 }
3966
3967 static struct pvo_entry *
moea64_sp_unwire(struct pvo_entry * sp)3968 moea64_sp_unwire(struct pvo_entry *sp)
3969 {
3970 struct pvo_entry *pvo, *prev;
3971 vm_offset_t eva;
3972 pmap_t pm;
3973 int64_t ret, refchg;
3974
3975 CTR2(KTR_PMAP, "%s: va=%#jx", __func__, (uintmax_t)PVO_VADDR(sp));
3976
3977 pm = sp->pvo_pmap;
3978 PMAP_LOCK_ASSERT(pm, MA_OWNED);
3979
3980 eva = PVO_VADDR(sp) + HPT_SP_SIZE;
3981 refchg = 0;
3982 for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva;
3983 prev = pvo, pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
3984 if ((pvo->pvo_vaddr & PVO_WIRED) == 0)
3985 panic("%s: pvo %p is missing PVO_WIRED",
3986 __func__, pvo);
3987 pvo->pvo_vaddr &= ~PVO_WIRED;
3988
3989 ret = moea64_pte_replace(pvo, 0 /* No invalidation */);
3990 if (ret < 0)
3991 refchg |= LPTE_CHG;
3992 else
3993 refchg |= ret;
3994
3995 pm->pm_stats.wired_count--;
3996 }
3997
3998 /* Send REF/CHG bits to VM */
3999 moea64_sp_refchg_process(sp, PHYS_TO_VM_PAGE(PVO_PADDR(sp)),
4000 refchg, sp->pvo_pte.prot);
4001
4002 return (prev);
4003 }
4004
4005 static struct pvo_entry *
moea64_sp_protect(struct pvo_entry * sp,vm_prot_t prot)4006 moea64_sp_protect(struct pvo_entry *sp, vm_prot_t prot)
4007 {
4008 struct pvo_entry *pvo, *prev;
4009 vm_offset_t eva;
4010 pmap_t pm;
4011 vm_page_t m, m_end;
4012 int64_t ret, refchg;
4013 vm_prot_t oldprot;
4014
4015 CTR3(KTR_PMAP, "%s: va=%#jx, prot=%x",
4016 __func__, (uintmax_t)PVO_VADDR(sp), prot);
4017
4018 pm = sp->pvo_pmap;
4019 PMAP_LOCK_ASSERT(pm, MA_OWNED);
4020
4021 oldprot = sp->pvo_pte.prot;
4022 m = PHYS_TO_VM_PAGE(PVO_PADDR(sp));
4023 KASSERT(m != NULL, ("%s: missing vm page for pa %#jx",
4024 __func__, (uintmax_t)PVO_PADDR(sp)));
4025 eva = PVO_VADDR(sp) + HPT_SP_SIZE;
4026 refchg = 0;
4027
4028 for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva;
4029 prev = pvo, pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
4030 pvo->pvo_pte.prot = prot;
4031 /*
4032 * If the PVO is in the page table, update mapping
4033 */
4034 ret = moea64_pte_replace(pvo, MOEA64_PTE_PROT_UPDATE);
4035 if (ret < 0)
4036 refchg |= LPTE_CHG;
4037 else
4038 refchg |= ret;
4039 }
4040
4041 /* Send REF/CHG bits to VM */
4042 moea64_sp_refchg_process(sp, m, refchg, oldprot);
4043
4044 /* Handle pages that became executable */
4045 if ((m->a.flags & PGA_EXECUTABLE) == 0 &&
4046 (sp->pvo_pte.pa & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {
4047 if ((m->oflags & VPO_UNMANAGED) == 0)
4048 for (m_end = &m[HPT_SP_PAGES]; m < m_end; m++)
4049 vm_page_aflag_set(m, PGA_EXECUTABLE);
4050 moea64_syncicache(pm, PVO_VADDR(sp), PVO_PADDR(sp),
4051 HPT_SP_SIZE);
4052 }
4053
4054 return (prev);
4055 }
4056
4057 static struct pvo_entry *
moea64_sp_remove(struct pvo_entry * sp,struct pvo_dlist * tofree)4058 moea64_sp_remove(struct pvo_entry *sp, struct pvo_dlist *tofree)
4059 {
4060 struct pvo_entry *pvo, *tpvo;
4061 vm_offset_t eva;
4062 pmap_t pm __diagused;
4063
4064 CTR2(KTR_PMAP, "%s: va=%#jx", __func__, (uintmax_t)PVO_VADDR(sp));
4065
4066 pm = sp->pvo_pmap;
4067 PMAP_LOCK_ASSERT(pm, MA_OWNED);
4068
4069 eva = PVO_VADDR(sp) + HPT_SP_SIZE;
4070 for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
4071 tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
4072
4073 /*
4074 * For locking reasons, remove this from the page table and
4075 * pmap, but save delinking from the vm_page for a second
4076 * pass
4077 */
4078 moea64_pvo_remove_from_pmap(pvo);
4079 SLIST_INSERT_HEAD(tofree, pvo, pvo_dlink);
4080 }
4081
4082 /*
4083 * Clear SP bit
4084 *
4085 * XXX See comment in moea64_sp_demote_aligned() for why it's
4086 * ok to always clear the SP bit on remove/demote.
4087 */
4088 atomic_clear_32(&PHYS_TO_VM_PAGE(PVO_PADDR(sp))->md.mdpg_attrs,
4089 MDPG_ATTR_SP);
4090
4091 return (tpvo);
4092 }
4093
4094 static int64_t
moea64_sp_query_locked(struct pvo_entry * pvo,uint64_t ptebit)4095 moea64_sp_query_locked(struct pvo_entry *pvo, uint64_t ptebit)
4096 {
4097 int64_t refchg, ret;
4098 vm_offset_t eva;
4099 vm_page_t m;
4100 pmap_t pmap;
4101 struct pvo_entry *sp;
4102
4103 PV_LOCKASSERT(PVO_PADDR(pvo));
4104
4105 pmap = pvo->pvo_pmap;
4106 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
4107
4108 /* Get first SP PVO */
4109 if ((PVO_VADDR(pvo) & HPT_SP_MASK) != 0) {
4110 sp = moea64_pvo_find_va(pmap, PVO_VADDR(pvo) & ~HPT_SP_MASK);
4111 KASSERT(sp != NULL, ("%s: missing PVO for va %#jx",
4112 __func__, (uintmax_t)(PVO_VADDR(pvo) & ~HPT_SP_MASK)));
4113 } else
4114 sp = pvo;
4115 eva = PVO_VADDR(sp) + HPT_SP_SIZE;
4116
4117 refchg = 0;
4118 for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva;
4119 pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo)) {
4120 ret = moea64_pte_synch(pvo);
4121 if (ret > 0) {
4122 refchg |= ret & (LPTE_CHG | LPTE_REF);
4123 if ((refchg & ptebit) != 0)
4124 break;
4125 }
4126 }
4127
4128 /* Save results */
4129 if (refchg != 0) {
4130 m = PHYS_TO_VM_PAGE(PVO_PADDR(sp));
4131 atomic_set_32(&m->md.mdpg_attrs, refchg | MDPG_ATTR_SP);
4132 }
4133
4134 return (refchg);
4135 }
4136
4137 /*
4138 * Note: this assumes the vm_page represented by the given pvo
4139 * is at least read locked.
4140 */
4141 static int64_t
moea64_sp_query(struct pvo_entry * pvo,uint64_t ptebit)4142 moea64_sp_query(struct pvo_entry *pvo, uint64_t ptebit)
4143 {
4144 int64_t refchg;
4145 pmap_t pmap;
4146
4147 PV_LOCKASSERT(PVO_PADDR(pvo));
4148
4149 pmap = pvo->pvo_pmap;
4150 PMAP_LOCK(pmap);
4151
4152 /*
4153 * Check if SP was demoted/removed before pmap lock was acquired.
4154 */
4155 if (!PVO_IS_SP(pvo) || (pvo->pvo_vaddr & PVO_DEAD) != 0) {
4156 CTR2(KTR_PMAP, "%s: demoted/removed: pa=%#jx",
4157 __func__, (uintmax_t)PVO_PADDR(pvo));
4158 PMAP_UNLOCK(pmap);
4159 return (-1);
4160 }
4161
4162 refchg = moea64_sp_query_locked(pvo, ptebit);
4163 PMAP_UNLOCK(pmap);
4164
4165 CTR4(KTR_PMAP, "%s: va=%#jx, pa=%#jx: refchg=%#jx",
4166 __func__, (uintmax_t)PVO_VADDR(pvo),
4167 (uintmax_t)PVO_PADDR(pvo), (uintmax_t)refchg);
4168
4169 return (refchg);
4170 }
4171
4172 static int64_t
moea64_sp_pvo_clear(struct pvo_entry * pvo,uint64_t ptebit)4173 moea64_sp_pvo_clear(struct pvo_entry *pvo, uint64_t ptebit)
4174 {
4175 int64_t refchg, ret;
4176 pmap_t pmap;
4177 struct pvo_entry *sp;
4178 vm_offset_t eva;
4179 vm_page_t m;
4180
4181 pmap = pvo->pvo_pmap;
4182 PMAP_LOCK(pmap);
4183
4184 /*
4185 * Check if SP was demoted/removed before pmap lock was acquired.
4186 */
4187 if (!PVO_IS_SP(pvo) || (pvo->pvo_vaddr & PVO_DEAD) != 0) {
4188 CTR2(KTR_PMAP, "%s: demoted/removed: pa=%#jx",
4189 __func__, (uintmax_t)PVO_PADDR(pvo));
4190 PMAP_UNLOCK(pmap);
4191 return (-1);
4192 }
4193
4194 /* Get first SP PVO */
4195 if ((PVO_VADDR(pvo) & HPT_SP_MASK) != 0) {
4196 sp = moea64_pvo_find_va(pmap, PVO_VADDR(pvo) & ~HPT_SP_MASK);
4197 KASSERT(sp != NULL, ("%s: missing PVO for va %#jx",
4198 __func__, (uintmax_t)(PVO_VADDR(pvo) & ~HPT_SP_MASK)));
4199 } else
4200 sp = pvo;
4201 eva = PVO_VADDR(sp) + HPT_SP_SIZE;
4202
4203 refchg = 0;
4204 for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva;
4205 pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo)) {
4206 ret = moea64_pte_clear(pvo, ptebit);
4207 if (ret > 0)
4208 refchg |= ret & (LPTE_CHG | LPTE_REF);
4209 }
4210
4211 m = PHYS_TO_VM_PAGE(PVO_PADDR(sp));
4212 atomic_clear_32(&m->md.mdpg_attrs, ptebit);
4213 PMAP_UNLOCK(pmap);
4214
4215 CTR4(KTR_PMAP, "%s: va=%#jx, pa=%#jx: refchg=%#jx",
4216 __func__, (uintmax_t)PVO_VADDR(sp),
4217 (uintmax_t)PVO_PADDR(sp), (uintmax_t)refchg);
4218
4219 return (refchg);
4220 }
4221
4222 static int64_t
moea64_sp_clear(struct pvo_entry * pvo,vm_page_t m,uint64_t ptebit)4223 moea64_sp_clear(struct pvo_entry *pvo, vm_page_t m, uint64_t ptebit)
4224 {
4225 int64_t count, ret;
4226 pmap_t pmap;
4227
4228 count = 0;
4229 pmap = pvo->pvo_pmap;
4230
4231 /*
4232 * Since this reference bit is shared by 4096 4KB pages, it
4233 * should not be cleared every time it is tested. Apply a
4234 * simple "hash" function on the physical page number, the
4235 * virtual superpage number, and the pmap address to select
4236 * one 4KB page out of the 4096 on which testing the
4237 * reference bit will result in clearing that reference bit.
4238 * This function is designed to avoid the selection of the
4239 * same 4KB page for every 16MB page mapping.
4240 *
4241 * Always leave the reference bit of a wired mapping set, as
4242 * the current state of its reference bit won't affect page
4243 * replacement.
4244 */
4245 if (ptebit == LPTE_REF && (((VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) ^
4246 (PVO_VADDR(pvo) >> HPT_SP_SHIFT) ^ (uintptr_t)pmap) &
4247 (HPT_SP_PAGES - 1)) == 0 && (pvo->pvo_vaddr & PVO_WIRED) == 0) {
4248 if ((ret = moea64_sp_pvo_clear(pvo, ptebit)) == -1)
4249 return (-1);
4250
4251 if ((ret & ptebit) != 0)
4252 count++;
4253
4254 /*
4255 * If this page was not selected by the hash function, then assume
4256 * its REF bit was set.
4257 */
4258 } else if (ptebit == LPTE_REF) {
4259 count++;
4260
4261 /*
4262 * To clear the CHG bit of a single SP page, first it must be demoted.
4263 * But if no CHG bit is set, no bit clear and thus no SP demotion is
4264 * needed.
4265 */
4266 } else {
4267 CTR4(KTR_PMAP, "%s: ptebit=%#jx, va=%#jx, pa=%#jx",
4268 __func__, (uintmax_t)ptebit, (uintmax_t)PVO_VADDR(pvo),
4269 (uintmax_t)PVO_PADDR(pvo));
4270
4271 PMAP_LOCK(pmap);
4272
4273 /*
4274 * Make sure SP wasn't demoted/removed before pmap lock
4275 * was acquired.
4276 */
4277 if (!PVO_IS_SP(pvo) || (pvo->pvo_vaddr & PVO_DEAD) != 0) {
4278 CTR2(KTR_PMAP, "%s: demoted/removed: pa=%#jx",
4279 __func__, (uintmax_t)PVO_PADDR(pvo));
4280 PMAP_UNLOCK(pmap);
4281 return (-1);
4282 }
4283
4284 ret = moea64_sp_query_locked(pvo, ptebit);
4285 if ((ret & ptebit) != 0)
4286 count++;
4287 else {
4288 PMAP_UNLOCK(pmap);
4289 return (0);
4290 }
4291
4292 moea64_sp_demote(pvo);
4293 moea64_pte_clear(pvo, ptebit);
4294
4295 /*
4296 * Write protect the mapping to a single page so that a
4297 * subsequent write access may repromote.
4298 */
4299 if ((pvo->pvo_vaddr & PVO_WIRED) == 0)
4300 moea64_pvo_protect(pmap, pvo,
4301 pvo->pvo_pte.prot & ~VM_PROT_WRITE);
4302
4303 PMAP_UNLOCK(pmap);
4304 }
4305
4306 return (count);
4307 }
4308