1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause AND BSD-4-Clause 3 * 4 * Copyright (c) 2001 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Matt Thomas <matt@3am-software.com> of Allegro Networks, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 /*- 32 * Copyright (C) 1995, 1996 Wolfgang Solfrank. 33 * Copyright (C) 1995, 1996 TooLs GmbH. 34 * All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by TooLs GmbH. 47 * 4. The name of TooLs GmbH may not be used to endorse or promote products 48 * derived from this software without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR 51 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 52 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 53 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 54 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 55 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 56 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 57 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 58 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 59 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 60 * 61 * $NetBSD: pmap.c,v 1.28 2000/03/26 20:42:36 kleink Exp $ 62 */ 63 /*- 64 * Copyright (C) 2001 Benno Rice. 65 * All rights reserved. 66 * 67 * Redistribution and use in source and binary forms, with or without 68 * modification, are permitted provided that the following conditions 69 * are met: 70 * 1. Redistributions of source code must retain the above copyright 71 * notice, this list of conditions and the following disclaimer. 72 * 2. Redistributions in binary form must reproduce the above copyright 73 * notice, this list of conditions and the following disclaimer in the 74 * documentation and/or other materials provided with the distribution. 75 * 76 * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR 77 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 78 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 79 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 80 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 81 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 82 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 83 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 84 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 85 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 86 */ 87 88 #include <sys/cdefs.h> 89 /* 90 * Native 64-bit page table operations for running without a hypervisor. 91 */ 92 93 #include <sys/param.h> 94 #include <sys/kernel.h> 95 #include <sys/ktr.h> 96 #include <sys/lock.h> 97 #include <sys/mutex.h> 98 #include <sys/proc.h> 99 #include <sys/sched.h> 100 #include <sys/sysctl.h> 101 #include <sys/systm.h> 102 #include <sys/rwlock.h> 103 #include <sys/endian.h> 104 105 #include <sys/kdb.h> 106 107 #include <vm/vm.h> 108 #include <vm/vm_param.h> 109 #include <vm/vm_kern.h> 110 #include <vm/vm_page.h> 111 #include <vm/vm_map.h> 112 #include <vm/vm_object.h> 113 #include <vm/vm_extern.h> 114 #include <vm/vm_pageout.h> 115 116 #include <machine/cpu.h> 117 #include <machine/hid.h> 118 #include <machine/md_var.h> 119 #include <machine/mmuvar.h> 120 121 #include "mmu_oea64.h" 122 123 #define PTESYNC() __asm __volatile("ptesync"); 124 #define TLBSYNC() __asm __volatile("tlbsync; ptesync"); 125 #define SYNC() __asm __volatile("sync"); 126 #define EIEIO() __asm __volatile("eieio"); 127 128 #define VSID_HASH_MASK 0x0000007fffffffffULL 129 130 /* POWER9 only permits a 64k partition table size. */ 131 #define PART_SIZE 0x10000 132 133 /* Actual page sizes (to be used with tlbie, when L=0) */ 134 #define AP_4K 0x00 135 #define AP_16M 0x80 136 137 #define LPTE_KERNEL_VSID_BIT (KERNEL_VSID_BIT << \ 138 (16 - (ADDR_API_SHFT64 - ADDR_PIDX_SHFT))) 139 140 /* Abbreviated Virtual Address Page - high bits */ 141 #define LPTE_AVA_PGNHI_MASK 0x0000000000000F80ULL 142 #define LPTE_AVA_PGNHI_SHIFT 7 143 144 /* Effective Address Page - low bits */ 145 #define EA_PAGELO_MASK 0x7ffULL 146 #define EA_PAGELO_SHIFT 11 147 148 static bool moea64_crop_tlbie; 149 static bool moea64_need_lock; 150 151 /* 152 * The tlbie instruction has two forms: an old one used by PowerISA 153 * 2.03 and prior, and a newer one used by PowerISA 2.06 and later. 154 * We need to support both. 155 */ 156 static __inline void 157 TLBIE(uint64_t vpn, uint64_t oldptehi) 158 { 159 #ifndef __powerpc64__ 160 register_t vpn_hi, vpn_lo; 161 register_t msr; 162 register_t scratch, intr; 163 #endif 164 165 static volatile u_int tlbie_lock = 0; 166 bool need_lock = moea64_need_lock; 167 168 vpn <<= ADDR_PIDX_SHFT; 169 170 /* Hobo spinlock: we need stronger guarantees than mutexes provide */ 171 if (need_lock) { 172 while (!atomic_cmpset_int(&tlbie_lock, 0, 1)); 173 isync(); /* Flush instruction queue once lock acquired */ 174 175 if (moea64_crop_tlbie) { 176 vpn &= ~(0xffffULL << 48); 177 #ifdef __powerpc64__ 178 if ((oldptehi & LPTE_BIG) != 0) 179 __asm __volatile("tlbie %0, 1" :: "r"(vpn) : 180 "memory"); 181 else 182 __asm __volatile("tlbie %0, 0" :: "r"(vpn) : 183 "memory"); 184 __asm __volatile("eieio; tlbsync; ptesync" ::: 185 "memory"); 186 goto done; 187 #endif 188 } 189 } 190 191 #ifdef __powerpc64__ 192 /* 193 * If this page has LPTE_BIG set and is from userspace, then 194 * it must be a superpage with 4KB base/16MB actual page size. 195 */ 196 if ((oldptehi & LPTE_BIG) != 0 && 197 (oldptehi & LPTE_KERNEL_VSID_BIT) == 0) 198 vpn |= AP_16M; 199 200 /* 201 * Explicitly clobber r0. The tlbie instruction has two forms: an old 202 * one used by PowerISA 2.03 and prior, and a newer one used by PowerISA 203 * 2.06 (maybe 2.05?) and later. We need to support both, and it just 204 * so happens that since we use 4k pages we can simply zero out r0, and 205 * clobber it, and the assembler will interpret the single-operand form 206 * of tlbie as having RB set, and everything else as 0. The RS operand 207 * in the newer form is in the same position as the L(page size) bit of 208 * the old form, so a slong as RS is 0, we're good on both sides. 209 */ 210 __asm __volatile("li 0, 0 \n tlbie %0, 0" :: "r"(vpn) : "r0", "memory"); 211 __asm __volatile("eieio; tlbsync; ptesync" ::: "memory"); 212 done: 213 214 #else 215 vpn_hi = (uint32_t)(vpn >> 32); 216 vpn_lo = (uint32_t)vpn; 217 218 intr = intr_disable(); 219 __asm __volatile("\ 220 mfmsr %0; \ 221 mr %1, %0; \ 222 insrdi %1,%5,1,0; \ 223 mtmsrd %1; isync; \ 224 \ 225 sld %1,%2,%4; \ 226 or %1,%1,%3; \ 227 tlbie %1; \ 228 \ 229 mtmsrd %0; isync; \ 230 eieio; \ 231 tlbsync; \ 232 ptesync;" 233 : "=r"(msr), "=r"(scratch) : "r"(vpn_hi), "r"(vpn_lo), "r"(32), "r"(1) 234 : "memory"); 235 intr_restore(intr); 236 #endif 237 238 /* No barriers or special ops -- taken care of by ptesync above */ 239 if (need_lock) 240 tlbie_lock = 0; 241 } 242 243 #define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR) 244 #define ENABLE_TRANS(msr) mtmsr(msr) 245 246 /* 247 * PTEG data. 248 */ 249 static volatile struct lpte *moea64_pteg_table; 250 static struct rwlock moea64_eviction_lock; 251 252 static volatile struct pate *moea64_part_table; 253 254 /* 255 * Dump function. 256 */ 257 static void *moea64_dump_pmap_native(void *ctx, void *buf, 258 u_long *nbytes); 259 260 /* 261 * PTE calls. 262 */ 263 static int64_t moea64_pte_insert_native(struct pvo_entry *); 264 static int64_t moea64_pte_synch_native(struct pvo_entry *); 265 static int64_t moea64_pte_clear_native(struct pvo_entry *, uint64_t); 266 static int64_t moea64_pte_replace_native(struct pvo_entry *, int); 267 static int64_t moea64_pte_unset_native(struct pvo_entry *); 268 static int64_t moea64_pte_insert_sp_native(struct pvo_entry *); 269 static int64_t moea64_pte_unset_sp_native(struct pvo_entry *); 270 static int64_t moea64_pte_replace_sp_native(struct pvo_entry *); 271 272 /* 273 * Utility routines. 274 */ 275 static void moea64_bootstrap_native( 276 vm_offset_t kernelstart, vm_offset_t kernelend); 277 static void moea64_cpu_bootstrap_native(int ap); 278 static void tlbia(void); 279 static void moea64_install_native(void); 280 281 static struct pmap_funcs moea64_native_methods = { 282 .install = moea64_install_native, 283 284 /* Internal interfaces */ 285 .bootstrap = moea64_bootstrap_native, 286 .cpu_bootstrap = moea64_cpu_bootstrap_native, 287 .dumpsys_dump_pmap = moea64_dump_pmap_native, 288 }; 289 290 static struct moea64_funcs moea64_native_funcs = { 291 .pte_synch = moea64_pte_synch_native, 292 .pte_clear = moea64_pte_clear_native, 293 .pte_unset = moea64_pte_unset_native, 294 .pte_replace = moea64_pte_replace_native, 295 .pte_insert = moea64_pte_insert_native, 296 .pte_insert_sp = moea64_pte_insert_sp_native, 297 .pte_unset_sp = moea64_pte_unset_sp_native, 298 .pte_replace_sp = moea64_pte_replace_sp_native, 299 }; 300 301 MMU_DEF_INHERIT(oea64_mmu_native, MMU_TYPE_G5, moea64_native_methods, oea64_mmu); 302 303 static void 304 moea64_install_native(void) 305 { 306 307 /* Install the MOEA64 ops. */ 308 moea64_ops = &moea64_native_funcs; 309 310 moea64_install(); 311 } 312 313 static int64_t 314 moea64_pte_synch_native(struct pvo_entry *pvo) 315 { 316 volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot; 317 uint64_t ptelo, pvo_ptevpn; 318 319 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); 320 321 pvo_ptevpn = moea64_pte_vpn_from_pvo_vpn(pvo); 322 323 rw_rlock(&moea64_eviction_lock); 324 if ((be64toh(pt->pte_hi) & LPTE_AVPN_MASK) != pvo_ptevpn) { 325 /* Evicted */ 326 rw_runlock(&moea64_eviction_lock); 327 return (-1); 328 } 329 330 PTESYNC(); 331 ptelo = be64toh(pt->pte_lo); 332 333 rw_runlock(&moea64_eviction_lock); 334 335 return (ptelo & (LPTE_REF | LPTE_CHG)); 336 } 337 338 static int64_t 339 moea64_pte_clear_native(struct pvo_entry *pvo, uint64_t ptebit) 340 { 341 volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot; 342 struct lpte properpt; 343 uint64_t ptelo; 344 345 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); 346 347 moea64_pte_from_pvo(pvo, &properpt); 348 349 rw_rlock(&moea64_eviction_lock); 350 if ((be64toh(pt->pte_hi) & LPTE_AVPN_MASK) != 351 (properpt.pte_hi & LPTE_AVPN_MASK)) { 352 /* Evicted */ 353 rw_runlock(&moea64_eviction_lock); 354 return (-1); 355 } 356 357 if (ptebit == LPTE_REF) { 358 /* See "Resetting the Reference Bit" in arch manual */ 359 PTESYNC(); 360 /* 2-step here safe: precision is not guaranteed */ 361 ptelo = be64toh(pt->pte_lo); 362 363 /* One-byte store to avoid touching the C bit */ 364 ((volatile uint8_t *)(&pt->pte_lo))[6] = 365 #if BYTE_ORDER == BIG_ENDIAN 366 ((uint8_t *)(&properpt.pte_lo))[6]; 367 #else 368 ((uint8_t *)(&properpt.pte_lo))[1]; 369 #endif 370 rw_runlock(&moea64_eviction_lock); 371 372 critical_enter(); 373 TLBIE(pvo->pvo_vpn, properpt.pte_hi); 374 critical_exit(); 375 } else { 376 rw_runlock(&moea64_eviction_lock); 377 ptelo = moea64_pte_unset_native(pvo); 378 moea64_pte_insert_native(pvo); 379 } 380 381 return (ptelo & (LPTE_REF | LPTE_CHG)); 382 } 383 384 static __always_inline int64_t 385 moea64_pte_unset_locked(volatile struct lpte *pt, uint64_t vpn) 386 { 387 uint64_t ptelo, ptehi; 388 389 /* 390 * Invalidate the pte, briefly locking it to collect RC bits. No 391 * atomics needed since this is protected against eviction by the lock. 392 */ 393 isync(); 394 critical_enter(); 395 ptehi = (be64toh(pt->pte_hi) & ~LPTE_VALID) | LPTE_LOCKED; 396 pt->pte_hi = htobe64(ptehi); 397 PTESYNC(); 398 TLBIE(vpn, ptehi); 399 ptelo = be64toh(pt->pte_lo); 400 *((volatile int32_t *)(&pt->pte_hi) + 1) = 0; /* Release lock */ 401 critical_exit(); 402 403 /* Keep statistics */ 404 STAT_MOEA64(moea64_pte_valid--); 405 406 return (ptelo & (LPTE_CHG | LPTE_REF)); 407 } 408 409 static int64_t 410 moea64_pte_unset_native(struct pvo_entry *pvo) 411 { 412 volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot; 413 int64_t ret; 414 uint64_t pvo_ptevpn; 415 416 pvo_ptevpn = moea64_pte_vpn_from_pvo_vpn(pvo); 417 418 rw_rlock(&moea64_eviction_lock); 419 420 if ((be64toh(pt->pte_hi) & LPTE_AVPN_MASK) != pvo_ptevpn) { 421 /* Evicted */ 422 STAT_MOEA64(moea64_pte_overflow--); 423 ret = -1; 424 } else 425 ret = moea64_pte_unset_locked(pt, pvo->pvo_vpn); 426 427 rw_runlock(&moea64_eviction_lock); 428 429 return (ret); 430 } 431 432 static int64_t 433 moea64_pte_replace_inval_native(struct pvo_entry *pvo, 434 volatile struct lpte *pt) 435 { 436 struct lpte properpt; 437 uint64_t ptelo, ptehi; 438 439 moea64_pte_from_pvo(pvo, &properpt); 440 441 rw_rlock(&moea64_eviction_lock); 442 if ((be64toh(pt->pte_hi) & LPTE_AVPN_MASK) != 443 (properpt.pte_hi & LPTE_AVPN_MASK)) { 444 /* Evicted */ 445 STAT_MOEA64(moea64_pte_overflow--); 446 rw_runlock(&moea64_eviction_lock); 447 return (-1); 448 } 449 450 /* 451 * Replace the pte, briefly locking it to collect RC bits. No 452 * atomics needed since this is protected against eviction by the lock. 453 */ 454 isync(); 455 critical_enter(); 456 ptehi = (be64toh(pt->pte_hi) & ~LPTE_VALID) | LPTE_LOCKED; 457 pt->pte_hi = htobe64(ptehi); 458 PTESYNC(); 459 TLBIE(pvo->pvo_vpn, ptehi); 460 ptelo = be64toh(pt->pte_lo); 461 EIEIO(); 462 pt->pte_lo = htobe64(properpt.pte_lo); 463 EIEIO(); 464 pt->pte_hi = htobe64(properpt.pte_hi); /* Release lock */ 465 PTESYNC(); 466 critical_exit(); 467 rw_runlock(&moea64_eviction_lock); 468 469 return (ptelo & (LPTE_CHG | LPTE_REF)); 470 } 471 472 static int64_t 473 moea64_pte_replace_native(struct pvo_entry *pvo, int flags) 474 { 475 volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot; 476 struct lpte properpt; 477 int64_t ptelo; 478 479 if (flags == 0) { 480 /* Just some software bits changing. */ 481 moea64_pte_from_pvo(pvo, &properpt); 482 483 rw_rlock(&moea64_eviction_lock); 484 if ((be64toh(pt->pte_hi) & LPTE_AVPN_MASK) != 485 (properpt.pte_hi & LPTE_AVPN_MASK)) { 486 rw_runlock(&moea64_eviction_lock); 487 return (-1); 488 } 489 pt->pte_hi = htobe64(properpt.pte_hi); 490 ptelo = be64toh(pt->pte_lo); 491 rw_runlock(&moea64_eviction_lock); 492 } else { 493 /* Otherwise, need reinsertion and deletion */ 494 ptelo = moea64_pte_replace_inval_native(pvo, pt); 495 } 496 497 return (ptelo); 498 } 499 500 static void 501 moea64_cpu_bootstrap_native(int ap) 502 { 503 int i = 0; 504 #ifdef __powerpc64__ 505 struct slb *slb = PCPU_GET(aim.slb); 506 register_t seg0; 507 #endif 508 509 /* 510 * Initialize segment registers and MMU 511 */ 512 513 mtmsr(mfmsr() & ~PSL_DR & ~PSL_IR); 514 515 switch(mfpvr() >> 16) { 516 case IBMPOWER9: 517 mtspr(SPR_HID0, mfspr(SPR_HID0) & ~HID0_RADIX); 518 break; 519 } 520 521 /* 522 * Install kernel SLB entries 523 */ 524 525 #ifdef __powerpc64__ 526 __asm __volatile ("slbia"); 527 __asm __volatile ("slbmfee %0,%1; slbie %0;" : "=r"(seg0) : 528 "r"(0)); 529 530 for (i = 0; i < n_slbs; i++) { 531 if (!(slb[i].slbe & SLBE_VALID)) 532 continue; 533 534 __asm __volatile ("slbmte %0, %1" :: 535 "r"(slb[i].slbv), "r"(slb[i].slbe)); 536 } 537 #else 538 for (i = 0; i < 16; i++) 539 mtsrin(i << ADDR_SR_SHFT, kernel_pmap->pm_sr[i]); 540 #endif 541 542 /* 543 * Install page table 544 */ 545 546 if (cpu_features2 & PPC_FEATURE2_ARCH_3_00) 547 mtspr(SPR_PTCR, 548 ((uintptr_t)moea64_part_table & ~DMAP_BASE_ADDRESS) | 549 flsl((PART_SIZE >> 12) - 1)); 550 else 551 __asm __volatile ("ptesync; mtsdr1 %0; isync" 552 :: "r"(((uintptr_t)moea64_pteg_table & ~DMAP_BASE_ADDRESS) 553 | (uintptr_t)(flsl(moea64_pteg_mask >> 11)))); 554 tlbia(); 555 } 556 557 static void 558 moea64_bootstrap_native(vm_offset_t kernelstart, vm_offset_t kernelend) 559 { 560 vm_size_t size; 561 vm_offset_t off; 562 vm_paddr_t pa; 563 register_t msr; 564 565 moea64_early_bootstrap(kernelstart, kernelend); 566 567 switch (mfpvr() >> 16) { 568 case IBMPOWER8: 569 case IBMPOWER8E: 570 case IBMPOWER8NVL: 571 moea64_need_lock = false; 572 break; 573 case IBMPOWER9: 574 moea64_need_lock = false; 575 break; 576 case IBMPOWER4: 577 case IBMPOWER4PLUS: 578 case IBM970: 579 case IBM970FX: 580 case IBM970GX: 581 case IBM970MP: 582 moea64_crop_tlbie = true; 583 default: 584 moea64_need_lock = true; 585 } 586 /* 587 * Allocate PTEG table. 588 */ 589 590 size = moea64_pteg_count * sizeof(struct lpteg); 591 CTR2(KTR_PMAP, "moea64_bootstrap: %lu PTEGs, %lu bytes", 592 moea64_pteg_count, size); 593 rw_init(&moea64_eviction_lock, "pte eviction"); 594 595 /* 596 * We now need to allocate memory. This memory, to be allocated, 597 * has to reside in a page table. The page table we are about to 598 * allocate. We don't have BAT. So drop to data real mode for a minute 599 * as a measure of last resort. We do this a couple times. 600 */ 601 /* 602 * PTEG table must be aligned on a 256k boundary, but can be placed 603 * anywhere with that alignment on POWER ISA 3+ systems. On earlier 604 * systems, offset addition is done by the CPU with bitwise OR rather 605 * than addition, so the table must also be aligned on a boundary of 606 * its own size. Pick the larger of the two, which works on all 607 * systems. 608 */ 609 moea64_pteg_table = (struct lpte *)moea64_bootstrap_alloc(size, 610 MAX(256*1024, size)); 611 if (hw_direct_map) 612 moea64_pteg_table = 613 (struct lpte *)PHYS_TO_DMAP((vm_offset_t)moea64_pteg_table); 614 /* Allocate partition table (ISA 3.0). */ 615 if (cpu_features2 & PPC_FEATURE2_ARCH_3_00) { 616 moea64_part_table = 617 (struct pate *)moea64_bootstrap_alloc(PART_SIZE, PART_SIZE); 618 moea64_part_table = 619 (struct pate *)PHYS_TO_DMAP((vm_offset_t)moea64_part_table); 620 } 621 DISABLE_TRANS(msr); 622 bzero(__DEVOLATILE(void *, moea64_pteg_table), moea64_pteg_count * 623 sizeof(struct lpteg)); 624 if (cpu_features2 & PPC_FEATURE2_ARCH_3_00) { 625 bzero(__DEVOLATILE(void *, moea64_part_table), PART_SIZE); 626 moea64_part_table[0].pagetab = htobe64( 627 (DMAP_TO_PHYS((vm_offset_t)moea64_pteg_table)) | 628 (uintptr_t)(flsl((moea64_pteg_count - 1) >> 11))); 629 } 630 ENABLE_TRANS(msr); 631 632 CTR1(KTR_PMAP, "moea64_bootstrap: PTEG table at %p", moea64_pteg_table); 633 634 moea64_mid_bootstrap(kernelstart, kernelend); 635 636 /* 637 * Add a mapping for the page table itself if there is no direct map. 638 */ 639 if (!hw_direct_map) { 640 size = moea64_pteg_count * sizeof(struct lpteg); 641 off = (vm_offset_t)(moea64_pteg_table); 642 DISABLE_TRANS(msr); 643 for (pa = off; pa < off + size; pa += PAGE_SIZE) 644 pmap_kenter(pa, pa); 645 ENABLE_TRANS(msr); 646 } 647 648 /* Bring up virtual memory */ 649 moea64_late_bootstrap(kernelstart, kernelend); 650 } 651 652 static void 653 tlbia(void) 654 { 655 vm_offset_t i; 656 #ifndef __powerpc64__ 657 register_t msr, scratch; 658 #endif 659 660 i = 0xc00; /* IS = 11 */ 661 switch (mfpvr() >> 16) { 662 case IBM970: 663 case IBM970FX: 664 case IBM970MP: 665 case IBM970GX: 666 case IBMPOWER4: 667 case IBMPOWER4PLUS: 668 case IBMPOWER5: 669 case IBMPOWER5PLUS: 670 i = 0; /* IS not supported */ 671 break; 672 } 673 674 TLBSYNC(); 675 676 for (; i < 0x400000; i += 0x00001000) { 677 #ifdef __powerpc64__ 678 __asm __volatile("tlbiel %0" :: "r"(i)); 679 #else 680 __asm __volatile("\ 681 mfmsr %0; \ 682 mr %1, %0; \ 683 insrdi %1,%3,1,0; \ 684 mtmsrd %1; \ 685 isync; \ 686 \ 687 tlbiel %2; \ 688 \ 689 mtmsrd %0; \ 690 isync;" 691 : "=r"(msr), "=r"(scratch) : "r"(i), "r"(1)); 692 #endif 693 } 694 695 EIEIO(); 696 TLBSYNC(); 697 } 698 699 static int 700 atomic_pte_lock(volatile struct lpte *pte, uint64_t bitmask, uint64_t *oldhi) 701 { 702 int ret; 703 #ifdef __powerpc64__ 704 uint64_t temp; 705 #else 706 uint32_t oldhihalf; 707 #endif 708 709 /* 710 * Note: in principle, if just the locked bit were set here, we 711 * could avoid needing the eviction lock. However, eviction occurs 712 * so rarely that it isn't worth bothering about in practice. 713 */ 714 #ifdef __powerpc64__ 715 /* 716 * Note: Success of this sequence has the side effect of invalidating 717 * the PTE, as we are setting it to LPTE_LOCKED and discarding the 718 * other bits, including LPTE_V. 719 */ 720 __asm __volatile ( 721 "1:\tldarx %1, 0, %3\n\t" /* load old value */ 722 "and. %0,%1,%4\n\t" /* check if any bits set */ 723 "bne 2f\n\t" /* exit if any set */ 724 "stdcx. %5, 0, %3\n\t" /* attempt to store */ 725 "bne- 1b\n\t" /* spin if failed */ 726 "li %0, 1\n\t" /* success - retval = 1 */ 727 "b 3f\n\t" /* we've succeeded */ 728 "2:\n\t" 729 "stdcx. %1, 0, %3\n\t" /* clear reservation (74xx) */ 730 "li %0, 0\n\t" /* failure - retval = 0 */ 731 "3:\n\t" 732 : "=&r" (ret), "=&r"(temp), "=m" (pte->pte_hi) 733 : "r" ((volatile char *)&pte->pte_hi), 734 "r" (htobe64(bitmask)), "r" (htobe64(LPTE_LOCKED)), 735 "m" (pte->pte_hi) 736 : "cr0", "cr1", "cr2", "memory"); 737 *oldhi = be64toh(temp); 738 #else 739 /* 740 * This code is used on bridge mode only. 741 */ 742 __asm __volatile ( 743 "1:\tlwarx %1, 0, %3\n\t" /* load old value */ 744 "and. %0,%1,%4\n\t" /* check if any bits set */ 745 "bne 2f\n\t" /* exit if any set */ 746 "stwcx. %5, 0, %3\n\t" /* attempt to store */ 747 "bne- 1b\n\t" /* spin if failed */ 748 "li %0, 1\n\t" /* success - retval = 1 */ 749 "b 3f\n\t" /* we've succeeded */ 750 "2:\n\t" 751 "stwcx. %1, 0, %3\n\t" /* clear reservation (74xx) */ 752 "li %0, 0\n\t" /* failure - retval = 0 */ 753 "3:\n\t" 754 : "=&r" (ret), "=&r"(oldhihalf), "=m" (pte->pte_hi) 755 : "r" ((volatile char *)&pte->pte_hi + 4), 756 "r" ((uint32_t)bitmask), "r" ((uint32_t)LPTE_LOCKED), 757 "m" (pte->pte_hi) 758 : "cr0", "cr1", "cr2", "memory"); 759 760 *oldhi = (pte->pte_hi & 0xffffffff00000000ULL) | oldhihalf; 761 #endif 762 763 return (ret); 764 } 765 766 static uintptr_t 767 moea64_insert_to_pteg_native(struct lpte *pvo_pt, uintptr_t slotbase, 768 uint64_t mask) 769 { 770 volatile struct lpte *pt; 771 uint64_t oldptehi, va; 772 uintptr_t k; 773 int i, j; 774 775 /* Start at a random slot */ 776 i = mftb() % 8; 777 for (j = 0; j < 8; j++) { 778 k = slotbase + (i + j) % 8; 779 pt = &moea64_pteg_table[k]; 780 /* Invalidate and seize lock only if no bits in mask set */ 781 if (atomic_pte_lock(pt, mask, &oldptehi)) /* Lock obtained */ 782 break; 783 } 784 785 if (j == 8) 786 return (-1); 787 788 if (oldptehi & LPTE_VALID) { 789 KASSERT(!(oldptehi & LPTE_WIRED), ("Unmapped wired entry")); 790 /* 791 * Need to invalidate old entry completely: see 792 * "Modifying a Page Table Entry". Need to reconstruct 793 * the virtual address for the outgoing entry to do that. 794 */ 795 va = oldptehi >> (ADDR_SR_SHFT - ADDR_API_SHFT64); 796 if (oldptehi & LPTE_HID) 797 va = (((k >> 3) ^ moea64_pteg_mask) ^ va) & 798 (ADDR_PIDX >> ADDR_PIDX_SHFT); 799 else 800 va = ((k >> 3) ^ va) & (ADDR_PIDX >> ADDR_PIDX_SHFT); 801 va |= (oldptehi & LPTE_AVPN_MASK) << 802 (ADDR_API_SHFT64 - ADDR_PIDX_SHFT); 803 PTESYNC(); 804 TLBIE(va, oldptehi); 805 STAT_MOEA64(moea64_pte_valid--); 806 STAT_MOEA64(moea64_pte_overflow++); 807 } 808 809 /* 810 * Update the PTE as per "Adding a Page Table Entry". Lock is released 811 * by setting the high doubleworld. 812 */ 813 pt->pte_lo = htobe64(pvo_pt->pte_lo); 814 EIEIO(); 815 pt->pte_hi = htobe64(pvo_pt->pte_hi); 816 PTESYNC(); 817 818 /* Keep statistics */ 819 STAT_MOEA64(moea64_pte_valid++); 820 821 return (k); 822 } 823 824 static __always_inline int64_t 825 moea64_pte_insert_locked(struct pvo_entry *pvo, struct lpte *insertpt, 826 uint64_t mask) 827 { 828 uintptr_t slot; 829 830 /* 831 * First try primary hash. 832 */ 833 slot = moea64_insert_to_pteg_native(insertpt, pvo->pvo_pte.slot, 834 mask | LPTE_WIRED | LPTE_LOCKED); 835 if (slot != -1) { 836 pvo->pvo_pte.slot = slot; 837 return (0); 838 } 839 840 /* 841 * Now try secondary hash. 842 */ 843 pvo->pvo_vaddr ^= PVO_HID; 844 insertpt->pte_hi ^= LPTE_HID; 845 pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3); 846 slot = moea64_insert_to_pteg_native(insertpt, pvo->pvo_pte.slot, 847 mask | LPTE_WIRED | LPTE_LOCKED); 848 if (slot != -1) { 849 pvo->pvo_pte.slot = slot; 850 return (0); 851 } 852 853 return (-1); 854 } 855 856 static int64_t 857 moea64_pte_insert_native(struct pvo_entry *pvo) 858 { 859 struct lpte insertpt; 860 int64_t ret; 861 862 /* Initialize PTE */ 863 moea64_pte_from_pvo(pvo, &insertpt); 864 865 /* Make sure further insertion is locked out during evictions */ 866 rw_rlock(&moea64_eviction_lock); 867 868 pvo->pvo_pte.slot &= ~7ULL; /* Base slot address */ 869 ret = moea64_pte_insert_locked(pvo, &insertpt, LPTE_VALID); 870 if (ret == -1) { 871 /* 872 * Out of luck. Find a PTE to sacrifice. 873 */ 874 875 /* Lock out all insertions for a bit */ 876 if (!rw_try_upgrade(&moea64_eviction_lock)) { 877 rw_runlock(&moea64_eviction_lock); 878 rw_wlock(&moea64_eviction_lock); 879 } 880 /* Don't evict large pages */ 881 ret = moea64_pte_insert_locked(pvo, &insertpt, LPTE_BIG); 882 rw_wunlock(&moea64_eviction_lock); 883 /* No freeable slots in either PTEG? We're hosed. */ 884 if (ret == -1) 885 panic("moea64_pte_insert: overflow"); 886 } else 887 rw_runlock(&moea64_eviction_lock); 888 889 return (0); 890 } 891 892 static void * 893 moea64_dump_pmap_native(void *ctx, void *buf, u_long *nbytes) 894 { 895 struct dump_context *dctx; 896 u_long ptex, ptex_end; 897 898 dctx = (struct dump_context *)ctx; 899 ptex = dctx->ptex; 900 ptex_end = ptex + dctx->blksz / sizeof(struct lpte); 901 ptex_end = MIN(ptex_end, dctx->ptex_end); 902 *nbytes = (ptex_end - ptex) * sizeof(struct lpte); 903 904 if (*nbytes == 0) 905 return (NULL); 906 907 dctx->ptex = ptex_end; 908 return (__DEVOLATILE(struct lpte *, moea64_pteg_table) + ptex); 909 } 910 911 static __always_inline uint64_t 912 moea64_vpn_from_pte(uint64_t ptehi, uintptr_t slot) 913 { 914 uint64_t pgn, pgnlo, vsid; 915 916 vsid = (ptehi & LPTE_AVA_MASK) >> LPTE_VSID_SHIFT; 917 if ((ptehi & LPTE_HID) != 0) 918 slot ^= (moea64_pteg_mask << 3); 919 pgnlo = ((vsid & VSID_HASH_MASK) ^ (slot >> 3)) & EA_PAGELO_MASK; 920 pgn = ((ptehi & LPTE_AVA_PGNHI_MASK) << (EA_PAGELO_SHIFT - 921 LPTE_AVA_PGNHI_SHIFT)) | pgnlo; 922 return ((vsid << 16) | pgn); 923 } 924 925 static __always_inline int64_t 926 moea64_pte_unset_sp_locked(struct pvo_entry *pvo) 927 { 928 volatile struct lpte *pt; 929 uint64_t ptehi, refchg, vpn; 930 vm_offset_t eva; 931 932 refchg = 0; 933 eva = PVO_VADDR(pvo) + HPT_SP_SIZE; 934 935 for (; pvo != NULL && PVO_VADDR(pvo) < eva; 936 pvo = RB_NEXT(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo)) { 937 pt = moea64_pteg_table + pvo->pvo_pte.slot; 938 ptehi = be64toh(pt->pte_hi); 939 if ((ptehi & LPTE_AVPN_MASK) != 940 moea64_pte_vpn_from_pvo_vpn(pvo)) { 941 /* Evicted: invalidate new entry */ 942 STAT_MOEA64(moea64_pte_overflow--); 943 vpn = moea64_vpn_from_pte(ptehi, pvo->pvo_pte.slot); 944 CTR1(KTR_PMAP, "Evicted page in pte_unset_sp: vpn=%jx", 945 (uintmax_t)vpn); 946 /* Assume evicted page was modified */ 947 refchg |= LPTE_CHG; 948 } else 949 vpn = pvo->pvo_vpn; 950 951 refchg |= moea64_pte_unset_locked(pt, vpn); 952 } 953 954 return (refchg); 955 } 956 957 static int64_t 958 moea64_pte_unset_sp_native(struct pvo_entry *pvo) 959 { 960 uint64_t refchg; 961 962 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); 963 KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0, 964 ("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo))); 965 966 rw_rlock(&moea64_eviction_lock); 967 refchg = moea64_pte_unset_sp_locked(pvo); 968 rw_runlock(&moea64_eviction_lock); 969 970 return (refchg); 971 } 972 973 static __always_inline int64_t 974 moea64_pte_insert_sp_locked(struct pvo_entry *pvo) 975 { 976 struct lpte insertpt; 977 int64_t ret; 978 vm_offset_t eva; 979 980 eva = PVO_VADDR(pvo) + HPT_SP_SIZE; 981 982 for (; pvo != NULL && PVO_VADDR(pvo) < eva; 983 pvo = RB_NEXT(pvo_tree, &pvo->pvo_pmap->pmap_pvo, pvo)) { 984 moea64_pte_from_pvo(pvo, &insertpt); 985 pvo->pvo_pte.slot &= ~7ULL; /* Base slot address */ 986 987 ret = moea64_pte_insert_locked(pvo, &insertpt, LPTE_VALID); 988 if (ret == -1) { 989 /* Lock out all insertions for a bit */ 990 if (!rw_try_upgrade(&moea64_eviction_lock)) { 991 rw_runlock(&moea64_eviction_lock); 992 rw_wlock(&moea64_eviction_lock); 993 } 994 /* Don't evict large pages */ 995 ret = moea64_pte_insert_locked(pvo, &insertpt, 996 LPTE_BIG); 997 rw_downgrade(&moea64_eviction_lock); 998 /* No freeable slots in either PTEG? We're hosed. */ 999 if (ret == -1) 1000 panic("moea64_pte_insert_sp: overflow"); 1001 } 1002 } 1003 1004 return (0); 1005 } 1006 1007 static int64_t 1008 moea64_pte_insert_sp_native(struct pvo_entry *pvo) 1009 { 1010 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); 1011 KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0, 1012 ("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo))); 1013 1014 rw_rlock(&moea64_eviction_lock); 1015 moea64_pte_insert_sp_locked(pvo); 1016 rw_runlock(&moea64_eviction_lock); 1017 1018 return (0); 1019 } 1020 1021 static int64_t 1022 moea64_pte_replace_sp_native(struct pvo_entry *pvo) 1023 { 1024 uint64_t refchg; 1025 1026 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); 1027 KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0, 1028 ("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo))); 1029 1030 rw_rlock(&moea64_eviction_lock); 1031 refchg = moea64_pte_unset_sp_locked(pvo); 1032 moea64_pte_insert_sp_locked(pvo); 1033 rw_runlock(&moea64_eviction_lock); 1034 1035 return (refchg); 1036 } 1037