1 /* 2 * Info about, and flushing the host cpu caches. 3 * 4 * This work is licensed under the terms of the GNU GPL, version 2 or later. 5 * See the COPYING file in the top-level directory. 6 */ 7 8 #include "qemu/osdep.h" 9 #include "qemu/cacheflush.h" 10 #include "qemu/cacheinfo.h" 11 #include "qemu/bitops.h" 12 #include "qemu/host-utils.h" 13 #include "qemu/atomic.h" 14 15 16 int qemu_icache_linesize = 0; 17 int qemu_icache_linesize_log; 18 int qemu_dcache_linesize = 0; 19 int qemu_dcache_linesize_log; 20 21 /* 22 * Operating system specific cache detection mechanisms. 23 */ 24 25 #if defined(_WIN32) 26 27 static void sys_cache_info(int *isize, int *dsize) 28 { 29 SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf; 30 DWORD size = 0; 31 BOOL success; 32 size_t i, n; 33 34 /* 35 * Check for the required buffer size first. Note that if the zero 36 * size we use for the probe results in success, then there is no 37 * data available; fail in that case. 38 */ 39 success = GetLogicalProcessorInformation(0, &size); 40 if (success || GetLastError() != ERROR_INSUFFICIENT_BUFFER) { 41 return; 42 } 43 44 n = size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); 45 size = n * sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); 46 buf = g_new0(SYSTEM_LOGICAL_PROCESSOR_INFORMATION, n); 47 if (!GetLogicalProcessorInformation(buf, &size)) { 48 goto fail; 49 } 50 51 for (i = 0; i < n; i++) { 52 if (buf[i].Relationship == RelationCache 53 && buf[i].Cache.Level == 1) { 54 switch (buf[i].Cache.Type) { 55 case CacheUnified: 56 *isize = *dsize = buf[i].Cache.LineSize; 57 break; 58 case CacheInstruction: 59 *isize = buf[i].Cache.LineSize; 60 break; 61 case CacheData: 62 *dsize = buf[i].Cache.LineSize; 63 break; 64 default: 65 break; 66 } 67 } 68 } 69 fail: 70 g_free(buf); 71 } 72 73 #elif defined(CONFIG_DARWIN) 74 # include <sys/sysctl.h> 75 static void sys_cache_info(int *isize, int *dsize) 76 { 77 /* There's only a single sysctl for both I/D cache line sizes. */ 78 long size; 79 size_t len = sizeof(size); 80 if (!sysctlbyname("hw.cachelinesize", &size, &len, NULL, 0)) { 81 *isize = *dsize = size; 82 } 83 } 84 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 85 # include <sys/sysctl.h> 86 static void sys_cache_info(int *isize, int *dsize) 87 { 88 /* There's only a single sysctl for both I/D cache line sizes. */ 89 int size; 90 size_t len = sizeof(size); 91 if (!sysctlbyname("machdep.cacheline_size", &size, &len, NULL, 0)) { 92 *isize = *dsize = size; 93 } 94 } 95 #else 96 /* POSIX */ 97 98 static void sys_cache_info(int *isize, int *dsize) 99 { 100 # ifdef _SC_LEVEL1_ICACHE_LINESIZE 101 int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE); 102 if (tmp_isize > 0) { 103 *isize = tmp_isize; 104 } 105 # endif 106 # ifdef _SC_LEVEL1_DCACHE_LINESIZE 107 int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE); 108 if (tmp_dsize > 0) { 109 *dsize = tmp_dsize; 110 } 111 # endif 112 } 113 #endif /* sys_cache_info */ 114 115 116 /* 117 * Architecture (+ OS) specific cache detection mechanisms. 118 */ 119 120 #if defined(__powerpc__) 121 static bool have_coherent_icache; 122 #endif 123 124 #if defined(__aarch64__) && !defined(CONFIG_DARWIN) && !defined(CONFIG_WIN32) 125 /* 126 * Apple does not expose CTR_EL0, so we must use system interfaces. 127 * Windows neither, but we use a generic implementation of flush_idcache_range 128 * in this case. 129 */ 130 static uint64_t save_ctr_el0; 131 static void arch_cache_info(int *isize, int *dsize) 132 { 133 uint64_t ctr; 134 135 /* 136 * The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1, 137 * but (at least under Linux) these are marked protected by the 138 * kernel. However, CTR_EL0 contains the minimum linesize in the 139 * entire hierarchy, and is used by userspace cache flushing. 140 * 141 * We will also use this value in flush_idcache_range. 142 */ 143 asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr)); 144 save_ctr_el0 = ctr; 145 146 if (*isize == 0 || *dsize == 0) { 147 if (*isize == 0) { 148 *isize = 4 << (ctr & 0xf); 149 } 150 if (*dsize == 0) { 151 *dsize = 4 << ((ctr >> 16) & 0xf); 152 } 153 } 154 } 155 156 #elif defined(_ARCH_PPC) && defined(__linux__) 157 # include "elf.h" 158 159 static void arch_cache_info(int *isize, int *dsize) 160 { 161 if (*isize == 0) { 162 *isize = qemu_getauxval(AT_ICACHEBSIZE); 163 } 164 if (*dsize == 0) { 165 *dsize = qemu_getauxval(AT_DCACHEBSIZE); 166 } 167 have_coherent_icache = qemu_getauxval(AT_HWCAP) & PPC_FEATURE_ICACHE_SNOOP; 168 } 169 170 #else 171 static void arch_cache_info(int *isize, int *dsize) { } 172 #endif /* arch_cache_info */ 173 174 /* 175 * ... and if all else fails ... 176 */ 177 178 static void fallback_cache_info(int *isize, int *dsize) 179 { 180 /* If we can only find one of the two, assume they're the same. */ 181 if (*isize) { 182 if (*dsize) { 183 /* Success! */ 184 } else { 185 *dsize = *isize; 186 } 187 } else if (*dsize) { 188 *isize = *dsize; 189 } else { 190 #if defined(_ARCH_PPC) 191 /* 192 * For PPC, we're going to use the cache sizes computed for 193 * flush_idcache_range. Which means that we must use the 194 * architecture minimum. 195 */ 196 *isize = *dsize = 16; 197 #else 198 /* Otherwise, 64 bytes is not uncommon. */ 199 *isize = *dsize = 64; 200 #endif 201 } 202 } 203 204 static void __attribute__((constructor)) init_cache_info(void) 205 { 206 int isize = 0, dsize = 0; 207 208 sys_cache_info(&isize, &dsize); 209 arch_cache_info(&isize, &dsize); 210 fallback_cache_info(&isize, &dsize); 211 212 assert((isize & (isize - 1)) == 0); 213 assert((dsize & (dsize - 1)) == 0); 214 215 qemu_icache_linesize = isize; 216 qemu_icache_linesize_log = ctz32(isize); 217 qemu_dcache_linesize = dsize; 218 qemu_dcache_linesize_log = ctz32(dsize); 219 220 qatomic64_init(); 221 } 222 223 224 /* 225 * Architecture (+ OS) specific cache flushing mechanisms. 226 */ 227 228 #if defined(__i386__) || defined(__x86_64__) || defined(__s390__) 229 230 /* Caches are coherent and do not require flushing; symbol inline. */ 231 232 #elif defined(EMSCRIPTEN) 233 234 /* Wasm doesn't have executable region of memory. */ 235 236 #elif defined(__aarch64__) && !defined(CONFIG_WIN32) 237 /* 238 * For Windows, we use generic implementation of flush_idcache_range, that 239 * performs a call to FlushInstructionCache, through __builtin___clear_cache. 240 */ 241 242 #ifdef CONFIG_DARWIN 243 /* Apple does not expose CTR_EL0, so we must use system interfaces. */ 244 #include <libkern/OSCacheControl.h> 245 246 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 247 { 248 if (rx == rw) { 249 /* 250 * sys_icache_invalidate() syncs the dcache and icache, 251 * so no need to call sys_dcache_flush(). 252 */ 253 } else { 254 sys_dcache_flush((void *)rw, len); 255 } 256 sys_icache_invalidate((void *)rx, len); 257 } 258 #else 259 260 /* 261 * This is a copy of gcc's __aarch64_sync_cache_range, modified 262 * to fit this three-operand interface. 263 */ 264 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 265 { 266 const unsigned CTR_IDC = 1u << 28; 267 const unsigned CTR_DIC = 1u << 29; 268 const uint64_t ctr_el0 = save_ctr_el0; 269 const uintptr_t icache_lsize = qemu_icache_linesize; 270 const uintptr_t dcache_lsize = qemu_dcache_linesize; 271 uintptr_t p; 272 273 /* 274 * If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification 275 * is not required for instruction to data coherence. 276 */ 277 if (!(ctr_el0 & CTR_IDC)) { 278 /* 279 * Loop over the address range, clearing one cache line at once. 280 * Data cache must be flushed to unification first to make sure 281 * the instruction cache fetches the updated data. 282 */ 283 for (p = rw & -dcache_lsize; p < rw + len; p += dcache_lsize) { 284 asm volatile("dc\tcvau, %0" : : "r" (p) : "memory"); 285 } 286 } 287 288 /* DSB unconditionally to ensure any outstanding writes are committed. */ 289 asm volatile("dsb\tish" : : : "memory"); 290 291 /* 292 * If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point 293 * of Unification is not required for instruction to data coherence. 294 */ 295 if (!(ctr_el0 & CTR_DIC)) { 296 for (p = rx & -icache_lsize; p < rx + len; p += icache_lsize) { 297 asm volatile("ic\tivau, %0" : : "r"(p) : "memory"); 298 } 299 asm volatile ("dsb\tish" : : : "memory"); 300 } 301 302 asm volatile("isb" : : : "memory"); 303 } 304 #endif /* CONFIG_DARWIN */ 305 306 #elif defined(__mips__) 307 308 #ifdef __OpenBSD__ 309 #include <machine/sysarch.h> 310 #else 311 #include <sys/cachectl.h> 312 #endif 313 314 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 315 { 316 if (rx != rw) { 317 cacheflush((void *)rw, len, DCACHE); 318 } 319 cacheflush((void *)rx, len, ICACHE); 320 } 321 322 #elif defined(__powerpc__) 323 324 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 325 { 326 uintptr_t p, b, e; 327 size_t dsize, isize; 328 329 /* 330 * Some processors have coherent caches and support a simplified 331 * flushing procedure. See 332 * POWER9 UM, 4.6.2.2 Instruction Cache Block Invalidate (icbi) 333 * https://ibm.ent.box.com/s/tmklq90ze7aj8f4n32er1mu3sy9u8k3k 334 */ 335 if (have_coherent_icache) { 336 asm volatile ("sync\n\t" 337 "icbi 0,%0\n\t" 338 "isync" 339 : : "r"(rx) : "memory"); 340 return; 341 } 342 343 dsize = qemu_dcache_linesize; 344 isize = qemu_icache_linesize; 345 346 b = rw & ~(dsize - 1); 347 e = (rw + len + dsize - 1) & ~(dsize - 1); 348 for (p = b; p < e; p += dsize) { 349 asm volatile ("dcbst 0,%0" : : "r"(p) : "memory"); 350 } 351 asm volatile ("sync" : : : "memory"); 352 353 b = rx & ~(isize - 1); 354 e = (rx + len + isize - 1) & ~(isize - 1); 355 for (p = b; p < e; p += isize) { 356 asm volatile ("icbi 0,%0" : : "r"(p) : "memory"); 357 } 358 asm volatile ("sync" : : : "memory"); 359 asm volatile ("isync" : : : "memory"); 360 } 361 362 #elif defined(__sparc__) 363 364 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 365 { 366 /* No additional data flush to the RW virtual address required. */ 367 uintptr_t p, end = (rx + len + 7) & -8; 368 for (p = rx & -8; p < end; p += 8) { 369 __asm__ __volatile__("flush\t%0" : : "r" (p)); 370 } 371 } 372 373 #else 374 375 void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 376 { 377 if (rw != rx) { 378 __builtin___clear_cache((char *)rw, (char *)rw + len); 379 } 380 __builtin___clear_cache((char *)rx, (char *)rx + len); 381 } 382 383 #endif 384