1084cfca1SRichard Henderson /* 279713752SRichard Henderson * Info about, and flushing the host cpu caches. 3084cfca1SRichard Henderson * 4084cfca1SRichard Henderson * This work is licensed under the terms of the GNU GPL, version 2 or later. 5084cfca1SRichard Henderson * See the COPYING file in the top-level directory. 6084cfca1SRichard Henderson */ 7084cfca1SRichard Henderson 8084cfca1SRichard Henderson #include "qemu/osdep.h" 9084cfca1SRichard Henderson #include "qemu/cacheflush.h" 10ad768e6fSPeter Maydell #include "qemu/cacheinfo.h" 11664a7973SRichard Henderson #include "qemu/bitops.h" 1279713752SRichard Henderson #include "qemu/host-utils.h" 1379713752SRichard Henderson #include "qemu/atomic.h" 14084cfca1SRichard Henderson 15084cfca1SRichard Henderson 1679713752SRichard Henderson int qemu_icache_linesize = 0; 1779713752SRichard Henderson int qemu_icache_linesize_log; 1879713752SRichard Henderson int qemu_dcache_linesize = 0; 1979713752SRichard Henderson int qemu_dcache_linesize_log; 2079713752SRichard Henderson 2179713752SRichard Henderson /* 2279713752SRichard Henderson * Operating system specific cache detection mechanisms. 2379713752SRichard Henderson */ 2479713752SRichard Henderson 2579713752SRichard Henderson #if defined(_WIN32) 2679713752SRichard Henderson 2779713752SRichard Henderson static void sys_cache_info(int *isize, int *dsize) 2879713752SRichard Henderson { 2979713752SRichard Henderson SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf; 3079713752SRichard Henderson DWORD size = 0; 3179713752SRichard Henderson BOOL success; 3279713752SRichard Henderson size_t i, n; 3379713752SRichard Henderson 3479713752SRichard Henderson /* 3579713752SRichard Henderson * Check for the required buffer size first. Note that if the zero 3679713752SRichard Henderson * size we use for the probe results in success, then there is no 3779713752SRichard Henderson * data available; fail in that case. 3879713752SRichard Henderson */ 3979713752SRichard Henderson success = GetLogicalProcessorInformation(0, &size); 4079713752SRichard Henderson if (success || GetLastError() != ERROR_INSUFFICIENT_BUFFER) { 4179713752SRichard Henderson return; 4279713752SRichard Henderson } 4379713752SRichard Henderson 4479713752SRichard Henderson n = size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); 4579713752SRichard Henderson size = n * sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); 4679713752SRichard Henderson buf = g_new0(SYSTEM_LOGICAL_PROCESSOR_INFORMATION, n); 4779713752SRichard Henderson if (!GetLogicalProcessorInformation(buf, &size)) { 4879713752SRichard Henderson goto fail; 4979713752SRichard Henderson } 5079713752SRichard Henderson 5179713752SRichard Henderson for (i = 0; i < n; i++) { 5279713752SRichard Henderson if (buf[i].Relationship == RelationCache 5379713752SRichard Henderson && buf[i].Cache.Level == 1) { 5479713752SRichard Henderson switch (buf[i].Cache.Type) { 5579713752SRichard Henderson case CacheUnified: 5679713752SRichard Henderson *isize = *dsize = buf[i].Cache.LineSize; 5779713752SRichard Henderson break; 5879713752SRichard Henderson case CacheInstruction: 5979713752SRichard Henderson *isize = buf[i].Cache.LineSize; 6079713752SRichard Henderson break; 6179713752SRichard Henderson case CacheData: 6279713752SRichard Henderson *dsize = buf[i].Cache.LineSize; 6379713752SRichard Henderson break; 6479713752SRichard Henderson default: 6579713752SRichard Henderson break; 6679713752SRichard Henderson } 6779713752SRichard Henderson } 6879713752SRichard Henderson } 6979713752SRichard Henderson fail: 7079713752SRichard Henderson g_free(buf); 7179713752SRichard Henderson } 7279713752SRichard Henderson 73bdd50dc7SRichard Henderson #elif defined(CONFIG_DARWIN) 7479713752SRichard Henderson # include <sys/sysctl.h> 7579713752SRichard Henderson static void sys_cache_info(int *isize, int *dsize) 7679713752SRichard Henderson { 7779713752SRichard Henderson /* There's only a single sysctl for both I/D cache line sizes. */ 7879713752SRichard Henderson long size; 7979713752SRichard Henderson size_t len = sizeof(size); 8079713752SRichard Henderson if (!sysctlbyname("hw.cachelinesize", &size, &len, NULL, 0)) { 8179713752SRichard Henderson *isize = *dsize = size; 8279713752SRichard Henderson } 8379713752SRichard Henderson } 8479713752SRichard Henderson #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 8579713752SRichard Henderson # include <sys/sysctl.h> 8679713752SRichard Henderson static void sys_cache_info(int *isize, int *dsize) 8779713752SRichard Henderson { 8879713752SRichard Henderson /* There's only a single sysctl for both I/D cache line sizes. */ 8979713752SRichard Henderson int size; 9079713752SRichard Henderson size_t len = sizeof(size); 9179713752SRichard Henderson if (!sysctlbyname("machdep.cacheline_size", &size, &len, NULL, 0)) { 9279713752SRichard Henderson *isize = *dsize = size; 9379713752SRichard Henderson } 9479713752SRichard Henderson } 9579713752SRichard Henderson #else 9679713752SRichard Henderson /* POSIX */ 9779713752SRichard Henderson 9879713752SRichard Henderson static void sys_cache_info(int *isize, int *dsize) 9979713752SRichard Henderson { 10079713752SRichard Henderson # ifdef _SC_LEVEL1_ICACHE_LINESIZE 10179713752SRichard Henderson int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE); 10279713752SRichard Henderson if (tmp_isize > 0) { 10379713752SRichard Henderson *isize = tmp_isize; 10479713752SRichard Henderson } 10579713752SRichard Henderson # endif 10679713752SRichard Henderson # ifdef _SC_LEVEL1_DCACHE_LINESIZE 10779713752SRichard Henderson int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE); 10879713752SRichard Henderson if (tmp_dsize > 0) { 10979713752SRichard Henderson *dsize = tmp_dsize; 11079713752SRichard Henderson } 11179713752SRichard Henderson # endif 11279713752SRichard Henderson } 11379713752SRichard Henderson #endif /* sys_cache_info */ 11479713752SRichard Henderson 11579713752SRichard Henderson 11679713752SRichard Henderson /* 11779713752SRichard Henderson * Architecture (+ OS) specific cache detection mechanisms. 11879713752SRichard Henderson */ 11979713752SRichard Henderson 120c79a8e84SNicholas Piggin #if defined(__powerpc__) 121c79a8e84SNicholas Piggin static bool have_coherent_icache; 122c79a8e84SNicholas Piggin #endif 123c79a8e84SNicholas Piggin 124b3c32602SPierrick Bouvier #if defined(__aarch64__) && !defined(CONFIG_DARWIN) && !defined(CONFIG_WIN32) 125b3c32602SPierrick Bouvier /* 126b3c32602SPierrick Bouvier * Apple does not expose CTR_EL0, so we must use system interfaces. 127b3c32602SPierrick Bouvier * Windows neither, but we use a generic implementation of flush_idcache_range 128b3c32602SPierrick Bouvier * in this case. 129b3c32602SPierrick Bouvier */ 130bdd50dc7SRichard Henderson static uint64_t save_ctr_el0; 13179713752SRichard Henderson static void arch_cache_info(int *isize, int *dsize) 13279713752SRichard Henderson { 13379713752SRichard Henderson uint64_t ctr; 13479713752SRichard Henderson 13579713752SRichard Henderson /* 13679713752SRichard Henderson * The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1, 13779713752SRichard Henderson * but (at least under Linux) these are marked protected by the 13879713752SRichard Henderson * kernel. However, CTR_EL0 contains the minimum linesize in the 13979713752SRichard Henderson * entire hierarchy, and is used by userspace cache flushing. 140bdd50dc7SRichard Henderson * 141bdd50dc7SRichard Henderson * We will also use this value in flush_idcache_range. 14279713752SRichard Henderson */ 14379713752SRichard Henderson asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr)); 144bdd50dc7SRichard Henderson save_ctr_el0 = ctr; 145bdd50dc7SRichard Henderson 146bdd50dc7SRichard Henderson if (*isize == 0 || *dsize == 0) { 14779713752SRichard Henderson if (*isize == 0) { 14879713752SRichard Henderson *isize = 4 << (ctr & 0xf); 14979713752SRichard Henderson } 15079713752SRichard Henderson if (*dsize == 0) { 15179713752SRichard Henderson *dsize = 4 << ((ctr >> 16) & 0xf); 15279713752SRichard Henderson } 15379713752SRichard Henderson } 15479713752SRichard Henderson } 15579713752SRichard Henderson 15679713752SRichard Henderson #elif defined(_ARCH_PPC) && defined(__linux__) 15779713752SRichard Henderson # include "elf.h" 15879713752SRichard Henderson 15979713752SRichard Henderson static void arch_cache_info(int *isize, int *dsize) 16079713752SRichard Henderson { 16179713752SRichard Henderson if (*isize == 0) { 16279713752SRichard Henderson *isize = qemu_getauxval(AT_ICACHEBSIZE); 16379713752SRichard Henderson } 16479713752SRichard Henderson if (*dsize == 0) { 16579713752SRichard Henderson *dsize = qemu_getauxval(AT_DCACHEBSIZE); 16679713752SRichard Henderson } 167c79a8e84SNicholas Piggin have_coherent_icache = qemu_getauxval(AT_HWCAP) & PPC_FEATURE_ICACHE_SNOOP; 16879713752SRichard Henderson } 16979713752SRichard Henderson 17079713752SRichard Henderson #else 17179713752SRichard Henderson static void arch_cache_info(int *isize, int *dsize) { } 17279713752SRichard Henderson #endif /* arch_cache_info */ 17379713752SRichard Henderson 17479713752SRichard Henderson /* 17579713752SRichard Henderson * ... and if all else fails ... 17679713752SRichard Henderson */ 17779713752SRichard Henderson 17879713752SRichard Henderson static void fallback_cache_info(int *isize, int *dsize) 17979713752SRichard Henderson { 18079713752SRichard Henderson /* If we can only find one of the two, assume they're the same. */ 18179713752SRichard Henderson if (*isize) { 18279713752SRichard Henderson if (*dsize) { 18379713752SRichard Henderson /* Success! */ 18479713752SRichard Henderson } else { 18579713752SRichard Henderson *dsize = *isize; 18679713752SRichard Henderson } 18779713752SRichard Henderson } else if (*dsize) { 18879713752SRichard Henderson *isize = *dsize; 18979713752SRichard Henderson } else { 19079713752SRichard Henderson #if defined(_ARCH_PPC) 19179713752SRichard Henderson /* 19279713752SRichard Henderson * For PPC, we're going to use the cache sizes computed for 19379713752SRichard Henderson * flush_idcache_range. Which means that we must use the 19479713752SRichard Henderson * architecture minimum. 19579713752SRichard Henderson */ 19679713752SRichard Henderson *isize = *dsize = 16; 19779713752SRichard Henderson #else 19879713752SRichard Henderson /* Otherwise, 64 bytes is not uncommon. */ 19979713752SRichard Henderson *isize = *dsize = 64; 20079713752SRichard Henderson #endif 20179713752SRichard Henderson } 20279713752SRichard Henderson } 20379713752SRichard Henderson 20479713752SRichard Henderson static void __attribute__((constructor)) init_cache_info(void) 20579713752SRichard Henderson { 20679713752SRichard Henderson int isize = 0, dsize = 0; 20779713752SRichard Henderson 20879713752SRichard Henderson sys_cache_info(&isize, &dsize); 20979713752SRichard Henderson arch_cache_info(&isize, &dsize); 21079713752SRichard Henderson fallback_cache_info(&isize, &dsize); 21179713752SRichard Henderson 21279713752SRichard Henderson assert((isize & (isize - 1)) == 0); 21379713752SRichard Henderson assert((dsize & (dsize - 1)) == 0); 21479713752SRichard Henderson 21579713752SRichard Henderson qemu_icache_linesize = isize; 21679713752SRichard Henderson qemu_icache_linesize_log = ctz32(isize); 21779713752SRichard Henderson qemu_dcache_linesize = dsize; 21879713752SRichard Henderson qemu_dcache_linesize_log = ctz32(dsize); 21979713752SRichard Henderson 22079713752SRichard Henderson qatomic64_init(); 22179713752SRichard Henderson } 22279713752SRichard Henderson 22379713752SRichard Henderson 22479713752SRichard Henderson /* 22579713752SRichard Henderson * Architecture (+ OS) specific cache flushing mechanisms. 22679713752SRichard Henderson */ 22779713752SRichard Henderson 228084cfca1SRichard Henderson #if defined(__i386__) || defined(__x86_64__) || defined(__s390__) 229084cfca1SRichard Henderson 230084cfca1SRichard Henderson /* Caches are coherent and do not require flushing; symbol inline. */ 231084cfca1SRichard Henderson 232b3c32602SPierrick Bouvier #elif defined(__aarch64__) && !defined(CONFIG_WIN32) 233b3c32602SPierrick Bouvier /* 234b3c32602SPierrick Bouvier * For Windows, we use generic implementation of flush_idcache_range, that 235b3c32602SPierrick Bouvier * performs a call to FlushInstructionCache, through __builtin___clear_cache. 236b3c32602SPierrick Bouvier */ 237664a7973SRichard Henderson 238664a7973SRichard Henderson #ifdef CONFIG_DARWIN 239664a7973SRichard Henderson /* Apple does not expose CTR_EL0, so we must use system interfaces. */ 2400baf54d0SPhilippe Mathieu-Daudé #include <libkern/OSCacheControl.h> 2410baf54d0SPhilippe Mathieu-Daudé 242664a7973SRichard Henderson void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 243664a7973SRichard Henderson { 244664a7973SRichard Henderson sys_dcache_flush((void *)rw, len); 245664a7973SRichard Henderson sys_icache_invalidate((void *)rx, len); 246664a7973SRichard Henderson } 247664a7973SRichard Henderson #else 248664a7973SRichard Henderson 249664a7973SRichard Henderson /* 250664a7973SRichard Henderson * This is a copy of gcc's __aarch64_sync_cache_range, modified 251664a7973SRichard Henderson * to fit this three-operand interface. 252664a7973SRichard Henderson */ 253664a7973SRichard Henderson void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 254664a7973SRichard Henderson { 255664a7973SRichard Henderson const unsigned CTR_IDC = 1u << 28; 256664a7973SRichard Henderson const unsigned CTR_DIC = 1u << 29; 257acd15fc2SGan Qixin const uint64_t ctr_el0 = save_ctr_el0; 258bdd50dc7SRichard Henderson const uintptr_t icache_lsize = qemu_icache_linesize; 259bdd50dc7SRichard Henderson const uintptr_t dcache_lsize = qemu_dcache_linesize; 260664a7973SRichard Henderson uintptr_t p; 261664a7973SRichard Henderson 262664a7973SRichard Henderson /* 263664a7973SRichard Henderson * If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification 264664a7973SRichard Henderson * is not required for instruction to data coherence. 265664a7973SRichard Henderson */ 266664a7973SRichard Henderson if (!(ctr_el0 & CTR_IDC)) { 267664a7973SRichard Henderson /* 268664a7973SRichard Henderson * Loop over the address range, clearing one cache line at once. 269664a7973SRichard Henderson * Data cache must be flushed to unification first to make sure 270664a7973SRichard Henderson * the instruction cache fetches the updated data. 271664a7973SRichard Henderson */ 272664a7973SRichard Henderson for (p = rw & -dcache_lsize; p < rw + len; p += dcache_lsize) { 273664a7973SRichard Henderson asm volatile("dc\tcvau, %0" : : "r" (p) : "memory"); 274664a7973SRichard Henderson } 275664a7973SRichard Henderson asm volatile("dsb\tish" : : : "memory"); 276664a7973SRichard Henderson } 277664a7973SRichard Henderson 278664a7973SRichard Henderson /* 279664a7973SRichard Henderson * If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point 280664a7973SRichard Henderson * of Unification is not required for instruction to data coherence. 281664a7973SRichard Henderson */ 282664a7973SRichard Henderson if (!(ctr_el0 & CTR_DIC)) { 283664a7973SRichard Henderson for (p = rx & -icache_lsize; p < rx + len; p += icache_lsize) { 284664a7973SRichard Henderson asm volatile("ic\tivau, %0" : : "r"(p) : "memory"); 285664a7973SRichard Henderson } 286664a7973SRichard Henderson asm volatile ("dsb\tish" : : : "memory"); 287664a7973SRichard Henderson } 288664a7973SRichard Henderson 289664a7973SRichard Henderson asm volatile("isb" : : : "memory"); 290664a7973SRichard Henderson } 291664a7973SRichard Henderson #endif /* CONFIG_DARWIN */ 292664a7973SRichard Henderson 293084cfca1SRichard Henderson #elif defined(__mips__) 294084cfca1SRichard Henderson 295084cfca1SRichard Henderson #ifdef __OpenBSD__ 296084cfca1SRichard Henderson #include <machine/sysarch.h> 297084cfca1SRichard Henderson #else 298084cfca1SRichard Henderson #include <sys/cachectl.h> 299084cfca1SRichard Henderson #endif 300084cfca1SRichard Henderson 3011da8de39SRichard Henderson void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 302084cfca1SRichard Henderson { 3031da8de39SRichard Henderson if (rx != rw) { 3041da8de39SRichard Henderson cacheflush((void *)rw, len, DCACHE); 3051da8de39SRichard Henderson } 3061da8de39SRichard Henderson cacheflush((void *)rx, len, ICACHE); 307084cfca1SRichard Henderson } 308084cfca1SRichard Henderson 309084cfca1SRichard Henderson #elif defined(__powerpc__) 310084cfca1SRichard Henderson 3111da8de39SRichard Henderson void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 312084cfca1SRichard Henderson { 3131da8de39SRichard Henderson uintptr_t p, b, e; 314c79a8e84SNicholas Piggin size_t dsize, isize; 315c79a8e84SNicholas Piggin 316c79a8e84SNicholas Piggin /* 317c79a8e84SNicholas Piggin * Some processors have coherent caches and support a simplified 318c79a8e84SNicholas Piggin * flushing procedure. See 319c79a8e84SNicholas Piggin * POWER9 UM, 4.6.2.2 Instruction Cache Block Invalidate (icbi) 320c79a8e84SNicholas Piggin * https://ibm.ent.box.com/s/tmklq90ze7aj8f4n32er1mu3sy9u8k3k 321c79a8e84SNicholas Piggin */ 322c79a8e84SNicholas Piggin if (have_coherent_icache) { 323c79a8e84SNicholas Piggin asm volatile ("sync\n\t" 324c79a8e84SNicholas Piggin "icbi 0,%0\n\t" 325c79a8e84SNicholas Piggin "isync" 326c79a8e84SNicholas Piggin : : "r"(rx) : "memory"); 327c79a8e84SNicholas Piggin return; 328c79a8e84SNicholas Piggin } 329c79a8e84SNicholas Piggin 330c79a8e84SNicholas Piggin dsize = qemu_dcache_linesize; 331c79a8e84SNicholas Piggin isize = qemu_icache_linesize; 332084cfca1SRichard Henderson 3331da8de39SRichard Henderson b = rw & ~(dsize - 1); 3341da8de39SRichard Henderson e = (rw + len + dsize - 1) & ~(dsize - 1); 3351da8de39SRichard Henderson for (p = b; p < e; p += dsize) { 336084cfca1SRichard Henderson asm volatile ("dcbst 0,%0" : : "r"(p) : "memory"); 337084cfca1SRichard Henderson } 338084cfca1SRichard Henderson asm volatile ("sync" : : : "memory"); 339084cfca1SRichard Henderson 3401da8de39SRichard Henderson b = rx & ~(isize - 1); 3411da8de39SRichard Henderson e = (rx + len + isize - 1) & ~(isize - 1); 3421da8de39SRichard Henderson for (p = b; p < e; p += isize) { 343084cfca1SRichard Henderson asm volatile ("icbi 0,%0" : : "r"(p) : "memory"); 344084cfca1SRichard Henderson } 345084cfca1SRichard Henderson asm volatile ("sync" : : : "memory"); 346084cfca1SRichard Henderson asm volatile ("isync" : : : "memory"); 347084cfca1SRichard Henderson } 348084cfca1SRichard Henderson 349084cfca1SRichard Henderson #elif defined(__sparc__) 350084cfca1SRichard Henderson 3511da8de39SRichard Henderson void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 352084cfca1SRichard Henderson { 3531da8de39SRichard Henderson /* No additional data flush to the RW virtual address required. */ 3541da8de39SRichard Henderson uintptr_t p, end = (rx + len + 7) & -8; 3551da8de39SRichard Henderson for (p = rx & -8; p < end; p += 8) { 356084cfca1SRichard Henderson __asm__ __volatile__("flush\t%0" : : "r" (p)); 357084cfca1SRichard Henderson } 358084cfca1SRichard Henderson } 359084cfca1SRichard Henderson 360084cfca1SRichard Henderson #else 361084cfca1SRichard Henderson 3621da8de39SRichard Henderson void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) 363084cfca1SRichard Henderson { 3641da8de39SRichard Henderson if (rw != rx) { 3651da8de39SRichard Henderson __builtin___clear_cache((char *)rw, (char *)rw + len); 3661da8de39SRichard Henderson } 3671da8de39SRichard Henderson __builtin___clear_cache((char *)rx, (char *)rx + len); 368084cfca1SRichard Henderson } 369084cfca1SRichard Henderson 370084cfca1SRichard Henderson #endif 371