1 /* $OpenBSD: cache_loongson2.c,v 1.8 2021/03/11 11:16:59 jsg Exp $ */
2
3 /*
4 * Copyright (c) 2009, 2012 Miodrag Vallat.
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 /*
20 * Cache handling code for Loongson 2E and 2F processors.
21 * This code could be made to work on 2C by not hardcoding the number of
22 * cache ways.
23 *
24 * 2E and 2F caches are :
25 * - L1 I$ is 4-way, VIPT, 32 bytes/line, 64KB total
26 * - L1 D$ is 4-way, VIPT, write-back, 32 bytes/line, 64KB total
27 * - L2 is 4-way, PIPT, write-back, 32 bytes/line, 512KB total
28 */
29
30 #include <sys/param.h>
31 #include <sys/systm.h>
32
33 #include <mips64/cache.h>
34 #include <machine/cpu.h>
35
36 #include <uvm/uvm_extern.h>
37
38 /* L1 cache operations */
39 #define IndexInvalidate_I 0x00
40 #define IndexWBInvalidate_D 0x01
41 #define IndexLoadTag_D 0x05
42 #define IndexStoreTag_D 0x09
43 #define HitInvalidate_D 0x11
44 #define HitWBInvalidate_D 0x15
45 #define IndexLoadData_D 0x19
46 #define IndexStoreData_D 0x1d
47
48 /* L2 cache operations */
49 #define IndexWBInvalidate_S 0x03
50 #define IndexLoadTag_S 0x07
51 #define IndexStoreTag_S 0x0b
52 #define HitInvalidate_S 0x13
53 #define HitWBInvalidate_S 0x17
54 #define IndexLoadData_S 0x1b
55 #define IndexStoreData_S 0x1f
56
57 #define cache(op,set,addr) \
58 __asm__ volatile \
59 ("cache %0, %1(%2)" :: "i"(op), "i"(set), "r"(addr) : "memory")
60
61 static __inline__ void ls2f_hitinv_primary(vaddr_t, vsize_t);
62 static __inline__ void ls2f_hitinv_secondary(vaddr_t, vsize_t);
63 static __inline__ void ls2f_hitwbinv_primary(vaddr_t, vsize_t);
64 static __inline__ void ls2f_hitwbinv_secondary(vaddr_t, vsize_t);
65
66 #define LS2F_CACHE_LINE 32UL
67 #define LS2F_CACHE_WAYS 4UL
68 #define LS2F_L1_SIZE (64UL * 1024UL)
69 #define LS2F_L2_SIZE (512UL * 1024UL)
70
71 void
Loongson2_ConfigCache(struct cpu_info * ci)72 Loongson2_ConfigCache(struct cpu_info *ci)
73 {
74 ci->ci_l1inst.size = LS2F_L1_SIZE;
75 ci->ci_l1inst.linesize = LS2F_CACHE_LINE;
76 ci->ci_l1inst.setsize = LS2F_L1_SIZE / LS2F_CACHE_WAYS;
77 ci->ci_l1inst.sets = LS2F_CACHE_WAYS;
78
79 ci->ci_l1data.size = LS2F_L1_SIZE;
80 ci->ci_l1data.linesize = LS2F_CACHE_LINE;
81 ci->ci_l1data.setsize = LS2F_L1_SIZE / LS2F_CACHE_WAYS;
82 ci->ci_l1data.sets = LS2F_CACHE_WAYS;
83
84 ci->ci_l2.size = LS2F_L2_SIZE;
85 ci->ci_l2.linesize = LS2F_CACHE_LINE;
86 ci->ci_l2.setsize = LS2F_L2_SIZE / LS2F_CACHE_WAYS;
87 ci->ci_l2.sets = LS2F_CACHE_WAYS;
88
89 memset(&ci->ci_l3, 0, sizeof(struct cache_info));
90
91 cache_valias_mask = ci->ci_l1inst.setsize & ~PAGE_MASK;
92
93 /* should not happen as we use 16KB pages */
94 if (cache_valias_mask != 0) {
95 cache_valias_mask |= PAGE_MASK;
96 pmap_prefer_mask |= cache_valias_mask;
97 }
98
99 ci->ci_SyncCache = Loongson2_SyncCache;
100 ci->ci_InvalidateICache = Loongson2_InvalidateICache;
101 ci->ci_InvalidateICachePage = Loongson2_InvalidateICachePage;
102 ci->ci_SyncICache = Loongson2_SyncICache;
103 ci->ci_SyncDCachePage = Loongson2_SyncDCachePage;
104 ci->ci_HitSyncDCachePage = Loongson2_SyncDCachePage;
105 ci->ci_HitSyncDCache = Loongson2_HitSyncDCache;
106 ci->ci_HitInvalidateDCache = Loongson2_HitInvalidateDCache;
107 ci->ci_IOSyncDCache = Loongson2_IOSyncDCache;
108 }
109
110 /*
111 * Writeback and invalidate all caches.
112 */
113 void
Loongson2_SyncCache(struct cpu_info * ci)114 Loongson2_SyncCache(struct cpu_info *ci)
115 {
116 vaddr_t sva, eva;
117
118 mips_sync();
119
120 sva = PHYS_TO_XKPHYS(0, CCA_CACHED);
121 eva = sva + LS2F_L1_SIZE / LS2F_CACHE_WAYS;
122 while (sva != eva) {
123 cache(IndexInvalidate_I, 0, sva);
124 sva += LS2F_CACHE_LINE;
125 }
126
127 sva = PHYS_TO_XKPHYS(0, CCA_CACHED);
128 eva = sva + LS2F_L1_SIZE / LS2F_CACHE_WAYS;
129 while (sva != eva) {
130 cache(IndexWBInvalidate_D, 0, sva);
131 cache(IndexWBInvalidate_D, 1, sva);
132 cache(IndexWBInvalidate_D, 2, sva);
133 cache(IndexWBInvalidate_D, 3, sva);
134 sva += LS2F_CACHE_LINE;
135 }
136
137 sva = PHYS_TO_XKPHYS(0, CCA_CACHED);
138 eva = sva + LS2F_L2_SIZE / LS2F_CACHE_WAYS;
139 while (sva != eva) {
140 cache(IndexWBInvalidate_S, 0, sva);
141 cache(IndexWBInvalidate_S, 1, sva);
142 cache(IndexWBInvalidate_S, 2, sva);
143 cache(IndexWBInvalidate_S, 3, sva);
144 sva += LS2F_CACHE_LINE;
145 }
146 }
147
148 /*
149 * Invalidate I$ for the given range.
150 */
151 void
Loongson2_InvalidateICache(struct cpu_info * ci,vaddr_t _va,size_t _sz)152 Loongson2_InvalidateICache(struct cpu_info *ci, vaddr_t _va, size_t _sz)
153 {
154 vaddr_t va, sva, eva;
155 vsize_t sz;
156
157 /* extend the range to integral cache lines */
158 va = _va & ~(LS2F_CACHE_LINE - 1);
159 sz = ((_va + _sz + LS2F_CACHE_LINE - 1) & ~(LS2F_CACHE_LINE - 1)) - va;
160
161 sva = PHYS_TO_XKPHYS(0, CCA_CACHED);
162 /* keep only the index bits */
163 sva |= va & ((1UL << 14) - 1);
164 eva = sva + sz;
165 while (sva != eva) {
166 cache(IndexInvalidate_I, 0, sva);
167 sva += LS2F_CACHE_LINE;
168 }
169 }
170
171 /*
172 * Register a given page for I$ invalidation.
173 */
174 void
Loongson2_InvalidateICachePage(struct cpu_info * ci,vaddr_t va)175 Loongson2_InvalidateICachePage(struct cpu_info *ci, vaddr_t va)
176 {
177 /*
178 * Since the page size matches the I$ set size, and I$ maintenance
179 * operations always operate on all the sets, all we need to do here
180 * is remember there are postponed flushes.
181 */
182 ci->ci_cachepending_l1i = 1;
183 }
184
185 /*
186 * Perform postponed I$ invalidation.
187 */
188 void
Loongson2_SyncICache(struct cpu_info * ci)189 Loongson2_SyncICache(struct cpu_info *ci)
190 {
191 vaddr_t sva, eva;
192
193 if (ci->ci_cachepending_l1i != 0) {
194 /* inline Loongson2_InvalidateICache(ci, 0, PAGE_SIZE); */
195 sva = PHYS_TO_XKPHYS(0, CCA_CACHED);
196 eva = sva + PAGE_SIZE;
197 while (sva != eva) {
198 cache(IndexInvalidate_I, 0, sva);
199 sva += LS2F_CACHE_LINE;
200 }
201
202 ci->ci_cachepending_l1i = 0;
203 }
204 }
205
206 /*
207 * Writeback D$ for the given page.
208 *
209 * The index for L1 is the low 14 bits of the virtual address. Since the
210 * page size is 2**14 bytes, it is possible to access the page through
211 * any valid address.
212 */
213 void
Loongson2_SyncDCachePage(struct cpu_info * ci,vaddr_t va,paddr_t pa)214 Loongson2_SyncDCachePage(struct cpu_info *ci, vaddr_t va, paddr_t pa)
215 {
216 vaddr_t sva, eva;
217
218 mips_sync();
219
220 sva = PHYS_TO_XKPHYS(pa, CCA_CACHED);
221 eva = sva + PAGE_SIZE;
222 for (va = sva; va != eva; va += LS2F_CACHE_LINE)
223 cache(HitWBInvalidate_D, 0, va);
224 for (va = sva; va != eva; va += LS2F_CACHE_LINE)
225 cache(HitWBInvalidate_S, 0, va);
226 }
227
228 /*
229 * Writeback D$ for the given range. Range is expected to be currently
230 * mapped, allowing the use of `Hit' operations. This is less aggressive
231 * than using `Index' operations.
232 */
233
234 static __inline__ void
ls2f_hitwbinv_primary(vaddr_t va,vsize_t sz)235 ls2f_hitwbinv_primary(vaddr_t va, vsize_t sz)
236 {
237 vaddr_t eva;
238
239 eva = va + sz;
240 while (va != eva) {
241 cache(HitWBInvalidate_D, 0, va);
242 va += LS2F_CACHE_LINE;
243 }
244 }
245
246 static __inline__ void
ls2f_hitwbinv_secondary(vaddr_t va,vsize_t sz)247 ls2f_hitwbinv_secondary(vaddr_t va, vsize_t sz)
248 {
249 vaddr_t eva;
250
251 eva = va + sz;
252 while (va != eva) {
253 cache(HitWBInvalidate_S, 0, va);
254 va += LS2F_CACHE_LINE;
255 }
256 }
257
258 void
Loongson2_HitSyncDCache(struct cpu_info * ci,vaddr_t _va,size_t _sz)259 Loongson2_HitSyncDCache(struct cpu_info *ci, vaddr_t _va, size_t _sz)
260 {
261 vaddr_t va;
262 vsize_t sz;
263
264 mips_sync();
265
266 /* extend the range to integral cache lines */
267 va = _va & ~(LS2F_CACHE_LINE - 1);
268 sz = ((_va + _sz + LS2F_CACHE_LINE - 1) & ~(LS2F_CACHE_LINE - 1)) - va;
269
270 ls2f_hitwbinv_primary(va, sz);
271 ls2f_hitwbinv_secondary(va, sz);
272 }
273
274 /*
275 * Invalidate D$ for the given range. Range is expected to be currently
276 * mapped, allowing the use of `Hit' operations. This is less aggressive
277 * than using `Index' operations.
278 */
279
280 static __inline__ void
ls2f_hitinv_primary(vaddr_t va,vsize_t sz)281 ls2f_hitinv_primary(vaddr_t va, vsize_t sz)
282 {
283 vaddr_t eva;
284
285 eva = va + sz;
286 while (va != eva) {
287 cache(HitInvalidate_D, 0, va);
288 va += LS2F_CACHE_LINE;
289 }
290 }
291
292 static __inline__ void
ls2f_hitinv_secondary(vaddr_t va,vsize_t sz)293 ls2f_hitinv_secondary(vaddr_t va, vsize_t sz)
294 {
295 vaddr_t eva;
296
297 eva = va + sz;
298 while (va != eva) {
299 cache(HitInvalidate_S, 0, va);
300 va += LS2F_CACHE_LINE;
301 }
302 }
303
304 void
Loongson2_HitInvalidateDCache(struct cpu_info * ci,vaddr_t _va,size_t _sz)305 Loongson2_HitInvalidateDCache(struct cpu_info *ci, vaddr_t _va, size_t _sz)
306 {
307 vaddr_t va;
308 vsize_t sz;
309
310 /* extend the range to integral cache lines */
311 va = _va & ~(LS2F_CACHE_LINE - 1);
312 sz = ((_va + _sz + LS2F_CACHE_LINE - 1) & ~(LS2F_CACHE_LINE - 1)) - va;
313
314 ls2f_hitinv_primary(va, sz);
315 ls2f_hitinv_secondary(va, sz);
316
317 mips_sync();
318 }
319
320 /*
321 * Backend for bus_dmamap_sync(). Enforce coherency of the given range
322 * by performing the necessary cache writeback and/or invalidate
323 * operations.
324 */
325 void
Loongson2_IOSyncDCache(struct cpu_info * ci,vaddr_t _va,size_t _sz,int how)326 Loongson2_IOSyncDCache(struct cpu_info *ci, vaddr_t _va, size_t _sz, int how)
327 {
328 vaddr_t va;
329 vsize_t sz;
330 int partial_start, partial_end;
331
332 /* extend the range to integral cache lines */
333 va = _va & ~(LS2F_CACHE_LINE - 1);
334 sz = ((_va + _sz + LS2F_CACHE_LINE - 1) & ~(LS2F_CACHE_LINE - 1)) - va;
335
336 switch (how) {
337 case CACHE_SYNC_R:
338 /* writeback partial cachelines */
339 if (((_va | _sz) & (LS2F_CACHE_LINE - 1)) != 0) {
340 partial_start = va != _va;
341 partial_end = va + sz != _va + _sz;
342 } else {
343 partial_start = partial_end = 0;
344 }
345 if (partial_start) {
346 cache(HitWBInvalidate_D, 0, va);
347 cache(HitWBInvalidate_S, 0, va);
348 va += LS2F_CACHE_LINE;
349 sz -= LS2F_CACHE_LINE;
350 }
351 if (sz != 0 && partial_end) {
352 cache(HitWBInvalidate_D, 0, va + sz - LS2F_CACHE_LINE);
353 cache(HitWBInvalidate_S, 0, va + sz - LS2F_CACHE_LINE);
354 sz -= LS2F_CACHE_LINE;
355 }
356 ls2f_hitinv_primary(va, sz);
357 ls2f_hitinv_secondary(va, sz);
358 break;
359 case CACHE_SYNC_X:
360 case CACHE_SYNC_W:
361 ls2f_hitwbinv_primary(va, sz);
362 ls2f_hitwbinv_secondary(va, sz);
363 break;
364 }
365 }
366