1 /*	$OpenBSD: cache_loongson2.c,v 1.8 2021/03/11 11:16:59 jsg Exp $	*/
2 
3 /*
4  * Copyright (c) 2009, 2012 Miodrag Vallat.
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 /*
20  * Cache handling code for Loongson 2E and 2F processors.
21  * This code could be made to work on 2C by not hardcoding the number of
22  * cache ways.
23  *
24  * 2E and 2F caches are :
25  * - L1 I$ is 4-way, VIPT, 32 bytes/line, 64KB total
26  * - L1 D$ is 4-way, VIPT, write-back, 32 bytes/line, 64KB total
27  * - L2 is 4-way, PIPT, write-back, 32 bytes/line, 512KB total
28  */
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 
33 #include <mips64/cache.h>
34 #include <machine/cpu.h>
35 
36 #include <uvm/uvm_extern.h>
37 
38 /* L1 cache operations */
39 #define	IndexInvalidate_I	0x00
40 #define	IndexWBInvalidate_D	0x01
41 #define	IndexLoadTag_D		0x05
42 #define	IndexStoreTag_D		0x09
43 #define	HitInvalidate_D		0x11
44 #define	HitWBInvalidate_D	0x15
45 #define	IndexLoadData_D		0x19
46 #define	IndexStoreData_D	0x1d
47 
48 /* L2 cache operations */
49 #define	IndexWBInvalidate_S	0x03
50 #define	IndexLoadTag_S		0x07
51 #define	IndexStoreTag_S		0x0b
52 #define	HitInvalidate_S		0x13
53 #define	HitWBInvalidate_S	0x17
54 #define	IndexLoadData_S		0x1b
55 #define	IndexStoreData_S	0x1f
56 
57 #define	cache(op,set,addr) \
58     __asm__ volatile \
59       ("cache %0, %1(%2)" :: "i"(op), "i"(set), "r"(addr) : "memory")
60 
61 static __inline__ void	ls2f_hitinv_primary(vaddr_t, vsize_t);
62 static __inline__ void	ls2f_hitinv_secondary(vaddr_t, vsize_t);
63 static __inline__ void	ls2f_hitwbinv_primary(vaddr_t, vsize_t);
64 static __inline__ void	ls2f_hitwbinv_secondary(vaddr_t, vsize_t);
65 
66 #define	LS2F_CACHE_LINE	32UL
67 #define	LS2F_CACHE_WAYS	4UL
68 #define	LS2F_L1_SIZE		(64UL * 1024UL)
69 #define	LS2F_L2_SIZE		(512UL * 1024UL)
70 
71 void
Loongson2_ConfigCache(struct cpu_info * ci)72 Loongson2_ConfigCache(struct cpu_info *ci)
73 {
74 	ci->ci_l1inst.size = LS2F_L1_SIZE;
75 	ci->ci_l1inst.linesize = LS2F_CACHE_LINE;
76 	ci->ci_l1inst.setsize = LS2F_L1_SIZE / LS2F_CACHE_WAYS;
77 	ci->ci_l1inst.sets = LS2F_CACHE_WAYS;
78 
79 	ci->ci_l1data.size = LS2F_L1_SIZE;
80 	ci->ci_l1data.linesize = LS2F_CACHE_LINE;
81 	ci->ci_l1data.setsize = LS2F_L1_SIZE / LS2F_CACHE_WAYS;
82 	ci->ci_l1data.sets = LS2F_CACHE_WAYS;
83 
84 	ci->ci_l2.size = LS2F_L2_SIZE;
85 	ci->ci_l2.linesize = LS2F_CACHE_LINE;
86 	ci->ci_l2.setsize = LS2F_L2_SIZE / LS2F_CACHE_WAYS;
87 	ci->ci_l2.sets = LS2F_CACHE_WAYS;
88 
89 	memset(&ci->ci_l3, 0, sizeof(struct cache_info));
90 
91 	cache_valias_mask = ci->ci_l1inst.setsize & ~PAGE_MASK;
92 
93 	/* should not happen as we use 16KB pages */
94 	if (cache_valias_mask != 0) {
95 		cache_valias_mask |= PAGE_MASK;
96 		pmap_prefer_mask |= cache_valias_mask;
97 	}
98 
99 	ci->ci_SyncCache = Loongson2_SyncCache;
100 	ci->ci_InvalidateICache = Loongson2_InvalidateICache;
101 	ci->ci_InvalidateICachePage = Loongson2_InvalidateICachePage;
102 	ci->ci_SyncICache = Loongson2_SyncICache;
103 	ci->ci_SyncDCachePage = Loongson2_SyncDCachePage;
104 	ci->ci_HitSyncDCachePage = Loongson2_SyncDCachePage;
105 	ci->ci_HitSyncDCache = Loongson2_HitSyncDCache;
106 	ci->ci_HitInvalidateDCache = Loongson2_HitInvalidateDCache;
107 	ci->ci_IOSyncDCache = Loongson2_IOSyncDCache;
108 }
109 
110 /*
111  * Writeback and invalidate all caches.
112  */
113 void
Loongson2_SyncCache(struct cpu_info * ci)114 Loongson2_SyncCache(struct cpu_info *ci)
115 {
116 	vaddr_t sva, eva;
117 
118 	mips_sync();
119 
120 	sva = PHYS_TO_XKPHYS(0, CCA_CACHED);
121 	eva = sva + LS2F_L1_SIZE / LS2F_CACHE_WAYS;
122 	while (sva != eva) {
123 		cache(IndexInvalidate_I, 0, sva);
124 		sva += LS2F_CACHE_LINE;
125 	}
126 
127 	sva = PHYS_TO_XKPHYS(0, CCA_CACHED);
128 	eva = sva + LS2F_L1_SIZE / LS2F_CACHE_WAYS;
129 	while (sva != eva) {
130 		cache(IndexWBInvalidate_D, 0, sva);
131 		cache(IndexWBInvalidate_D, 1, sva);
132 		cache(IndexWBInvalidate_D, 2, sva);
133 		cache(IndexWBInvalidate_D, 3, sva);
134 		sva += LS2F_CACHE_LINE;
135 	}
136 
137 	sva = PHYS_TO_XKPHYS(0, CCA_CACHED);
138 	eva = sva + LS2F_L2_SIZE / LS2F_CACHE_WAYS;
139 	while (sva != eva) {
140 		cache(IndexWBInvalidate_S, 0, sva);
141 		cache(IndexWBInvalidate_S, 1, sva);
142 		cache(IndexWBInvalidate_S, 2, sva);
143 		cache(IndexWBInvalidate_S, 3, sva);
144 		sva += LS2F_CACHE_LINE;
145 	}
146 }
147 
148 /*
149  * Invalidate I$ for the given range.
150  */
151 void
Loongson2_InvalidateICache(struct cpu_info * ci,vaddr_t _va,size_t _sz)152 Loongson2_InvalidateICache(struct cpu_info *ci, vaddr_t _va, size_t _sz)
153 {
154 	vaddr_t va, sva, eva;
155 	vsize_t sz;
156 
157 	/* extend the range to integral cache lines */
158 	va = _va & ~(LS2F_CACHE_LINE - 1);
159 	sz = ((_va + _sz + LS2F_CACHE_LINE - 1) & ~(LS2F_CACHE_LINE - 1)) - va;
160 
161 	sva = PHYS_TO_XKPHYS(0, CCA_CACHED);
162 	/* keep only the index bits */
163 	sva |= va & ((1UL << 14) - 1);
164 	eva = sva + sz;
165 	while (sva != eva) {
166 		cache(IndexInvalidate_I, 0, sva);
167 		sva += LS2F_CACHE_LINE;
168 	}
169 }
170 
171 /*
172  * Register a given page for I$ invalidation.
173  */
174 void
Loongson2_InvalidateICachePage(struct cpu_info * ci,vaddr_t va)175 Loongson2_InvalidateICachePage(struct cpu_info *ci, vaddr_t va)
176 {
177 	/*
178 	 * Since the page size matches the I$ set size, and I$ maintenance
179 	 * operations always operate on all the sets, all we need to do here
180 	 * is remember there are postponed flushes.
181 	 */
182 	ci->ci_cachepending_l1i = 1;
183 }
184 
185 /*
186  * Perform postponed I$ invalidation.
187  */
188 void
Loongson2_SyncICache(struct cpu_info * ci)189 Loongson2_SyncICache(struct cpu_info *ci)
190 {
191 	vaddr_t sva, eva;
192 
193 	if (ci->ci_cachepending_l1i != 0) {
194 		/* inline Loongson2_InvalidateICache(ci, 0, PAGE_SIZE); */
195 		sva = PHYS_TO_XKPHYS(0, CCA_CACHED);
196 		eva = sva + PAGE_SIZE;
197 		while (sva != eva) {
198 			cache(IndexInvalidate_I, 0, sva);
199 			sva += LS2F_CACHE_LINE;
200 		}
201 
202 		ci->ci_cachepending_l1i = 0;
203 	}
204 }
205 
206 /*
207  * Writeback D$ for the given page.
208  *
209  * The index for L1 is the low 14 bits of the virtual address. Since the
210  * page size is 2**14 bytes, it is possible to access the page through
211  * any valid address.
212  */
213 void
Loongson2_SyncDCachePage(struct cpu_info * ci,vaddr_t va,paddr_t pa)214 Loongson2_SyncDCachePage(struct cpu_info *ci, vaddr_t va, paddr_t pa)
215 {
216 	vaddr_t sva, eva;
217 
218 	mips_sync();
219 
220 	sva = PHYS_TO_XKPHYS(pa, CCA_CACHED);
221 	eva = sva + PAGE_SIZE;
222 	for (va = sva; va != eva; va += LS2F_CACHE_LINE)
223 		cache(HitWBInvalidate_D, 0, va);
224 	for (va = sva; va != eva; va += LS2F_CACHE_LINE)
225 		cache(HitWBInvalidate_S, 0, va);
226 }
227 
228 /*
229  * Writeback D$ for the given range. Range is expected to be currently
230  * mapped, allowing the use of `Hit' operations. This is less aggressive
231  * than using `Index' operations.
232  */
233 
234 static __inline__ void
ls2f_hitwbinv_primary(vaddr_t va,vsize_t sz)235 ls2f_hitwbinv_primary(vaddr_t va, vsize_t sz)
236 {
237 	vaddr_t eva;
238 
239 	eva = va + sz;
240 	while (va != eva) {
241 		cache(HitWBInvalidate_D, 0, va);
242 		va += LS2F_CACHE_LINE;
243 	}
244 }
245 
246 static __inline__ void
ls2f_hitwbinv_secondary(vaddr_t va,vsize_t sz)247 ls2f_hitwbinv_secondary(vaddr_t va, vsize_t sz)
248 {
249 	vaddr_t eva;
250 
251 	eva = va + sz;
252 	while (va != eva) {
253 		cache(HitWBInvalidate_S, 0, va);
254 		va += LS2F_CACHE_LINE;
255 	}
256 }
257 
258 void
Loongson2_HitSyncDCache(struct cpu_info * ci,vaddr_t _va,size_t _sz)259 Loongson2_HitSyncDCache(struct cpu_info *ci, vaddr_t _va, size_t _sz)
260 {
261 	vaddr_t va;
262 	vsize_t sz;
263 
264 	mips_sync();
265 
266 	/* extend the range to integral cache lines */
267 	va = _va & ~(LS2F_CACHE_LINE - 1);
268 	sz = ((_va + _sz + LS2F_CACHE_LINE - 1) & ~(LS2F_CACHE_LINE - 1)) - va;
269 
270 	ls2f_hitwbinv_primary(va, sz);
271 	ls2f_hitwbinv_secondary(va, sz);
272 }
273 
274 /*
275  * Invalidate D$ for the given range. Range is expected to be currently
276  * mapped, allowing the use of `Hit' operations. This is less aggressive
277  * than using `Index' operations.
278  */
279 
280 static __inline__ void
ls2f_hitinv_primary(vaddr_t va,vsize_t sz)281 ls2f_hitinv_primary(vaddr_t va, vsize_t sz)
282 {
283 	vaddr_t eva;
284 
285 	eva = va + sz;
286 	while (va != eva) {
287 		cache(HitInvalidate_D, 0, va);
288 		va += LS2F_CACHE_LINE;
289 	}
290 }
291 
292 static __inline__ void
ls2f_hitinv_secondary(vaddr_t va,vsize_t sz)293 ls2f_hitinv_secondary(vaddr_t va, vsize_t sz)
294 {
295 	vaddr_t eva;
296 
297 	eva = va + sz;
298 	while (va != eva) {
299 		cache(HitInvalidate_S, 0, va);
300 		va += LS2F_CACHE_LINE;
301 	}
302 }
303 
304 void
Loongson2_HitInvalidateDCache(struct cpu_info * ci,vaddr_t _va,size_t _sz)305 Loongson2_HitInvalidateDCache(struct cpu_info *ci, vaddr_t _va, size_t _sz)
306 {
307 	vaddr_t va;
308 	vsize_t sz;
309 
310 	/* extend the range to integral cache lines */
311 	va = _va & ~(LS2F_CACHE_LINE - 1);
312 	sz = ((_va + _sz + LS2F_CACHE_LINE - 1) & ~(LS2F_CACHE_LINE - 1)) - va;
313 
314 	ls2f_hitinv_primary(va, sz);
315 	ls2f_hitinv_secondary(va, sz);
316 
317 	mips_sync();
318 }
319 
320 /*
321  * Backend for bus_dmamap_sync(). Enforce coherency of the given range
322  * by performing the necessary cache writeback and/or invalidate
323  * operations.
324  */
325 void
Loongson2_IOSyncDCache(struct cpu_info * ci,vaddr_t _va,size_t _sz,int how)326 Loongson2_IOSyncDCache(struct cpu_info *ci, vaddr_t _va, size_t _sz, int how)
327 {
328 	vaddr_t va;
329 	vsize_t sz;
330 	int partial_start, partial_end;
331 
332 	/* extend the range to integral cache lines */
333 	va = _va & ~(LS2F_CACHE_LINE - 1);
334 	sz = ((_va + _sz + LS2F_CACHE_LINE - 1) & ~(LS2F_CACHE_LINE - 1)) - va;
335 
336 	switch (how) {
337 	case CACHE_SYNC_R:
338 		/* writeback partial cachelines */
339 		if (((_va | _sz) & (LS2F_CACHE_LINE - 1)) != 0) {
340 			partial_start = va != _va;
341 			partial_end = va + sz != _va + _sz;
342 		} else {
343 			partial_start = partial_end = 0;
344 		}
345 		if (partial_start) {
346 			cache(HitWBInvalidate_D, 0, va);
347 			cache(HitWBInvalidate_S, 0, va);
348 			va += LS2F_CACHE_LINE;
349 			sz -= LS2F_CACHE_LINE;
350 		}
351 		if (sz != 0 && partial_end) {
352 			cache(HitWBInvalidate_D, 0, va + sz - LS2F_CACHE_LINE);
353 			cache(HitWBInvalidate_S, 0, va + sz - LS2F_CACHE_LINE);
354 			sz -= LS2F_CACHE_LINE;
355 		}
356 		ls2f_hitinv_primary(va, sz);
357 		ls2f_hitinv_secondary(va, sz);
358 		break;
359 	case CACHE_SYNC_X:
360 	case CACHE_SYNC_W:
361 		ls2f_hitwbinv_primary(va, sz);
362 		ls2f_hitwbinv_secondary(va, sz);
363 		break;
364 	}
365 }
366