xref: /netbsd/sys/arch/arm/arm/cpufunc_asm_xscale.S (revision c4a72b64)
1/*	$NetBSD: cpufunc_asm_xscale.S,v 1.16 2002/08/17 16:36:32 thorpej Exp $	*/
2
3/*
4 * Copyright (c) 2001, 2002 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Allen Briggs and Jason R. Thorpe for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *	This product includes software developed for the NetBSD Project by
20 *	Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37
38/*
39 * Copyright (c) 2001 Matt Thomas.
40 * Copyright (c) 1997,1998 Mark Brinicombe.
41 * Copyright (c) 1997 Causality Limited
42 * All rights reserved.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 *    notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 *    notice, this list of conditions and the following disclaimer in the
51 *    documentation and/or other materials provided with the distribution.
52 * 3. All advertising materials mentioning features or use of this software
53 *    must display the following acknowledgement:
54 *	This product includes software developed by Causality Limited.
55 * 4. The name of Causality Limited may not be used to endorse or promote
56 *    products derived from this software without specific prior written
57 *    permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY CAUSALITY LIMITED ``AS IS'' AND ANY EXPRESS
60 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
61 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
62 * DISCLAIMED. IN NO EVENT SHALL CAUSALITY LIMITED BE LIABLE FOR ANY DIRECT,
63 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
64 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
65 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 * XScale assembly functions for CPU / MMU / TLB specific operations
72 */
73
74#include <machine/cpu.h>
75#include <machine/asm.h>
76
77/*
78 * Size of the XScale core D-cache.
79 */
80#define	DCACHE_SIZE		0x00008000
81
82.Lblock_userspace_access:
83	.word	_C_LABEL(block_userspace_access)
84
85/*
86 * CPWAIT -- Canonical method to wait for CP15 update.
87 * From: Intel 80200 manual, section 2.3.3.
88 *
89 * NOTE: Clobbers the specified temp reg.
90 */
91#define	CPWAIT_BRANCH							 \
92	sub	pc, pc, #4
93
94#define	CPWAIT(tmp)							 \
95	mrc	p15, 0, tmp, c2, c0, 0	/* arbitrary read of CP15 */	;\
96	mov	tmp, tmp		/* wait for it to complete */	;\
97	CPWAIT_BRANCH			/* branch to next insn */
98
99#define	CPWAIT_AND_RETURN_SHIFTER	lsr #32
100
101#define	CPWAIT_AND_RETURN(tmp)						 \
102	mrc	p15, 0, tmp, c2, c0, 0	/* arbitrary read of CP15 */	;\
103	/* Wait for it to complete and branch to the return address */	 \
104	sub	pc, lr, tmp, CPWAIT_AND_RETURN_SHIFTER
105
106ENTRY(xscale_cpwait)
107	CPWAIT_AND_RETURN(r0)
108
109/*
110 * We need a separate cpu_control() entry point, since we have to
111 * invalidate the Branch Target Buffer in the event the BPRD bit
112 * changes in the control register.
113 */
114ENTRY(xscale_control)
115	mrc	p15, 0, r3, c1, c0, 0	/* Read the control register */
116	bic	r2, r3, r0		/* Clear bits */
117	eor	r2, r2, r1		/* XOR bits */
118
119	teq	r2, r3			/* Only write if there was a change */
120	mcrne	p15, 0, r0, c7, c5, 6	/* Invalidate the BTB */
121	mcrne	p15, 0, r2, c1, c0, 0	/* Write new control register */
122	mov	r0, r3			/* Return old value */
123
124	CPWAIT_AND_RETURN(r1)
125
126/*
127 * Functions to set the MMU Translation Table Base register
128 *
129 * We need to clean and flush the cache as it uses virtual
130 * addresses that are about to change.
131 */
132ENTRY(xscale_setttb)
133#ifdef CACHE_CLEAN_BLOCK_INTR
134	mrs	r3, cpsr_all
135	orr	r1, r3, #(I32_bit | F32_bit)
136	msr	cpsr_all, r1
137#else
138	ldr	r3, .Lblock_userspace_access
139	ldr	r2, [r3]
140	orr	r1, r2, #1
141	str	r1, [r3]
142#endif
143	stmfd	sp!, {r0-r3, lr}
144	bl	_C_LABEL(xscale_cache_cleanID)
145	mcr	p15, 0, r0, c7, c5, 0	/* invalidate I$ and BTB */
146	mcr	p15, 0, r0, c7, c10, 4	/* drain write and fill buffer */
147
148	CPWAIT(r0)
149
150	ldmfd	sp!, {r0-r3, lr}
151
152	/* Write the TTB */
153	mcr	p15, 0, r0, c2, c0, 0
154
155	/* If we have updated the TTB we must flush the TLB */
156	mcr	p15, 0, r0, c8, c7, 0	/* invalidate I+D TLB */
157
158	/* The cleanID above means we only need to flush the I cache here */
159	mcr	p15, 0, r0, c7, c5, 0	/* invalidate I$ and BTB */
160
161	CPWAIT(r0)
162
163#ifdef CACHE_CLEAN_BLOCK_INTR
164	msr	cpsr_all, r3
165#else
166	str	r2, [r3]
167#endif
168	mov	pc, lr
169
170/*
171 * TLB functions
172 *
173 * Note: We don't need to worry about issuing a CPWAIT after
174 * TLB operations, because we expect a pmap_update() to follow.
175 */
176ENTRY(xscale_tlb_flushID_SE)
177	mcr	p15, 0, r0, c8, c6, 1	/* flush D tlb single entry */
178	mcr	p15, 0, r0, c8, c5, 1	/* flush I tlb single entry */
179	mov	pc, lr
180
181/*
182 * Cache functions
183 */
184ENTRY(xscale_cache_flushID)
185	mcr	p15, 0, r0, c7, c7, 0	/* flush I+D cache */
186	CPWAIT_AND_RETURN(r0)
187
188ENTRY(xscale_cache_flushI)
189	mcr	p15, 0, r0, c7, c5, 0	/* flush I cache */
190	CPWAIT_AND_RETURN(r0)
191
192ENTRY(xscale_cache_flushD)
193	mcr	p15, 0, r0, c7, c6, 0	/* flush D cache */
194	CPWAIT_AND_RETURN(r0)
195
196ENTRY(xscale_cache_flushI_SE)
197	mcr	p15, 0, r0, c7, c5, 1	/* flush I cache single entry */
198	CPWAIT_AND_RETURN(r0)
199
200ENTRY(xscale_cache_flushD_SE)
201	/*
202	 * Errata (rev < 2): Must clean-dcache-line to an address
203	 * before invalidate-dcache-line to an address, or dirty
204	 * bits will not be cleared in the dcache array.
205	 */
206	mcr	p15, 0, r0, c7, c10, 1
207	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
208	CPWAIT_AND_RETURN(r0)
209
210ENTRY(xscale_cache_cleanD_E)
211	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
212	CPWAIT_AND_RETURN(r0)
213
214/*
215 * Information for the XScale cache clean/purge functions:
216 *
217 *	* Virtual address of the memory region to use
218 *	* Size of memory region
219 *
220 * Note the virtual address for the Data cache clean operation
221 * does not need to be backed by physical memory, since no loads
222 * will actually be performed by the allocate-line operation.
223 *
224 * Note that the Mini-Data cache MUST be cleaned by executing
225 * loads from memory mapped into a region reserved exclusively
226 * for cleaning of the Mini-Data cache.
227 */
228	.data
229
230	.global	_C_LABEL(xscale_cache_clean_addr)
231_C_LABEL(xscale_cache_clean_addr):
232	.word	0x00000000
233
234	.global	_C_LABEL(xscale_cache_clean_size)
235_C_LABEL(xscale_cache_clean_size):
236	.word	DCACHE_SIZE
237
238	.global	_C_LABEL(xscale_minidata_clean_addr)
239_C_LABEL(xscale_minidata_clean_addr):
240	.word	0x00000000
241
242	.global	_C_LABEL(xscale_minidata_clean_size)
243_C_LABEL(xscale_minidata_clean_size):
244	.word	0x00000800
245
246	.text
247
248.Lxscale_cache_clean_addr:
249	.word	_C_LABEL(xscale_cache_clean_addr)
250.Lxscale_cache_clean_size:
251	.word	_C_LABEL(xscale_cache_clean_size)
252
253.Lxscale_minidata_clean_addr:
254	.word	_C_LABEL(xscale_minidata_clean_addr)
255.Lxscale_minidata_clean_size:
256	.word	_C_LABEL(xscale_minidata_clean_size)
257
258#ifdef CACHE_CLEAN_BLOCK_INTR
259#define	XSCALE_CACHE_CLEAN_BLOCK					\
260	mrs	r3, cpsr_all					;	\
261	orr	r0, r3, #(I32_bit | F32_bit)			;	\
262	msr	cpsr_all, r0
263
264#define	XSCALE_CACHE_CLEAN_UNBLOCK					\
265	msr	cpsr_all, r3
266#else
267#define	XSCALE_CACHE_CLEAN_BLOCK					\
268	ldr	r3, .Lblock_userspace_access			;	\
269	ldr	ip, [r3]					;	\
270	orr	r0, ip, #1					;	\
271	str	r0, [r3]
272
273#define	XSCALE_CACHE_CLEAN_UNBLOCK					\
274	str	ip, [r3]
275#endif /* CACHE_CLEAN_BLOCK_INTR */
276
277#define	XSCALE_CACHE_CLEAN_PROLOGUE					\
278	XSCALE_CACHE_CLEAN_BLOCK				;	\
279	ldr	r2, .Lxscale_cache_clean_addr			;	\
280	ldmia	r2, {r0, r1}					;	\
281	/*								\
282	 * BUG ALERT!							\
283	 *								\
284	 * The XScale core has a strange cache eviction bug, which	\
285	 * requires us to use 2x the cache size for the cache clean	\
286	 * and for that area to be aligned to 2 * cache size.		\
287	 *								\
288	 * The work-around is to use 2 areas for cache clean, and to	\
289	 * alternate between them whenever this is done.  No one knows	\
290	 * why the work-around works (mmm!).				\
291	 */								\
292	eor	r0, r0, #(DCACHE_SIZE)				;	\
293	str	r0, [r2]					;	\
294	add	r0, r0, r1
295
296#define	XSCALE_CACHE_CLEAN_EPILOGUE					\
297	XSCALE_CACHE_CLEAN_UNBLOCK
298
299ENTRY_NP(xscale_cache_syncI)
300ENTRY_NP(xscale_cache_purgeID)
301	mcr	p15, 0, r0, c7, c5, 0	/* flush I cache (D cleaned below) */
302ENTRY_NP(xscale_cache_cleanID)
303ENTRY_NP(xscale_cache_purgeD)
304ENTRY(xscale_cache_cleanD)
305	XSCALE_CACHE_CLEAN_PROLOGUE
306
3071:	subs	r0, r0, #32
308	mcr	p15, 0, r0, c7, c2, 5	/* allocate cache line */
309	subs	r1, r1, #32
310	bne	1b
311
312	CPWAIT(r0)
313
314	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
315
316	CPWAIT(r0)
317
318	XSCALE_CACHE_CLEAN_EPILOGUE
319	mov	pc, lr
320
321/*
322 * Clean the mini-data cache.
323 *
324 * It's expected that we only use the mini-data cache for
325 * kernel addresses, so there is no need to purge it on
326 * context switch, and no need to prevent userspace access
327 * while we clean it.
328 */
329ENTRY(xscale_cache_clean_minidata)
330	ldr	r2, .Lxscale_minidata_clean_addr
331	ldmia	r2, {r0, r1}
3321:	ldr	r3, [r0], #32
333	subs	r1, r1, #32
334	bne	1b
335
336	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
337
338	CPWAIT_AND_RETURN(r1)
339
340ENTRY(xscale_cache_purgeID_E)
341	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
342	CPWAIT(r1)
343	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
344	mcr	p15, 0, r0, c7, c5, 1	/* flush I cache single entry */
345	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
346	CPWAIT_AND_RETURN(r1)
347
348ENTRY(xscale_cache_purgeD_E)
349	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
350	CPWAIT(r1)
351	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
352	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
353	CPWAIT_AND_RETURN(r1)
354
355/*
356 * Soft functions
357 */
358/* xscale_cache_syncI is identical to xscale_cache_purgeID */
359
360ENTRY(xscale_cache_cleanID_rng)
361ENTRY(xscale_cache_cleanD_rng)
362	cmp	r1, #0x4000
363	bcs	_C_LABEL(xscale_cache_cleanID)
364
365	and	r2, r0, #0x1f
366	add	r1, r1, r2
367	bic	r0, r0, #0x1f
368
3691:	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
370	add	r0, r0, #32
371	subs	r1, r1, #32
372	bhi	1b
373
374	CPWAIT(r0)
375
376	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
377
378	CPWAIT_AND_RETURN(r0)
379
380ENTRY(xscale_cache_purgeID_rng)
381	cmp	r1, #0x4000
382	bcs	_C_LABEL(xscale_cache_purgeID)
383
384	and	r2, r0, #0x1f
385	add	r1, r1, r2
386	bic	r0, r0, #0x1f
387
3881:	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
389	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
390	mcr	p15, 0, r0, c7, c5, 1	/* flush I cache single entry */
391	add	r0, r0, #32
392	subs	r1, r1, #32
393	bhi	1b
394
395	CPWAIT(r0)
396
397	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
398
399	CPWAIT_AND_RETURN(r0)
400
401ENTRY(xscale_cache_purgeD_rng)
402	cmp	r1, #0x4000
403	bcs	_C_LABEL(xscale_cache_purgeD)
404
405	and	r2, r0, #0x1f
406	add	r1, r1, r2
407	bic	r0, r0, #0x1f
408
4091:	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
410	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
411	add	r0, r0, #32
412	subs	r1, r1, #32
413	bhi	1b
414
415	CPWAIT(r0)
416
417	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
418
419	CPWAIT_AND_RETURN(r0)
420
421ENTRY(xscale_cache_syncI_rng)
422	cmp	r1, #0x4000
423	bcs	_C_LABEL(xscale_cache_syncI)
424
425	and	r2, r0, #0x1f
426	add	r1, r1, r2
427	bic	r0, r0, #0x1f
428
4291:	mcr	p15, 0, r0, c7, c10, 1	/* clean D cache entry */
430	mcr	p15, 0, r0, c7, c5, 1	/* flush I cache single entry */
431	add	r0, r0, #32
432	subs	r1, r1, #32
433	bhi	1b
434
435	CPWAIT(r0)
436
437	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
438
439	CPWAIT_AND_RETURN(r0)
440
441ENTRY(xscale_cache_flushD_rng)
442	and	r2, r0, #0x1f
443	add	r1, r1, r2
444	bic	r0, r0, #0x1f
445
4461:	mcr	p15, 0, r0, c7, c6, 1	/* flush D cache single entry */
447	add	r0, r0, #32
448	subs	r1, r1, #32
449	bhi	1b
450
451	mcr	p15, 0, r0, c7, c10, 4	/* drain write buffer */
452
453	CPWAIT_AND_RETURN(r0)
454
455/*
456 * Context switch.
457 *
458 * These is the CPU-specific parts of the context switcher cpu_switch()
459 * These functions actually perform the TTB reload.
460 *
461 * NOTE: Special calling convention
462 *	r1, r4-r13 must be preserved
463 */
464ENTRY(xscale_context_switch)
465	/*
466	 * CF_CACHE_PURGE_ID will *ALWAYS* be called prior to this.
467	 * Thus the data cache will contain only kernel data and the
468	 * instruction cache will contain only kernel code, and all
469	 * kernel mappings are shared by all processes.
470	 */
471
472	/* Write the TTB */
473	mcr	p15, 0, r0, c2, c0, 0
474
475	/* If we have updated the TTB we must flush the TLB */
476	mcr	p15, 0, r0, c8, c7, 0	/* flush the I+D tlb */
477
478	CPWAIT_AND_RETURN(r0)
479
480/*
481 * xscale_cpu_sleep
482 *
483 * This is called when there is nothing on any of the run queues.
484 * We go into IDLE mode so that any IRQ or FIQ will awaken us.
485 *
486 * If this is called with anything other than ARM_SLEEP_MODE_IDLE,
487 * ignore it.
488 */
489ENTRY(xscale_cpu_sleep)
490	tst	r0, #0x00000000
491	bne	1f
492	mov	r0, #0x1
493	mcr	p14, 0, r0, c7, c0, 0
494
4951:
496	mov	pc, lr
497