xref: /netbsd/sys/arch/arm/arm32/cpuswitch.S (revision fc7b39db)
1/*	$NetBSD: cpuswitch.S,v 1.46 2007/02/19 01:59:23 briggs Exp $	*/
2
3/*
4 * Copyright 2003 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 *    must display the following acknowledgement:
19 *      This product includes software developed for the NetBSD Project by
20 *      Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 *    or promote products derived from this software without specific prior
23 *    written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37/*
38 * Copyright (c) 1994-1998 Mark Brinicombe.
39 * Copyright (c) 1994 Brini.
40 * All rights reserved.
41 *
42 * This code is derived from software written for Brini by Mark Brinicombe
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 *    notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 *    notice, this list of conditions and the following disclaimer in the
51 *    documentation and/or other materials provided with the distribution.
52 * 3. All advertising materials mentioning features or use of this software
53 *    must display the following acknowledgement:
54 *	This product includes software developed by Brini.
55 * 4. The name of the company nor the name of the author may be used to
56 *    endorse or promote products derived from this software without specific
57 *    prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
60 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
61 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
62 * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
63 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
64 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
65 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 * RiscBSD kernel project
72 *
73 * cpuswitch.S
74 *
75 * cpu switching functions
76 *
77 * Created      : 15/10/94
78 */
79
80#include "opt_armfpe.h"
81#include "opt_arm32_pmap.h"
82#include "opt_multiprocessor.h"
83#include "opt_lockdebug.h"
84
85#include "assym.h"
86#include <arm/arm32/pte.h>
87#include <machine/param.h>
88#include <machine/cpu.h>
89#include <machine/frame.h>
90#include <machine/asm.h>
91
92/* LINTSTUB: include <sys/param.h> */
93
94#undef IRQdisable
95#undef IRQenable
96
97/*
98 * New experimental definitions of IRQdisable and IRQenable
99 * These keep FIQ's enabled since FIQ's are special.
100 */
101
102#define IRQdisable \
103	mrs	r14, cpsr ; \
104	orr	r14, r14, #(I32_bit) ; \
105	msr	cpsr_c, r14 ; \
106
107#define IRQenable \
108	mrs	r14, cpsr ; \
109	bic	r14, r14, #(I32_bit) ; \
110	msr	cpsr_c, r14 ; \
111
112/*
113 * These are used for switching the translation table/DACR.
114 * Since the vector page can be invalid for a short time, we must
115 * disable both regular IRQs *and* FIQs.
116 *
117 * XXX: This is not necessary if the vector table is relocated.
118 */
119#define IRQdisableALL \
120	mrs	r14, cpsr ; \
121	orr	r14, r14, #(I32_bit | F32_bit) ; \
122	msr	cpsr_c, r14
123
124#define IRQenableALL \
125	mrs	r14, cpsr ; \
126	bic	r14, r14, #(I32_bit | F32_bit) ; \
127	msr	cpsr_c, r14
128
129	.text
130
131.Lwhichqs:
132	.word	_C_LABEL(sched_whichqs)
133
134.Lqs:
135	.word	_C_LABEL(sched_qs)
136
137/*
138 * cpuswitch()
139 *
140 * preforms a process context switch.
141 * This function has several entry points
142 */
143
144#ifdef MULTIPROCESSOR
145.Lcpu_info_store:
146	.word	_C_LABEL(cpu_info_store)
147.Lcurlwp:
148	/* FIXME: This is bogus in the general case. */
149	.word	_C_LABEL(cpu_info_store) + CI_CURLWP
150
151.Lcurpcb:
152	.word	_C_LABEL(cpu_info_store) + CI_CURPCB
153#else
154.Lcurlwp:
155	.word	_C_LABEL(curlwp)
156
157.Lcurpcb:
158	.word	_C_LABEL(curpcb)
159#endif
160
161.Lwant_resched:
162	.word	_C_LABEL(want_resched)
163
164.Lcpufuncs:
165	.word	_C_LABEL(cpufuncs)
166
167#ifndef MULTIPROCESSOR
168	.data
169	.global	_C_LABEL(curpcb)
170_C_LABEL(curpcb):
171	.word	0x00000000
172	.text
173#endif
174
175.Lblock_userspace_access:
176	.word	_C_LABEL(block_userspace_access)
177
178.Lcpu_do_powersave:
179	.word	_C_LABEL(cpu_do_powersave)
180
181.Lpmap_kernel_cstate:
182	.word	(kernel_pmap_store + PMAP_CSTATE)
183
184.Llast_cache_state_ptr:
185	.word	_C_LABEL(pmap_cache_state)
186
187/*
188 * Idle loop, exercised while waiting for a process to wake up.
189 *
190 * NOTE: When we jump back to .Lswitch_search, we must have a
191 * pointer to whichqs in r7, which is what it is when we arrive
192 * here.
193 */
194/* LINTSTUB: Ignore */
195ASENTRY_NP(idle)
196	ldr	r6, .Lcpu_do_powersave
197	IRQenable			/* Enable interrupts */
198	ldr	r6, [r6]		/* r6 = cpu_do_powersave */
199
200	bl	_C_LABEL(sched_unlock_idle)
201
202	/* Drop to spl0 (returns the current spl level in r0). */
203#ifdef __NEWINTR
204	mov	r0, #(IPL_NONE)
205	bl	_C_LABEL(_spllower)
206#else /* ! __NEWINTR */
207	mov	r0, #(_SPL_0)
208	bl	_C_LABEL(splx)
209#endif /* __NEWINTR */
210
211	teq	r6, #0			/* cpu_do_powersave non zero? */
212	ldrne	r6, .Lcpufuncs
213	mov	r4, r0			/* Old interrupt level to r4 */
214	ldrne	r6, [r6, #(CF_SLEEP)]
215
216	/*
217	 * Main idle loop.
218	 * r6 points to power-save idle function if required, else NULL.
219	 */
2201:	ldr	r3, [r7]		/* r3 = sched_whichqs */
221	teq	r3, #0
222	bne	2f			/* We have work to do */
223	teq	r6, #0			/* Powersave idle? */
224	beq	1b			/* Nope. Just sit-n-spin. */
225
226	/*
227	 * Before going into powersave idle mode, disable interrupts
228	 * and check sched_whichqs one more time.
229	 */
230	IRQdisableALL
231	ldr	r3, [r7]
232	mov	r0, #0
233	teq	r3, #0			/* sched_whichqs still zero? */
234	moveq	lr, pc
235	moveq	pc, r6			/* If so, do powersave idle */
236	IRQenableALL
237	b	1b			/* Back around */
238
239	/*
240	 * sched_whichqs indicates that at least one lwp is ready to run.
241	 * Restore the original interrupt priority level, grab the
242	 * scheduler lock if necessary, and jump back into cpu_switch.
243	 */
2442:	mov	r0, r4
245	bl	_C_LABEL(splx)
246	adr	lr, .Lswitch_search
247	b	_C_LABEL(sched_lock_idle)
248
249
250/*
251 * Find a new lwp to run, save the current context and
252 * load the new context
253 *
254 * Arguments:
255 *	r0	'struct lwp *' of the current LWP
256 */
257
258ENTRY(cpu_switch)
259/*
260 * Local register usage. Some of these registers are out of date.
261 * r1 = oldlwp
262 * r2 = spl level
263 * r3 = whichqs
264 * r4 = queue
265 * r5 = &qs[queue]
266 * r6 = newlwp
267 * r7 = scratch
268 */
269	stmfd	sp!, {r4-r7, lr}
270
271	/*
272	 * Indicate that there is no longer a valid process (curlwp = 0).
273	 * Zero the current PCB pointer while we're at it.
274	 */
275	ldr	r7, .Lcurlwp
276	ldr	r6, .Lcurpcb
277	mov	r2, #0x00000000
278	str	r2, [r7]		/* curlwp = NULL */
279	str	r2, [r6]		/* curpcb = NULL */
280
281	/* stash the old lwp while we call functions */
282	mov	r5, r0
283
284	/* First phase : find a new lwp */
285	ldr	r7, .Lwhichqs
286
287	/* rem: r5 = old lwp */
288	/* rem: r7 = &whichqs */
289
290.Lswitch_search:
291	IRQdisable
292
293	/* Do we have any active queues  */
294	ldr	r3, [r7]
295
296	/* If not we must idle until we do. */
297	teq	r3, #0x00000000
298	beq	_ASM_LABEL(idle)
299
300	/* put old lwp back in r1 */
301	mov	r1, r5
302
303	/* rem: r1 = old lwp */
304	/* rem: r3 = whichqs */
305	/* rem: interrupts are disabled */
306
307	/* used further down, saves SA stall */
308	ldr	r6, .Lqs
309
310	/*
311	 * We have found an active queue. Currently we do not know which queue
312	 * is active just that one of them is.
313	 */
314	/* Non-Xscale version of the ffs algorithm devised by d.seal and
315	 * posted to comp.sys.arm on 16 Feb 1994.
316	 */
317 	rsb	r5, r3, #0
318 	ands	r0, r3, r5
319
320#ifndef __XSCALE__
321	adr	r5, .Lcpu_switch_ffs_table
322
323				    /* X = R0 */
324	orr	r4, r0, r0, lsl #4  /* r4 = X * 0x11 */
325	orr	r4, r4, r4, lsl #6  /* r4 = X * 0x451 */
326	rsb	r4, r4, r4, lsl #16 /* r4 = X * 0x0450fbaf */
327
328	/* now lookup in table indexed on top 6 bits of a4 */
329	ldrb	r4, [ r5, r4, lsr #26 ]
330
331#else	/* __XSCALE__ */
332	clz	r4, r0
333	rsb	r4, r4, #31
334#endif	/* __XSCALE__ */
335
336	/* rem: r0 = bit mask of chosen queue (1 << r4) */
337	/* rem: r1 = old lwp */
338	/* rem: r3 = whichqs */
339	/* rem: r4 = queue number */
340	/* rem: interrupts are disabled */
341
342	/* Get the address of the queue (&qs[queue]) */
343	add	r5, r6, r4, lsl #3
344
345	/*
346	 * Get the lwp from the queue and place the next process in
347	 * the queue at the head. This basically unlinks the lwp at
348	 * the head of the queue.
349	 */
350	ldr	r6, [r5, #(L_FORW)]
351
352#ifdef DIAGNOSTIC
353	cmp	r6, r5
354	beq	.Lswitch_bogons
355#endif
356
357	/* rem: r6 = new lwp */
358	ldr	r7, [r6, #(L_FORW)]
359	str	r7, [r5, #(L_FORW)]
360
361	/*
362	 * Test to see if the queue is now empty. If the head of the queue
363	 * points to the queue itself then there are no more lwps in
364	 * the queue. We can therefore clear the queue not empty flag held
365	 * in r3.
366	 */
367
368	teq	r5, r7
369	biceq	r3, r3, r0
370
371	/* rem: r0 = bit mask of chosen queue (1 << r4) - NOT NEEDED AN MORE */
372
373	/* Fix the back pointer for the lwp now at the head of the queue. */
374	ldr	r0, [r6, #(L_BACK)]
375	str	r0, [r7, #(L_BACK)]
376
377	/* Update the RAM copy of the queue not empty flags word. */
378	ldreq	r7, .Lwhichqs
379	streq	r3, [r7]
380
381	/* rem: r1 = old lwp */
382	/* rem: r3 = whichqs - NOT NEEDED ANY MORE */
383	/* rem: r4 = queue number - NOT NEEDED ANY MORE */
384	/* rem: r6 = new lwp */
385	/* rem: interrupts are disabled */
386
387	/* Clear the want_resched flag */
388	ldr	r7, .Lwant_resched
389	mov	r0, #0x00000000
390	str	r0, [r7]
391
392	/*
393	 * Clear the back pointer of the lwp we have removed from
394	 * the head of the queue. The new lwp is isolated now.
395	 */
396	str	r0, [r6, #(L_BACK)]
397
398	/*
399	 * unlock the sched_lock, but leave interrupts off, for now.
400	 */
401	mov	r7, r1
402	bl	_C_LABEL(sched_unlock_idle)
403	mov	r1, r7
404
405
406.Lswitch_resume:
407	/* rem: r1 = old lwp */
408	/* rem: r4 = return value [not used if came from cpu_switchto()] */
409	/* rem: r6 = new lwp */
410	/* rem: interrupts are disabled */
411
412#ifdef MULTIPROCESSOR
413	/* XXX use curcpu() */
414	ldr	r0, .Lcpu_info_store
415	str	r0, [r6, #(L_CPU)]
416#else
417	/* l->l_cpu initialized in fork1() for single-processor */
418#endif
419
420	/* Process is now on a processor. */
421	mov	r0, #LSONPROC			/* l->l_stat = LSONPROC */
422	str	r0, [r6, #(L_STAT)]
423
424	/* We have a new curlwp now so make a note it */
425	ldr	r7, .Lcurlwp
426	str	r6, [r7]
427
428	/* Hook in a new pcb */
429	ldr	r7, .Lcurpcb
430	ldr	r0, [r6, #(L_ADDR)]
431	str	r0, [r7]
432
433	/* At this point we can allow IRQ's again. */
434	IRQenable
435
436	/* rem: r1 = old lwp */
437	/* rem: r4 = return value */
438	/* rem: r6 = new lwp */
439	/* rem: interrupts are enabled */
440
441	/*
442	 * If the new lwp is the same as the lwp that called
443	 * cpu_switch() then we do not need to save and restore any
444	 * contexts. This means we can make a quick exit.
445	 * The test is simple if curlwp on entry (now in r1) is the
446	 * same as the lwp removed from the queue we can jump to the exit.
447	 */
448	teq	r1, r6
449	moveq	r4, #0x00000000		/* default to "didn't switch" */
450	beq	.Lswitch_return
451
452	/*
453	 * At this point, we are guaranteed to be switching to
454	 * a new lwp.
455	 */
456	mov	r4, #0x00000001
457
458	/* Remember the old lwp in r0 */
459	mov	r0, r1
460
461	/*
462	 * If the old lwp on entry to cpu_switch was zero then the
463	 * process that called it was exiting. This means that we do
464	 * not need to save the current context. Instead we can jump
465	 * straight to restoring the context for the new process.
466	 */
467	teq	r0, #0x00000000
468	beq	.Lswitch_exited
469
470	/* rem: r0 = old lwp */
471	/* rem: r4 = return value */
472	/* rem: r6 = new lwp */
473	/* rem: interrupts are enabled */
474
475	/* Stage two : Save old context */
476
477	/* Get the user structure for the old lwp. */
478	ldr	r1, [r0, #(L_ADDR)]
479
480	/* Save all the registers in the old lwp's pcb */
481#ifndef __XSCALE__
482	add	r7, r1, #(PCB_R8)
483	stmia	r7, {r8-r13}
484#else
485	strd	r8, [r1, #(PCB_R8)]
486	strd	r10, [r1, #(PCB_R10)]
487	strd	r12, [r1, #(PCB_R12)]
488#endif
489
490	/*
491	 * NOTE: We can now use r8-r13 until it is time to restore
492	 * them for the new process.
493	 */
494
495	/* Remember the old PCB. */
496	mov	r8, r1
497
498	/* r1 now free! */
499
500	/* Get the user structure for the new process in r9 */
501	ldr	r9, [r6, #(L_ADDR)]
502
503	/*
504	 * This can be optimised... We know we want to go from SVC32
505	 * mode to UND32 mode
506	 */
507        mrs	r3, cpsr
508	bic	r2, r3, #(PSR_MODE)
509	orr	r2, r2, #(PSR_UND32_MODE | I32_bit)
510        msr	cpsr_c, r2
511
512	str	sp, [r8, #(PCB_UND_SP)]
513
514        msr	cpsr_c, r3		/* Restore the old mode */
515
516	/* rem: r0 = old lwp */
517	/* rem: r4 = return value */
518	/* rem: r6 = new lwp */
519	/* rem: r8 = old PCB */
520	/* rem: r9 = new PCB */
521	/* rem: interrupts are enabled */
522
523	/* What else needs to be saved  Only FPA stuff when that is supported */
524
525	/* Third phase : restore saved context */
526
527	/* rem: r0 = old lwp */
528	/* rem: r4 = return value */
529	/* rem: r6 = new lwp */
530	/* rem: r8 = old PCB */
531	/* rem: r9 = new PCB */
532	/* rem: interrupts are enabled */
533
534	/*
535	 * Get the new L1 table pointer into r11.  If we're switching to
536	 * an LWP with the same address space as the outgoing one, we can
537	 * skip the cache purge and the TTB load.
538	 *
539	 * To avoid data dep stalls that would happen anyway, we try
540	 * and get some useful work done in the mean time.
541	 */
542	ldr	r10, [r8, #(PCB_PAGEDIR)]	/* r10 = old L1 */
543	ldr	r11, [r9, #(PCB_PAGEDIR)]	/* r11 = new L1 */
544
545	ldr	r0, [r8, #(PCB_DACR)]		/* r0 = old DACR */
546	ldr	r1, [r9, #(PCB_DACR)]		/* r1 = new DACR */
547	ldr	r8, [r9, #(PCB_CSTATE)]		/* r8 = &new_pmap->pm_cstate */
548	ldr	r5, .Llast_cache_state_ptr	/* Previous thread's cstate */
549
550	teq	r10, r11			/* Same L1? */
551	ldr	r5, [r5]
552	cmpeq	r0, r1				/* Same DACR? */
553	beq	.Lcs_context_switched		/* yes! */
554
555	ldr	r3, .Lblock_userspace_access
556	mov	r12, #0
557	cmp	r5, #0				/* No last vm? (switch_exit) */
558	beq	.Lcs_cache_purge_skipped	/* No, we can skip cache flsh */
559
560	mov	r2, #DOMAIN_CLIENT
561	cmp	r1, r2, lsl #(PMAP_DOMAIN_KERNEL * 2) /* Sw to kernel thread? */
562	beq	.Lcs_cache_purge_skipped	/* Yup. Don't flush cache */
563
564	cmp	r5, r8				/* Same userland VM space? */
565	ldrneb	r12, [r5, #(CS_CACHE_ID)]	/* Last VM space cache state */
566
567	/*
568	 * We're definately switching to a new userland VM space,
569	 * and the previous userland VM space has yet to be flushed
570	 * from the cache/tlb.
571	 *
572	 * r12 holds the previous VM space's cs_cache_id state
573	 */
574	tst	r12, #0xff			/* Test cs_cache_id */
575	beq	.Lcs_cache_purge_skipped	/* VM space is not in cache */
576
577	/*
578	 * Definately need to flush the cache.
579	 * Mark the old VM space as NOT being resident in the cache.
580	 */
581	mov	r2, #0x00000000
582	strb	r2, [r5, #(CS_CACHE_ID)]
583	strb	r2, [r5, #(CS_CACHE_D)]
584
585	/*
586	 * Don't allow user space access between the purge and the switch.
587	 */
588	mov	r2, #0x00000001
589	str	r2, [r3]
590
591	stmfd	sp!, {r0-r3}
592	ldr	r1, .Lcpufuncs
593	mov	lr, pc
594	ldr	pc, [r1, #CF_IDCACHE_WBINV_ALL]
595	ldmfd	sp!, {r0-r3}
596
597.Lcs_cache_purge_skipped:
598	/* rem: r1 = new DACR */
599	/* rem: r3 = &block_userspace_access */
600	/* rem: r4 = return value */
601	/* rem: r5 = &old_pmap->pm_cstate (or NULL) */
602	/* rem: r6 = new lwp */
603	/* rem: r8 = &new_pmap->pm_cstate */
604	/* rem: r9 = new PCB */
605	/* rem: r10 = old L1 */
606	/* rem: r11 = new L1 */
607
608	mov	r2, #0x00000000
609	ldr	r7, [r9, #(PCB_PL1VEC)]
610
611	/*
612	 * At this point we need to kill IRQ's again.
613	 *
614	 * XXXSCW: Don't need to block FIQs if vectors have been relocated
615	 */
616	IRQdisableALL
617
618	/*
619	 * Interrupts are disabled so we can allow user space accesses again
620	 * as none will occur until interrupts are re-enabled after the
621	 * switch.
622	 */
623	str	r2, [r3]
624
625	/*
626	 * Ensure the vector table is accessible by fixing up the L1
627	 */
628	cmp	r7, #0			/* No need to fixup vector table? */
629	ldrne	r2, [r7]		/* But if yes, fetch current value */
630	ldrne	r0, [r9, #(PCB_L1VEC)]	/* Fetch new vector_page value */
631	mcr	p15, 0, r1, c3, c0, 0	/* Update DACR for new context */
632	cmpne	r2, r0			/* Stuffing the same value? */
633#ifndef PMAP_INCLUDE_PTE_SYNC
634	strne	r0, [r7]		/* Nope, update it */
635#else
636	beq	.Lcs_same_vector
637	str	r0, [r7]		/* Otherwise, update it */
638
639	/*
640	 * Need to sync the cache to make sure that last store is
641	 * visible to the MMU.
642	 */
643	ldr	r2, .Lcpufuncs
644	mov	r0, r7
645	mov	r1, #4
646	mov	lr, pc
647	ldr	pc, [r2, #CF_DCACHE_WB_RANGE]
648
649.Lcs_same_vector:
650#endif /* PMAP_INCLUDE_PTE_SYNC */
651
652	cmp	r10, r11		/* Switching to the same L1? */
653	ldr	r10, .Lcpufuncs
654	beq	.Lcs_same_l1		/* Yup. */
655
656	/*
657	 * Do a full context switch, including full TLB flush.
658	 */
659	mov	r0, r11
660	mov	lr, pc
661	ldr	pc, [r10, #CF_CONTEXT_SWITCH]
662
663	/*
664	 * Mark the old VM space as NOT being resident in the TLB
665	 */
666	mov	r2, #0x00000000
667	cmp	r5, #0
668	strneh	r2, [r5, #(CS_TLB_ID)]
669	b	.Lcs_context_switched
670
671	/*
672	 * We're switching to a different process in the same L1.
673	 * In this situation, we only need to flush the TLB for the
674	 * vector_page mapping, and even then only if r7 is non-NULL.
675	 */
676.Lcs_same_l1:
677	cmp	r7, #0
678	movne	r0, #0			/* We *know* vector_page's VA is 0x0 */
679	movne	lr, pc
680	ldrne	pc, [r10, #CF_TLB_FLUSHID_SE]
681
682.Lcs_context_switched:
683	/* rem: r8 = &new_pmap->pm_cstate */
684
685	/* XXXSCW: Safe to re-enable FIQs here */
686
687	/*
688	 * The new VM space is live in the cache and TLB.
689	 * Update its cache/tlb state, and if it's not the kernel
690	 * pmap, update the 'last cache state' pointer.
691	 */
692	mov	r2, #-1
693	ldr	r5, .Lpmap_kernel_cstate
694	ldr	r0, .Llast_cache_state_ptr
695	str	r2, [r8, #(CS_ALL)]
696	cmp	r5, r8
697	strne	r8, [r0]
698
699	/* rem: r4 = return value */
700	/* rem: r6 = new lwp */
701	/* rem: r9 = new PCB */
702
703	/*
704	 * This can be optimised... We know we want to go from SVC32
705	 * mode to UND32 mode
706	 */
707        mrs	r3, cpsr
708	bic	r2, r3, #(PSR_MODE)
709	orr	r2, r2, #(PSR_UND32_MODE)
710        msr	cpsr_c, r2
711
712	ldr	sp, [r9, #(PCB_UND_SP)]
713
714        msr	cpsr_c, r3		/* Restore the old mode */
715
716	/* Restore all the save registers */
717#ifndef __XSCALE__
718	add	r7, r9, #PCB_R8
719	ldmia	r7, {r8-r13}
720
721	sub	r7, r7, #PCB_R8		/* restore PCB pointer */
722#else
723	mov	r7, r9
724	ldr	r8, [r7, #(PCB_R8)]
725	ldr	r9, [r7, #(PCB_R9)]
726	ldr	r10, [r7, #(PCB_R10)]
727	ldr	r11, [r7, #(PCB_R11)]
728	ldr	r12, [r7, #(PCB_R12)]
729	ldr	r13, [r7, #(PCB_SP)]
730#endif
731
732	ldr	r5, [r6, #(L_PROC)]	/* fetch the proc for below */
733
734	/* rem: r4 = return value */
735	/* rem: r5 = new lwp's proc */
736	/* rem: r6 = new lwp */
737	/* rem: r7 = new pcb */
738
739#ifdef ARMFPE
740	add	r0, r7, #(USER_SIZE) & 0x00ff
741	add	r0, r0, #(USER_SIZE) & 0xff00
742	bl	_C_LABEL(arm_fpe_core_changecontext)
743#endif
744
745	/* We can enable interrupts again */
746	IRQenableALL
747
748	/* rem: r4 = return value */
749	/* rem: r5 = new lwp's proc */
750	/* rem: r6 = new lwp */
751	/* rem: r7 = new PCB */
752
753	/*
754	 * Check for restartable atomic sequences (RAS).
755	 */
756
757	ldr	r2, [r5, #(P_RASLIST)]
758	ldr	r1, [r7, #(PCB_TF)]	/* r1 = trapframe (used below) */
759	teq	r2, #0			/* p->p_nras == 0? */
760	bne	.Lswitch_do_ras		/* no, check for one */
761
762.Lswitch_return:
763	/* cpu_switch returns 1 == switched, 0 == didn't switch */
764	mov	r0, r4
765
766	/*
767	 * Pull the registers that got pushed when either savectx() or
768	 * cpu_switch() was called and return.
769	 */
770	ldmfd	sp!, {r4-r7, pc}
771
772.Lswitch_do_ras:
773	ldr	r1, [r1, #(TF_PC)]	/* second ras_lookup() arg */
774	mov	r0, r5			/* first ras_lookup() arg */
775	bl	_C_LABEL(ras_lookup)
776	cmn	r0, #1			/* -1 means "not in a RAS" */
777	ldrne	r1, [r7, #(PCB_TF)]
778	strne	r0, [r1, #(TF_PC)]
779	b	.Lswitch_return
780
781.Lswitch_exited:
782	/*
783	 * We skip the cache purge because switch_exit() already did it.
784	 * Load up registers the way .Lcs_cache_purge_skipped expects.
785	 * Userspace access already blocked by switch_exit().
786	 */
787	ldr	r9, [r6, #(L_ADDR)]		/* r9 = new PCB */
788	ldr	r3, .Lblock_userspace_access
789	mrc	p15, 0, r10, c2, c0, 0		/* r10 = old L1 */
790	mov	r5, #0				/* No previous cache state */
791	ldr	r1, [r9, #(PCB_DACR)]		/* r1 = new DACR */
792	ldr	r8, [r9, #(PCB_CSTATE)]		/* r8 = new cache state */
793	ldr	r11, [r9, #(PCB_PAGEDIR)]	/* r11 = new L1 */
794	b	.Lcs_cache_purge_skipped
795
796
797#ifdef DIAGNOSTIC
798.Lswitch_bogons:
799	adr	r0, .Lswitch_panic_str
800	bl	_C_LABEL(panic)
8011:	nop
802	b	1b
803
804.Lswitch_panic_str:
805	.asciz	"cpu_switch: sched_qs empty with non-zero sched_whichqs!\n"
806#endif
807
808/*
809 * cpu_switchto(struct lwp *current, struct lwp *next)
810 * Switch to the specified next LWP
811 * Arguments:
812 *
813 *	r0	'struct lwp *' of the current LWP
814 *	r1	'struct lwp *' of the LWP to switch to
815 */
816ENTRY(cpu_switchto)
817	stmfd	sp!, {r4-r7, lr}
818
819	mov	r6, r1		/* save new lwp */
820
821	mov	r5, r0		/* save old lwp */
822	bl	_C_LABEL(sched_unlock_idle)
823	mov	r1, r5
824
825	IRQdisable
826
827	/*
828	 * Okay, set up registers the way cpu_switch() wants them,
829	 * and jump into the middle of it (where we bring up the
830	 * new process).
831	 *
832	 * r1 = old lwp (r6 = new lwp)
833	 */
834	b	.Lswitch_resume
835
836/*
837 * void switch_exit(struct lwp *l, struct lwp *l0, void (*exit)(struct lwp *));
838 * Switch to lwp0's saved context and deallocate the address space and kernel
839 * stack for l.  Then jump into cpu_switch(), as if we were in lwp0 all along.
840 */
841
842/* LINTSTUB: Func: void switch_exit(struct lwp *l, struct lwp *l0, void (*func)(struct lwp *)) */
843ENTRY(switch_exit)
844	/*
845	 * The process is going away, so we can use callee-saved
846	 * registers here without having to save them.
847	 */
848
849	mov	r4, r0
850	ldr	r0, .Lcurlwp
851
852	mov	r5, r1
853	ldr	r1, .Lblock_userspace_access
854
855	mov	r6, r2
856
857	/*
858	 * r4 = lwp
859	 * r5 = lwp0
860	 * r6 = exit func
861	 */
862
863	mov	r2, #0x00000000		/* curlwp = NULL */
864	str	r2, [r0]
865
866	/*
867	 * We're about to clear both the cache and the TLB.
868	 * Make sure to zap the 'last cache state' pointer since the
869	 * pmap might be about to go away. Also ensure the outgoing
870	 * VM space's cache state is marked as NOT resident in the
871	 * cache, and that lwp0's cache state IS resident.
872	 */
873	ldr	r7, [r4, #(L_ADDR)]		/* r7 = old lwp's PCB */
874	ldr	r0, .Llast_cache_state_ptr	/* Last userland cache state */
875	ldr	r9, [r7, #(PCB_CSTATE)]		/* Fetch cache state pointer */
876	ldr	r3, [r5, #(L_ADDR)]		/* r3 = lwp0's PCB */
877	str	r2, [r0]			/* No previous cache state */
878	str	r2, [r9, #(CS_ALL)]		/* Zap old lwp's cache state */
879	ldr	r3, [r3, #(PCB_CSTATE)]		/* lwp0's cache state */
880	mov	r2, #-1
881	str	r2, [r3, #(CS_ALL)]		/* lwp0 is in da cache! */
882
883	/*
884	 * Don't allow user space access between the purge and the switch.
885	 */
886	mov	r2, #0x00000001
887	str	r2, [r1]
888
889	/* Switch to lwp0 context */
890
891	ldr	r9, .Lcpufuncs
892	mov	lr, pc
893	ldr	pc, [r9, #CF_IDCACHE_WBINV_ALL]
894
895	ldr	r0, [r7, #(PCB_PL1VEC)]
896	ldr	r1, [r7, #(PCB_DACR)]
897
898	/*
899	 * r0 = Pointer to L1 slot for vector_page (or NULL)
900	 * r1 = lwp0's DACR
901	 * r4 = lwp we're switching from
902	 * r5 = lwp0
903	 * r6 = exit func
904	 * r7 = lwp0's PCB
905	 * r9 = cpufuncs
906	 */
907
908	IRQdisableALL
909
910	/*
911	 * Ensure the vector table is accessible by fixing up lwp0's L1
912	 */
913	cmp	r0, #0			/* No need to fixup vector table? */
914	ldrne	r3, [r0]		/* But if yes, fetch current value */
915	ldrne	r2, [r7, #(PCB_L1VEC)]	/* Fetch new vector_page value */
916	mcr	p15, 0, r1, c3, c0, 0	/* Update DACR for lwp0's context */
917	cmpne	r3, r2			/* Stuffing the same value? */
918	strne	r2, [r0]		/* Store if not. */
919
920#ifdef PMAP_INCLUDE_PTE_SYNC
921	/*
922	 * Need to sync the cache to make sure that last store is
923	 * visible to the MMU.
924	 */
925	movne	r1, #4
926	movne	lr, pc
927	ldrne	pc, [r9, #CF_DCACHE_WB_RANGE]
928#endif /* PMAP_INCLUDE_PTE_SYNC */
929
930	/*
931	 * Note: We don't do the same optimisation as cpu_switch() with
932	 * respect to avoiding flushing the TLB if we're switching to
933	 * the same L1 since this process' VM space may be about to go
934	 * away, so we don't want *any* turds left in the TLB.
935	 */
936
937	/* Switch the memory to the new process */
938	ldr	r0, [r7, #(PCB_PAGEDIR)]
939	mov	lr, pc
940	ldr	pc, [r9, #CF_CONTEXT_SWITCH]
941
942	ldr	r0, .Lcurpcb
943
944	/* Restore all the save registers */
945#ifndef __XSCALE__
946	add	r1, r7, #PCB_R8
947	ldmia	r1, {r8-r13}
948#else
949	ldr	r8, [r7, #(PCB_R8)]
950	ldr	r9, [r7, #(PCB_R9)]
951	ldr	r10, [r7, #(PCB_R10)]
952	ldr	r11, [r7, #(PCB_R11)]
953	ldr	r12, [r7, #(PCB_R12)]
954	ldr	r13, [r7, #(PCB_SP)]
955#endif
956	str	r7, [r0]	/* curpcb = lwp0's PCB */
957
958	IRQenableALL
959
960	/*
961	 * Schedule the vmspace and stack to be freed.
962	 */
963	mov	r0, r4			/* {lwp_}exit2(l) */
964	mov	lr, pc
965	mov	pc, r6
966
967	bl	_C_LABEL(sched_lock_idle)
968
969	ldr	r7, .Lwhichqs		/* r7 = &whichqs */
970	mov	r5, #0x00000000		/* r5 = old lwp = NULL */
971	b	.Lswitch_search
972
973/* LINTSTUB: Func: void savectx(struct pcb *pcb) */
974ENTRY(savectx)
975	/*
976	 * r0 = pcb
977	 */
978
979	/* Push registers.*/
980	stmfd	sp!, {r4-r7, lr}
981
982	/* Store all the registers in the process's pcb */
983#ifndef __XSCALE__
984	add	r2, r0, #(PCB_R8)
985	stmia	r2, {r8-r13}
986#else
987	strd	r8, [r0, #(PCB_R8)]
988	strd	r10, [r0, #(PCB_R10)]
989	strd	r12, [r0, #(PCB_R12)]
990#endif
991
992	/* Pull the regs of the stack */
993	ldmfd	sp!, {r4-r7, pc}
994
995ENTRY(proc_trampoline)
996#ifdef __NEWINTR
997	mov	r0, #(IPL_NONE)
998	bl	_C_LABEL(_spllower)
999#else /* ! __NEWINTR */
1000	mov	r0, #(_SPL_0)
1001	bl	_C_LABEL(splx)
1002#endif /* __NEWINTR */
1003
1004#ifdef MULTIPROCESSOR
1005	bl	_C_LABEL(proc_trampoline_mp)
1006#endif
1007	mov	r0, r5
1008	mov	r1, sp
1009	mov	lr, pc
1010	mov	pc, r4
1011
1012	/* Kill irq's */
1013        mrs     r0, cpsr
1014        orr     r0, r0, #(I32_bit)
1015        msr     cpsr_c, r0
1016
1017	PULLFRAME
1018
1019	movs	pc, lr			/* Exit */
1020
1021#ifndef __XSCALE__
1022	.type .Lcpu_switch_ffs_table, _ASM_TYPE_OBJECT;
1023.Lcpu_switch_ffs_table:
1024/* same as ffs table but all nums are -1 from that */
1025/*               0   1   2   3   4   5   6   7           */
1026	.byte	 0,  0,  1, 12,  2,  6,  0, 13  /*  0- 7 */
1027	.byte	 3,  0,  7,  0,  0,  0,  0, 14  /*  8-15 */
1028	.byte	10,  4,  0,  0,  8,  0,  0, 25  /* 16-23 */
1029	.byte	 0,  0,  0,  0,  0, 21, 27, 15  /* 24-31 */
1030	.byte	31, 11,  5,  0,  0,  0,  0,  0	/* 32-39 */
1031	.byte	 9,  0,  0, 24,  0,  0, 20, 26  /* 40-47 */
1032	.byte	30,  0,  0,  0,  0, 23,  0, 19  /* 48-55 */
1033	.byte   29,  0, 22, 18, 28, 17, 16,  0  /* 56-63 */
1034#endif	/* !__XSCALE_ */
1035