xref: /freebsd/sys/amd64/amd64/support.S (revision 0957b409)
1/*-
2 * Copyright (c) 2003 Peter Wemm.
3 * Copyright (c) 1993 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. Neither the name of the University nor the names of its contributors
15 *    may be used to endorse or promote products derived from this software
16 *    without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 * $FreeBSD$
31 */
32
33#include "opt_ddb.h"
34
35#include <machine/asmacros.h>
36#include <machine/specialreg.h>
37#include <machine/pmap.h>
38
39#include "assym.inc"
40
41	.text
42
43/* Address: %rdi */
44ENTRY(pagezero_std)
45	PUSH_FRAME_POINTER
46	movq	$PAGE_SIZE/8,%rcx
47	xorl	%eax,%eax
48	rep
49	stosq
50	POP_FRAME_POINTER
51	ret
52END(pagezero_std)
53
54ENTRY(pagezero_erms)
55	PUSH_FRAME_POINTER
56	movq	$PAGE_SIZE,%rcx
57	xorl	%eax,%eax
58	rep
59	stosb
60	POP_FRAME_POINTER
61	ret
62END(pagezero_erms)
63
64/*
65 * pagecopy(%rdi=from, %rsi=to)
66 */
67ENTRY(pagecopy)
68	PUSH_FRAME_POINTER
69	movq	$PAGE_SIZE/8,%rcx
70	movq	%rdi,%r9
71	movq	%rsi,%rdi
72	movq	%r9,%rsi
73	rep
74	movsq
75	POP_FRAME_POINTER
76	ret
77END(pagecopy)
78
79/* Address: %rdi */
80ENTRY(sse2_pagezero)
81	PUSH_FRAME_POINTER
82	movq	$-PAGE_SIZE,%rdx
83	subq	%rdx,%rdi
84	xorl	%eax,%eax
85	jmp	1f
86	/*
87	 * The loop takes 29 bytes.  Ensure that it doesn't cross a 32-byte
88	 * cache line.
89	 */
90	.p2align 5,0x90
911:
92	movnti	%rax,(%rdi,%rdx)
93	movnti	%rax,8(%rdi,%rdx)
94	movnti	%rax,16(%rdi,%rdx)
95	movnti	%rax,24(%rdi,%rdx)
96	addq	$32,%rdx
97	jne	1b
98	sfence
99	POP_FRAME_POINTER
100	ret
101END(sse2_pagezero)
102
103/*
104 * memcmpy(b1, b2, len)
105 *	   rdi,rsi,len
106 */
107ENTRY(memcmp)
108	PUSH_FRAME_POINTER
109	cmpq	$16,%rdx
110	jae	5f
1111:
112	testq	%rdx,%rdx
113	je	3f
114	xorl	%ecx,%ecx
1152:
116	movzbl	(%rdi,%rcx,1),%eax
117	movzbl	(%rsi,%rcx,1),%r8d
118	cmpb	%r8b,%al
119	jne	4f
120	addq    $1,%rcx
121	cmpq    %rcx,%rdx
122	jz	3f
123	movzbl	(%rdi,%rcx,1),%eax
124	movzbl	(%rsi,%rcx,1),%r8d
125	cmpb	%r8b,%al
126	jne	4f
127	addq	$1,%rcx
128	cmpq	%rcx,%rdx
129	jz	3f
130	movzbl	(%rdi,%rcx,1),%eax
131	movzbl	(%rsi,%rcx,1),%r8d
132	cmpb	%r8b,%al
133	jne	4f
134	addq	$1,%rcx
135	cmpq	%rcx,%rdx
136	jz	3f
137	movzbl	(%rdi,%rcx,1),%eax
138	movzbl	(%rsi,%rcx,1),%r8d
139	cmpb	%r8b,%al
140	jne	4f
141	addq	$1,%rcx
142	cmpq	%rcx,%rdx
143	jne	2b
1443:
145	xorl	%eax,%eax
146	POP_FRAME_POINTER
147	ret
1484:
149	subl	%r8d,%eax
150	POP_FRAME_POINTER
151	ret
1525:
153	cmpq	$32,%rdx
154	jae	7f
1556:
156	/*
157	 * 8 bytes
158	 */
159	movq    (%rdi),%r8
160	movq    (%rsi),%r9
161	cmpq    %r8,%r9
162	jne	1b
163	leaq	8(%rdi),%rdi
164	leaq	8(%rsi),%rsi
165	subq	$8,%rdx
166	cmpq	$8,%rdx
167	jae	6b
168	jl	1b
169	jmp	3b
1707:
171	/*
172	 * 32 bytes
173	 */
174	movq    (%rsi),%r8
175	movq    8(%rsi),%r9
176	subq    (%rdi),%r8
177	subq    8(%rdi),%r9
178	or	%r8,%r9
179	jnz	1b
180
181	movq    16(%rsi),%r8
182	movq    24(%rsi),%r9
183	subq    16(%rdi),%r8
184	subq    24(%rdi),%r9
185	or	%r8,%r9
186	jnz	1b
187
188	leaq    32(%rdi),%rdi
189	leaq    32(%rsi),%rsi
190	subq    $32,%rdx
191	cmpq    $32,%rdx
192	jae	7b
193	jnz	1b
194	jmp	3b
195END(memcmp)
196
197/*
198 * memmove(dst, src, cnt)
199 *         rdi, rsi, rdx
200 */
201
202/*
203 * Register state at entry is supposed to be as follows:
204 * rdi - destination
205 * rsi - source
206 * rdx - count
207 *
208 * The macro possibly clobbers the above and: rcx, r8, r9, 10
209 * It does not clobber rax nor r11.
210 */
211.macro MEMMOVE erms overlap begin end
212	\begin
213
214	/*
215	 * For sizes 0..32 all data is read before it is written, so there
216	 * is no correctness issue with direction of copying.
217	 */
218	cmpq	$32,%rcx
219	jbe	101632f
220
221.if \overlap == 1
222	movq	%rdi,%r8
223	subq	%rsi,%r8
224	cmpq	%rcx,%r8	/* overlapping && src < dst? */
225	jb	2f
226.endif
227
228	cmpq	$256,%rcx
229	ja	1256f
230
231103200:
232	movq	(%rsi),%rdx
233	movq	%rdx,(%rdi)
234	movq	8(%rsi),%rdx
235	movq	%rdx,8(%rdi)
236	movq	16(%rsi),%rdx
237	movq	%rdx,16(%rdi)
238	movq	24(%rsi),%rdx
239	movq	%rdx,24(%rdi)
240	leaq	32(%rsi),%rsi
241	leaq	32(%rdi),%rdi
242	subq	$32,%rcx
243	cmpq	$32,%rcx
244	jae	103200b
245	cmpb	$0,%cl
246	jne	101632f
247	\end
248	ret
249	ALIGN_TEXT
250101632:
251	cmpb	$16,%cl
252	jl	100816f
253	movq	(%rsi),%rdx
254	movq	8(%rsi),%r8
255	movq	-16(%rsi,%rcx),%r9
256	movq	-8(%rsi,%rcx),%r10
257	movq	%rdx,(%rdi)
258	movq	%r8,8(%rdi)
259	movq	%r9,-16(%rdi,%rcx)
260	movq	%r10,-8(%rdi,%rcx)
261	\end
262	ret
263	ALIGN_TEXT
264100816:
265	cmpb	$8,%cl
266	jl	100408f
267	movq	(%rsi),%rdx
268	movq	-8(%rsi,%rcx),%r8
269	movq	%rdx,(%rdi)
270	movq	%r8,-8(%rdi,%rcx,)
271	\end
272	ret
273	ALIGN_TEXT
274100408:
275	cmpb	$4,%cl
276	jl	100204f
277	movl	(%rsi),%edx
278	movl	-4(%rsi,%rcx),%r8d
279	movl	%edx,(%rdi)
280	movl	%r8d,-4(%rdi,%rcx)
281	\end
282	ret
283	ALIGN_TEXT
284100204:
285	cmpb	$2,%cl
286	jl	100001f
287	movzwl	(%rsi),%edx
288	movzwl	-2(%rsi,%rcx),%r8d
289	movw	%dx,(%rdi)
290	movw	%r8w,-2(%rdi,%rcx)
291	\end
292	ret
293	ALIGN_TEXT
294100001:
295	cmpb	$1,%cl
296	jl	100000f
297	movb	(%rsi),%dl
298	movb	%dl,(%rdi)
299100000:
300	\end
301	ret
302
303	ALIGN_TEXT
3041256:
305	testb	$15,%dil
306	jnz	100f
307.if \erms == 1
308	rep
309	movsb
310.else
311	shrq	$3,%rcx                         /* copy by 64-bit words */
312	rep
313	movsq
314	movq	%rdx,%rcx
315	andl	$7,%ecx                         /* any bytes left? */
316	jne	100408b
317.endif
318	\end
319	ret
320100:
321	movq	(%rsi),%r8
322	movq	8(%rsi),%r9
323	movq	%rdi,%r10
324	movq	%rdi,%rcx
325	andq	$15,%rcx
326	leaq	-16(%rdx,%rcx),%rdx
327	neg	%rcx
328	leaq	16(%rdi,%rcx),%rdi
329	leaq	16(%rsi,%rcx),%rsi
330	movq	%rdx,%rcx
331.if \erms == 1
332	rep
333	movsb
334	movq	%r8,(%r10)
335	movq	%r9,8(%r10)
336.else
337	shrq	$3,%rcx                         /* copy by 64-bit words */
338	rep
339	movsq
340	movq	%r8,(%r10)
341	movq	%r9,8(%r10)
342	movq	%rdx,%rcx
343	andl	$7,%ecx                         /* any bytes left? */
344	jne	100408b
345.endif
346	\end
347	ret
348
349.if \overlap == 1
350	/*
351	 * Copy backwards.
352	 */
353        ALIGN_TEXT
3542:
355	cmpq	$256,%rcx
356	ja	2256f
357
358	leaq	-8(%rdi,%rcx),%rdi
359	leaq	-8(%rsi,%rcx),%rsi
360
361	cmpq	$32,%rcx
362	jb	2016f
363
3642032:
365	movq	(%rsi),%rdx
366	movq	%rdx,(%rdi)
367	movq	-8(%rsi),%rdx
368	movq	%rdx,-8(%rdi)
369	movq	-16(%rsi),%rdx
370	movq	%rdx,-16(%rdi)
371	movq	-24(%rsi),%rdx
372	movq	%rdx,-24(%rdi)
373	leaq	-32(%rsi),%rsi
374	leaq	-32(%rdi),%rdi
375	subq	$32,%rcx
376	cmpq	$32,%rcx
377	jae	2032b
378	cmpb	$0,%cl
379	jne	2016f
380	\end
381	ret
382	ALIGN_TEXT
3832016:
384	cmpb	$16,%cl
385	jl	2008f
386	movq	(%rsi),%rdx
387	movq	%rdx,(%rdi)
388	movq	-8(%rsi),%rdx
389	movq	%rdx,-8(%rdi)
390	subb	$16,%cl
391	jz	2000f
392	leaq	-16(%rsi),%rsi
393	leaq	-16(%rdi),%rdi
3942008:
395	cmpb	$8,%cl
396	jl	2004f
397	movq	(%rsi),%rdx
398	movq	%rdx,(%rdi)
399	subb	$8,%cl
400	jz	2000f
401	leaq	-8(%rsi),%rsi
402	leaq	-8(%rdi),%rdi
4032004:
404	cmpb	$4,%cl
405	jl	2002f
406	movl	4(%rsi),%edx
407	movl	%edx,4(%rdi)
408	subb	$4,%cl
409	jz	2000f
410	leaq	-4(%rsi),%rsi
411	leaq	-4(%rdi),%rdi
4122002:
413	cmpb	$2,%cl
414	jl	2001f
415	movw	6(%rsi),%dx
416	movw	%dx,6(%rdi)
417	subb	$2,%cl
418	jz	2000f
419	leaq	-2(%rsi),%rsi
420	leaq	-2(%rdi),%rdi
4212001:
422	cmpb	$1,%cl
423	jl	2000f
424	movb	7(%rsi),%dl
425	movb	%dl,7(%rdi)
4262000:
427	\end
428	ret
429	ALIGN_TEXT
4302256:
431	std
432.if \erms == 1
433	leaq	-1(%rdi,%rcx),%rdi
434	leaq	-1(%rsi,%rcx),%rsi
435	rep
436	movsb
437	cld
438.else
439	leaq	-8(%rdi,%rcx),%rdi
440	leaq	-8(%rsi,%rcx),%rsi
441	shrq	$3,%rcx
442	rep
443	movsq
444	cld
445	movq	%rdx,%rcx
446	andb	$7,%cl
447	jne	2004b
448.endif
449	\end
450	ret
451.endif
452.endm
453
454.macro MEMMOVE_BEGIN
455	PUSH_FRAME_POINTER
456	movq	%rdi,%rax
457	movq	%rdx,%rcx
458.endm
459
460.macro MEMMOVE_END
461	POP_FRAME_POINTER
462.endm
463
464ENTRY(memmove_std)
465	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
466END(memmove_std)
467
468ENTRY(memmove_erms)
469	MEMMOVE erms=1 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
470END(memmove_erms)
471
472/*
473 * memcpy(dst, src, len)
474 *        rdi, rsi, rdx
475 *
476 * Note: memcpy does not support overlapping copies
477 */
478ENTRY(memcpy_std)
479	MEMMOVE erms=0 overlap=0 begin=MEMMOVE_BEGIN end=MEMMOVE_END
480END(memcpy_std)
481
482ENTRY(memcpy_erms)
483	MEMMOVE erms=1 overlap=0 begin=MEMMOVE_BEGIN end=MEMMOVE_END
484END(memcpy_erms)
485
486/*
487 * memset(dst, c,   len)
488 *        rdi, rsi, rdx
489 */
490.macro MEMSET erms
491	PUSH_FRAME_POINTER
492	movq	%rdi,%rax
493	movq	%rdx,%rcx
494	movzbq	%sil,%r8
495	movabs	$0x0101010101010101,%r10
496	imulq	%r8,%r10
497
498	cmpq	$32,%rcx
499	jbe	101632f
500
501	cmpq	$256,%rcx
502	ja	1256f
503
504103200:
505	movq	%r10,(%rdi)
506	movq	%r10,8(%rdi)
507	movq	%r10,16(%rdi)
508	movq	%r10,24(%rdi)
509	leaq	32(%rdi),%rdi
510	subq	$32,%rcx
511	cmpq	$32,%rcx
512	ja	103200b
513	cmpb	$16,%cl
514	ja	201632f
515	movq	%r10,-16(%rdi,%rcx)
516	movq	%r10,-8(%rdi,%rcx)
517	POP_FRAME_POINTER
518	ret
519	ALIGN_TEXT
520101632:
521	cmpb	$16,%cl
522	jl	100816f
523201632:
524	movq	%r10,(%rdi)
525	movq	%r10,8(%rdi)
526	movq	%r10,-16(%rdi,%rcx)
527	movq	%r10,-8(%rdi,%rcx)
528	POP_FRAME_POINTER
529	ret
530	ALIGN_TEXT
531100816:
532	cmpb	$8,%cl
533	jl	100408f
534	movq	%r10,(%rdi)
535	movq	%r10,-8(%rdi,%rcx)
536	POP_FRAME_POINTER
537	ret
538	ALIGN_TEXT
539100408:
540	cmpb	$4,%cl
541	jl	100204f
542	movl	%r10d,(%rdi)
543	movl	%r10d,-4(%rdi,%rcx)
544	POP_FRAME_POINTER
545	ret
546	ALIGN_TEXT
547100204:
548	cmpb	$2,%cl
549	jl	100001f
550	movw	%r10w,(%rdi)
551	movw	%r10w,-2(%rdi,%rcx)
552	POP_FRAME_POINTER
553	ret
554	ALIGN_TEXT
555100001:
556	cmpb	$0,%cl
557	je	100000f
558	movb	%r10b,(%rdi)
559100000:
560	POP_FRAME_POINTER
561	ret
562	ALIGN_TEXT
5631256:
564	movq	%rdi,%r9
565	movq	%r10,%rax
566	testl	$15,%edi
567	jnz	3f
5681:
569.if \erms == 1
570	rep
571	stosb
572	movq	%r9,%rax
573.else
574	movq	%rcx,%rdx
575	shrq	$3,%rcx
576	rep
577	stosq
578	movq	%r9,%rax
579	andl	$7,%edx
580	jnz	2f
581	POP_FRAME_POINTER
582	ret
5832:
584	movq	%r10,-8(%rdi,%rdx)
585.endif
586	POP_FRAME_POINTER
587	ret
588	ALIGN_TEXT
5893:
590	movq	%r10,(%rdi)
591	movq	%r10,8(%rdi)
592	movq	%rdi,%r8
593	andq	$15,%r8
594	leaq	-16(%rcx,%r8),%rcx
595	neg	%r8
596	leaq	16(%rdi,%r8),%rdi
597	jmp	1b
598.endm
599
600ENTRY(memset_std)
601	MEMSET erms=0
602END(memset_std)
603
604ENTRY(memset_erms)
605	MEMSET erms=1
606END(memset_erms)
607
608/* fillw(pat, base, cnt) */
609/*       %rdi,%rsi, %rdx */
610ENTRY(fillw)
611	PUSH_FRAME_POINTER
612	movq	%rdi,%rax
613	movq	%rsi,%rdi
614	movq	%rdx,%rcx
615	rep
616	stosw
617	POP_FRAME_POINTER
618	ret
619END(fillw)
620
621/*****************************************************************************/
622/* copyout and fubyte family                                                 */
623/*****************************************************************************/
624/*
625 * Access user memory from inside the kernel. These routines should be
626 * the only places that do this.
627 *
628 * These routines set curpcb->pcb_onfault for the time they execute. When a
629 * protection violation occurs inside the functions, the trap handler
630 * returns to *curpcb->pcb_onfault instead of the function.
631 */
632
633.macro SMAP_DISABLE smap
634.if	\smap
635	stac
636.endif
637.endm
638
639
640.macro SMAP_ENABLE smap
641.if	\smap
642	clac
643.endif
644.endm
645
646.macro COPYINOUT_BEGIN
647.endm
648
649.macro COPYINOUT_END
650	movq	%rax,PCB_ONFAULT(%r11)
651	POP_FRAME_POINTER
652.endm
653
654.macro COPYINOUT_SMAP_END
655	SMAP_ENABLE smap=1
656	COPYINOUT_END
657.endm
658
659/*
660 * copyout(from_kernel, to_user, len)
661 *         %rdi,        %rsi,    %rdx
662 */
663.macro	COPYOUT smap erms
664	PUSH_FRAME_POINTER
665	movq	PCPU(CURPCB),%r11
666	movq	$copy_fault,PCB_ONFAULT(%r11)
667
668	/*
669	 * Check explicitly for non-user addresses.  If 486 write protection
670	 * is being used, this check is essential because we are in kernel
671	 * mode so the h/w does not provide any protection against writing
672	 * kernel addresses.
673	 */
674
675	/*
676	 * First, prevent address wrapping.
677	 */
678	movq	%rsi,%rax
679	addq	%rdx,%rax
680	jc	copy_fault
681/*
682 * XXX STOP USING VM_MAXUSER_ADDRESS.
683 * It is an end address, not a max, so every time it is used correctly it
684 * looks like there is an off by one error, and of course it caused an off
685 * by one error in several places.
686 */
687	movq	$VM_MAXUSER_ADDRESS,%rcx
688	cmpq	%rcx,%rax
689	ja	copy_fault
690
691	/*
692	 * Set return value to zero. Remaining failure mode goes through
693	 * copy_fault.
694	 */
695	xorl	%eax,%eax
696
697	/*
698	 * Set up arguments for MEMMOVE.
699	 */
700	movq	%rdi,%r8
701	movq	%rsi,%rdi
702	movq	%r8,%rsi
703	movq	%rdx,%rcx
704
705
706	SMAP_DISABLE \smap
707.if	\smap == 1
708	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_SMAP_END
709.else
710	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_END
711.endif
712	/* NOTREACHED */
713.endm
714
715ENTRY(copyout_nosmap_std)
716	COPYOUT smap=0 erms=0
717END(copyout_nosmap_std)
718
719ENTRY(copyout_smap_std)
720	COPYOUT smap=1 erms=0
721END(copyout_smap_std)
722
723ENTRY(copyout_nosmap_erms)
724	COPYOUT smap=0 erms=1
725END(copyout_nosmap_erms)
726
727ENTRY(copyout_smap_erms)
728	COPYOUT smap=1 erms=1
729END(copyout_smap_erms)
730
731/*
732 * copyin(from_user, to_kernel, len)
733 *        %rdi,      %rsi,      %rdx
734 */
735.macro	COPYIN smap erms
736	PUSH_FRAME_POINTER
737	movq	PCPU(CURPCB),%r11
738	movq	$copy_fault,PCB_ONFAULT(%r11)
739
740	/*
741	 * make sure address is valid
742	 */
743	movq	%rdi,%rax
744	addq	%rdx,%rax
745	jc	copy_fault
746	movq	$VM_MAXUSER_ADDRESS,%rcx
747	cmpq	%rcx,%rax
748	ja	copy_fault
749
750	xorl	%eax,%eax
751
752	movq	%rdi,%r8
753	movq	%rsi,%rdi
754	movq	%r8,%rsi
755	movq	%rdx,%rcx
756
757	SMAP_DISABLE \smap
758.if	\smap == 1
759	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_SMAP_END
760.else
761	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_END
762.endif
763	/* NOTREACHED */
764.endm
765
766ENTRY(copyin_nosmap_std)
767	COPYIN smap=0 erms=0
768END(copyin_nosmap_std)
769
770ENTRY(copyin_smap_std)
771	COPYIN smap=1 erms=0
772END(copyin_smap_std)
773
774ENTRY(copyin_nosmap_erms)
775	COPYIN smap=0 erms=1
776END(copyin_nosmap_erms)
777
778ENTRY(copyin_smap_erms)
779	COPYIN smap=1 erms=1
780END(copyin_smap_erms)
781
782	ALIGN_TEXT
783	/* Trap entry clears PSL.AC */
784copy_fault:
785	movq	$0,PCB_ONFAULT(%r11)
786	movl	$EFAULT,%eax
787	POP_FRAME_POINTER
788	ret
789
790/*
791 * casueword32.  Compare and set user integer.  Returns -1 on fault,
792 *        0 if access was successful.  Old value is written to *oldp.
793 *        dst = %rdi, old = %esi, oldp = %rdx, new = %ecx
794 */
795ENTRY(casueword32_nosmap)
796	PUSH_FRAME_POINTER
797	movq	PCPU(CURPCB),%r8
798	movq	$fusufault,PCB_ONFAULT(%r8)
799
800	movq	$VM_MAXUSER_ADDRESS-4,%rax
801	cmpq	%rax,%rdi			/* verify address is valid */
802	ja	fusufault
803
804	movl	%esi,%eax			/* old */
805#ifdef SMP
806	lock
807#endif
808	cmpxchgl %ecx,(%rdi)			/* new = %ecx */
809
810	/*
811	 * The old value is in %eax.  If the store succeeded it will be the
812	 * value we expected (old) from before the store, otherwise it will
813	 * be the current value.  Save %eax into %esi to prepare the return
814	 * value.
815	 */
816	movl	%eax,%esi
817	xorl	%eax,%eax
818	movq	%rax,PCB_ONFAULT(%r8)
819
820	/*
821	 * Access the oldp after the pcb_onfault is cleared, to correctly
822	 * catch corrupted pointer.
823	 */
824	movl	%esi,(%rdx)			/* oldp = %rdx */
825	POP_FRAME_POINTER
826	ret
827END(casueword32_nosmap)
828
829ENTRY(casueword32_smap)
830	PUSH_FRAME_POINTER
831	movq	PCPU(CURPCB),%r8
832	movq	$fusufault,PCB_ONFAULT(%r8)
833
834	movq	$VM_MAXUSER_ADDRESS-4,%rax
835	cmpq	%rax,%rdi			/* verify address is valid */
836	ja	fusufault
837
838	movl	%esi,%eax			/* old */
839	stac
840#ifdef SMP
841	lock
842#endif
843	cmpxchgl %ecx,(%rdi)			/* new = %ecx */
844	clac
845
846	/*
847	 * The old value is in %eax.  If the store succeeded it will be the
848	 * value we expected (old) from before the store, otherwise it will
849	 * be the current value.  Save %eax into %esi to prepare the return
850	 * value.
851	 */
852	movl	%eax,%esi
853	xorl	%eax,%eax
854	movq	%rax,PCB_ONFAULT(%r8)
855
856	/*
857	 * Access the oldp after the pcb_onfault is cleared, to correctly
858	 * catch corrupted pointer.
859	 */
860	movl	%esi,(%rdx)			/* oldp = %rdx */
861	POP_FRAME_POINTER
862	ret
863END(casueword32_smap)
864
865/*
866 * casueword.  Compare and set user long.  Returns -1 on fault,
867 *        0 if access was successful.  Old value is written to *oldp.
868 *        dst = %rdi, old = %rsi, oldp = %rdx, new = %rcx
869 */
870ENTRY(casueword_nosmap)
871	PUSH_FRAME_POINTER
872	movq	PCPU(CURPCB),%r8
873	movq	$fusufault,PCB_ONFAULT(%r8)
874
875	movq	$VM_MAXUSER_ADDRESS-4,%rax
876	cmpq	%rax,%rdi			/* verify address is valid */
877	ja	fusufault
878
879	movq	%rsi,%rax			/* old */
880#ifdef SMP
881	lock
882#endif
883	cmpxchgq %rcx,(%rdi)			/* new = %rcx */
884
885	/*
886	 * The old value is in %rax.  If the store succeeded it will be the
887	 * value we expected (old) from before the store, otherwise it will
888	 * be the current value.
889	 */
890	movq	%rax,%rsi
891	xorl	%eax,%eax
892	movq	%rax,PCB_ONFAULT(%r8)
893	movq	%rsi,(%rdx)
894	POP_FRAME_POINTER
895	ret
896END(casueword_nosmap)
897
898ENTRY(casueword_smap)
899	PUSH_FRAME_POINTER
900	movq	PCPU(CURPCB),%r8
901	movq	$fusufault,PCB_ONFAULT(%r8)
902
903	movq	$VM_MAXUSER_ADDRESS-4,%rax
904	cmpq	%rax,%rdi			/* verify address is valid */
905	ja	fusufault
906
907	movq	%rsi,%rax			/* old */
908	stac
909#ifdef SMP
910	lock
911#endif
912	cmpxchgq %rcx,(%rdi)			/* new = %rcx */
913	clac
914
915	/*
916	 * The old value is in %rax.  If the store succeeded it will be the
917	 * value we expected (old) from before the store, otherwise it will
918	 * be the current value.
919	 */
920	movq	%rax,%rsi
921	xorl	%eax,%eax
922	movq	%rax,PCB_ONFAULT(%r8)
923	movq	%rsi,(%rdx)
924	POP_FRAME_POINTER
925	ret
926END(casueword_smap)
927
928/*
929 * Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit
930 * byte from user memory.
931 * addr = %rdi, valp = %rsi
932 */
933
934ENTRY(fueword_nosmap)
935	PUSH_FRAME_POINTER
936	movq	PCPU(CURPCB),%rcx
937	movq	$fusufault,PCB_ONFAULT(%rcx)
938
939	movq	$VM_MAXUSER_ADDRESS-8,%rax
940	cmpq	%rax,%rdi			/* verify address is valid */
941	ja	fusufault
942
943	xorl	%eax,%eax
944	movq	(%rdi),%r11
945	movq	%rax,PCB_ONFAULT(%rcx)
946	movq	%r11,(%rsi)
947	POP_FRAME_POINTER
948	ret
949END(fueword_nosmap)
950
951ENTRY(fueword_smap)
952	PUSH_FRAME_POINTER
953	movq	PCPU(CURPCB),%rcx
954	movq	$fusufault,PCB_ONFAULT(%rcx)
955
956	movq	$VM_MAXUSER_ADDRESS-8,%rax
957	cmpq	%rax,%rdi			/* verify address is valid */
958	ja	fusufault
959
960	xorl	%eax,%eax
961	stac
962	movq	(%rdi),%r11
963	clac
964	movq	%rax,PCB_ONFAULT(%rcx)
965	movq	%r11,(%rsi)
966	POP_FRAME_POINTER
967	ret
968END(fueword_smap)
969
970ENTRY(fueword32_nosmap)
971	PUSH_FRAME_POINTER
972	movq	PCPU(CURPCB),%rcx
973	movq	$fusufault,PCB_ONFAULT(%rcx)
974
975	movq	$VM_MAXUSER_ADDRESS-4,%rax
976	cmpq	%rax,%rdi			/* verify address is valid */
977	ja	fusufault
978
979	xorl	%eax,%eax
980	movl	(%rdi),%r11d
981	movq	%rax,PCB_ONFAULT(%rcx)
982	movl	%r11d,(%rsi)
983	POP_FRAME_POINTER
984	ret
985END(fueword32_nosmap)
986
987ENTRY(fueword32_smap)
988	PUSH_FRAME_POINTER
989	movq	PCPU(CURPCB),%rcx
990	movq	$fusufault,PCB_ONFAULT(%rcx)
991
992	movq	$VM_MAXUSER_ADDRESS-4,%rax
993	cmpq	%rax,%rdi			/* verify address is valid */
994	ja	fusufault
995
996	xorl	%eax,%eax
997	stac
998	movl	(%rdi),%r11d
999	clac
1000	movq	%rax,PCB_ONFAULT(%rcx)
1001	movl	%r11d,(%rsi)
1002	POP_FRAME_POINTER
1003	ret
1004END(fueword32_smap)
1005
1006ENTRY(fuword16_nosmap)
1007	PUSH_FRAME_POINTER
1008	movq	PCPU(CURPCB),%rcx
1009	movq	$fusufault,PCB_ONFAULT(%rcx)
1010
1011	movq	$VM_MAXUSER_ADDRESS-2,%rax
1012	cmpq	%rax,%rdi
1013	ja	fusufault
1014
1015	movzwl	(%rdi),%eax
1016	movq	$0,PCB_ONFAULT(%rcx)
1017	POP_FRAME_POINTER
1018	ret
1019END(fuword16_nosmap)
1020
1021ENTRY(fuword16_smap)
1022	PUSH_FRAME_POINTER
1023	movq	PCPU(CURPCB),%rcx
1024	movq	$fusufault,PCB_ONFAULT(%rcx)
1025
1026	movq	$VM_MAXUSER_ADDRESS-2,%rax
1027	cmpq	%rax,%rdi
1028	ja	fusufault
1029
1030	stac
1031	movzwl	(%rdi),%eax
1032	clac
1033	movq	$0,PCB_ONFAULT(%rcx)
1034	POP_FRAME_POINTER
1035	ret
1036END(fuword16_smap)
1037
1038ENTRY(fubyte_nosmap)
1039	PUSH_FRAME_POINTER
1040	movq	PCPU(CURPCB),%rcx
1041	movq	$fusufault,PCB_ONFAULT(%rcx)
1042
1043	movq	$VM_MAXUSER_ADDRESS-1,%rax
1044	cmpq	%rax,%rdi
1045	ja	fusufault
1046
1047	movzbl	(%rdi),%eax
1048	movq	$0,PCB_ONFAULT(%rcx)
1049	POP_FRAME_POINTER
1050	ret
1051END(fubyte_nosmap)
1052
1053ENTRY(fubyte_smap)
1054	PUSH_FRAME_POINTER
1055	movq	PCPU(CURPCB),%rcx
1056	movq	$fusufault,PCB_ONFAULT(%rcx)
1057
1058	movq	$VM_MAXUSER_ADDRESS-1,%rax
1059	cmpq	%rax,%rdi
1060	ja	fusufault
1061
1062	stac
1063	movzbl	(%rdi),%eax
1064	clac
1065	movq	$0,PCB_ONFAULT(%rcx)
1066	POP_FRAME_POINTER
1067	ret
1068END(fubyte_smap)
1069
1070/*
1071 * Store a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit byte to
1072 * user memory.
1073 * addr = %rdi, value = %rsi
1074 */
1075ENTRY(suword_nosmap)
1076	PUSH_FRAME_POINTER
1077	movq	PCPU(CURPCB),%rcx
1078	movq	$fusufault,PCB_ONFAULT(%rcx)
1079
1080	movq	$VM_MAXUSER_ADDRESS-8,%rax
1081	cmpq	%rax,%rdi			/* verify address validity */
1082	ja	fusufault
1083
1084	movq	%rsi,(%rdi)
1085	xorl	%eax,%eax
1086	movq	PCPU(CURPCB),%rcx
1087	movq	%rax,PCB_ONFAULT(%rcx)
1088	POP_FRAME_POINTER
1089	ret
1090END(suword_nosmap)
1091
1092ENTRY(suword_smap)
1093	PUSH_FRAME_POINTER
1094	movq	PCPU(CURPCB),%rcx
1095	movq	$fusufault,PCB_ONFAULT(%rcx)
1096
1097	movq	$VM_MAXUSER_ADDRESS-8,%rax
1098	cmpq	%rax,%rdi			/* verify address validity */
1099	ja	fusufault
1100
1101	stac
1102	movq	%rsi,(%rdi)
1103	clac
1104	xorl	%eax,%eax
1105	movq	PCPU(CURPCB),%rcx
1106	movq	%rax,PCB_ONFAULT(%rcx)
1107	POP_FRAME_POINTER
1108	ret
1109END(suword_smap)
1110
1111ENTRY(suword32_nosmap)
1112	PUSH_FRAME_POINTER
1113	movq	PCPU(CURPCB),%rcx
1114	movq	$fusufault,PCB_ONFAULT(%rcx)
1115
1116	movq	$VM_MAXUSER_ADDRESS-4,%rax
1117	cmpq	%rax,%rdi			/* verify address validity */
1118	ja	fusufault
1119
1120	movl	%esi,(%rdi)
1121	xorl	%eax,%eax
1122	movq	PCPU(CURPCB),%rcx
1123	movq	%rax,PCB_ONFAULT(%rcx)
1124	POP_FRAME_POINTER
1125	ret
1126END(suword32_nosmap)
1127
1128ENTRY(suword32_smap)
1129	PUSH_FRAME_POINTER
1130	movq	PCPU(CURPCB),%rcx
1131	movq	$fusufault,PCB_ONFAULT(%rcx)
1132
1133	movq	$VM_MAXUSER_ADDRESS-4,%rax
1134	cmpq	%rax,%rdi			/* verify address validity */
1135	ja	fusufault
1136
1137	stac
1138	movl	%esi,(%rdi)
1139	clac
1140	xorl	%eax,%eax
1141	movq	PCPU(CURPCB),%rcx
1142	movq	%rax,PCB_ONFAULT(%rcx)
1143	POP_FRAME_POINTER
1144	ret
1145END(suword32_smap)
1146
1147ENTRY(suword16_nosmap)
1148	PUSH_FRAME_POINTER
1149	movq	PCPU(CURPCB),%rcx
1150	movq	$fusufault,PCB_ONFAULT(%rcx)
1151
1152	movq	$VM_MAXUSER_ADDRESS-2,%rax
1153	cmpq	%rax,%rdi			/* verify address validity */
1154	ja	fusufault
1155
1156	movw	%si,(%rdi)
1157	xorl	%eax,%eax
1158	movq	%rax,PCB_ONFAULT(%rcx)
1159	POP_FRAME_POINTER
1160	ret
1161END(suword16_nosmap)
1162
1163ENTRY(suword16_smap)
1164	PUSH_FRAME_POINTER
1165	movq	PCPU(CURPCB),%rcx
1166	movq	$fusufault,PCB_ONFAULT(%rcx)
1167
1168	movq	$VM_MAXUSER_ADDRESS-2,%rax
1169	cmpq	%rax,%rdi			/* verify address validity */
1170	ja	fusufault
1171
1172	stac
1173	movw	%si,(%rdi)
1174	clac
1175	xorl	%eax,%eax
1176	movq	%rax,PCB_ONFAULT(%rcx)
1177	POP_FRAME_POINTER
1178	ret
1179END(suword16_smap)
1180
1181ENTRY(subyte_nosmap)
1182	PUSH_FRAME_POINTER
1183	movq	PCPU(CURPCB),%rcx
1184	movq	$fusufault,PCB_ONFAULT(%rcx)
1185
1186	movq	$VM_MAXUSER_ADDRESS-1,%rax
1187	cmpq	%rax,%rdi			/* verify address validity */
1188	ja	fusufault
1189
1190	movl	%esi,%eax
1191	movb	%al,(%rdi)
1192	xorl	%eax,%eax
1193	movq	%rax,PCB_ONFAULT(%rcx)
1194	POP_FRAME_POINTER
1195	ret
1196END(subyte_nosmap)
1197
1198ENTRY(subyte_smap)
1199	PUSH_FRAME_POINTER
1200	movq	PCPU(CURPCB),%rcx
1201	movq	$fusufault,PCB_ONFAULT(%rcx)
1202
1203	movq	$VM_MAXUSER_ADDRESS-1,%rax
1204	cmpq	%rax,%rdi			/* verify address validity */
1205	ja	fusufault
1206
1207	movl	%esi,%eax
1208	stac
1209	movb	%al,(%rdi)
1210	clac
1211	xorl	%eax,%eax
1212	movq	%rax,PCB_ONFAULT(%rcx)
1213	POP_FRAME_POINTER
1214	ret
1215END(subyte_smap)
1216
1217	ALIGN_TEXT
1218	/* Fault entry clears PSL.AC */
1219fusufault:
1220	movq	PCPU(CURPCB),%rcx
1221	xorl	%eax,%eax
1222	movq	%rax,PCB_ONFAULT(%rcx)
1223	decq	%rax
1224	POP_FRAME_POINTER
1225	ret
1226
1227/*
1228 * copyinstr(from, to, maxlen, int *lencopied)
1229 *           %rdi, %rsi, %rdx, %rcx
1230 *
1231 *	copy a string from 'from' to 'to', stop when a 0 character is reached.
1232 *	return ENAMETOOLONG if string is longer than maxlen, and
1233 *	EFAULT on protection violations. If lencopied is non-zero,
1234 *	return the actual length in *lencopied.
1235 */
1236.macro COPYINSTR smap
1237	PUSH_FRAME_POINTER
1238	movq	%rdx,%r8			/* %r8 = maxlen */
1239	movq	PCPU(CURPCB),%r9
1240	movq	$cpystrflt,PCB_ONFAULT(%r9)
1241
1242	movq	$VM_MAXUSER_ADDRESS,%rax
1243
1244	/* make sure 'from' is within bounds */
1245	subq	%rdi,%rax
1246	jbe	cpystrflt
1247
1248	SMAP_DISABLE \smap
1249
1250	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1251	cmpq	%rdx,%rax
1252	jb	8f
12531:
1254	incq	%rdx
12552:
1256	decq	%rdx
1257.if \smap == 0
1258	jz	copyinstr_toolong
1259.else
1260	jz	copyinstr_toolong_smap
1261.endif
1262
1263	movb	(%rdi),%al
1264	movb	%al,(%rsi)
1265	incq	%rsi
1266	incq	%rdi
1267	testb	%al,%al
1268	jnz	2b
1269
1270	SMAP_ENABLE \smap
1271
1272	/* Success -- 0 byte reached */
1273	decq	%rdx
1274	xorl	%eax,%eax
1275
1276	/* set *lencopied and return %eax */
1277	movq	%rax,PCB_ONFAULT(%r9)
1278
1279	testq	%rcx,%rcx
1280	jz	3f
1281	subq	%rdx,%r8
1282	movq	%r8,(%rcx)
12833:
1284	POP_FRAME_POINTER
1285	ret
1286	ALIGN_TEXT
12878:
1288	movq	%rax,%rdx
1289	movq	%rax,%r8
1290	jmp 1b
1291
1292.endm
1293
1294ENTRY(copyinstr_nosmap)
1295	COPYINSTR smap=0
1296END(copyinstr_nosmap)
1297
1298ENTRY(copyinstr_smap)
1299	COPYINSTR smap=1
1300END(copyinstr_smap)
1301
1302cpystrflt:
1303	/* Fault entry clears PSL.AC */
1304	movl	$EFAULT,%eax
1305cpystrflt_x:
1306	/* set *lencopied and return %eax */
1307	movq	$0,PCB_ONFAULT(%r9)
1308
1309	testq	%rcx,%rcx
1310	jz	1f
1311	subq	%rdx,%r8
1312	movq	%r8,(%rcx)
13131:
1314	POP_FRAME_POINTER
1315	ret
1316
1317copyinstr_toolong_smap:
1318	clac
1319copyinstr_toolong:
1320	/* rdx is zero - return ENAMETOOLONG or EFAULT */
1321	movq	$VM_MAXUSER_ADDRESS,%rax
1322	cmpq	%rax,%rdi
1323	jae	cpystrflt
1324	movl	$ENAMETOOLONG,%eax
1325	jmp	cpystrflt_x
1326
1327/*
1328 * copystr(from, to, maxlen, int *lencopied)
1329 *         %rdi, %rsi, %rdx, %rcx
1330 */
1331ENTRY(copystr)
1332	PUSH_FRAME_POINTER
1333	movq	%rdx,%r8			/* %r8 = maxlen */
1334
1335	incq    %rdx
13361:
1337	decq	%rdx
1338	jz	4f
1339	movb	(%rdi),%al
1340	movb	%al,(%rsi)
1341	incq	%rsi
1342	incq	%rdi
1343	testb	%al,%al
1344	jnz	1b
1345
1346	/* Success -- 0 byte reached */
1347	decq	%rdx
1348	xorl	%eax,%eax
13492:
1350	testq	%rcx,%rcx
1351	jz      3f
1352	/* set *lencopied and return %rax */
1353	subq	%rdx,%r8
1354	movq	%r8,(%rcx)
13553:
1356	POP_FRAME_POINTER
1357	ret
13584:
1359	/* rdx is zero -- return ENAMETOOLONG */
1360	movl    $ENAMETOOLONG,%eax
1361	jmp	2b
1362END(copystr)
1363
1364/*
1365 * Handling of special amd64 registers and descriptor tables etc
1366 */
1367/* void lgdt(struct region_descriptor *rdp); */
1368ENTRY(lgdt)
1369	/* reload the descriptor table */
1370	lgdt	(%rdi)
1371
1372	/* flush the prefetch q */
1373	jmp	1f
1374	nop
13751:
1376	movl	$KDSEL,%eax
1377	movl	%eax,%ds
1378	movl	%eax,%es
1379	movl	%eax,%fs	/* Beware, use wrmsr to set 64 bit base */
1380	movl	%eax,%gs
1381	movl	%eax,%ss
1382
1383	/* reload code selector by turning return into intersegmental return */
1384	popq	%rax
1385	pushq	$KCSEL
1386	pushq	%rax
1387	MEXITCOUNT
1388	lretq
1389END(lgdt)
1390
1391/*****************************************************************************/
1392/* setjump, longjump                                                         */
1393/*****************************************************************************/
1394
1395ENTRY(setjmp)
1396	movq	%rbx,0(%rdi)			/* save rbx */
1397	movq	%rsp,8(%rdi)			/* save rsp */
1398	movq	%rbp,16(%rdi)			/* save rbp */
1399	movq	%r12,24(%rdi)			/* save r12 */
1400	movq	%r13,32(%rdi)			/* save r13 */
1401	movq	%r14,40(%rdi)			/* save r14 */
1402	movq	%r15,48(%rdi)			/* save r15 */
1403	movq	0(%rsp),%rdx			/* get rta */
1404	movq	%rdx,56(%rdi)			/* save rip */
1405	xorl	%eax,%eax			/* return(0); */
1406	ret
1407END(setjmp)
1408
1409ENTRY(longjmp)
1410	movq	0(%rdi),%rbx			/* restore rbx */
1411	movq	8(%rdi),%rsp			/* restore rsp */
1412	movq	16(%rdi),%rbp			/* restore rbp */
1413	movq	24(%rdi),%r12			/* restore r12 */
1414	movq	32(%rdi),%r13			/* restore r13 */
1415	movq	40(%rdi),%r14			/* restore r14 */
1416	movq	48(%rdi),%r15			/* restore r15 */
1417	movq	56(%rdi),%rdx			/* get rta */
1418	movq	%rdx,0(%rsp)			/* put in return frame */
1419	xorl	%eax,%eax			/* return(1); */
1420	incl	%eax
1421	ret
1422END(longjmp)
1423
1424/*
1425 * Support for reading MSRs in the safe manner.  (Instead of panic on #gp,
1426 * return an error.)
1427 */
1428ENTRY(rdmsr_safe)
1429/* int rdmsr_safe(u_int msr, uint64_t *data) */
1430	PUSH_FRAME_POINTER
1431	movq	PCPU(CURPCB),%r8
1432	movq	$msr_onfault,PCB_ONFAULT(%r8)
1433	movl	%edi,%ecx
1434	rdmsr			/* Read MSR pointed by %ecx. Returns
1435				   hi byte in edx, lo in %eax */
1436	salq	$32,%rdx	/* sign-shift %rdx left */
1437	movl	%eax,%eax	/* zero-extend %eax -> %rax */
1438	orq	%rdx,%rax
1439	movq	%rax,(%rsi)
1440	xorq	%rax,%rax
1441	movq	%rax,PCB_ONFAULT(%r8)
1442	POP_FRAME_POINTER
1443	ret
1444
1445/*
1446 * Support for writing MSRs in the safe manner.  (Instead of panic on #gp,
1447 * return an error.)
1448 */
1449ENTRY(wrmsr_safe)
1450/* int wrmsr_safe(u_int msr, uint64_t data) */
1451	PUSH_FRAME_POINTER
1452	movq	PCPU(CURPCB),%r8
1453	movq	$msr_onfault,PCB_ONFAULT(%r8)
1454	movl	%edi,%ecx
1455	movl	%esi,%eax
1456	sarq	$32,%rsi
1457	movl	%esi,%edx
1458	wrmsr			/* Write MSR pointed by %ecx. Accepts
1459				   hi byte in edx, lo in %eax. */
1460	xorq	%rax,%rax
1461	movq	%rax,PCB_ONFAULT(%r8)
1462	POP_FRAME_POINTER
1463	ret
1464
1465/*
1466 * MSR operations fault handler
1467 */
1468	ALIGN_TEXT
1469msr_onfault:
1470	movq	$0,PCB_ONFAULT(%r8)
1471	movl	$EFAULT,%eax
1472	POP_FRAME_POINTER
1473	ret
1474
1475/*
1476 * void pmap_pti_pcid_invalidate(uint64_t ucr3, uint64_t kcr3);
1477 * Invalidates address space addressed by ucr3, then returns to kcr3.
1478 * Done in assembler to ensure no other memory accesses happen while
1479 * on ucr3.
1480 */
1481	ALIGN_TEXT
1482ENTRY(pmap_pti_pcid_invalidate)
1483	pushfq
1484	cli
1485	movq	%rdi,%cr3	/* to user page table */
1486	movq	%rsi,%cr3	/* back to kernel */
1487	popfq
1488	retq
1489
1490/*
1491 * void pmap_pti_pcid_invlpg(uint64_t ucr3, uint64_t kcr3, vm_offset_t va);
1492 * Invalidates virtual address va in address space ucr3, then returns to kcr3.
1493 */
1494	ALIGN_TEXT
1495ENTRY(pmap_pti_pcid_invlpg)
1496	pushfq
1497	cli
1498	movq	%rdi,%cr3	/* to user page table */
1499	invlpg	(%rdx)
1500	movq	%rsi,%cr3	/* back to kernel */
1501	popfq
1502	retq
1503
1504/*
1505 * void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr3, vm_offset_t sva,
1506 *     vm_offset_t eva);
1507 * Invalidates virtual addresses between sva and eva in address space ucr3,
1508 * then returns to kcr3.
1509 */
1510	ALIGN_TEXT
1511ENTRY(pmap_pti_pcid_invlrng)
1512	pushfq
1513	cli
1514	movq	%rdi,%cr3	/* to user page table */
15151:	invlpg	(%rdx)
1516	addq	$PAGE_SIZE,%rdx
1517	cmpq	%rdx,%rcx
1518	ja	1b
1519	movq	%rsi,%cr3	/* back to kernel */
1520	popfq
1521	retq
1522
1523	.altmacro
1524	.macro	ibrs_seq_label l
1525handle_ibrs_\l:
1526	.endm
1527	.macro	ibrs_call_label l
1528	call	handle_ibrs_\l
1529	.endm
1530	.macro	ibrs_seq count
1531	ll=1
1532	.rept	\count
1533	ibrs_call_label	%(ll)
1534	nop
1535	ibrs_seq_label %(ll)
1536	addq	$8,%rsp
1537	ll=ll+1
1538	.endr
1539	.endm
1540
1541/* all callers already saved %rax, %rdx, and %rcx */
1542ENTRY(handle_ibrs_entry)
1543	cmpb	$0,hw_ibrs_active(%rip)
1544	je	1f
1545	movl	$MSR_IA32_SPEC_CTRL,%ecx
1546	rdmsr
1547	orl	$(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1548	orl	$(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32,%edx
1549	wrmsr
1550	movb	$1,PCPU(IBPB_SET)
1551	testl	$CPUID_STDEXT_SMEP,cpu_stdext_feature(%rip)
1552	jne	1f
1553	ibrs_seq 32
15541:	ret
1555END(handle_ibrs_entry)
1556
1557ENTRY(handle_ibrs_exit)
1558	cmpb	$0,PCPU(IBPB_SET)
1559	je	1f
1560	movl	$MSR_IA32_SPEC_CTRL,%ecx
1561	rdmsr
1562	andl	$~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1563	andl	$~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx
1564	wrmsr
1565	movb	$0,PCPU(IBPB_SET)
15661:	ret
1567END(handle_ibrs_exit)
1568
1569/* registers-neutral version, but needs stack */
1570ENTRY(handle_ibrs_exit_rs)
1571	cmpb	$0,PCPU(IBPB_SET)
1572	je	1f
1573	pushq	%rax
1574	pushq	%rdx
1575	pushq	%rcx
1576	movl	$MSR_IA32_SPEC_CTRL,%ecx
1577	rdmsr
1578	andl	$~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1579	andl	$~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx
1580	wrmsr
1581	popq	%rcx
1582	popq	%rdx
1583	popq	%rax
1584	movb	$0,PCPU(IBPB_SET)
15851:	ret
1586END(handle_ibrs_exit_rs)
1587
1588	.noaltmacro
1589
1590/*
1591 * Flush L1D cache.  Load enough of the data from the kernel text
1592 * to flush existing L1D content.
1593 *
1594 * N.B. The function does not follow ABI calling conventions, it corrupts %rbx.
1595 * The vmm.ko caller expects that only %rax, %rdx, %rbx, %rcx, %r9, and %rflags
1596 * registers are clobbered.  The NMI handler caller only needs %r13 preserved.
1597 */
1598ENTRY(flush_l1d_sw)
1599#define	L1D_FLUSH_SIZE	(64 * 1024)
1600	movq	$KERNBASE, %r9
1601	movq	$-L1D_FLUSH_SIZE, %rcx
1602	/*
1603	 * pass 1: Preload TLB.
1604	 * Kernel text is mapped using superpages.  TLB preload is
1605	 * done for the benefit of older CPUs which split 2M page
1606	 * into 4k TLB entries.
1607	 */
16081:	movb	L1D_FLUSH_SIZE(%r9, %rcx), %al
1609	addq	$PAGE_SIZE, %rcx
1610	jne	1b
1611	xorl	%eax, %eax
1612	cpuid
1613	movq	$-L1D_FLUSH_SIZE, %rcx
1614	/* pass 2: Read each cache line. */
16152:	movb	L1D_FLUSH_SIZE(%r9, %rcx), %al
1616	addq	$64, %rcx
1617	jne	2b
1618	lfence
1619	ret
1620#undef	L1D_FLUSH_SIZE
1621END(flush_l1d_sw)
1622
1623ENTRY(flush_l1d_sw_abi)
1624	pushq	%rbx
1625	call	flush_l1d_sw
1626	popq	%rbx
1627	ret
1628END(flush_l1d_sw_abi)
1629