xref: /freebsd/sys/amd64/amd64/support.S (revision 42249ef2)
1/*-
2 * Copyright (c) 2018-2019 The FreeBSD Foundation
3 * Copyright (c) 2003 Peter Wemm.
4 * Copyright (c) 1993 The Regents of the University of California.
5 * All rights reserved.
6 *
7 * Portions of this software were developed by
8 * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
9 * the FreeBSD Foundation.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * $FreeBSD$
36 */
37
38#include "opt_ddb.h"
39
40#include <machine/asmacros.h>
41#include <machine/specialreg.h>
42#include <machine/pmap.h>
43
44#include "assym.inc"
45
46	.text
47
48/* Address: %rdi */
49ENTRY(pagezero_std)
50	PUSH_FRAME_POINTER
51	movl	$PAGE_SIZE/8,%ecx
52	xorl	%eax,%eax
53	rep
54	stosq
55	POP_FRAME_POINTER
56	ret
57END(pagezero_std)
58
59ENTRY(pagezero_erms)
60	PUSH_FRAME_POINTER
61	movl	$PAGE_SIZE,%ecx
62	xorl	%eax,%eax
63	rep
64	stosb
65	POP_FRAME_POINTER
66	ret
67END(pagezero_erms)
68
69/*
70 * pagecopy(%rdi=from, %rsi=to)
71 */
72ENTRY(pagecopy)
73	PUSH_FRAME_POINTER
74	movl	$PAGE_SIZE/8,%ecx
75	movq	%rdi,%r9
76	movq	%rsi,%rdi
77	movq	%r9,%rsi
78	rep
79	movsq
80	POP_FRAME_POINTER
81	ret
82END(pagecopy)
83
84/* Address: %rdi */
85ENTRY(sse2_pagezero)
86	PUSH_FRAME_POINTER
87	movq	$-PAGE_SIZE,%rdx
88	subq	%rdx,%rdi
89	xorl	%eax,%eax
90	jmp	1f
91	/*
92	 * The loop takes 29 bytes.  Ensure that it doesn't cross a 32-byte
93	 * cache line.
94	 */
95	.p2align 5,0x90
961:
97	movnti	%rax,(%rdi,%rdx)
98	movnti	%rax,8(%rdi,%rdx)
99	movnti	%rax,16(%rdi,%rdx)
100	movnti	%rax,24(%rdi,%rdx)
101	addq	$32,%rdx
102	jne	1b
103	sfence
104	POP_FRAME_POINTER
105	ret
106END(sse2_pagezero)
107
108/*
109 * memcmpy(b1, b2, len)
110 *	   rdi,rsi,len
111 */
112ENTRY(memcmp)
113	PUSH_FRAME_POINTER
114	cmpq	$16,%rdx
115	jae	5f
1161:
117	testq	%rdx,%rdx
118	je	3f
119	xorl	%ecx,%ecx
1202:
121	movzbl	(%rdi,%rcx,1),%eax
122	movzbl	(%rsi,%rcx,1),%r8d
123	cmpb	%r8b,%al
124	jne	4f
125	addq    $1,%rcx
126	cmpq    %rcx,%rdx
127	jz	3f
128	movzbl	(%rdi,%rcx,1),%eax
129	movzbl	(%rsi,%rcx,1),%r8d
130	cmpb	%r8b,%al
131	jne	4f
132	addq	$1,%rcx
133	cmpq	%rcx,%rdx
134	jz	3f
135	movzbl	(%rdi,%rcx,1),%eax
136	movzbl	(%rsi,%rcx,1),%r8d
137	cmpb	%r8b,%al
138	jne	4f
139	addq	$1,%rcx
140	cmpq	%rcx,%rdx
141	jz	3f
142	movzbl	(%rdi,%rcx,1),%eax
143	movzbl	(%rsi,%rcx,1),%r8d
144	cmpb	%r8b,%al
145	jne	4f
146	addq	$1,%rcx
147	cmpq	%rcx,%rdx
148	jne	2b
1493:
150	xorl	%eax,%eax
151	POP_FRAME_POINTER
152	ret
1534:
154	subl	%r8d,%eax
155	POP_FRAME_POINTER
156	ret
1575:
158	cmpq	$32,%rdx
159	jae	7f
1606:
161	/*
162	 * 8 bytes
163	 */
164	movq    (%rdi),%r8
165	movq    (%rsi),%r9
166	cmpq    %r8,%r9
167	jne	1b
168	leaq	8(%rdi),%rdi
169	leaq	8(%rsi),%rsi
170	subq	$8,%rdx
171	cmpq	$8,%rdx
172	jae	6b
173	jl	1b
174	jmp	3b
1757:
176	/*
177	 * 32 bytes
178	 */
179	movq    (%rsi),%r8
180	movq    8(%rsi),%r9
181	subq    (%rdi),%r8
182	subq    8(%rdi),%r9
183	or	%r8,%r9
184	jnz	1b
185
186	movq    16(%rsi),%r8
187	movq    24(%rsi),%r9
188	subq    16(%rdi),%r8
189	subq    24(%rdi),%r9
190	or	%r8,%r9
191	jnz	1b
192
193	leaq    32(%rdi),%rdi
194	leaq    32(%rsi),%rsi
195	subq    $32,%rdx
196	cmpq    $32,%rdx
197	jae	7b
198	jnz	1b
199	jmp	3b
200END(memcmp)
201
202/*
203 * memmove(dst, src, cnt)
204 *         rdi, rsi, rdx
205 */
206
207/*
208 * Register state at entry is supposed to be as follows:
209 * rdi - destination
210 * rsi - source
211 * rdx - count
212 *
213 * The macro possibly clobbers the above and: rcx, r8, r9, r10
214 * It does not clobber rax nor r11.
215 */
216.macro MEMMOVE erms overlap begin end
217	\begin
218
219	/*
220	 * For sizes 0..32 all data is read before it is written, so there
221	 * is no correctness issue with direction of copying.
222	 */
223	cmpq	$32,%rcx
224	jbe	101632f
225
226.if \overlap == 1
227	movq	%rdi,%r8
228	subq	%rsi,%r8
229	cmpq	%rcx,%r8	/* overlapping && src < dst? */
230	jb	2f
231.endif
232
233	cmpq	$256,%rcx
234	ja	1256f
235
236103200:
237	movq	(%rsi),%rdx
238	movq	%rdx,(%rdi)
239	movq	8(%rsi),%rdx
240	movq	%rdx,8(%rdi)
241	movq	16(%rsi),%rdx
242	movq	%rdx,16(%rdi)
243	movq	24(%rsi),%rdx
244	movq	%rdx,24(%rdi)
245	leaq	32(%rsi),%rsi
246	leaq	32(%rdi),%rdi
247	subq	$32,%rcx
248	cmpq	$32,%rcx
249	jae	103200b
250	cmpb	$0,%cl
251	jne	101632f
252	\end
253	ret
254	ALIGN_TEXT
255101632:
256	cmpb	$16,%cl
257	jl	100816f
258	movq	(%rsi),%rdx
259	movq	8(%rsi),%r8
260	movq	-16(%rsi,%rcx),%r9
261	movq	-8(%rsi,%rcx),%r10
262	movq	%rdx,(%rdi)
263	movq	%r8,8(%rdi)
264	movq	%r9,-16(%rdi,%rcx)
265	movq	%r10,-8(%rdi,%rcx)
266	\end
267	ret
268	ALIGN_TEXT
269100816:
270	cmpb	$8,%cl
271	jl	100408f
272	movq	(%rsi),%rdx
273	movq	-8(%rsi,%rcx),%r8
274	movq	%rdx,(%rdi)
275	movq	%r8,-8(%rdi,%rcx,)
276	\end
277	ret
278	ALIGN_TEXT
279100408:
280	cmpb	$4,%cl
281	jl	100204f
282	movl	(%rsi),%edx
283	movl	-4(%rsi,%rcx),%r8d
284	movl	%edx,(%rdi)
285	movl	%r8d,-4(%rdi,%rcx)
286	\end
287	ret
288	ALIGN_TEXT
289100204:
290	cmpb	$2,%cl
291	jl	100001f
292	movzwl	(%rsi),%edx
293	movzwl	-2(%rsi,%rcx),%r8d
294	movw	%dx,(%rdi)
295	movw	%r8w,-2(%rdi,%rcx)
296	\end
297	ret
298	ALIGN_TEXT
299100001:
300	cmpb	$1,%cl
301	jl	100000f
302	movb	(%rsi),%dl
303	movb	%dl,(%rdi)
304100000:
305	\end
306	ret
307
308	ALIGN_TEXT
3091256:
310	testb	$15,%dil
311	jnz	100f
312.if \erms == 1
313	rep
314	movsb
315.else
316	shrq	$3,%rcx                         /* copy by 64-bit words */
317	rep
318	movsq
319	movq	%rdx,%rcx
320	andl	$7,%ecx                         /* any bytes left? */
321	jne	100408b
322.endif
323	\end
324	ret
325100:
326	movq	(%rsi),%r8
327	movq	8(%rsi),%r9
328	movq	%rdi,%r10
329	movq	%rdi,%rcx
330	andq	$15,%rcx
331	leaq	-16(%rdx,%rcx),%rdx
332	neg	%rcx
333	leaq	16(%rdi,%rcx),%rdi
334	leaq	16(%rsi,%rcx),%rsi
335	movq	%rdx,%rcx
336.if \erms == 1
337	rep
338	movsb
339	movq	%r8,(%r10)
340	movq	%r9,8(%r10)
341.else
342	shrq	$3,%rcx                         /* copy by 64-bit words */
343	rep
344	movsq
345	movq	%r8,(%r10)
346	movq	%r9,8(%r10)
347	movq	%rdx,%rcx
348	andl	$7,%ecx                         /* any bytes left? */
349	jne	100408b
350.endif
351	\end
352	ret
353
354.if \overlap == 1
355	/*
356	 * Copy backwards.
357	 */
358        ALIGN_TEXT
3592:
360	cmpq	$256,%rcx
361	ja	2256f
362
363	leaq	-8(%rdi,%rcx),%rdi
364	leaq	-8(%rsi,%rcx),%rsi
365
366	cmpq	$32,%rcx
367	jb	2016f
368
3692032:
370	movq	(%rsi),%rdx
371	movq	%rdx,(%rdi)
372	movq	-8(%rsi),%rdx
373	movq	%rdx,-8(%rdi)
374	movq	-16(%rsi),%rdx
375	movq	%rdx,-16(%rdi)
376	movq	-24(%rsi),%rdx
377	movq	%rdx,-24(%rdi)
378	leaq	-32(%rsi),%rsi
379	leaq	-32(%rdi),%rdi
380	subq	$32,%rcx
381	cmpq	$32,%rcx
382	jae	2032b
383	cmpb	$0,%cl
384	jne	2016f
385	\end
386	ret
387	ALIGN_TEXT
3882016:
389	cmpb	$16,%cl
390	jl	2008f
391	movq	(%rsi),%rdx
392	movq	%rdx,(%rdi)
393	movq	-8(%rsi),%rdx
394	movq	%rdx,-8(%rdi)
395	subb	$16,%cl
396	jz	2000f
397	leaq	-16(%rsi),%rsi
398	leaq	-16(%rdi),%rdi
3992008:
400	cmpb	$8,%cl
401	jl	2004f
402	movq	(%rsi),%rdx
403	movq	%rdx,(%rdi)
404	subb	$8,%cl
405	jz	2000f
406	leaq	-8(%rsi),%rsi
407	leaq	-8(%rdi),%rdi
4082004:
409	cmpb	$4,%cl
410	jl	2002f
411	movl	4(%rsi),%edx
412	movl	%edx,4(%rdi)
413	subb	$4,%cl
414	jz	2000f
415	leaq	-4(%rsi),%rsi
416	leaq	-4(%rdi),%rdi
4172002:
418	cmpb	$2,%cl
419	jl	2001f
420	movw	6(%rsi),%dx
421	movw	%dx,6(%rdi)
422	subb	$2,%cl
423	jz	2000f
424	leaq	-2(%rsi),%rsi
425	leaq	-2(%rdi),%rdi
4262001:
427	cmpb	$1,%cl
428	jl	2000f
429	movb	7(%rsi),%dl
430	movb	%dl,7(%rdi)
4312000:
432	\end
433	ret
434	ALIGN_TEXT
4352256:
436	std
437.if \erms == 1
438	leaq	-1(%rdi,%rcx),%rdi
439	leaq	-1(%rsi,%rcx),%rsi
440	rep
441	movsb
442	cld
443.else
444	leaq	-8(%rdi,%rcx),%rdi
445	leaq	-8(%rsi,%rcx),%rsi
446	shrq	$3,%rcx
447	rep
448	movsq
449	cld
450	movq	%rdx,%rcx
451	andb	$7,%cl
452	jne	2004b
453.endif
454	\end
455	ret
456.endif
457.endm
458
459.macro MEMMOVE_BEGIN
460	PUSH_FRAME_POINTER
461	movq	%rdi,%rax
462	movq	%rdx,%rcx
463.endm
464
465.macro MEMMOVE_END
466	POP_FRAME_POINTER
467.endm
468
469ENTRY(memmove_std)
470	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
471END(memmove_std)
472
473ENTRY(memmove_erms)
474	MEMMOVE erms=1 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
475END(memmove_erms)
476
477/*
478 * memcpy(dst, src, len)
479 *        rdi, rsi, rdx
480 *
481 * Note: memcpy does not support overlapping copies
482 */
483ENTRY(memcpy_std)
484	MEMMOVE erms=0 overlap=0 begin=MEMMOVE_BEGIN end=MEMMOVE_END
485END(memcpy_std)
486
487ENTRY(memcpy_erms)
488	MEMMOVE erms=1 overlap=0 begin=MEMMOVE_BEGIN end=MEMMOVE_END
489END(memcpy_erms)
490
491/*
492 * memset(dst, c,   len)
493 *        rdi, rsi, rdx
494 */
495.macro MEMSET erms
496	PUSH_FRAME_POINTER
497	movq	%rdi,%rax
498	movq	%rdx,%rcx
499	movzbq	%sil,%r8
500	movabs	$0x0101010101010101,%r10
501	imulq	%r8,%r10
502
503	cmpq	$32,%rcx
504	jbe	101632f
505
506	cmpq	$256,%rcx
507	ja	1256f
508
509103200:
510	movq	%r10,(%rdi)
511	movq	%r10,8(%rdi)
512	movq	%r10,16(%rdi)
513	movq	%r10,24(%rdi)
514	leaq	32(%rdi),%rdi
515	subq	$32,%rcx
516	cmpq	$32,%rcx
517	ja	103200b
518	cmpb	$16,%cl
519	ja	201632f
520	movq	%r10,-16(%rdi,%rcx)
521	movq	%r10,-8(%rdi,%rcx)
522	POP_FRAME_POINTER
523	ret
524	ALIGN_TEXT
525101632:
526	cmpb	$16,%cl
527	jl	100816f
528201632:
529	movq	%r10,(%rdi)
530	movq	%r10,8(%rdi)
531	movq	%r10,-16(%rdi,%rcx)
532	movq	%r10,-8(%rdi,%rcx)
533	POP_FRAME_POINTER
534	ret
535	ALIGN_TEXT
536100816:
537	cmpb	$8,%cl
538	jl	100408f
539	movq	%r10,(%rdi)
540	movq	%r10,-8(%rdi,%rcx)
541	POP_FRAME_POINTER
542	ret
543	ALIGN_TEXT
544100408:
545	cmpb	$4,%cl
546	jl	100204f
547	movl	%r10d,(%rdi)
548	movl	%r10d,-4(%rdi,%rcx)
549	POP_FRAME_POINTER
550	ret
551	ALIGN_TEXT
552100204:
553	cmpb	$2,%cl
554	jl	100001f
555	movw	%r10w,(%rdi)
556	movw	%r10w,-2(%rdi,%rcx)
557	POP_FRAME_POINTER
558	ret
559	ALIGN_TEXT
560100001:
561	cmpb	$0,%cl
562	je	100000f
563	movb	%r10b,(%rdi)
564100000:
565	POP_FRAME_POINTER
566	ret
567	ALIGN_TEXT
5681256:
569	movq	%rdi,%r9
570	movq	%r10,%rax
571	testl	$15,%edi
572	jnz	3f
5731:
574.if \erms == 1
575	rep
576	stosb
577	movq	%r9,%rax
578.else
579	movq	%rcx,%rdx
580	shrq	$3,%rcx
581	rep
582	stosq
583	movq	%r9,%rax
584	andl	$7,%edx
585	jnz	2f
586	POP_FRAME_POINTER
587	ret
5882:
589	movq	%r10,-8(%rdi,%rdx)
590.endif
591	POP_FRAME_POINTER
592	ret
593	ALIGN_TEXT
5943:
595	movq	%r10,(%rdi)
596	movq	%r10,8(%rdi)
597	movq	%rdi,%r8
598	andq	$15,%r8
599	leaq	-16(%rcx,%r8),%rcx
600	neg	%r8
601	leaq	16(%rdi,%r8),%rdi
602	jmp	1b
603.endm
604
605ENTRY(memset_std)
606	MEMSET erms=0
607END(memset_std)
608
609ENTRY(memset_erms)
610	MEMSET erms=1
611END(memset_erms)
612
613/* fillw(pat, base, cnt) */
614/*       %rdi,%rsi, %rdx */
615ENTRY(fillw)
616	PUSH_FRAME_POINTER
617	movq	%rdi,%rax
618	movq	%rsi,%rdi
619	movq	%rdx,%rcx
620	rep
621	stosw
622	POP_FRAME_POINTER
623	ret
624END(fillw)
625
626/*****************************************************************************/
627/* copyout and fubyte family                                                 */
628/*****************************************************************************/
629/*
630 * Access user memory from inside the kernel. These routines should be
631 * the only places that do this.
632 *
633 * These routines set curpcb->pcb_onfault for the time they execute. When a
634 * protection violation occurs inside the functions, the trap handler
635 * returns to *curpcb->pcb_onfault instead of the function.
636 */
637
638.macro SMAP_DISABLE smap
639.if	\smap
640	stac
641.endif
642.endm
643
644
645.macro SMAP_ENABLE smap
646.if	\smap
647	clac
648.endif
649.endm
650
651.macro COPYINOUT_BEGIN
652.endm
653
654.macro COPYINOUT_END
655	movq	%rax,PCB_ONFAULT(%r11)
656	POP_FRAME_POINTER
657.endm
658
659.macro COPYINOUT_SMAP_END
660	SMAP_ENABLE smap=1
661	COPYINOUT_END
662.endm
663
664/*
665 * copyout(from_kernel, to_user, len)
666 *         %rdi,        %rsi,    %rdx
667 */
668.macro	COPYOUT smap erms
669	PUSH_FRAME_POINTER
670	movq	PCPU(CURPCB),%r11
671	movq	$copy_fault,PCB_ONFAULT(%r11)
672
673	/*
674	 * Check explicitly for non-user addresses.  If 486 write protection
675	 * is being used, this check is essential because we are in kernel
676	 * mode so the h/w does not provide any protection against writing
677	 * kernel addresses.
678	 */
679
680	/*
681	 * First, prevent address wrapping.
682	 */
683	movq	%rsi,%rax
684	addq	%rdx,%rax
685	jc	copy_fault
686/*
687 * XXX STOP USING VM_MAXUSER_ADDRESS.
688 * It is an end address, not a max, so every time it is used correctly it
689 * looks like there is an off by one error, and of course it caused an off
690 * by one error in several places.
691 */
692	movq	$VM_MAXUSER_ADDRESS,%rcx
693	cmpq	%rcx,%rax
694	ja	copy_fault
695
696	/*
697	 * Set return value to zero. Remaining failure mode goes through
698	 * copy_fault.
699	 */
700	xorl	%eax,%eax
701
702	/*
703	 * Set up arguments for MEMMOVE.
704	 */
705	movq	%rdi,%r8
706	movq	%rsi,%rdi
707	movq	%r8,%rsi
708	movq	%rdx,%rcx
709
710
711	SMAP_DISABLE \smap
712.if	\smap == 1
713	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_SMAP_END
714.else
715	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_END
716.endif
717	/* NOTREACHED */
718.endm
719
720ENTRY(copyout_nosmap_std)
721	COPYOUT smap=0 erms=0
722END(copyout_nosmap_std)
723
724ENTRY(copyout_smap_std)
725	COPYOUT smap=1 erms=0
726END(copyout_smap_std)
727
728ENTRY(copyout_nosmap_erms)
729	COPYOUT smap=0 erms=1
730END(copyout_nosmap_erms)
731
732ENTRY(copyout_smap_erms)
733	COPYOUT smap=1 erms=1
734END(copyout_smap_erms)
735
736/*
737 * copyin(from_user, to_kernel, len)
738 *        %rdi,      %rsi,      %rdx
739 */
740.macro	COPYIN smap erms
741	PUSH_FRAME_POINTER
742	movq	PCPU(CURPCB),%r11
743	movq	$copy_fault,PCB_ONFAULT(%r11)
744
745	/*
746	 * make sure address is valid
747	 */
748	movq	%rdi,%rax
749	addq	%rdx,%rax
750	jc	copy_fault
751	movq	$VM_MAXUSER_ADDRESS,%rcx
752	cmpq	%rcx,%rax
753	ja	copy_fault
754
755	xorl	%eax,%eax
756
757	movq	%rdi,%r8
758	movq	%rsi,%rdi
759	movq	%r8,%rsi
760	movq	%rdx,%rcx
761
762	SMAP_DISABLE \smap
763.if	\smap == 1
764	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_SMAP_END
765.else
766	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_END
767.endif
768	/* NOTREACHED */
769.endm
770
771ENTRY(copyin_nosmap_std)
772	COPYIN smap=0 erms=0
773END(copyin_nosmap_std)
774
775ENTRY(copyin_smap_std)
776	COPYIN smap=1 erms=0
777END(copyin_smap_std)
778
779ENTRY(copyin_nosmap_erms)
780	COPYIN smap=0 erms=1
781END(copyin_nosmap_erms)
782
783ENTRY(copyin_smap_erms)
784	COPYIN smap=1 erms=1
785END(copyin_smap_erms)
786
787	ALIGN_TEXT
788	/* Trap entry clears PSL.AC */
789copy_fault:
790	movq	$0,PCB_ONFAULT(%r11)
791	movl	$EFAULT,%eax
792	POP_FRAME_POINTER
793	ret
794
795/*
796 * casueword32.  Compare and set user integer.  Returns -1 on fault,
797 *        0 if access was successful.  Old value is written to *oldp.
798 *        dst = %rdi, old = %esi, oldp = %rdx, new = %ecx
799 */
800ENTRY(casueword32_nosmap)
801	PUSH_FRAME_POINTER
802	movq	PCPU(CURPCB),%r8
803	movq	$fusufault,PCB_ONFAULT(%r8)
804
805	movq	$VM_MAXUSER_ADDRESS-4,%rax
806	cmpq	%rax,%rdi			/* verify address is valid */
807	ja	fusufault
808
809	movl	%esi,%eax			/* old */
810#ifdef SMP
811	lock
812#endif
813	cmpxchgl %ecx,(%rdi)			/* new = %ecx */
814	setne	%cl
815
816	/*
817	 * The old value is in %eax.  If the store succeeded it will be the
818	 * value we expected (old) from before the store, otherwise it will
819	 * be the current value.  Save %eax into %esi to prepare the return
820	 * value.
821	 */
822	movl	%eax,%esi
823	xorl	%eax,%eax
824	movq	%rax,PCB_ONFAULT(%r8)
825
826	/*
827	 * Access the oldp after the pcb_onfault is cleared, to correctly
828	 * catch corrupted pointer.
829	 */
830	movl	%esi,(%rdx)			/* oldp = %rdx */
831	POP_FRAME_POINTER
832	movzbl	%cl, %eax
833	ret
834END(casueword32_nosmap)
835
836ENTRY(casueword32_smap)
837	PUSH_FRAME_POINTER
838	movq	PCPU(CURPCB),%r8
839	movq	$fusufault,PCB_ONFAULT(%r8)
840
841	movq	$VM_MAXUSER_ADDRESS-4,%rax
842	cmpq	%rax,%rdi			/* verify address is valid */
843	ja	fusufault
844
845	movl	%esi,%eax			/* old */
846	stac
847#ifdef SMP
848	lock
849#endif
850	cmpxchgl %ecx,(%rdi)			/* new = %ecx */
851	clac
852	setne	%cl
853
854	/*
855	 * The old value is in %eax.  If the store succeeded it will be the
856	 * value we expected (old) from before the store, otherwise it will
857	 * be the current value.  Save %eax into %esi to prepare the return
858	 * value.
859	 */
860	movl	%eax,%esi
861	xorl	%eax,%eax
862	movq	%rax,PCB_ONFAULT(%r8)
863
864	/*
865	 * Access the oldp after the pcb_onfault is cleared, to correctly
866	 * catch corrupted pointer.
867	 */
868	movl	%esi,(%rdx)			/* oldp = %rdx */
869	POP_FRAME_POINTER
870	movzbl	%cl, %eax
871	ret
872END(casueword32_smap)
873
874/*
875 * casueword.  Compare and set user long.  Returns -1 on fault,
876 *        0 if access was successful.  Old value is written to *oldp.
877 *        dst = %rdi, old = %rsi, oldp = %rdx, new = %rcx
878 */
879ENTRY(casueword_nosmap)
880	PUSH_FRAME_POINTER
881	movq	PCPU(CURPCB),%r8
882	movq	$fusufault,PCB_ONFAULT(%r8)
883
884	movq	$VM_MAXUSER_ADDRESS-4,%rax
885	cmpq	%rax,%rdi			/* verify address is valid */
886	ja	fusufault
887
888	movq	%rsi,%rax			/* old */
889#ifdef SMP
890	lock
891#endif
892	cmpxchgq %rcx,(%rdi)			/* new = %rcx */
893	setne	%cl
894
895	/*
896	 * The old value is in %rax.  If the store succeeded it will be the
897	 * value we expected (old) from before the store, otherwise it will
898	 * be the current value.
899	 */
900	movq	%rax,%rsi
901	xorl	%eax,%eax
902	movq	%rax,PCB_ONFAULT(%r8)
903	movq	%rsi,(%rdx)
904	POP_FRAME_POINTER
905	movzbl	%cl, %eax
906	ret
907END(casueword_nosmap)
908
909ENTRY(casueword_smap)
910	PUSH_FRAME_POINTER
911	movq	PCPU(CURPCB),%r8
912	movq	$fusufault,PCB_ONFAULT(%r8)
913
914	movq	$VM_MAXUSER_ADDRESS-4,%rax
915	cmpq	%rax,%rdi			/* verify address is valid */
916	ja	fusufault
917
918	movq	%rsi,%rax			/* old */
919	stac
920#ifdef SMP
921	lock
922#endif
923	cmpxchgq %rcx,(%rdi)			/* new = %rcx */
924	clac
925	setne	%cl
926
927	/*
928	 * The old value is in %rax.  If the store succeeded it will be the
929	 * value we expected (old) from before the store, otherwise it will
930	 * be the current value.
931	 */
932	movq	%rax,%rsi
933	xorl	%eax,%eax
934	movq	%rax,PCB_ONFAULT(%r8)
935	movq	%rsi,(%rdx)
936	POP_FRAME_POINTER
937	movzbl	%cl, %eax
938	ret
939END(casueword_smap)
940
941/*
942 * Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit
943 * byte from user memory.
944 * addr = %rdi, valp = %rsi
945 */
946
947ENTRY(fueword_nosmap)
948	PUSH_FRAME_POINTER
949	movq	PCPU(CURPCB),%rcx
950	movq	$fusufault,PCB_ONFAULT(%rcx)
951
952	movq	$VM_MAXUSER_ADDRESS-8,%rax
953	cmpq	%rax,%rdi			/* verify address is valid */
954	ja	fusufault
955
956	xorl	%eax,%eax
957	movq	(%rdi),%r11
958	movq	%rax,PCB_ONFAULT(%rcx)
959	movq	%r11,(%rsi)
960	POP_FRAME_POINTER
961	ret
962END(fueword_nosmap)
963
964ENTRY(fueword_smap)
965	PUSH_FRAME_POINTER
966	movq	PCPU(CURPCB),%rcx
967	movq	$fusufault,PCB_ONFAULT(%rcx)
968
969	movq	$VM_MAXUSER_ADDRESS-8,%rax
970	cmpq	%rax,%rdi			/* verify address is valid */
971	ja	fusufault
972
973	xorl	%eax,%eax
974	stac
975	movq	(%rdi),%r11
976	clac
977	movq	%rax,PCB_ONFAULT(%rcx)
978	movq	%r11,(%rsi)
979	POP_FRAME_POINTER
980	ret
981END(fueword_smap)
982
983ENTRY(fueword32_nosmap)
984	PUSH_FRAME_POINTER
985	movq	PCPU(CURPCB),%rcx
986	movq	$fusufault,PCB_ONFAULT(%rcx)
987
988	movq	$VM_MAXUSER_ADDRESS-4,%rax
989	cmpq	%rax,%rdi			/* verify address is valid */
990	ja	fusufault
991
992	xorl	%eax,%eax
993	movl	(%rdi),%r11d
994	movq	%rax,PCB_ONFAULT(%rcx)
995	movl	%r11d,(%rsi)
996	POP_FRAME_POINTER
997	ret
998END(fueword32_nosmap)
999
1000ENTRY(fueword32_smap)
1001	PUSH_FRAME_POINTER
1002	movq	PCPU(CURPCB),%rcx
1003	movq	$fusufault,PCB_ONFAULT(%rcx)
1004
1005	movq	$VM_MAXUSER_ADDRESS-4,%rax
1006	cmpq	%rax,%rdi			/* verify address is valid */
1007	ja	fusufault
1008
1009	xorl	%eax,%eax
1010	stac
1011	movl	(%rdi),%r11d
1012	clac
1013	movq	%rax,PCB_ONFAULT(%rcx)
1014	movl	%r11d,(%rsi)
1015	POP_FRAME_POINTER
1016	ret
1017END(fueword32_smap)
1018
1019ENTRY(fuword16_nosmap)
1020	PUSH_FRAME_POINTER
1021	movq	PCPU(CURPCB),%rcx
1022	movq	$fusufault,PCB_ONFAULT(%rcx)
1023
1024	movq	$VM_MAXUSER_ADDRESS-2,%rax
1025	cmpq	%rax,%rdi
1026	ja	fusufault
1027
1028	movzwl	(%rdi),%eax
1029	movq	$0,PCB_ONFAULT(%rcx)
1030	POP_FRAME_POINTER
1031	ret
1032END(fuword16_nosmap)
1033
1034ENTRY(fuword16_smap)
1035	PUSH_FRAME_POINTER
1036	movq	PCPU(CURPCB),%rcx
1037	movq	$fusufault,PCB_ONFAULT(%rcx)
1038
1039	movq	$VM_MAXUSER_ADDRESS-2,%rax
1040	cmpq	%rax,%rdi
1041	ja	fusufault
1042
1043	stac
1044	movzwl	(%rdi),%eax
1045	clac
1046	movq	$0,PCB_ONFAULT(%rcx)
1047	POP_FRAME_POINTER
1048	ret
1049END(fuword16_smap)
1050
1051ENTRY(fubyte_nosmap)
1052	PUSH_FRAME_POINTER
1053	movq	PCPU(CURPCB),%rcx
1054	movq	$fusufault,PCB_ONFAULT(%rcx)
1055
1056	movq	$VM_MAXUSER_ADDRESS-1,%rax
1057	cmpq	%rax,%rdi
1058	ja	fusufault
1059
1060	movzbl	(%rdi),%eax
1061	movq	$0,PCB_ONFAULT(%rcx)
1062	POP_FRAME_POINTER
1063	ret
1064END(fubyte_nosmap)
1065
1066ENTRY(fubyte_smap)
1067	PUSH_FRAME_POINTER
1068	movq	PCPU(CURPCB),%rcx
1069	movq	$fusufault,PCB_ONFAULT(%rcx)
1070
1071	movq	$VM_MAXUSER_ADDRESS-1,%rax
1072	cmpq	%rax,%rdi
1073	ja	fusufault
1074
1075	stac
1076	movzbl	(%rdi),%eax
1077	clac
1078	movq	$0,PCB_ONFAULT(%rcx)
1079	POP_FRAME_POINTER
1080	ret
1081END(fubyte_smap)
1082
1083/*
1084 * Store a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit byte to
1085 * user memory.
1086 * addr = %rdi, value = %rsi
1087 */
1088ENTRY(suword_nosmap)
1089	PUSH_FRAME_POINTER
1090	movq	PCPU(CURPCB),%rcx
1091	movq	$fusufault,PCB_ONFAULT(%rcx)
1092
1093	movq	$VM_MAXUSER_ADDRESS-8,%rax
1094	cmpq	%rax,%rdi			/* verify address validity */
1095	ja	fusufault
1096
1097	movq	%rsi,(%rdi)
1098	xorl	%eax,%eax
1099	movq	%rax,PCB_ONFAULT(%rcx)
1100	POP_FRAME_POINTER
1101	ret
1102END(suword_nosmap)
1103
1104ENTRY(suword_smap)
1105	PUSH_FRAME_POINTER
1106	movq	PCPU(CURPCB),%rcx
1107	movq	$fusufault,PCB_ONFAULT(%rcx)
1108
1109	movq	$VM_MAXUSER_ADDRESS-8,%rax
1110	cmpq	%rax,%rdi			/* verify address validity */
1111	ja	fusufault
1112
1113	stac
1114	movq	%rsi,(%rdi)
1115	clac
1116	xorl	%eax,%eax
1117	movq	%rax,PCB_ONFAULT(%rcx)
1118	POP_FRAME_POINTER
1119	ret
1120END(suword_smap)
1121
1122ENTRY(suword32_nosmap)
1123	PUSH_FRAME_POINTER
1124	movq	PCPU(CURPCB),%rcx
1125	movq	$fusufault,PCB_ONFAULT(%rcx)
1126
1127	movq	$VM_MAXUSER_ADDRESS-4,%rax
1128	cmpq	%rax,%rdi			/* verify address validity */
1129	ja	fusufault
1130
1131	movl	%esi,(%rdi)
1132	xorl	%eax,%eax
1133	movq	%rax,PCB_ONFAULT(%rcx)
1134	POP_FRAME_POINTER
1135	ret
1136END(suword32_nosmap)
1137
1138ENTRY(suword32_smap)
1139	PUSH_FRAME_POINTER
1140	movq	PCPU(CURPCB),%rcx
1141	movq	$fusufault,PCB_ONFAULT(%rcx)
1142
1143	movq	$VM_MAXUSER_ADDRESS-4,%rax
1144	cmpq	%rax,%rdi			/* verify address validity */
1145	ja	fusufault
1146
1147	stac
1148	movl	%esi,(%rdi)
1149	clac
1150	xorl	%eax,%eax
1151	movq	%rax,PCB_ONFAULT(%rcx)
1152	POP_FRAME_POINTER
1153	ret
1154END(suword32_smap)
1155
1156ENTRY(suword16_nosmap)
1157	PUSH_FRAME_POINTER
1158	movq	PCPU(CURPCB),%rcx
1159	movq	$fusufault,PCB_ONFAULT(%rcx)
1160
1161	movq	$VM_MAXUSER_ADDRESS-2,%rax
1162	cmpq	%rax,%rdi			/* verify address validity */
1163	ja	fusufault
1164
1165	movw	%si,(%rdi)
1166	xorl	%eax,%eax
1167	movq	%rax,PCB_ONFAULT(%rcx)
1168	POP_FRAME_POINTER
1169	ret
1170END(suword16_nosmap)
1171
1172ENTRY(suword16_smap)
1173	PUSH_FRAME_POINTER
1174	movq	PCPU(CURPCB),%rcx
1175	movq	$fusufault,PCB_ONFAULT(%rcx)
1176
1177	movq	$VM_MAXUSER_ADDRESS-2,%rax
1178	cmpq	%rax,%rdi			/* verify address validity */
1179	ja	fusufault
1180
1181	stac
1182	movw	%si,(%rdi)
1183	clac
1184	xorl	%eax,%eax
1185	movq	%rax,PCB_ONFAULT(%rcx)
1186	POP_FRAME_POINTER
1187	ret
1188END(suword16_smap)
1189
1190ENTRY(subyte_nosmap)
1191	PUSH_FRAME_POINTER
1192	movq	PCPU(CURPCB),%rcx
1193	movq	$fusufault,PCB_ONFAULT(%rcx)
1194
1195	movq	$VM_MAXUSER_ADDRESS-1,%rax
1196	cmpq	%rax,%rdi			/* verify address validity */
1197	ja	fusufault
1198
1199	movl	%esi,%eax
1200	movb	%al,(%rdi)
1201	xorl	%eax,%eax
1202	movq	%rax,PCB_ONFAULT(%rcx)
1203	POP_FRAME_POINTER
1204	ret
1205END(subyte_nosmap)
1206
1207ENTRY(subyte_smap)
1208	PUSH_FRAME_POINTER
1209	movq	PCPU(CURPCB),%rcx
1210	movq	$fusufault,PCB_ONFAULT(%rcx)
1211
1212	movq	$VM_MAXUSER_ADDRESS-1,%rax
1213	cmpq	%rax,%rdi			/* verify address validity */
1214	ja	fusufault
1215
1216	movl	%esi,%eax
1217	stac
1218	movb	%al,(%rdi)
1219	clac
1220	xorl	%eax,%eax
1221	movq	%rax,PCB_ONFAULT(%rcx)
1222	POP_FRAME_POINTER
1223	ret
1224END(subyte_smap)
1225
1226	ALIGN_TEXT
1227	/* Fault entry clears PSL.AC */
1228fusufault:
1229	movq	PCPU(CURPCB),%rcx
1230	xorl	%eax,%eax
1231	movq	%rax,PCB_ONFAULT(%rcx)
1232	decq	%rax
1233	POP_FRAME_POINTER
1234	ret
1235
1236/*
1237 * copyinstr(from, to, maxlen, int *lencopied)
1238 *           %rdi, %rsi, %rdx, %rcx
1239 *
1240 *	copy a string from 'from' to 'to', stop when a 0 character is reached.
1241 *	return ENAMETOOLONG if string is longer than maxlen, and
1242 *	EFAULT on protection violations. If lencopied is non-zero,
1243 *	return the actual length in *lencopied.
1244 */
1245.macro COPYINSTR smap
1246	PUSH_FRAME_POINTER
1247	movq	%rdx,%r8			/* %r8 = maxlen */
1248	movq	PCPU(CURPCB),%r9
1249	movq	$cpystrflt,PCB_ONFAULT(%r9)
1250
1251	movq	$VM_MAXUSER_ADDRESS,%rax
1252
1253	/* make sure 'from' is within bounds */
1254	subq	%rdi,%rax
1255	jbe	cpystrflt
1256
1257	SMAP_DISABLE \smap
1258
1259	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1260	cmpq	%rdx,%rax
1261	jb	8f
12621:
1263	incq	%rdx
12642:
1265	decq	%rdx
1266.if \smap == 0
1267	jz	copyinstr_toolong
1268.else
1269	jz	copyinstr_toolong_smap
1270.endif
1271
1272	movb	(%rdi),%al
1273	movb	%al,(%rsi)
1274	incq	%rsi
1275	incq	%rdi
1276	testb	%al,%al
1277	jnz	2b
1278
1279	SMAP_ENABLE \smap
1280
1281	/* Success -- 0 byte reached */
1282	decq	%rdx
1283	xorl	%eax,%eax
1284
1285	/* set *lencopied and return %eax */
1286	movq	%rax,PCB_ONFAULT(%r9)
1287
1288	testq	%rcx,%rcx
1289	jz	3f
1290	subq	%rdx,%r8
1291	movq	%r8,(%rcx)
12923:
1293	POP_FRAME_POINTER
1294	ret
1295	ALIGN_TEXT
12968:
1297	movq	%rax,%rdx
1298	movq	%rax,%r8
1299	jmp 1b
1300
1301.endm
1302
1303ENTRY(copyinstr_nosmap)
1304	COPYINSTR smap=0
1305END(copyinstr_nosmap)
1306
1307ENTRY(copyinstr_smap)
1308	COPYINSTR smap=1
1309END(copyinstr_smap)
1310
1311cpystrflt:
1312	/* Fault entry clears PSL.AC */
1313	movl	$EFAULT,%eax
1314cpystrflt_x:
1315	/* set *lencopied and return %eax */
1316	movq	$0,PCB_ONFAULT(%r9)
1317
1318	testq	%rcx,%rcx
1319	jz	1f
1320	subq	%rdx,%r8
1321	movq	%r8,(%rcx)
13221:
1323	POP_FRAME_POINTER
1324	ret
1325
1326copyinstr_toolong_smap:
1327	clac
1328copyinstr_toolong:
1329	/* rdx is zero - return ENAMETOOLONG or EFAULT */
1330	movq	$VM_MAXUSER_ADDRESS,%rax
1331	cmpq	%rax,%rdi
1332	jae	cpystrflt
1333	movl	$ENAMETOOLONG,%eax
1334	jmp	cpystrflt_x
1335
1336/*
1337 * copystr(from, to, maxlen, int *lencopied)
1338 *         %rdi, %rsi, %rdx, %rcx
1339 */
1340ENTRY(copystr)
1341	PUSH_FRAME_POINTER
1342	movq	%rdx,%r8			/* %r8 = maxlen */
1343
1344	incq    %rdx
13451:
1346	decq	%rdx
1347	jz	4f
1348	movb	(%rdi),%al
1349	movb	%al,(%rsi)
1350	incq	%rsi
1351	incq	%rdi
1352	testb	%al,%al
1353	jnz	1b
1354
1355	/* Success -- 0 byte reached */
1356	decq	%rdx
1357	xorl	%eax,%eax
13582:
1359	testq	%rcx,%rcx
1360	jz      3f
1361	/* set *lencopied and return %rax */
1362	subq	%rdx,%r8
1363	movq	%r8,(%rcx)
13643:
1365	POP_FRAME_POINTER
1366	ret
13674:
1368	/* rdx is zero -- return ENAMETOOLONG */
1369	movl    $ENAMETOOLONG,%eax
1370	jmp	2b
1371END(copystr)
1372
1373/*
1374 * Handling of special amd64 registers and descriptor tables etc
1375 */
1376/* void lgdt(struct region_descriptor *rdp); */
1377ENTRY(lgdt)
1378	/* reload the descriptor table */
1379	lgdt	(%rdi)
1380
1381	/* flush the prefetch q */
1382	jmp	1f
1383	nop
13841:
1385	movl	$KDSEL,%eax
1386	movl	%eax,%ds
1387	movl	%eax,%es
1388	movl	%eax,%fs	/* Beware, use wrmsr to set 64 bit base */
1389	movl	%eax,%gs
1390	movl	%eax,%ss
1391
1392	/* reload code selector by turning return into intersegmental return */
1393	popq	%rax
1394	pushq	$KCSEL
1395	pushq	%rax
1396	MEXITCOUNT
1397	lretq
1398END(lgdt)
1399
1400/*****************************************************************************/
1401/* setjump, longjump                                                         */
1402/*****************************************************************************/
1403
1404ENTRY(setjmp)
1405	movq	%rbx,0(%rdi)			/* save rbx */
1406	movq	%rsp,8(%rdi)			/* save rsp */
1407	movq	%rbp,16(%rdi)			/* save rbp */
1408	movq	%r12,24(%rdi)			/* save r12 */
1409	movq	%r13,32(%rdi)			/* save r13 */
1410	movq	%r14,40(%rdi)			/* save r14 */
1411	movq	%r15,48(%rdi)			/* save r15 */
1412	movq	0(%rsp),%rdx			/* get rta */
1413	movq	%rdx,56(%rdi)			/* save rip */
1414	xorl	%eax,%eax			/* return(0); */
1415	ret
1416END(setjmp)
1417
1418ENTRY(longjmp)
1419	movq	0(%rdi),%rbx			/* restore rbx */
1420	movq	8(%rdi),%rsp			/* restore rsp */
1421	movq	16(%rdi),%rbp			/* restore rbp */
1422	movq	24(%rdi),%r12			/* restore r12 */
1423	movq	32(%rdi),%r13			/* restore r13 */
1424	movq	40(%rdi),%r14			/* restore r14 */
1425	movq	48(%rdi),%r15			/* restore r15 */
1426	movq	56(%rdi),%rdx			/* get rta */
1427	movq	%rdx,0(%rsp)			/* put in return frame */
1428	xorl	%eax,%eax			/* return(1); */
1429	incl	%eax
1430	ret
1431END(longjmp)
1432
1433/*
1434 * Support for reading MSRs in the safe manner.  (Instead of panic on #gp,
1435 * return an error.)
1436 */
1437ENTRY(rdmsr_safe)
1438/* int rdmsr_safe(u_int msr, uint64_t *data) */
1439	PUSH_FRAME_POINTER
1440	movq	PCPU(CURPCB),%r8
1441	movq	$msr_onfault,PCB_ONFAULT(%r8)
1442	movl	%edi,%ecx
1443	rdmsr			/* Read MSR pointed by %ecx. Returns
1444				   hi byte in edx, lo in %eax */
1445	salq	$32,%rdx	/* sign-shift %rdx left */
1446	movl	%eax,%eax	/* zero-extend %eax -> %rax */
1447	orq	%rdx,%rax
1448	movq	%rax,(%rsi)
1449	xorq	%rax,%rax
1450	movq	%rax,PCB_ONFAULT(%r8)
1451	POP_FRAME_POINTER
1452	ret
1453
1454/*
1455 * Support for writing MSRs in the safe manner.  (Instead of panic on #gp,
1456 * return an error.)
1457 */
1458ENTRY(wrmsr_safe)
1459/* int wrmsr_safe(u_int msr, uint64_t data) */
1460	PUSH_FRAME_POINTER
1461	movq	PCPU(CURPCB),%r8
1462	movq	$msr_onfault,PCB_ONFAULT(%r8)
1463	movl	%edi,%ecx
1464	movl	%esi,%eax
1465	sarq	$32,%rsi
1466	movl	%esi,%edx
1467	wrmsr			/* Write MSR pointed by %ecx. Accepts
1468				   hi byte in edx, lo in %eax. */
1469	xorq	%rax,%rax
1470	movq	%rax,PCB_ONFAULT(%r8)
1471	POP_FRAME_POINTER
1472	ret
1473
1474/*
1475 * MSR operations fault handler
1476 */
1477	ALIGN_TEXT
1478msr_onfault:
1479	movq	$0,PCB_ONFAULT(%r8)
1480	movl	$EFAULT,%eax
1481	POP_FRAME_POINTER
1482	ret
1483
1484/*
1485 * void pmap_pti_pcid_invalidate(uint64_t ucr3, uint64_t kcr3);
1486 * Invalidates address space addressed by ucr3, then returns to kcr3.
1487 * Done in assembler to ensure no other memory accesses happen while
1488 * on ucr3.
1489 */
1490	ALIGN_TEXT
1491ENTRY(pmap_pti_pcid_invalidate)
1492	pushfq
1493	cli
1494	movq	%rdi,%cr3	/* to user page table */
1495	movq	%rsi,%cr3	/* back to kernel */
1496	popfq
1497	retq
1498
1499/*
1500 * void pmap_pti_pcid_invlpg(uint64_t ucr3, uint64_t kcr3, vm_offset_t va);
1501 * Invalidates virtual address va in address space ucr3, then returns to kcr3.
1502 */
1503	ALIGN_TEXT
1504ENTRY(pmap_pti_pcid_invlpg)
1505	pushfq
1506	cli
1507	movq	%rdi,%cr3	/* to user page table */
1508	invlpg	(%rdx)
1509	movq	%rsi,%cr3	/* back to kernel */
1510	popfq
1511	retq
1512
1513/*
1514 * void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr3, vm_offset_t sva,
1515 *     vm_offset_t eva);
1516 * Invalidates virtual addresses between sva and eva in address space ucr3,
1517 * then returns to kcr3.
1518 */
1519	ALIGN_TEXT
1520ENTRY(pmap_pti_pcid_invlrng)
1521	pushfq
1522	cli
1523	movq	%rdi,%cr3	/* to user page table */
15241:	invlpg	(%rdx)
1525	addq	$PAGE_SIZE,%rdx
1526	cmpq	%rdx,%rcx
1527	ja	1b
1528	movq	%rsi,%cr3	/* back to kernel */
1529	popfq
1530	retq
1531
1532	.altmacro
1533	.macro	ibrs_seq_label l
1534handle_ibrs_\l:
1535	.endm
1536	.macro	ibrs_call_label l
1537	call	handle_ibrs_\l
1538	.endm
1539	.macro	ibrs_seq count
1540	ll=1
1541	.rept	\count
1542	ibrs_call_label	%(ll)
1543	nop
1544	ibrs_seq_label %(ll)
1545	addq	$8,%rsp
1546	ll=ll+1
1547	.endr
1548	.endm
1549
1550/* all callers already saved %rax, %rdx, and %rcx */
1551ENTRY(handle_ibrs_entry)
1552	cmpb	$0,hw_ibrs_active(%rip)
1553	je	1f
1554	movl	$MSR_IA32_SPEC_CTRL,%ecx
1555	rdmsr
1556	orl	$(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1557	orl	$(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32,%edx
1558	wrmsr
1559	movb	$1,PCPU(IBPB_SET)
1560	testl	$CPUID_STDEXT_SMEP,cpu_stdext_feature(%rip)
1561	jne	1f
1562	ibrs_seq 32
15631:	ret
1564END(handle_ibrs_entry)
1565
1566ENTRY(handle_ibrs_exit)
1567	cmpb	$0,PCPU(IBPB_SET)
1568	je	1f
1569	movl	$MSR_IA32_SPEC_CTRL,%ecx
1570	rdmsr
1571	andl	$~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1572	andl	$~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx
1573	wrmsr
1574	movb	$0,PCPU(IBPB_SET)
15751:	ret
1576END(handle_ibrs_exit)
1577
1578/* registers-neutral version, but needs stack */
1579ENTRY(handle_ibrs_exit_rs)
1580	cmpb	$0,PCPU(IBPB_SET)
1581	je	1f
1582	pushq	%rax
1583	pushq	%rdx
1584	pushq	%rcx
1585	movl	$MSR_IA32_SPEC_CTRL,%ecx
1586	rdmsr
1587	andl	$~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1588	andl	$~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx
1589	wrmsr
1590	popq	%rcx
1591	popq	%rdx
1592	popq	%rax
1593	movb	$0,PCPU(IBPB_SET)
15941:	ret
1595END(handle_ibrs_exit_rs)
1596
1597	.noaltmacro
1598
1599/*
1600 * Flush L1D cache.  Load enough of the data from the kernel text
1601 * to flush existing L1D content.
1602 *
1603 * N.B. The function does not follow ABI calling conventions, it corrupts %rbx.
1604 * The vmm.ko caller expects that only %rax, %rdx, %rbx, %rcx, %r9, and %rflags
1605 * registers are clobbered.  The NMI handler caller only needs %r13 preserved.
1606 */
1607ENTRY(flush_l1d_sw)
1608#define	L1D_FLUSH_SIZE	(64 * 1024)
1609	movq	$KERNBASE, %r9
1610	movq	$-L1D_FLUSH_SIZE, %rcx
1611	/*
1612	 * pass 1: Preload TLB.
1613	 * Kernel text is mapped using superpages.  TLB preload is
1614	 * done for the benefit of older CPUs which split 2M page
1615	 * into 4k TLB entries.
1616	 */
16171:	movb	L1D_FLUSH_SIZE(%r9, %rcx), %al
1618	addq	$PAGE_SIZE, %rcx
1619	jne	1b
1620	xorl	%eax, %eax
1621	cpuid
1622	movq	$-L1D_FLUSH_SIZE, %rcx
1623	/* pass 2: Read each cache line. */
16242:	movb	L1D_FLUSH_SIZE(%r9, %rcx), %al
1625	addq	$64, %rcx
1626	jne	2b
1627	lfence
1628	ret
1629#undef	L1D_FLUSH_SIZE
1630END(flush_l1d_sw)
1631
1632ENTRY(flush_l1d_sw_abi)
1633	pushq	%rbx
1634	call	flush_l1d_sw
1635	popq	%rbx
1636	ret
1637END(flush_l1d_sw_abi)
1638
1639ENTRY(mds_handler_void)
1640	retq
1641END(mds_handler_void)
1642
1643ENTRY(mds_handler_verw)
1644	subq	$8, %rsp
1645	movw	%ds, (%rsp)
1646	verw	(%rsp)
1647	addq	$8, %rsp
1648	retq
1649END(mds_handler_verw)
1650
1651ENTRY(mds_handler_ivb)
1652	pushq	%rax
1653	pushq	%rdx
1654	pushq	%rcx
1655
1656	movq	%cr0, %rax
1657	testb	$CR0_TS, %al
1658	je	1f
1659	clts
16601:	movq	PCPU(MDS_BUF), %rdx
1661	movdqa	%xmm0, PCPU(MDS_TMP)
1662	pxor	%xmm0, %xmm0
1663
1664	lfence
1665	orpd	(%rdx), %xmm0
1666	orpd	(%rdx), %xmm0
1667	mfence
1668	movl	$40, %ecx
1669	addq	$16, %rdx
16702:	movntdq	%xmm0, (%rdx)
1671	addq	$16, %rdx
1672	decl	%ecx
1673	jnz	2b
1674	mfence
1675
1676	movdqa	PCPU(MDS_TMP),%xmm0
1677	testb	$CR0_TS, %al
1678	je	3f
1679	movq	%rax, %cr0
16803:	popq	%rcx
1681	popq	%rdx
1682	popq	%rax
1683	retq
1684END(mds_handler_ivb)
1685
1686ENTRY(mds_handler_bdw)
1687	pushq	%rax
1688	pushq	%rbx
1689	pushq	%rcx
1690	pushq	%rdi
1691	pushq	%rsi
1692
1693	movq	%cr0, %rax
1694	testb	$CR0_TS, %al
1695	je	1f
1696	clts
16971:	movq	PCPU(MDS_BUF), %rbx
1698	movdqa	%xmm0, PCPU(MDS_TMP)
1699	pxor	%xmm0, %xmm0
1700
1701	movq	%rbx, %rdi
1702	movq	%rbx, %rsi
1703	movl	$40, %ecx
17042:	movntdq	%xmm0, (%rbx)
1705	addq	$16, %rbx
1706	decl	%ecx
1707	jnz	2b
1708	mfence
1709	movl	$1536, %ecx
1710	rep; movsb
1711	lfence
1712
1713	movdqa	PCPU(MDS_TMP),%xmm0
1714	testb	$CR0_TS, %al
1715	je	3f
1716	movq	%rax, %cr0
17173:	popq	%rsi
1718	popq	%rdi
1719	popq	%rcx
1720	popq	%rbx
1721	popq	%rax
1722	retq
1723END(mds_handler_bdw)
1724
1725ENTRY(mds_handler_skl_sse)
1726	pushq	%rax
1727	pushq	%rdx
1728	pushq	%rcx
1729	pushq	%rdi
1730
1731	movq	%cr0, %rax
1732	testb	$CR0_TS, %al
1733	je	1f
1734	clts
17351:	movq	PCPU(MDS_BUF), %rdi
1736	movq	PCPU(MDS_BUF64), %rdx
1737	movdqa	%xmm0, PCPU(MDS_TMP)
1738	pxor	%xmm0, %xmm0
1739
1740	lfence
1741	orpd	(%rdx), %xmm0
1742	orpd	(%rdx), %xmm0
1743	xorl	%eax, %eax
17442:	clflushopt	5376(%rdi, %rax, 8)
1745	addl	$8, %eax
1746	cmpl	$8 * 12, %eax
1747	jb	2b
1748	sfence
1749	movl	$6144, %ecx
1750	xorl	%eax, %eax
1751	rep; stosb
1752	mfence
1753
1754	movdqa	PCPU(MDS_TMP), %xmm0
1755	testb	$CR0_TS, %al
1756	je	3f
1757	movq	%rax, %cr0
17583:	popq	%rdi
1759	popq	%rcx
1760	popq	%rdx
1761	popq	%rax
1762	retq
1763END(mds_handler_skl_sse)
1764
1765ENTRY(mds_handler_skl_avx)
1766	pushq	%rax
1767	pushq	%rdx
1768	pushq	%rcx
1769	pushq	%rdi
1770
1771	movq	%cr0, %rax
1772	testb	$CR0_TS, %al
1773	je	1f
1774	clts
17751:	movq	PCPU(MDS_BUF), %rdi
1776	movq	PCPU(MDS_BUF64), %rdx
1777	vmovdqa	%ymm0, PCPU(MDS_TMP)
1778	vpxor	%ymm0, %ymm0, %ymm0
1779
1780	lfence
1781	vorpd	(%rdx), %ymm0, %ymm0
1782	vorpd	(%rdx), %ymm0, %ymm0
1783	xorl	%eax, %eax
17842:	clflushopt	5376(%rdi, %rax, 8)
1785	addl	$8, %eax
1786	cmpl	$8 * 12, %eax
1787	jb	2b
1788	sfence
1789	movl	$6144, %ecx
1790	xorl	%eax, %eax
1791	rep; stosb
1792	mfence
1793
1794	vmovdqa	PCPU(MDS_TMP), %ymm0
1795	testb	$CR0_TS, %al
1796	je	3f
1797	movq	%rax, %cr0
17983:	popq	%rdi
1799	popq	%rcx
1800	popq	%rdx
1801	popq	%rax
1802	retq
1803END(mds_handler_skl_avx)
1804
1805ENTRY(mds_handler_skl_avx512)
1806	pushq	%rax
1807	pushq	%rdx
1808	pushq	%rcx
1809	pushq	%rdi
1810
1811	movq	%cr0, %rax
1812	testb	$CR0_TS, %al
1813	je	1f
1814	clts
18151:	movq	PCPU(MDS_BUF), %rdi
1816	movq	PCPU(MDS_BUF64), %rdx
1817	vmovdqa64	%zmm0, PCPU(MDS_TMP)
1818	vpxord	%zmm0, %zmm0, %zmm0
1819
1820	lfence
1821	vorpd	(%rdx), %zmm0, %zmm0
1822	vorpd	(%rdx), %zmm0, %zmm0
1823	xorl	%eax, %eax
18242:	clflushopt	5376(%rdi, %rax, 8)
1825	addl	$8, %eax
1826	cmpl	$8 * 12, %eax
1827	jb	2b
1828	sfence
1829	movl	$6144, %ecx
1830	xorl	%eax, %eax
1831	rep; stosb
1832	mfence
1833
1834	vmovdqa64	PCPU(MDS_TMP), %zmm0
1835	testb	$CR0_TS, %al
1836	je	3f
1837	movq	%rax, %cr0
18383:	popq	%rdi
1839	popq	%rcx
1840	popq	%rdx
1841	popq	%rax
1842	retq
1843END(mds_handler_skl_avx512)
1844
1845ENTRY(mds_handler_silvermont)
1846	pushq	%rax
1847	pushq	%rdx
1848	pushq	%rcx
1849
1850	movq	%cr0, %rax
1851	testb	$CR0_TS, %al
1852	je	1f
1853	clts
18541:	movq	PCPU(MDS_BUF), %rdx
1855	movdqa	%xmm0, PCPU(MDS_TMP)
1856	pxor	%xmm0, %xmm0
1857
1858	movl	$16, %ecx
18592:	movntdq	%xmm0, (%rdx)
1860	addq	$16, %rdx
1861	decl	%ecx
1862	jnz	2b
1863	mfence
1864
1865	movdqa	PCPU(MDS_TMP),%xmm0
1866	testb	$CR0_TS, %al
1867	je	3f
1868	movq	%rax, %cr0
18693:	popq	%rcx
1870	popq	%rdx
1871	popq	%rax
1872	retq
1873END(mds_handler_silvermont)
1874