xref: /freebsd/sys/amd64/amd64/support.S (revision 6419bb52)
1/*-
2 * Copyright (c) 2018-2019 The FreeBSD Foundation
3 * Copyright (c) 2003 Peter Wemm.
4 * Copyright (c) 1993 The Regents of the University of California.
5 * All rights reserved.
6 *
7 * Portions of this software were developed by
8 * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
9 * the FreeBSD Foundation.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * $FreeBSD$
36 */
37
38#include "opt_ddb.h"
39
40#include <machine/asmacros.h>
41#include <machine/specialreg.h>
42#include <machine/pmap.h>
43
44#include "assym.inc"
45
46	.text
47
48/* Address: %rdi */
49ENTRY(pagezero_std)
50	PUSH_FRAME_POINTER
51	movl	$PAGE_SIZE/8,%ecx
52	xorl	%eax,%eax
53	rep
54	stosq
55	POP_FRAME_POINTER
56	ret
57END(pagezero_std)
58
59ENTRY(pagezero_erms)
60	PUSH_FRAME_POINTER
61	movl	$PAGE_SIZE,%ecx
62	xorl	%eax,%eax
63	rep
64	stosb
65	POP_FRAME_POINTER
66	ret
67END(pagezero_erms)
68
69/*
70 * pagecopy(%rdi=from, %rsi=to)
71 */
72ENTRY(pagecopy)
73	PUSH_FRAME_POINTER
74	movl	$PAGE_SIZE/8,%ecx
75	movq	%rdi,%r9
76	movq	%rsi,%rdi
77	movq	%r9,%rsi
78	rep
79	movsq
80	POP_FRAME_POINTER
81	ret
82END(pagecopy)
83
84/* Address: %rdi */
85ENTRY(sse2_pagezero)
86	PUSH_FRAME_POINTER
87	movq	$-PAGE_SIZE,%rdx
88	subq	%rdx,%rdi
89	xorl	%eax,%eax
90	jmp	1f
91	/*
92	 * The loop takes 29 bytes.  Ensure that it doesn't cross a 32-byte
93	 * cache line.
94	 */
95	.p2align 5,0x90
961:
97	movnti	%rax,(%rdi,%rdx)
98	movnti	%rax,8(%rdi,%rdx)
99	movnti	%rax,16(%rdi,%rdx)
100	movnti	%rax,24(%rdi,%rdx)
101	addq	$32,%rdx
102	jne	1b
103	sfence
104	POP_FRAME_POINTER
105	ret
106END(sse2_pagezero)
107
108/*
109 * memcmpy(b1, b2, len)
110 *	   rdi,rsi,rdx
111 */
112ENTRY(memcmp)
113	PUSH_FRAME_POINTER
114
115	xorl	%eax,%eax
11610:
117	cmpq	$16,%rdx
118	ja	101632f
119
120100816:
121	cmpb	$8,%dl
122	jl	100408f
123	movq	(%rdi),%r8
124	movq	(%rsi),%r9
125	cmpq	%r8,%r9
126	jne	80f
127	movq	-8(%rdi,%rdx),%r8
128	movq	-8(%rsi,%rdx),%r9
129	cmpq	%r8,%r9
130	jne	10081608f
131	POP_FRAME_POINTER
132	ret
133100408:
134	cmpb	$4,%dl
135	jl	100204f
136	movl	(%rdi),%r8d
137	movl	(%rsi),%r9d
138	cmpl	%r8d,%r9d
139	jne	80f
140	movl	-4(%rdi,%rdx),%r8d
141	movl	-4(%rsi,%rdx),%r9d
142	cmpl	%r8d,%r9d
143	jne	10040804f
144	POP_FRAME_POINTER
145	ret
146100204:
147	cmpb	$2,%dl
148	jl	100001f
149	movzwl	(%rdi),%r8d
150	movzwl	(%rsi),%r9d
151	cmpl	%r8d,%r9d
152	jne	1f
153	movzwl	-2(%rdi,%rdx),%r8d
154	movzwl	-2(%rsi,%rdx),%r9d
155	cmpl	%r8d,%r9d
156	jne	1f
157	POP_FRAME_POINTER
158	ret
159100001:
160	cmpb	$1,%dl
161	jl	100000f
162	movzbl	(%rdi),%eax
163	movzbl	(%rsi),%r8d
164	subl	%r8d,%eax
165100000:
166	POP_FRAME_POINTER
167	ret
168ALIGN_TEXT
169101632:
170	cmpq	$32,%rdx
171	ja	103200f
172	movq	(%rdi),%r8
173	movq	(%rsi),%r9
174	cmpq	%r8,%r9
175	jne	80f
176	movq	8(%rdi),%r8
177	movq	8(%rsi),%r9
178	cmpq	%r8,%r9
179	jne	10163208f
180	movq	-16(%rdi,%rdx),%r8
181	movq	-16(%rsi,%rdx),%r9
182	cmpq	%r8,%r9
183	jne	10163216f
184	movq	-8(%rdi,%rdx),%r8
185	movq	-8(%rsi,%rdx),%r9
186	cmpq	%r8,%r9
187	jne	10163224f
188	POP_FRAME_POINTER
189	ret
190ALIGN_TEXT
191103200:
192	movq	(%rdi),%r8
193	movq	8(%rdi),%r9
194	subq	(%rsi),%r8
195	subq	8(%rsi),%r9
196	orq	%r8,%r9
197	jnz	10320000f
198
199	movq    16(%rdi),%r8
200	movq    24(%rdi),%r9
201	subq    16(%rsi),%r8
202	subq    24(%rsi),%r9
203	orq	%r8,%r9
204	jnz     10320016f
205
206	leaq	32(%rdi),%rdi
207	leaq	32(%rsi),%rsi
208	subq	$32,%rdx
209	cmpq	$32,%rdx
210	jae	103200b
211	cmpb	$0,%dl
212	jne	10b
213	POP_FRAME_POINTER
214	ret
215
216/*
217 * Mismatch was found.
218 *
219 * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes).
220 */
221ALIGN_TEXT
22210320016:
223	leaq	16(%rdi),%rdi
224	leaq	16(%rsi),%rsi
22510320000:
226	movq	(%rdi),%r8
227	movq	(%rsi),%r9
228	cmpq	%r8,%r9
229	jne	80f
230	leaq	8(%rdi),%rdi
231	leaq	8(%rsi),%rsi
232	jmp	80f
233ALIGN_TEXT
23410081608:
23510163224:
236	leaq	-8(%rdi,%rdx),%rdi
237	leaq	-8(%rsi,%rdx),%rsi
238	jmp	80f
239ALIGN_TEXT
24010163216:
241	leaq	-16(%rdi,%rdx),%rdi
242	leaq	-16(%rsi,%rdx),%rsi
243	jmp	80f
244ALIGN_TEXT
24510163208:
246	leaq	8(%rdi),%rdi
247	leaq	8(%rsi),%rsi
248	jmp	80f
249ALIGN_TEXT
25010040804:
251	leaq	-4(%rdi,%rdx),%rdi
252	leaq	-4(%rsi,%rdx),%rsi
253	jmp	1f
254
255ALIGN_TEXT
25680:
257	movl	(%rdi),%r8d
258	movl	(%rsi),%r9d
259	cmpl	%r8d,%r9d
260	jne	1f
261	leaq	4(%rdi),%rdi
262	leaq	4(%rsi),%rsi
263
264/*
265 * We have up to 4 bytes to inspect.
266 */
2671:
268	movzbl	(%rdi),%eax
269	movzbl	(%rsi),%r8d
270	cmpb	%r8b,%al
271	jne	2f
272
273	movzbl	1(%rdi),%eax
274	movzbl	1(%rsi),%r8d
275	cmpb	%r8b,%al
276	jne	2f
277
278	movzbl	2(%rdi),%eax
279	movzbl	2(%rsi),%r8d
280	cmpb	%r8b,%al
281	jne	2f
282
283	movzbl	3(%rdi),%eax
284	movzbl	3(%rsi),%r8d
2852:
286	subl	%r8d,%eax
287	POP_FRAME_POINTER
288	ret
289END(memcmp)
290
291/*
292 * memmove(dst, src, cnt)
293 *         rdi, rsi, rdx
294 */
295
296/*
297 * Register state at entry is supposed to be as follows:
298 * rdi - destination
299 * rsi - source
300 * rdx - count
301 *
302 * The macro possibly clobbers the above and: rcx, r8, r9, r10
303 * It does not clobber rax nor r11.
304 */
305.macro MEMMOVE erms overlap begin end
306	\begin
307
308	/*
309	 * For sizes 0..32 all data is read before it is written, so there
310	 * is no correctness issue with direction of copying.
311	 */
312	cmpq	$32,%rcx
313	jbe	101632f
314
315.if \overlap == 1
316	movq	%rdi,%r8
317	subq	%rsi,%r8
318	cmpq	%rcx,%r8	/* overlapping && src < dst? */
319	jb	2f
320.endif
321
322	cmpq	$256,%rcx
323	ja	1256f
324
325103200:
326	movq	(%rsi),%rdx
327	movq	%rdx,(%rdi)
328	movq	8(%rsi),%rdx
329	movq	%rdx,8(%rdi)
330	movq	16(%rsi),%rdx
331	movq	%rdx,16(%rdi)
332	movq	24(%rsi),%rdx
333	movq	%rdx,24(%rdi)
334	leaq	32(%rsi),%rsi
335	leaq	32(%rdi),%rdi
336	subq	$32,%rcx
337	cmpq	$32,%rcx
338	jae	103200b
339	cmpb	$0,%cl
340	jne	101632f
341	\end
342	ret
343	ALIGN_TEXT
344101632:
345	cmpb	$16,%cl
346	jl	100816f
347	movq	(%rsi),%rdx
348	movq	8(%rsi),%r8
349	movq	-16(%rsi,%rcx),%r9
350	movq	-8(%rsi,%rcx),%r10
351	movq	%rdx,(%rdi)
352	movq	%r8,8(%rdi)
353	movq	%r9,-16(%rdi,%rcx)
354	movq	%r10,-8(%rdi,%rcx)
355	\end
356	ret
357	ALIGN_TEXT
358100816:
359	cmpb	$8,%cl
360	jl	100408f
361	movq	(%rsi),%rdx
362	movq	-8(%rsi,%rcx),%r8
363	movq	%rdx,(%rdi)
364	movq	%r8,-8(%rdi,%rcx,)
365	\end
366	ret
367	ALIGN_TEXT
368100408:
369	cmpb	$4,%cl
370	jl	100204f
371	movl	(%rsi),%edx
372	movl	-4(%rsi,%rcx),%r8d
373	movl	%edx,(%rdi)
374	movl	%r8d,-4(%rdi,%rcx)
375	\end
376	ret
377	ALIGN_TEXT
378100204:
379	cmpb	$2,%cl
380	jl	100001f
381	movzwl	(%rsi),%edx
382	movzwl	-2(%rsi,%rcx),%r8d
383	movw	%dx,(%rdi)
384	movw	%r8w,-2(%rdi,%rcx)
385	\end
386	ret
387	ALIGN_TEXT
388100001:
389	cmpb	$1,%cl
390	jl	100000f
391	movb	(%rsi),%dl
392	movb	%dl,(%rdi)
393100000:
394	\end
395	ret
396
397	ALIGN_TEXT
3981256:
399	testb	$15,%dil
400	jnz	100f
401.if \erms == 1
402	rep
403	movsb
404.else
405	shrq	$3,%rcx                         /* copy by 64-bit words */
406	rep
407	movsq
408	movq	%rdx,%rcx
409	andl	$7,%ecx                         /* any bytes left? */
410	jne	100408b
411.endif
412	\end
413	ret
414100:
415	movq	(%rsi),%r8
416	movq	8(%rsi),%r9
417	movq	%rdi,%r10
418	movq	%rdi,%rcx
419	andq	$15,%rcx
420	leaq	-16(%rdx,%rcx),%rdx
421	neg	%rcx
422	leaq	16(%rdi,%rcx),%rdi
423	leaq	16(%rsi,%rcx),%rsi
424	movq	%rdx,%rcx
425.if \erms == 1
426	rep
427	movsb
428	movq	%r8,(%r10)
429	movq	%r9,8(%r10)
430.else
431	shrq	$3,%rcx                         /* copy by 64-bit words */
432	rep
433	movsq
434	movq	%r8,(%r10)
435	movq	%r9,8(%r10)
436	movq	%rdx,%rcx
437	andl	$7,%ecx                         /* any bytes left? */
438	jne	100408b
439.endif
440	\end
441	ret
442
443.if \overlap == 1
444	/*
445	 * Copy backwards.
446	 */
447        ALIGN_TEXT
4482:
449	cmpq	$256,%rcx
450	ja	2256f
451
452	leaq	-8(%rdi,%rcx),%rdi
453	leaq	-8(%rsi,%rcx),%rsi
454
455	cmpq	$32,%rcx
456	jb	2016f
457
4582032:
459	movq	(%rsi),%rdx
460	movq	%rdx,(%rdi)
461	movq	-8(%rsi),%rdx
462	movq	%rdx,-8(%rdi)
463	movq	-16(%rsi),%rdx
464	movq	%rdx,-16(%rdi)
465	movq	-24(%rsi),%rdx
466	movq	%rdx,-24(%rdi)
467	leaq	-32(%rsi),%rsi
468	leaq	-32(%rdi),%rdi
469	subq	$32,%rcx
470	cmpq	$32,%rcx
471	jae	2032b
472	cmpb	$0,%cl
473	jne	2016f
474	\end
475	ret
476	ALIGN_TEXT
4772016:
478	cmpb	$16,%cl
479	jl	2008f
480	movq	(%rsi),%rdx
481	movq	%rdx,(%rdi)
482	movq	-8(%rsi),%rdx
483	movq	%rdx,-8(%rdi)
484	subb	$16,%cl
485	jz	2000f
486	leaq	-16(%rsi),%rsi
487	leaq	-16(%rdi),%rdi
4882008:
489	cmpb	$8,%cl
490	jl	2004f
491	movq	(%rsi),%rdx
492	movq	%rdx,(%rdi)
493	subb	$8,%cl
494	jz	2000f
495	leaq	-8(%rsi),%rsi
496	leaq	-8(%rdi),%rdi
4972004:
498	cmpb	$4,%cl
499	jl	2002f
500	movl	4(%rsi),%edx
501	movl	%edx,4(%rdi)
502	subb	$4,%cl
503	jz	2000f
504	leaq	-4(%rsi),%rsi
505	leaq	-4(%rdi),%rdi
5062002:
507	cmpb	$2,%cl
508	jl	2001f
509	movw	6(%rsi),%dx
510	movw	%dx,6(%rdi)
511	subb	$2,%cl
512	jz	2000f
513	leaq	-2(%rsi),%rsi
514	leaq	-2(%rdi),%rdi
5152001:
516	cmpb	$1,%cl
517	jl	2000f
518	movb	7(%rsi),%dl
519	movb	%dl,7(%rdi)
5202000:
521	\end
522	ret
523	ALIGN_TEXT
5242256:
525	std
526.if \erms == 1
527	leaq	-1(%rdi,%rcx),%rdi
528	leaq	-1(%rsi,%rcx),%rsi
529	rep
530	movsb
531	cld
532.else
533	leaq	-8(%rdi,%rcx),%rdi
534	leaq	-8(%rsi,%rcx),%rsi
535	shrq	$3,%rcx
536	rep
537	movsq
538	cld
539	movq	%rdx,%rcx
540	andb	$7,%cl
541	jne	2004b
542.endif
543	\end
544	ret
545.endif
546.endm
547
548.macro MEMMOVE_BEGIN
549	PUSH_FRAME_POINTER
550	movq	%rdi,%rax
551	movq	%rdx,%rcx
552.endm
553
554.macro MEMMOVE_END
555	POP_FRAME_POINTER
556.endm
557
558ENTRY(memmove_std)
559	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
560END(memmove_std)
561
562ENTRY(memmove_erms)
563	MEMMOVE erms=1 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
564END(memmove_erms)
565
566/*
567 * memcpy(dst, src, len)
568 *        rdi, rsi, rdx
569 *
570 * Note: memcpy does not support overlapping copies
571 */
572ENTRY(memcpy_std)
573	MEMMOVE erms=0 overlap=0 begin=MEMMOVE_BEGIN end=MEMMOVE_END
574END(memcpy_std)
575
576ENTRY(memcpy_erms)
577	MEMMOVE erms=1 overlap=0 begin=MEMMOVE_BEGIN end=MEMMOVE_END
578END(memcpy_erms)
579
580/*
581 * memset(dst, c,   len)
582 *        rdi, rsi, rdx
583 */
584.macro MEMSET erms
585	PUSH_FRAME_POINTER
586	movq	%rdi,%rax
587	movq	%rdx,%rcx
588	movzbq	%sil,%r8
589	movabs	$0x0101010101010101,%r10
590	imulq	%r8,%r10
591
592	cmpq	$32,%rcx
593	jbe	101632f
594
595	cmpq	$256,%rcx
596	ja	1256f
597
598103200:
599	movq	%r10,(%rdi)
600	movq	%r10,8(%rdi)
601	movq	%r10,16(%rdi)
602	movq	%r10,24(%rdi)
603	leaq	32(%rdi),%rdi
604	subq	$32,%rcx
605	cmpq	$32,%rcx
606	ja	103200b
607	cmpb	$16,%cl
608	ja	201632f
609	movq	%r10,-16(%rdi,%rcx)
610	movq	%r10,-8(%rdi,%rcx)
611	POP_FRAME_POINTER
612	ret
613	ALIGN_TEXT
614101632:
615	cmpb	$16,%cl
616	jl	100816f
617201632:
618	movq	%r10,(%rdi)
619	movq	%r10,8(%rdi)
620	movq	%r10,-16(%rdi,%rcx)
621	movq	%r10,-8(%rdi,%rcx)
622	POP_FRAME_POINTER
623	ret
624	ALIGN_TEXT
625100816:
626	cmpb	$8,%cl
627	jl	100408f
628	movq	%r10,(%rdi)
629	movq	%r10,-8(%rdi,%rcx)
630	POP_FRAME_POINTER
631	ret
632	ALIGN_TEXT
633100408:
634	cmpb	$4,%cl
635	jl	100204f
636	movl	%r10d,(%rdi)
637	movl	%r10d,-4(%rdi,%rcx)
638	POP_FRAME_POINTER
639	ret
640	ALIGN_TEXT
641100204:
642	cmpb	$2,%cl
643	jl	100001f
644	movw	%r10w,(%rdi)
645	movw	%r10w,-2(%rdi,%rcx)
646	POP_FRAME_POINTER
647	ret
648	ALIGN_TEXT
649100001:
650	cmpb	$0,%cl
651	je	100000f
652	movb	%r10b,(%rdi)
653100000:
654	POP_FRAME_POINTER
655	ret
656	ALIGN_TEXT
6571256:
658	movq	%rdi,%r9
659	movq	%r10,%rax
660	testl	$15,%edi
661	jnz	3f
6621:
663.if \erms == 1
664	rep
665	stosb
666	movq	%r9,%rax
667.else
668	movq	%rcx,%rdx
669	shrq	$3,%rcx
670	rep
671	stosq
672	movq	%r9,%rax
673	andl	$7,%edx
674	jnz	2f
675	POP_FRAME_POINTER
676	ret
6772:
678	movq	%r10,-8(%rdi,%rdx)
679.endif
680	POP_FRAME_POINTER
681	ret
682	ALIGN_TEXT
6833:
684	movq	%r10,(%rdi)
685	movq	%r10,8(%rdi)
686	movq	%rdi,%r8
687	andq	$15,%r8
688	leaq	-16(%rcx,%r8),%rcx
689	neg	%r8
690	leaq	16(%rdi,%r8),%rdi
691	jmp	1b
692.endm
693
694ENTRY(memset_std)
695	MEMSET erms=0
696END(memset_std)
697
698ENTRY(memset_erms)
699	MEMSET erms=1
700END(memset_erms)
701
702/* fillw(pat, base, cnt) */
703/*       %rdi,%rsi, %rdx */
704ENTRY(fillw)
705	PUSH_FRAME_POINTER
706	movq	%rdi,%rax
707	movq	%rsi,%rdi
708	movq	%rdx,%rcx
709	rep
710	stosw
711	POP_FRAME_POINTER
712	ret
713END(fillw)
714
715/*****************************************************************************/
716/* copyout and fubyte family                                                 */
717/*****************************************************************************/
718/*
719 * Access user memory from inside the kernel. These routines should be
720 * the only places that do this.
721 *
722 * These routines set curpcb->pcb_onfault for the time they execute. When a
723 * protection violation occurs inside the functions, the trap handler
724 * returns to *curpcb->pcb_onfault instead of the function.
725 */
726
727.macro SMAP_DISABLE smap
728.if	\smap
729	stac
730.endif
731.endm
732
733
734.macro SMAP_ENABLE smap
735.if	\smap
736	clac
737.endif
738.endm
739
740.macro COPYINOUT_BEGIN
741.endm
742
743.macro COPYINOUT_END
744	movq	%rax,PCB_ONFAULT(%r11)
745	POP_FRAME_POINTER
746.endm
747
748.macro COPYINOUT_SMAP_END
749	SMAP_ENABLE smap=1
750	COPYINOUT_END
751.endm
752
753/*
754 * copyout(from_kernel, to_user, len)
755 *         %rdi,        %rsi,    %rdx
756 */
757.macro	COPYOUT smap erms
758	PUSH_FRAME_POINTER
759	movq	PCPU(CURPCB),%r11
760	movq	$copy_fault,PCB_ONFAULT(%r11)
761
762	/*
763	 * Check explicitly for non-user addresses.
764	 * First, prevent address wrapping.
765	 */
766	movq	%rsi,%rax
767	addq	%rdx,%rax
768	jc	copy_fault
769/*
770 * XXX STOP USING VM_MAXUSER_ADDRESS.
771 * It is an end address, not a max, so every time it is used correctly it
772 * looks like there is an off by one error, and of course it caused an off
773 * by one error in several places.
774 */
775	movq	$VM_MAXUSER_ADDRESS,%rcx
776	cmpq	%rcx,%rax
777	ja	copy_fault
778
779	/*
780	 * Set return value to zero. Remaining failure mode goes through
781	 * copy_fault.
782	 */
783	xorl	%eax,%eax
784
785	/*
786	 * Set up arguments for MEMMOVE.
787	 */
788	movq	%rdi,%r8
789	movq	%rsi,%rdi
790	movq	%r8,%rsi
791	movq	%rdx,%rcx
792
793
794	SMAP_DISABLE \smap
795.if	\smap == 1
796	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_SMAP_END
797.else
798	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_END
799.endif
800	/* NOTREACHED */
801.endm
802
803ENTRY(copyout_nosmap_std)
804	COPYOUT smap=0 erms=0
805END(copyout_nosmap_std)
806
807ENTRY(copyout_smap_std)
808	COPYOUT smap=1 erms=0
809END(copyout_smap_std)
810
811ENTRY(copyout_nosmap_erms)
812	COPYOUT smap=0 erms=1
813END(copyout_nosmap_erms)
814
815ENTRY(copyout_smap_erms)
816	COPYOUT smap=1 erms=1
817END(copyout_smap_erms)
818
819/*
820 * copyin(from_user, to_kernel, len)
821 *        %rdi,      %rsi,      %rdx
822 */
823.macro	COPYIN smap erms
824	PUSH_FRAME_POINTER
825	movq	PCPU(CURPCB),%r11
826	movq	$copy_fault,PCB_ONFAULT(%r11)
827
828	/*
829	 * make sure address is valid
830	 */
831	movq	%rdi,%rax
832	addq	%rdx,%rax
833	jc	copy_fault
834	movq	$VM_MAXUSER_ADDRESS,%rcx
835	cmpq	%rcx,%rax
836	ja	copy_fault
837
838	xorl	%eax,%eax
839
840	movq	%rdi,%r8
841	movq	%rsi,%rdi
842	movq	%r8,%rsi
843	movq	%rdx,%rcx
844
845	SMAP_DISABLE \smap
846.if	\smap == 1
847	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_SMAP_END
848.else
849	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_END
850.endif
851	/* NOTREACHED */
852.endm
853
854ENTRY(copyin_nosmap_std)
855	COPYIN smap=0 erms=0
856END(copyin_nosmap_std)
857
858ENTRY(copyin_smap_std)
859	COPYIN smap=1 erms=0
860END(copyin_smap_std)
861
862ENTRY(copyin_nosmap_erms)
863	COPYIN smap=0 erms=1
864END(copyin_nosmap_erms)
865
866ENTRY(copyin_smap_erms)
867	COPYIN smap=1 erms=1
868END(copyin_smap_erms)
869
870	ALIGN_TEXT
871	/* Trap entry clears PSL.AC */
872copy_fault:
873	movq	$0,PCB_ONFAULT(%r11)
874	movl	$EFAULT,%eax
875	POP_FRAME_POINTER
876	ret
877
878/*
879 * casueword32.  Compare and set user integer.  Returns -1 on fault,
880 *        0 if access was successful.  Old value is written to *oldp.
881 *        dst = %rdi, old = %esi, oldp = %rdx, new = %ecx
882 */
883ENTRY(casueword32_nosmap)
884	PUSH_FRAME_POINTER
885	movq	PCPU(CURPCB),%r8
886	movq	$fusufault,PCB_ONFAULT(%r8)
887
888	movq	$VM_MAXUSER_ADDRESS-4,%rax
889	cmpq	%rax,%rdi			/* verify address is valid */
890	ja	fusufault
891
892	movl	%esi,%eax			/* old */
893#ifdef SMP
894	lock
895#endif
896	cmpxchgl %ecx,(%rdi)			/* new = %ecx */
897	setne	%cl
898
899	/*
900	 * The old value is in %eax.  If the store succeeded it will be the
901	 * value we expected (old) from before the store, otherwise it will
902	 * be the current value.  Save %eax into %esi to prepare the return
903	 * value.
904	 */
905	movl	%eax,%esi
906	xorl	%eax,%eax
907	movq	%rax,PCB_ONFAULT(%r8)
908
909	/*
910	 * Access the oldp after the pcb_onfault is cleared, to correctly
911	 * catch corrupted pointer.
912	 */
913	movl	%esi,(%rdx)			/* oldp = %rdx */
914	POP_FRAME_POINTER
915	movzbl	%cl, %eax
916	ret
917END(casueword32_nosmap)
918
919ENTRY(casueword32_smap)
920	PUSH_FRAME_POINTER
921	movq	PCPU(CURPCB),%r8
922	movq	$fusufault,PCB_ONFAULT(%r8)
923
924	movq	$VM_MAXUSER_ADDRESS-4,%rax
925	cmpq	%rax,%rdi			/* verify address is valid */
926	ja	fusufault
927
928	movl	%esi,%eax			/* old */
929	stac
930#ifdef SMP
931	lock
932#endif
933	cmpxchgl %ecx,(%rdi)			/* new = %ecx */
934	clac
935	setne	%cl
936
937	/*
938	 * The old value is in %eax.  If the store succeeded it will be the
939	 * value we expected (old) from before the store, otherwise it will
940	 * be the current value.  Save %eax into %esi to prepare the return
941	 * value.
942	 */
943	movl	%eax,%esi
944	xorl	%eax,%eax
945	movq	%rax,PCB_ONFAULT(%r8)
946
947	/*
948	 * Access the oldp after the pcb_onfault is cleared, to correctly
949	 * catch corrupted pointer.
950	 */
951	movl	%esi,(%rdx)			/* oldp = %rdx */
952	POP_FRAME_POINTER
953	movzbl	%cl, %eax
954	ret
955END(casueword32_smap)
956
957/*
958 * casueword.  Compare and set user long.  Returns -1 on fault,
959 *        0 if access was successful.  Old value is written to *oldp.
960 *        dst = %rdi, old = %rsi, oldp = %rdx, new = %rcx
961 */
962ENTRY(casueword_nosmap)
963	PUSH_FRAME_POINTER
964	movq	PCPU(CURPCB),%r8
965	movq	$fusufault,PCB_ONFAULT(%r8)
966
967	movq	$VM_MAXUSER_ADDRESS-4,%rax
968	cmpq	%rax,%rdi			/* verify address is valid */
969	ja	fusufault
970
971	movq	%rsi,%rax			/* old */
972#ifdef SMP
973	lock
974#endif
975	cmpxchgq %rcx,(%rdi)			/* new = %rcx */
976	setne	%cl
977
978	/*
979	 * The old value is in %rax.  If the store succeeded it will be the
980	 * value we expected (old) from before the store, otherwise it will
981	 * be the current value.
982	 */
983	movq	%rax,%rsi
984	xorl	%eax,%eax
985	movq	%rax,PCB_ONFAULT(%r8)
986	movq	%rsi,(%rdx)
987	POP_FRAME_POINTER
988	movzbl	%cl, %eax
989	ret
990END(casueword_nosmap)
991
992ENTRY(casueword_smap)
993	PUSH_FRAME_POINTER
994	movq	PCPU(CURPCB),%r8
995	movq	$fusufault,PCB_ONFAULT(%r8)
996
997	movq	$VM_MAXUSER_ADDRESS-4,%rax
998	cmpq	%rax,%rdi			/* verify address is valid */
999	ja	fusufault
1000
1001	movq	%rsi,%rax			/* old */
1002	stac
1003#ifdef SMP
1004	lock
1005#endif
1006	cmpxchgq %rcx,(%rdi)			/* new = %rcx */
1007	clac
1008	setne	%cl
1009
1010	/*
1011	 * The old value is in %rax.  If the store succeeded it will be the
1012	 * value we expected (old) from before the store, otherwise it will
1013	 * be the current value.
1014	 */
1015	movq	%rax,%rsi
1016	xorl	%eax,%eax
1017	movq	%rax,PCB_ONFAULT(%r8)
1018	movq	%rsi,(%rdx)
1019	POP_FRAME_POINTER
1020	movzbl	%cl, %eax
1021	ret
1022END(casueword_smap)
1023
1024/*
1025 * Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit
1026 * byte from user memory.
1027 * addr = %rdi, valp = %rsi
1028 */
1029
1030ENTRY(fueword_nosmap)
1031	PUSH_FRAME_POINTER
1032	movq	PCPU(CURPCB),%rcx
1033	movq	$fusufault,PCB_ONFAULT(%rcx)
1034
1035	movq	$VM_MAXUSER_ADDRESS-8,%rax
1036	cmpq	%rax,%rdi			/* verify address is valid */
1037	ja	fusufault
1038
1039	xorl	%eax,%eax
1040	movq	(%rdi),%r11
1041	movq	%rax,PCB_ONFAULT(%rcx)
1042	movq	%r11,(%rsi)
1043	POP_FRAME_POINTER
1044	ret
1045END(fueword_nosmap)
1046
1047ENTRY(fueword_smap)
1048	PUSH_FRAME_POINTER
1049	movq	PCPU(CURPCB),%rcx
1050	movq	$fusufault,PCB_ONFAULT(%rcx)
1051
1052	movq	$VM_MAXUSER_ADDRESS-8,%rax
1053	cmpq	%rax,%rdi			/* verify address is valid */
1054	ja	fusufault
1055
1056	xorl	%eax,%eax
1057	stac
1058	movq	(%rdi),%r11
1059	clac
1060	movq	%rax,PCB_ONFAULT(%rcx)
1061	movq	%r11,(%rsi)
1062	POP_FRAME_POINTER
1063	ret
1064END(fueword_smap)
1065
1066ENTRY(fueword32_nosmap)
1067	PUSH_FRAME_POINTER
1068	movq	PCPU(CURPCB),%rcx
1069	movq	$fusufault,PCB_ONFAULT(%rcx)
1070
1071	movq	$VM_MAXUSER_ADDRESS-4,%rax
1072	cmpq	%rax,%rdi			/* verify address is valid */
1073	ja	fusufault
1074
1075	xorl	%eax,%eax
1076	movl	(%rdi),%r11d
1077	movq	%rax,PCB_ONFAULT(%rcx)
1078	movl	%r11d,(%rsi)
1079	POP_FRAME_POINTER
1080	ret
1081END(fueword32_nosmap)
1082
1083ENTRY(fueword32_smap)
1084	PUSH_FRAME_POINTER
1085	movq	PCPU(CURPCB),%rcx
1086	movq	$fusufault,PCB_ONFAULT(%rcx)
1087
1088	movq	$VM_MAXUSER_ADDRESS-4,%rax
1089	cmpq	%rax,%rdi			/* verify address is valid */
1090	ja	fusufault
1091
1092	xorl	%eax,%eax
1093	stac
1094	movl	(%rdi),%r11d
1095	clac
1096	movq	%rax,PCB_ONFAULT(%rcx)
1097	movl	%r11d,(%rsi)
1098	POP_FRAME_POINTER
1099	ret
1100END(fueword32_smap)
1101
1102ENTRY(fuword16_nosmap)
1103	PUSH_FRAME_POINTER
1104	movq	PCPU(CURPCB),%rcx
1105	movq	$fusufault,PCB_ONFAULT(%rcx)
1106
1107	movq	$VM_MAXUSER_ADDRESS-2,%rax
1108	cmpq	%rax,%rdi
1109	ja	fusufault
1110
1111	movzwl	(%rdi),%eax
1112	movq	$0,PCB_ONFAULT(%rcx)
1113	POP_FRAME_POINTER
1114	ret
1115END(fuword16_nosmap)
1116
1117ENTRY(fuword16_smap)
1118	PUSH_FRAME_POINTER
1119	movq	PCPU(CURPCB),%rcx
1120	movq	$fusufault,PCB_ONFAULT(%rcx)
1121
1122	movq	$VM_MAXUSER_ADDRESS-2,%rax
1123	cmpq	%rax,%rdi
1124	ja	fusufault
1125
1126	stac
1127	movzwl	(%rdi),%eax
1128	clac
1129	movq	$0,PCB_ONFAULT(%rcx)
1130	POP_FRAME_POINTER
1131	ret
1132END(fuword16_smap)
1133
1134ENTRY(fubyte_nosmap)
1135	PUSH_FRAME_POINTER
1136	movq	PCPU(CURPCB),%rcx
1137	movq	$fusufault,PCB_ONFAULT(%rcx)
1138
1139	movq	$VM_MAXUSER_ADDRESS-1,%rax
1140	cmpq	%rax,%rdi
1141	ja	fusufault
1142
1143	movzbl	(%rdi),%eax
1144	movq	$0,PCB_ONFAULT(%rcx)
1145	POP_FRAME_POINTER
1146	ret
1147END(fubyte_nosmap)
1148
1149ENTRY(fubyte_smap)
1150	PUSH_FRAME_POINTER
1151	movq	PCPU(CURPCB),%rcx
1152	movq	$fusufault,PCB_ONFAULT(%rcx)
1153
1154	movq	$VM_MAXUSER_ADDRESS-1,%rax
1155	cmpq	%rax,%rdi
1156	ja	fusufault
1157
1158	stac
1159	movzbl	(%rdi),%eax
1160	clac
1161	movq	$0,PCB_ONFAULT(%rcx)
1162	POP_FRAME_POINTER
1163	ret
1164END(fubyte_smap)
1165
1166/*
1167 * Store a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit byte to
1168 * user memory.
1169 * addr = %rdi, value = %rsi
1170 */
1171ENTRY(suword_nosmap)
1172	PUSH_FRAME_POINTER
1173	movq	PCPU(CURPCB),%rcx
1174	movq	$fusufault,PCB_ONFAULT(%rcx)
1175
1176	movq	$VM_MAXUSER_ADDRESS-8,%rax
1177	cmpq	%rax,%rdi			/* verify address validity */
1178	ja	fusufault
1179
1180	movq	%rsi,(%rdi)
1181	xorl	%eax,%eax
1182	movq	%rax,PCB_ONFAULT(%rcx)
1183	POP_FRAME_POINTER
1184	ret
1185END(suword_nosmap)
1186
1187ENTRY(suword_smap)
1188	PUSH_FRAME_POINTER
1189	movq	PCPU(CURPCB),%rcx
1190	movq	$fusufault,PCB_ONFAULT(%rcx)
1191
1192	movq	$VM_MAXUSER_ADDRESS-8,%rax
1193	cmpq	%rax,%rdi			/* verify address validity */
1194	ja	fusufault
1195
1196	stac
1197	movq	%rsi,(%rdi)
1198	clac
1199	xorl	%eax,%eax
1200	movq	%rax,PCB_ONFAULT(%rcx)
1201	POP_FRAME_POINTER
1202	ret
1203END(suword_smap)
1204
1205ENTRY(suword32_nosmap)
1206	PUSH_FRAME_POINTER
1207	movq	PCPU(CURPCB),%rcx
1208	movq	$fusufault,PCB_ONFAULT(%rcx)
1209
1210	movq	$VM_MAXUSER_ADDRESS-4,%rax
1211	cmpq	%rax,%rdi			/* verify address validity */
1212	ja	fusufault
1213
1214	movl	%esi,(%rdi)
1215	xorl	%eax,%eax
1216	movq	%rax,PCB_ONFAULT(%rcx)
1217	POP_FRAME_POINTER
1218	ret
1219END(suword32_nosmap)
1220
1221ENTRY(suword32_smap)
1222	PUSH_FRAME_POINTER
1223	movq	PCPU(CURPCB),%rcx
1224	movq	$fusufault,PCB_ONFAULT(%rcx)
1225
1226	movq	$VM_MAXUSER_ADDRESS-4,%rax
1227	cmpq	%rax,%rdi			/* verify address validity */
1228	ja	fusufault
1229
1230	stac
1231	movl	%esi,(%rdi)
1232	clac
1233	xorl	%eax,%eax
1234	movq	%rax,PCB_ONFAULT(%rcx)
1235	POP_FRAME_POINTER
1236	ret
1237END(suword32_smap)
1238
1239ENTRY(suword16_nosmap)
1240	PUSH_FRAME_POINTER
1241	movq	PCPU(CURPCB),%rcx
1242	movq	$fusufault,PCB_ONFAULT(%rcx)
1243
1244	movq	$VM_MAXUSER_ADDRESS-2,%rax
1245	cmpq	%rax,%rdi			/* verify address validity */
1246	ja	fusufault
1247
1248	movw	%si,(%rdi)
1249	xorl	%eax,%eax
1250	movq	%rax,PCB_ONFAULT(%rcx)
1251	POP_FRAME_POINTER
1252	ret
1253END(suword16_nosmap)
1254
1255ENTRY(suword16_smap)
1256	PUSH_FRAME_POINTER
1257	movq	PCPU(CURPCB),%rcx
1258	movq	$fusufault,PCB_ONFAULT(%rcx)
1259
1260	movq	$VM_MAXUSER_ADDRESS-2,%rax
1261	cmpq	%rax,%rdi			/* verify address validity */
1262	ja	fusufault
1263
1264	stac
1265	movw	%si,(%rdi)
1266	clac
1267	xorl	%eax,%eax
1268	movq	%rax,PCB_ONFAULT(%rcx)
1269	POP_FRAME_POINTER
1270	ret
1271END(suword16_smap)
1272
1273ENTRY(subyte_nosmap)
1274	PUSH_FRAME_POINTER
1275	movq	PCPU(CURPCB),%rcx
1276	movq	$fusufault,PCB_ONFAULT(%rcx)
1277
1278	movq	$VM_MAXUSER_ADDRESS-1,%rax
1279	cmpq	%rax,%rdi			/* verify address validity */
1280	ja	fusufault
1281
1282	movl	%esi,%eax
1283	movb	%al,(%rdi)
1284	xorl	%eax,%eax
1285	movq	%rax,PCB_ONFAULT(%rcx)
1286	POP_FRAME_POINTER
1287	ret
1288END(subyte_nosmap)
1289
1290ENTRY(subyte_smap)
1291	PUSH_FRAME_POINTER
1292	movq	PCPU(CURPCB),%rcx
1293	movq	$fusufault,PCB_ONFAULT(%rcx)
1294
1295	movq	$VM_MAXUSER_ADDRESS-1,%rax
1296	cmpq	%rax,%rdi			/* verify address validity */
1297	ja	fusufault
1298
1299	movl	%esi,%eax
1300	stac
1301	movb	%al,(%rdi)
1302	clac
1303	xorl	%eax,%eax
1304	movq	%rax,PCB_ONFAULT(%rcx)
1305	POP_FRAME_POINTER
1306	ret
1307END(subyte_smap)
1308
1309	ALIGN_TEXT
1310	/* Fault entry clears PSL.AC */
1311fusufault:
1312	movq	PCPU(CURPCB),%rcx
1313	xorl	%eax,%eax
1314	movq	%rax,PCB_ONFAULT(%rcx)
1315	decq	%rax
1316	POP_FRAME_POINTER
1317	ret
1318
1319/*
1320 * copyinstr(from, to, maxlen, int *lencopied)
1321 *           %rdi, %rsi, %rdx, %rcx
1322 *
1323 *	copy a string from 'from' to 'to', stop when a 0 character is reached.
1324 *	return ENAMETOOLONG if string is longer than maxlen, and
1325 *	EFAULT on protection violations. If lencopied is non-zero,
1326 *	return the actual length in *lencopied.
1327 */
1328.macro COPYINSTR smap
1329	PUSH_FRAME_POINTER
1330	movq	%rdx,%r8			/* %r8 = maxlen */
1331	movq	PCPU(CURPCB),%r9
1332	movq	$cpystrflt,PCB_ONFAULT(%r9)
1333
1334	movq	$VM_MAXUSER_ADDRESS,%rax
1335
1336	/* make sure 'from' is within bounds */
1337	subq	%rdi,%rax
1338	jbe	cpystrflt
1339
1340	SMAP_DISABLE \smap
1341
1342	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1343	cmpq	%rdx,%rax
1344	jb	8f
13451:
1346	incq	%rdx
13472:
1348	decq	%rdx
1349.if \smap == 0
1350	jz	copyinstr_toolong
1351.else
1352	jz	copyinstr_toolong_smap
1353.endif
1354
1355	movb	(%rdi),%al
1356	movb	%al,(%rsi)
1357	incq	%rsi
1358	incq	%rdi
1359	testb	%al,%al
1360	jnz	2b
1361
1362	SMAP_ENABLE \smap
1363
1364	/* Success -- 0 byte reached */
1365	decq	%rdx
1366	xorl	%eax,%eax
1367
1368	/* set *lencopied and return %eax */
1369	movq	%rax,PCB_ONFAULT(%r9)
1370
1371	testq	%rcx,%rcx
1372	jz	3f
1373	subq	%rdx,%r8
1374	movq	%r8,(%rcx)
13753:
1376	POP_FRAME_POINTER
1377	ret
1378	ALIGN_TEXT
13798:
1380	movq	%rax,%rdx
1381	movq	%rax,%r8
1382	jmp 1b
1383
1384.endm
1385
1386ENTRY(copyinstr_nosmap)
1387	COPYINSTR smap=0
1388END(copyinstr_nosmap)
1389
1390ENTRY(copyinstr_smap)
1391	COPYINSTR smap=1
1392END(copyinstr_smap)
1393
1394cpystrflt:
1395	/* Fault entry clears PSL.AC */
1396	movl	$EFAULT,%eax
1397cpystrflt_x:
1398	/* set *lencopied and return %eax */
1399	movq	$0,PCB_ONFAULT(%r9)
1400
1401	testq	%rcx,%rcx
1402	jz	1f
1403	subq	%rdx,%r8
1404	movq	%r8,(%rcx)
14051:
1406	POP_FRAME_POINTER
1407	ret
1408
1409copyinstr_toolong_smap:
1410	clac
1411copyinstr_toolong:
1412	/* rdx is zero - return ENAMETOOLONG or EFAULT */
1413	movq	$VM_MAXUSER_ADDRESS,%rax
1414	cmpq	%rax,%rdi
1415	jae	cpystrflt
1416	movl	$ENAMETOOLONG,%eax
1417	jmp	cpystrflt_x
1418
1419/*
1420 * Handling of special amd64 registers and descriptor tables etc
1421 */
1422/* void lgdt(struct region_descriptor *rdp); */
1423ENTRY(lgdt)
1424	/* reload the descriptor table */
1425	lgdt	(%rdi)
1426
1427	/* flush the prefetch q */
1428	jmp	1f
1429	nop
14301:
1431	movl	$KDSEL,%eax
1432	movl	%eax,%ds
1433	movl	%eax,%es
1434	movl	%eax,%fs	/* Beware, use wrmsr to set 64 bit base */
1435	movl	%eax,%gs
1436	movl	%eax,%ss
1437
1438	/* reload code selector by turning return into intersegmental return */
1439	popq	%rax
1440	pushq	$KCSEL
1441	pushq	%rax
1442	MEXITCOUNT
1443	lretq
1444END(lgdt)
1445
1446/*****************************************************************************/
1447/* setjump, longjump                                                         */
1448/*****************************************************************************/
1449
1450ENTRY(setjmp)
1451	movq	%rbx,0(%rdi)			/* save rbx */
1452	movq	%rsp,8(%rdi)			/* save rsp */
1453	movq	%rbp,16(%rdi)			/* save rbp */
1454	movq	%r12,24(%rdi)			/* save r12 */
1455	movq	%r13,32(%rdi)			/* save r13 */
1456	movq	%r14,40(%rdi)			/* save r14 */
1457	movq	%r15,48(%rdi)			/* save r15 */
1458	movq	0(%rsp),%rdx			/* get rta */
1459	movq	%rdx,56(%rdi)			/* save rip */
1460	xorl	%eax,%eax			/* return(0); */
1461	ret
1462END(setjmp)
1463
1464ENTRY(longjmp)
1465	movq	0(%rdi),%rbx			/* restore rbx */
1466	movq	8(%rdi),%rsp			/* restore rsp */
1467	movq	16(%rdi),%rbp			/* restore rbp */
1468	movq	24(%rdi),%r12			/* restore r12 */
1469	movq	32(%rdi),%r13			/* restore r13 */
1470	movq	40(%rdi),%r14			/* restore r14 */
1471	movq	48(%rdi),%r15			/* restore r15 */
1472	movq	56(%rdi),%rdx			/* get rta */
1473	movq	%rdx,0(%rsp)			/* put in return frame */
1474	xorl	%eax,%eax			/* return(1); */
1475	incl	%eax
1476	ret
1477END(longjmp)
1478
1479/*
1480 * Support for reading MSRs in the safe manner.  (Instead of panic on #gp,
1481 * return an error.)
1482 */
1483ENTRY(rdmsr_safe)
1484/* int rdmsr_safe(u_int msr, uint64_t *data) */
1485	PUSH_FRAME_POINTER
1486	movq	PCPU(CURPCB),%r8
1487	movq	$msr_onfault,PCB_ONFAULT(%r8)
1488	movl	%edi,%ecx
1489	rdmsr			/* Read MSR pointed by %ecx. Returns
1490				   hi byte in edx, lo in %eax */
1491	salq	$32,%rdx	/* sign-shift %rdx left */
1492	movl	%eax,%eax	/* zero-extend %eax -> %rax */
1493	orq	%rdx,%rax
1494	movq	%rax,(%rsi)
1495	xorq	%rax,%rax
1496	movq	%rax,PCB_ONFAULT(%r8)
1497	POP_FRAME_POINTER
1498	ret
1499
1500/*
1501 * Support for writing MSRs in the safe manner.  (Instead of panic on #gp,
1502 * return an error.)
1503 */
1504ENTRY(wrmsr_safe)
1505/* int wrmsr_safe(u_int msr, uint64_t data) */
1506	PUSH_FRAME_POINTER
1507	movq	PCPU(CURPCB),%r8
1508	movq	$msr_onfault,PCB_ONFAULT(%r8)
1509	movl	%edi,%ecx
1510	movl	%esi,%eax
1511	sarq	$32,%rsi
1512	movl	%esi,%edx
1513	wrmsr			/* Write MSR pointed by %ecx. Accepts
1514				   hi byte in edx, lo in %eax. */
1515	xorq	%rax,%rax
1516	movq	%rax,PCB_ONFAULT(%r8)
1517	POP_FRAME_POINTER
1518	ret
1519
1520/*
1521 * MSR operations fault handler
1522 */
1523	ALIGN_TEXT
1524msr_onfault:
1525	movq	$0,PCB_ONFAULT(%r8)
1526	movl	$EFAULT,%eax
1527	POP_FRAME_POINTER
1528	ret
1529
1530/*
1531 * void pmap_pti_pcid_invalidate(uint64_t ucr3, uint64_t kcr3);
1532 * Invalidates address space addressed by ucr3, then returns to kcr3.
1533 * Done in assembler to ensure no other memory accesses happen while
1534 * on ucr3.
1535 */
1536	ALIGN_TEXT
1537ENTRY(pmap_pti_pcid_invalidate)
1538	pushfq
1539	cli
1540	movq	%rdi,%cr3	/* to user page table */
1541	movq	%rsi,%cr3	/* back to kernel */
1542	popfq
1543	retq
1544
1545/*
1546 * void pmap_pti_pcid_invlpg(uint64_t ucr3, uint64_t kcr3, vm_offset_t va);
1547 * Invalidates virtual address va in address space ucr3, then returns to kcr3.
1548 */
1549	ALIGN_TEXT
1550ENTRY(pmap_pti_pcid_invlpg)
1551	pushfq
1552	cli
1553	movq	%rdi,%cr3	/* to user page table */
1554	invlpg	(%rdx)
1555	movq	%rsi,%cr3	/* back to kernel */
1556	popfq
1557	retq
1558
1559/*
1560 * void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr3, vm_offset_t sva,
1561 *     vm_offset_t eva);
1562 * Invalidates virtual addresses between sva and eva in address space ucr3,
1563 * then returns to kcr3.
1564 */
1565	ALIGN_TEXT
1566ENTRY(pmap_pti_pcid_invlrng)
1567	pushfq
1568	cli
1569	movq	%rdi,%cr3	/* to user page table */
15701:	invlpg	(%rdx)
1571	addq	$PAGE_SIZE,%rdx
1572	cmpq	%rdx,%rcx
1573	ja	1b
1574	movq	%rsi,%cr3	/* back to kernel */
1575	popfq
1576	retq
1577
1578	.altmacro
1579	.macro	rsb_seq_label l
1580rsb_seq_\l:
1581	.endm
1582	.macro	rsb_call_label l
1583	call	rsb_seq_\l
1584	.endm
1585	.macro	rsb_seq count
1586	ll=1
1587	.rept	\count
1588	rsb_call_label	%(ll)
1589	nop
1590	rsb_seq_label %(ll)
1591	addq	$8,%rsp
1592	ll=ll+1
1593	.endr
1594	.endm
1595
1596ENTRY(rsb_flush)
1597	rsb_seq	32
1598	ret
1599
1600/* all callers already saved %rax, %rdx, and %rcx */
1601ENTRY(handle_ibrs_entry)
1602	cmpb	$0,hw_ibrs_ibpb_active(%rip)
1603	je	1f
1604	movl	$MSR_IA32_SPEC_CTRL,%ecx
1605	rdmsr
1606	orl	$(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1607	orl	$(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32,%edx
1608	wrmsr
1609	movb	$1,PCPU(IBPB_SET)
1610	testl	$CPUID_STDEXT_SMEP,cpu_stdext_feature(%rip)
1611	je	rsb_flush
16121:	ret
1613END(handle_ibrs_entry)
1614
1615ENTRY(handle_ibrs_exit)
1616	cmpb	$0,PCPU(IBPB_SET)
1617	je	1f
1618	movl	$MSR_IA32_SPEC_CTRL,%ecx
1619	rdmsr
1620	andl	$~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1621	andl	$~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx
1622	wrmsr
1623	movb	$0,PCPU(IBPB_SET)
16241:	ret
1625END(handle_ibrs_exit)
1626
1627/* registers-neutral version, but needs stack */
1628ENTRY(handle_ibrs_exit_rs)
1629	cmpb	$0,PCPU(IBPB_SET)
1630	je	1f
1631	pushq	%rax
1632	pushq	%rdx
1633	pushq	%rcx
1634	movl	$MSR_IA32_SPEC_CTRL,%ecx
1635	rdmsr
1636	andl	$~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1637	andl	$~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx
1638	wrmsr
1639	popq	%rcx
1640	popq	%rdx
1641	popq	%rax
1642	movb	$0,PCPU(IBPB_SET)
16431:	ret
1644END(handle_ibrs_exit_rs)
1645
1646	.noaltmacro
1647
1648/*
1649 * Flush L1D cache.  Load enough of the data from the kernel text
1650 * to flush existing L1D content.
1651 *
1652 * N.B. The function does not follow ABI calling conventions, it corrupts %rbx.
1653 * The vmm.ko caller expects that only %rax, %rdx, %rbx, %rcx, %r9, and %rflags
1654 * registers are clobbered.  The NMI handler caller only needs %r13 preserved.
1655 */
1656ENTRY(flush_l1d_sw)
1657#define	L1D_FLUSH_SIZE	(64 * 1024)
1658	movq	$KERNBASE, %r9
1659	movq	$-L1D_FLUSH_SIZE, %rcx
1660	/*
1661	 * pass 1: Preload TLB.
1662	 * Kernel text is mapped using superpages.  TLB preload is
1663	 * done for the benefit of older CPUs which split 2M page
1664	 * into 4k TLB entries.
1665	 */
16661:	movb	L1D_FLUSH_SIZE(%r9, %rcx), %al
1667	addq	$PAGE_SIZE, %rcx
1668	jne	1b
1669	xorl	%eax, %eax
1670	cpuid
1671	movq	$-L1D_FLUSH_SIZE, %rcx
1672	/* pass 2: Read each cache line. */
16732:	movb	L1D_FLUSH_SIZE(%r9, %rcx), %al
1674	addq	$64, %rcx
1675	jne	2b
1676	lfence
1677	ret
1678#undef	L1D_FLUSH_SIZE
1679END(flush_l1d_sw)
1680
1681ENTRY(flush_l1d_sw_abi)
1682	pushq	%rbx
1683	call	flush_l1d_sw
1684	popq	%rbx
1685	ret
1686END(flush_l1d_sw_abi)
1687
1688ENTRY(mds_handler_void)
1689	retq
1690END(mds_handler_void)
1691
1692ENTRY(mds_handler_verw)
1693	subq	$8, %rsp
1694	movw	%ds, (%rsp)
1695	verw	(%rsp)
1696	addq	$8, %rsp
1697	retq
1698END(mds_handler_verw)
1699
1700ENTRY(mds_handler_ivb)
1701	pushq	%rax
1702	pushq	%rdx
1703	pushq	%rcx
1704
1705	movq	%cr0, %rax
1706	testb	$CR0_TS, %al
1707	je	1f
1708	clts
17091:	movq	PCPU(MDS_BUF), %rdx
1710	movdqa	%xmm0, PCPU(MDS_TMP)
1711	pxor	%xmm0, %xmm0
1712
1713	lfence
1714	orpd	(%rdx), %xmm0
1715	orpd	(%rdx), %xmm0
1716	mfence
1717	movl	$40, %ecx
1718	addq	$16, %rdx
17192:	movntdq	%xmm0, (%rdx)
1720	addq	$16, %rdx
1721	decl	%ecx
1722	jnz	2b
1723	mfence
1724
1725	movdqa	PCPU(MDS_TMP),%xmm0
1726	testb	$CR0_TS, %al
1727	je	3f
1728	movq	%rax, %cr0
17293:	popq	%rcx
1730	popq	%rdx
1731	popq	%rax
1732	retq
1733END(mds_handler_ivb)
1734
1735ENTRY(mds_handler_bdw)
1736	pushq	%rax
1737	pushq	%rbx
1738	pushq	%rcx
1739	pushq	%rdi
1740	pushq	%rsi
1741
1742	movq	%cr0, %rax
1743	testb	$CR0_TS, %al
1744	je	1f
1745	clts
17461:	movq	PCPU(MDS_BUF), %rbx
1747	movdqa	%xmm0, PCPU(MDS_TMP)
1748	pxor	%xmm0, %xmm0
1749
1750	movq	%rbx, %rdi
1751	movq	%rbx, %rsi
1752	movl	$40, %ecx
17532:	movntdq	%xmm0, (%rbx)
1754	addq	$16, %rbx
1755	decl	%ecx
1756	jnz	2b
1757	mfence
1758	movl	$1536, %ecx
1759	rep; movsb
1760	lfence
1761
1762	movdqa	PCPU(MDS_TMP),%xmm0
1763	testb	$CR0_TS, %al
1764	je	3f
1765	movq	%rax, %cr0
17663:	popq	%rsi
1767	popq	%rdi
1768	popq	%rcx
1769	popq	%rbx
1770	popq	%rax
1771	retq
1772END(mds_handler_bdw)
1773
1774ENTRY(mds_handler_skl_sse)
1775	pushq	%rax
1776	pushq	%rdx
1777	pushq	%rcx
1778	pushq	%rdi
1779
1780	movq	%cr0, %rax
1781	testb	$CR0_TS, %al
1782	je	1f
1783	clts
17841:	movq	PCPU(MDS_BUF), %rdi
1785	movq	PCPU(MDS_BUF64), %rdx
1786	movdqa	%xmm0, PCPU(MDS_TMP)
1787	pxor	%xmm0, %xmm0
1788
1789	lfence
1790	orpd	(%rdx), %xmm0
1791	orpd	(%rdx), %xmm0
1792	xorl	%eax, %eax
17932:	clflushopt	5376(%rdi, %rax, 8)
1794	addl	$8, %eax
1795	cmpl	$8 * 12, %eax
1796	jb	2b
1797	sfence
1798	movl	$6144, %ecx
1799	xorl	%eax, %eax
1800	rep; stosb
1801	mfence
1802
1803	movdqa	PCPU(MDS_TMP), %xmm0
1804	testb	$CR0_TS, %al
1805	je	3f
1806	movq	%rax, %cr0
18073:	popq	%rdi
1808	popq	%rcx
1809	popq	%rdx
1810	popq	%rax
1811	retq
1812END(mds_handler_skl_sse)
1813
1814ENTRY(mds_handler_skl_avx)
1815	pushq	%rax
1816	pushq	%rdx
1817	pushq	%rcx
1818	pushq	%rdi
1819
1820	movq	%cr0, %rax
1821	testb	$CR0_TS, %al
1822	je	1f
1823	clts
18241:	movq	PCPU(MDS_BUF), %rdi
1825	movq	PCPU(MDS_BUF64), %rdx
1826	vmovdqa	%ymm0, PCPU(MDS_TMP)
1827	vpxor	%ymm0, %ymm0, %ymm0
1828
1829	lfence
1830	vorpd	(%rdx), %ymm0, %ymm0
1831	vorpd	(%rdx), %ymm0, %ymm0
1832	xorl	%eax, %eax
18332:	clflushopt	5376(%rdi, %rax, 8)
1834	addl	$8, %eax
1835	cmpl	$8 * 12, %eax
1836	jb	2b
1837	sfence
1838	movl	$6144, %ecx
1839	xorl	%eax, %eax
1840	rep; stosb
1841	mfence
1842
1843	vmovdqa	PCPU(MDS_TMP), %ymm0
1844	testb	$CR0_TS, %al
1845	je	3f
1846	movq	%rax, %cr0
18473:	popq	%rdi
1848	popq	%rcx
1849	popq	%rdx
1850	popq	%rax
1851	retq
1852END(mds_handler_skl_avx)
1853
1854ENTRY(mds_handler_skl_avx512)
1855	pushq	%rax
1856	pushq	%rdx
1857	pushq	%rcx
1858	pushq	%rdi
1859
1860	movq	%cr0, %rax
1861	testb	$CR0_TS, %al
1862	je	1f
1863	clts
18641:	movq	PCPU(MDS_BUF), %rdi
1865	movq	PCPU(MDS_BUF64), %rdx
1866	vmovdqa64	%zmm0, PCPU(MDS_TMP)
1867	vpxord	%zmm0, %zmm0, %zmm0
1868
1869	lfence
1870	vorpd	(%rdx), %zmm0, %zmm0
1871	vorpd	(%rdx), %zmm0, %zmm0
1872	xorl	%eax, %eax
18732:	clflushopt	5376(%rdi, %rax, 8)
1874	addl	$8, %eax
1875	cmpl	$8 * 12, %eax
1876	jb	2b
1877	sfence
1878	movl	$6144, %ecx
1879	xorl	%eax, %eax
1880	rep; stosb
1881	mfence
1882
1883	vmovdqa64	PCPU(MDS_TMP), %zmm0
1884	testb	$CR0_TS, %al
1885	je	3f
1886	movq	%rax, %cr0
18873:	popq	%rdi
1888	popq	%rcx
1889	popq	%rdx
1890	popq	%rax
1891	retq
1892END(mds_handler_skl_avx512)
1893
1894ENTRY(mds_handler_silvermont)
1895	pushq	%rax
1896	pushq	%rdx
1897	pushq	%rcx
1898
1899	movq	%cr0, %rax
1900	testb	$CR0_TS, %al
1901	je	1f
1902	clts
19031:	movq	PCPU(MDS_BUF), %rdx
1904	movdqa	%xmm0, PCPU(MDS_TMP)
1905	pxor	%xmm0, %xmm0
1906
1907	movl	$16, %ecx
19082:	movntdq	%xmm0, (%rdx)
1909	addq	$16, %rdx
1910	decl	%ecx
1911	jnz	2b
1912	mfence
1913
1914	movdqa	PCPU(MDS_TMP),%xmm0
1915	testb	$CR0_TS, %al
1916	je	3f
1917	movq	%rax, %cr0
19183:	popq	%rcx
1919	popq	%rdx
1920	popq	%rax
1921	retq
1922END(mds_handler_silvermont)
1923