xref: /freebsd/sys/amd64/amd64/support.S (revision 1edb7116)
1/*-
2 * Copyright (c) 2018-2019 The FreeBSD Foundation
3 * Copyright (c) 2003 Peter Wemm.
4 * Copyright (c) 1993 The Regents of the University of California.
5 * All rights reserved.
6 *
7 * Portions of this software were developed by
8 * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
9 * the FreeBSD Foundation.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#include "opt_ddb.h"
37
38#include <machine/asmacros.h>
39#include <machine/specialreg.h>
40#include <machine/pmap.h>
41
42#include "assym.inc"
43
44	.text
45
46/* Address: %rdi */
47ENTRY(pagezero_std)
48	PUSH_FRAME_POINTER
49	movl	$PAGE_SIZE/8,%ecx
50	xorl	%eax,%eax
51	rep
52	stosq
53	POP_FRAME_POINTER
54	ret
55END(pagezero_std)
56
57ENTRY(pagezero_erms)
58	PUSH_FRAME_POINTER
59	movl	$PAGE_SIZE,%ecx
60	xorl	%eax,%eax
61	rep
62	stosb
63	POP_FRAME_POINTER
64	ret
65END(pagezero_erms)
66
67/*
68 * pagecopy(%rdi=from, %rsi=to)
69 */
70ENTRY(pagecopy)
71	PUSH_FRAME_POINTER
72	movl	$PAGE_SIZE/8,%ecx
73	movq	%rdi,%r9
74	movq	%rsi,%rdi
75	movq	%r9,%rsi
76	rep
77	movsq
78	POP_FRAME_POINTER
79	ret
80END(pagecopy)
81
82/*
83 * memcmp(b1, b2, len)
84 *	   rdi,rsi,rdx
85 */
86ENTRY(memcmp)
87	PUSH_FRAME_POINTER
88
89	xorl	%eax,%eax
9010:
91	cmpq	$16,%rdx
92	ja	101632f
93
94	cmpb	$8,%dl
95	jg	100816f
96
97	cmpb	$4,%dl
98	jg	100408f
99
100	cmpb	$2,%dl
101	jge	100204f
102
103	cmpb	$1,%dl
104	jl	100000f
105	movzbl	(%rdi),%eax
106	movzbl	(%rsi),%r8d
107	subl	%r8d,%eax
108100000:
109	POP_FRAME_POINTER
110	ret
111
112	ALIGN_TEXT
113100816:
114	movq	(%rdi),%r8
115	movq	(%rsi),%r9
116	cmpq	%r8,%r9
117	jne	80f
118	movq	-8(%rdi,%rdx),%r8
119	movq	-8(%rsi,%rdx),%r9
120	cmpq	%r8,%r9
121	jne	10081608f
122	POP_FRAME_POINTER
123	ret
124	ALIGN_TEXT
125100408:
126	movl	(%rdi),%r8d
127	movl	(%rsi),%r9d
128	cmpl	%r8d,%r9d
129	jne	80f
130	movl	-4(%rdi,%rdx),%r8d
131	movl	-4(%rsi,%rdx),%r9d
132	cmpl	%r8d,%r9d
133	jne	10040804f
134	POP_FRAME_POINTER
135	ret
136	ALIGN_TEXT
137100204:
138	movzwl	(%rdi),%r8d
139	movzwl	(%rsi),%r9d
140	cmpl	%r8d,%r9d
141	jne	1f
142	movzwl	-2(%rdi,%rdx),%r8d
143	movzwl	-2(%rsi,%rdx),%r9d
144	cmpl	%r8d,%r9d
145	jne	1f
146	POP_FRAME_POINTER
147	ret
148	ALIGN_TEXT
149101632:
150	cmpq	$32,%rdx
151	ja	103200f
152	movq	(%rdi),%r8
153	movq	(%rsi),%r9
154	cmpq	%r8,%r9
155	jne	80f
156	movq	8(%rdi),%r8
157	movq	8(%rsi),%r9
158	cmpq	%r8,%r9
159	jne	10163208f
160	movq	-16(%rdi,%rdx),%r8
161	movq	-16(%rsi,%rdx),%r9
162	cmpq	%r8,%r9
163	jne	10163216f
164	movq	-8(%rdi,%rdx),%r8
165	movq	-8(%rsi,%rdx),%r9
166	cmpq	%r8,%r9
167	jne	10163224f
168	POP_FRAME_POINTER
169	ret
170	ALIGN_TEXT
171103200:
172	movq	(%rdi),%r8
173	movq	8(%rdi),%r9
174	subq	(%rsi),%r8
175	subq	8(%rsi),%r9
176	orq	%r8,%r9
177	jnz	10320000f
178
179	movq    16(%rdi),%r8
180	movq    24(%rdi),%r9
181	subq    16(%rsi),%r8
182	subq    24(%rsi),%r9
183	orq	%r8,%r9
184	jnz     10320016f
185
186	leaq	32(%rdi),%rdi
187	leaq	32(%rsi),%rsi
188	subq	$32,%rdx
189	cmpq	$32,%rdx
190	jae	103200b
191	cmpb	$0,%dl
192	jne	10b
193	POP_FRAME_POINTER
194	ret
195
196/*
197 * Mismatch was found.
198 *
199 * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes).
200 */
201	ALIGN_TEXT
20210320016:
203	leaq	16(%rdi),%rdi
204	leaq	16(%rsi),%rsi
20510320000:
206	movq	(%rdi),%r8
207	movq	(%rsi),%r9
208	cmpq	%r8,%r9
209	jne	80f
210	leaq	8(%rdi),%rdi
211	leaq	8(%rsi),%rsi
212	jmp	80f
213	ALIGN_TEXT
21410081608:
21510163224:
216	leaq	-8(%rdi,%rdx),%rdi
217	leaq	-8(%rsi,%rdx),%rsi
218	jmp	80f
219	ALIGN_TEXT
22010163216:
221	leaq	-16(%rdi,%rdx),%rdi
222	leaq	-16(%rsi,%rdx),%rsi
223	jmp	80f
224	ALIGN_TEXT
22510163208:
226	leaq	8(%rdi),%rdi
227	leaq	8(%rsi),%rsi
228	jmp	80f
229	ALIGN_TEXT
23010040804:
231	leaq	-4(%rdi,%rdx),%rdi
232	leaq	-4(%rsi,%rdx),%rsi
233	jmp	1f
234
235	ALIGN_TEXT
23680:
237	movl	(%rdi),%r8d
238	movl	(%rsi),%r9d
239	cmpl	%r8d,%r9d
240	jne	1f
241	leaq	4(%rdi),%rdi
242	leaq	4(%rsi),%rsi
243
244/*
245 * We have up to 4 bytes to inspect.
246 */
2471:
248	movzbl	(%rdi),%eax
249	movzbl	(%rsi),%r8d
250	cmpb	%r8b,%al
251	jne	2f
252
253	movzbl	1(%rdi),%eax
254	movzbl	1(%rsi),%r8d
255	cmpb	%r8b,%al
256	jne	2f
257
258	movzbl	2(%rdi),%eax
259	movzbl	2(%rsi),%r8d
260	cmpb	%r8b,%al
261	jne	2f
262
263	movzbl	3(%rdi),%eax
264	movzbl	3(%rsi),%r8d
2652:
266	subl	%r8d,%eax
267	POP_FRAME_POINTER
268	ret
269END(memcmp)
270
271/*
272 * memmove(dst, src, cnt)
273 *         rdi, rsi, rdx
274 */
275
276/*
277 * Register state at entry is supposed to be as follows:
278 * rdi - destination
279 * rsi - source
280 * rdx - count
281 *
282 * The macro possibly clobbers the above and: rcx, r8, r9, r10
283 * It does not clobber rax nor r11.
284 */
285.macro MEMMOVE erms overlap begin end
286	\begin
287
288	/*
289	 * For sizes 0..32 all data is read before it is written, so there
290	 * is no correctness issue with direction of copying.
291	 */
292	cmpq	$32,%rcx
293	jbe	101632f
294
295.if \overlap == 1
296	movq	%rdi,%r8
297	subq	%rsi,%r8
298	cmpq	%rcx,%r8	/* overlapping && src < dst? */
299	jb	2f
300.endif
301
302	cmpq	$256,%rcx
303	ja	1256f
304
305	ALIGN_TEXT
306103200:
307	movq	(%rsi),%rdx
308	movq	%rdx,(%rdi)
309	movq	8(%rsi),%rdx
310	movq	%rdx,8(%rdi)
311	movq	16(%rsi),%rdx
312	movq	%rdx,16(%rdi)
313	movq	24(%rsi),%rdx
314	movq	%rdx,24(%rdi)
315	leaq	32(%rsi),%rsi
316	leaq	32(%rdi),%rdi
317	subq	$32,%rcx
318	cmpq	$32,%rcx
319	jae	103200b
320	cmpb	$0,%cl
321	jne	101632f
322	\end
323	ret
324	ALIGN_TEXT
325101632:
326	cmpb	$16,%cl
327	jl	100816f
328	movq	(%rsi),%rdx
329	movq	8(%rsi),%r8
330	movq	-16(%rsi,%rcx),%r9
331	movq	-8(%rsi,%rcx),%r10
332	movq	%rdx,(%rdi)
333	movq	%r8,8(%rdi)
334	movq	%r9,-16(%rdi,%rcx)
335	movq	%r10,-8(%rdi,%rcx)
336	\end
337	ret
338	ALIGN_TEXT
339100816:
340	cmpb	$8,%cl
341	jl	100408f
342	movq	(%rsi),%rdx
343	movq	-8(%rsi,%rcx),%r8
344	movq	%rdx,(%rdi)
345	movq	%r8,-8(%rdi,%rcx,)
346	\end
347	ret
348	ALIGN_TEXT
349100408:
350	cmpb	$4,%cl
351	jl	100204f
352	movl	(%rsi),%edx
353	movl	-4(%rsi,%rcx),%r8d
354	movl	%edx,(%rdi)
355	movl	%r8d,-4(%rdi,%rcx)
356	\end
357	ret
358	ALIGN_TEXT
359100204:
360	cmpb	$2,%cl
361	jl	100001f
362	movzwl	(%rsi),%edx
363	movzwl	-2(%rsi,%rcx),%r8d
364	movw	%dx,(%rdi)
365	movw	%r8w,-2(%rdi,%rcx)
366	\end
367	ret
368	ALIGN_TEXT
369100001:
370	cmpb	$1,%cl
371	jl	100000f
372	movb	(%rsi),%dl
373	movb	%dl,(%rdi)
374100000:
375	\end
376	ret
377
378	ALIGN_TEXT
3791256:
380	testb	$15,%dil
381	jnz	100f
382.if \erms == 1
383	rep
384	movsb
385.else
386	shrq	$3,%rcx                         /* copy by 64-bit words */
387	rep
388	movsq
389	movq	%rdx,%rcx
390	andl	$7,%ecx                         /* any bytes left? */
391	jne	100408b
392.endif
393	\end
394	ret
395100:
396	movq	(%rsi),%r8
397	movq	8(%rsi),%r9
398	movq	%rdi,%r10
399	movq	%rdi,%rcx
400	andq	$15,%rcx
401	leaq	-16(%rdx,%rcx),%rdx
402	neg	%rcx
403	leaq	16(%rdi,%rcx),%rdi
404	leaq	16(%rsi,%rcx),%rsi
405	movq	%rdx,%rcx
406.if \erms == 1
407	rep
408	movsb
409	movq	%r8,(%r10)
410	movq	%r9,8(%r10)
411.else
412	shrq	$3,%rcx                         /* copy by 64-bit words */
413	rep
414	movsq
415	movq	%r8,(%r10)
416	movq	%r9,8(%r10)
417	movq	%rdx,%rcx
418	andl	$7,%ecx                         /* any bytes left? */
419	jne	100408b
420.endif
421	\end
422	ret
423
424.if \overlap == 1
425	/*
426	 * Copy backwards.
427	 */
428        ALIGN_TEXT
4292:
430	cmpq	$256,%rcx
431	ja	2256f
432
433	leaq	-8(%rdi,%rcx),%rdi
434	leaq	-8(%rsi,%rcx),%rsi
435
436	cmpq	$32,%rcx
437	jb	2016f
438
439	ALIGN_TEXT
4402032:
441	movq	(%rsi),%rdx
442	movq	%rdx,(%rdi)
443	movq	-8(%rsi),%rdx
444	movq	%rdx,-8(%rdi)
445	movq	-16(%rsi),%rdx
446	movq	%rdx,-16(%rdi)
447	movq	-24(%rsi),%rdx
448	movq	%rdx,-24(%rdi)
449	leaq	-32(%rsi),%rsi
450	leaq	-32(%rdi),%rdi
451	subq	$32,%rcx
452	cmpq	$32,%rcx
453	jae	2032b
454	cmpb	$0,%cl
455	jne	2016f
456	\end
457	ret
458	ALIGN_TEXT
4592016:
460	cmpb	$16,%cl
461	jl	2008f
462	movq	(%rsi),%rdx
463	movq	%rdx,(%rdi)
464	movq	-8(%rsi),%rdx
465	movq	%rdx,-8(%rdi)
466	subb	$16,%cl
467	jz	2000f
468	leaq	-16(%rsi),%rsi
469	leaq	-16(%rdi),%rdi
4702008:
471	cmpb	$8,%cl
472	jl	2004f
473	movq	(%rsi),%rdx
474	movq	%rdx,(%rdi)
475	subb	$8,%cl
476	jz	2000f
477	leaq	-8(%rsi),%rsi
478	leaq	-8(%rdi),%rdi
4792004:
480	cmpb	$4,%cl
481	jl	2002f
482	movl	4(%rsi),%edx
483	movl	%edx,4(%rdi)
484	subb	$4,%cl
485	jz	2000f
486	leaq	-4(%rsi),%rsi
487	leaq	-4(%rdi),%rdi
4882002:
489	cmpb	$2,%cl
490	jl	2001f
491	movw	6(%rsi),%dx
492	movw	%dx,6(%rdi)
493	subb	$2,%cl
494	jz	2000f
495	leaq	-2(%rsi),%rsi
496	leaq	-2(%rdi),%rdi
4972001:
498	cmpb	$1,%cl
499	jl	2000f
500	movb	7(%rsi),%dl
501	movb	%dl,7(%rdi)
5022000:
503	\end
504	ret
505	ALIGN_TEXT
5062256:
507	std
508	leaq	-8(%rdi,%rcx),%rdi
509	leaq	-8(%rsi,%rcx),%rsi
510	shrq	$3,%rcx
511	rep
512	movsq
513	cld
514	movq	%rdx,%rcx
515	andb	$7,%cl
516	jne	2004b
517	\end
518	ret
519.endif
520.endm
521
522.macro MEMMOVE_BEGIN
523	PUSH_FRAME_POINTER
524	movq	%rdi,%rax
525	movq	%rdx,%rcx
526.endm
527
528.macro MEMMOVE_END
529	POP_FRAME_POINTER
530.endm
531
532ENTRY(memmove_std)
533	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
534END(memmove_std)
535
536ENTRY(memmove_erms)
537	MEMMOVE erms=1 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
538END(memmove_erms)
539
540/*
541 * memcpy(dst, src, len)
542 *        rdi, rsi, rdx
543 *
544 * Note: memcpy does not support overlapping copies
545 */
546ENTRY(memcpy_std)
547	MEMMOVE erms=0 overlap=0 begin=MEMMOVE_BEGIN end=MEMMOVE_END
548END(memcpy_std)
549
550ENTRY(memcpy_erms)
551	MEMMOVE erms=1 overlap=0 begin=MEMMOVE_BEGIN end=MEMMOVE_END
552END(memcpy_erms)
553
554/*
555 * memset(dst, c,   len)
556 *        rdi, rsi, rdx
557 */
558.macro MEMSET erms
559	PUSH_FRAME_POINTER
560	movq	%rdi,%rax
561	movq	%rdx,%rcx
562	movzbq	%sil,%r8
563	movabs	$0x0101010101010101,%r10
564	imulq	%r8,%r10
565
566	cmpq	$32,%rcx
567	jbe	101632f
568
569	cmpq	$256,%rcx
570	ja	1256f
571
572	ALIGN_TEXT
573103200:
574	movq	%r10,(%rdi)
575	movq	%r10,8(%rdi)
576	movq	%r10,16(%rdi)
577	movq	%r10,24(%rdi)
578	leaq	32(%rdi),%rdi
579	subq	$32,%rcx
580	cmpq	$32,%rcx
581	ja	103200b
582	cmpb	$16,%cl
583	ja	201632f
584	movq	%r10,-16(%rdi,%rcx)
585	movq	%r10,-8(%rdi,%rcx)
586	POP_FRAME_POINTER
587	ret
588	ALIGN_TEXT
589101632:
590	cmpb	$16,%cl
591	jl	100816f
592201632:
593	movq	%r10,(%rdi)
594	movq	%r10,8(%rdi)
595	movq	%r10,-16(%rdi,%rcx)
596	movq	%r10,-8(%rdi,%rcx)
597	POP_FRAME_POINTER
598	ret
599	ALIGN_TEXT
600100816:
601	cmpb	$8,%cl
602	jl	100408f
603	movq	%r10,(%rdi)
604	movq	%r10,-8(%rdi,%rcx)
605	POP_FRAME_POINTER
606	ret
607	ALIGN_TEXT
608100408:
609	cmpb	$4,%cl
610	jl	100204f
611	movl	%r10d,(%rdi)
612	movl	%r10d,-4(%rdi,%rcx)
613	POP_FRAME_POINTER
614	ret
615	ALIGN_TEXT
616100204:
617	cmpb	$2,%cl
618	jl	100001f
619	movw	%r10w,(%rdi)
620	movw	%r10w,-2(%rdi,%rcx)
621	POP_FRAME_POINTER
622	ret
623	ALIGN_TEXT
624100001:
625	cmpb	$0,%cl
626	je	100000f
627	movb	%r10b,(%rdi)
628100000:
629	POP_FRAME_POINTER
630	ret
631	ALIGN_TEXT
6321256:
633	movq	%rdi,%r9
634	movq	%r10,%rax
635	testl	$15,%edi
636	jnz	3f
6371:
638.if \erms == 1
639	rep
640	stosb
641	movq	%r9,%rax
642.else
643	movq	%rcx,%rdx
644	shrq	$3,%rcx
645	rep
646	stosq
647	movq	%r9,%rax
648	andl	$7,%edx
649	jnz	2f
650	POP_FRAME_POINTER
651	ret
6522:
653	movq	%r10,-8(%rdi,%rdx)
654.endif
655	POP_FRAME_POINTER
656	ret
657	ALIGN_TEXT
6583:
659	movq	%r10,(%rdi)
660	movq	%r10,8(%rdi)
661	movq	%rdi,%r8
662	andq	$15,%r8
663	leaq	-16(%rcx,%r8),%rcx
664	neg	%r8
665	leaq	16(%rdi,%r8),%rdi
666	jmp	1b
667.endm
668
669ENTRY(memset_std)
670	MEMSET erms=0
671END(memset_std)
672
673ENTRY(memset_erms)
674	MEMSET erms=1
675END(memset_erms)
676
677/* fillw(pat, base, cnt) */
678/*       %rdi,%rsi, %rdx */
679ENTRY(fillw)
680	PUSH_FRAME_POINTER
681	movq	%rdi,%rax
682	movq	%rsi,%rdi
683	movq	%rdx,%rcx
684	rep
685	stosw
686	POP_FRAME_POINTER
687	ret
688END(fillw)
689
690/*
691 * strlen(string)
692 *	  %rdi
693 *
694 * Uses the ((x - 0x01....01) & ~x & 0x80....80) trick.
695 *
696 * 0x01....01 is replaced with 0x0 - 0x01....01 so that it can be added
697 * with leaq.
698 *
699 * For a description see either:
700 * - "Hacker's Delight" by Henry S. Warren, Jr.
701 * - "Optimizing subroutines in assembly language: An optimization guide for x86 platforms"
702 *   by Agner Fog
703 *
704 * The latter contains a 32-bit variant of the same algorithm coded in assembly for i386.
705 */
706ENTRY(strlen)
707	PUSH_FRAME_POINTER
708	movabsq	$0xfefefefefefefeff,%r8
709	movabsq	$0x8080808080808080,%r9
710
711	movq	%rdi,%r10
712	movq	%rdi,%rcx
713	testb	$7,%dil
714	jz	2f
715
716	/*
717	 * Handle misaligned reads: align to 8 and fill
718	 * the spurious bytes.
719	 */
720	andq	$~7,%rdi
721	movq	(%rdi),%r11
722	shlq	$3,%rcx
723	movq	$-1,%rdx
724	shlq	%cl,%rdx
725	notq	%rdx
726	orq	%rdx,%r11
727
728	leaq	(%r11,%r8),%rcx
729	notq	%r11
730	andq	%r11,%rcx
731	andq	%r9,%rcx
732	jnz	3f
733
734	/*
735	 * Main loop.
736	 */
737	ALIGN_TEXT
7381:
739	leaq	8(%rdi),%rdi
7402:
741	movq	(%rdi),%r11
742	leaq	(%r11,%r8),%rcx
743	notq	%r11
744	andq	%r11,%rcx
745	andq	%r9,%rcx
746	jz	1b
7473:
748	bsfq	%rcx,%rcx
749	shrq	$3,%rcx
750	leaq	(%rcx,%rdi),%rax
751	subq	%r10,%rax
752	POP_FRAME_POINTER
753	ret
754END(strlen)
755
756/*****************************************************************************/
757/* copyout and fubyte family                                                 */
758/*****************************************************************************/
759/*
760 * Access user memory from inside the kernel. These routines should be
761 * the only places that do this.
762 *
763 * These routines set curpcb->pcb_onfault for the time they execute. When a
764 * protection violation occurs inside the functions, the trap handler
765 * returns to *curpcb->pcb_onfault instead of the function.
766 */
767
768.macro SMAP_DISABLE smap
769.if	\smap
770	stac
771.endif
772.endm
773
774
775.macro SMAP_ENABLE smap
776.if	\smap
777	clac
778.endif
779.endm
780
781.macro COPYINOUT_BEGIN
782.endm
783
784.macro COPYINOUT_END
785	movq	%rax,PCB_ONFAULT(%r11)
786	POP_FRAME_POINTER
787.endm
788
789.macro COPYINOUT_SMAP_END
790	SMAP_ENABLE smap=1
791	COPYINOUT_END
792.endm
793
794/*
795 * copyout(from_kernel, to_user, len)
796 *         %rdi,        %rsi,    %rdx
797 */
798.macro	COPYOUT smap erms
799	PUSH_FRAME_POINTER
800	movq	PCPU(CURPCB),%r11
801	movq	$copy_fault,PCB_ONFAULT(%r11)
802
803	/*
804	 * Check explicitly for non-user addresses.
805	 * First, prevent address wrapping.
806	 */
807	movq	%rsi,%rax
808	addq	%rdx,%rax
809	jc	copy_fault
810/*
811 * XXX STOP USING VM_MAXUSER_ADDRESS.
812 * It is an end address, not a max, so every time it is used correctly it
813 * looks like there is an off by one error, and of course it caused an off
814 * by one error in several places.
815 */
816	movq	$VM_MAXUSER_ADDRESS,%rcx
817	cmpq	%rcx,%rax
818	ja	copy_fault
819
820	/*
821	 * Set return value to zero. Remaining failure mode goes through
822	 * copy_fault.
823	 */
824	xorl	%eax,%eax
825
826	/*
827	 * Set up arguments for MEMMOVE.
828	 */
829	movq	%rdi,%r8
830	movq	%rsi,%rdi
831	movq	%r8,%rsi
832	movq	%rdx,%rcx
833
834
835	SMAP_DISABLE \smap
836.if	\smap == 1
837	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_SMAP_END
838.else
839	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_END
840.endif
841	/* NOTREACHED */
842.endm
843
844ENTRY(copyout_nosmap_std)
845	COPYOUT smap=0 erms=0
846END(copyout_nosmap_std)
847
848ENTRY(copyout_smap_std)
849	COPYOUT smap=1 erms=0
850END(copyout_smap_std)
851
852ENTRY(copyout_nosmap_erms)
853	COPYOUT smap=0 erms=1
854END(copyout_nosmap_erms)
855
856ENTRY(copyout_smap_erms)
857	COPYOUT smap=1 erms=1
858END(copyout_smap_erms)
859
860/*
861 * copyin(from_user, to_kernel, len)
862 *        %rdi,      %rsi,      %rdx
863 */
864.macro	COPYIN smap erms
865	PUSH_FRAME_POINTER
866	movq	PCPU(CURPCB),%r11
867	movq	$copy_fault,PCB_ONFAULT(%r11)
868
869	/*
870	 * make sure address is valid
871	 */
872	movq	%rdi,%rax
873	addq	%rdx,%rax
874	jc	copy_fault
875	movq	$VM_MAXUSER_ADDRESS,%rcx
876	cmpq	%rcx,%rax
877	ja	copy_fault
878
879	xorl	%eax,%eax
880
881	movq	%rdi,%r8
882	movq	%rsi,%rdi
883	movq	%r8,%rsi
884	movq	%rdx,%rcx
885
886	SMAP_DISABLE \smap
887.if	\smap == 1
888	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_SMAP_END
889.else
890	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_END
891.endif
892	/* NOTREACHED */
893.endm
894
895ENTRY(copyin_nosmap_std)
896	COPYIN smap=0 erms=0
897END(copyin_nosmap_std)
898
899ENTRY(copyin_smap_std)
900	COPYIN smap=1 erms=0
901END(copyin_smap_std)
902
903ENTRY(copyin_nosmap_erms)
904	COPYIN smap=0 erms=1
905END(copyin_nosmap_erms)
906
907ENTRY(copyin_smap_erms)
908	COPYIN smap=1 erms=1
909END(copyin_smap_erms)
910
911	ALIGN_TEXT
912copy_fault:
913	testl	$CPUID_STDEXT_SMAP,cpu_stdext_feature(%rip)
914	je	1f
915	clac
9161:	movq	$0,PCB_ONFAULT(%r11)
917	movl	$EFAULT,%eax
918	POP_FRAME_POINTER
919	ret
920
921/*
922 * casueword32.  Compare and set user integer.  Returns -1 on fault,
923 *        0 if access was successful, and 1 when comparison failed.
924 *        Old value is written to *oldp.
925 *        dst = %rdi, old = %esi, oldp = %rdx, new = %ecx
926 */
927ENTRY(casueword32_nosmap)
928	PUSH_FRAME_POINTER
929	movq	PCPU(CURPCB),%r8
930	movq	$fusufault,PCB_ONFAULT(%r8)
931
932	movq	$VM_MAXUSER_ADDRESS-4,%rax
933	cmpq	%rax,%rdi			/* verify address is valid */
934	ja	fusufault
935
936	movl	%esi,%eax			/* old */
937#ifdef SMP
938	lock
939#endif
940	cmpxchgl %ecx,(%rdi)			/* new = %ecx */
941	setne	%cl
942
943	/*
944	 * The old value is in %eax.  If the store succeeded it will be the
945	 * value we expected (old) from before the store, otherwise it will
946	 * be the current value.  Save %eax into %esi to prepare the return
947	 * value.
948	 */
949	movl	%eax,%esi
950	xorl	%eax,%eax
951	movq	%rax,PCB_ONFAULT(%r8)
952
953	/*
954	 * Access the oldp after the pcb_onfault is cleared, to correctly
955	 * catch corrupted pointer.
956	 */
957	movl	%esi,(%rdx)			/* oldp = %rdx */
958	POP_FRAME_POINTER
959	movzbl	%cl, %eax
960	ret
961END(casueword32_nosmap)
962
963ENTRY(casueword32_smap)
964	PUSH_FRAME_POINTER
965	movq	PCPU(CURPCB),%r8
966	movq	$fusufault,PCB_ONFAULT(%r8)
967
968	movq	$VM_MAXUSER_ADDRESS-4,%rax
969	cmpq	%rax,%rdi			/* verify address is valid */
970	ja	fusufault
971
972	movl	%esi,%eax			/* old */
973	stac
974#ifdef SMP
975	lock
976#endif
977	cmpxchgl %ecx,(%rdi)			/* new = %ecx */
978	clac
979	setne	%cl
980
981	/*
982	 * The old value is in %eax.  If the store succeeded it will be the
983	 * value we expected (old) from before the store, otherwise it will
984	 * be the current value.  Save %eax into %esi to prepare the return
985	 * value.
986	 */
987	movl	%eax,%esi
988	xorl	%eax,%eax
989	movq	%rax,PCB_ONFAULT(%r8)
990
991	/*
992	 * Access the oldp after the pcb_onfault is cleared, to correctly
993	 * catch corrupted pointer.
994	 */
995	movl	%esi,(%rdx)			/* oldp = %rdx */
996	POP_FRAME_POINTER
997	movzbl	%cl, %eax
998	ret
999END(casueword32_smap)
1000
1001/*
1002 * casueword.  Compare and set user long.  Returns -1 on fault,
1003 *        0 if access was successful, and 1 when comparison failed.
1004 *        Old value is written to *oldp.
1005 *        dst = %rdi, old = %rsi, oldp = %rdx, new = %rcx
1006 */
1007ENTRY(casueword_nosmap)
1008	PUSH_FRAME_POINTER
1009	movq	PCPU(CURPCB),%r8
1010	movq	$fusufault,PCB_ONFAULT(%r8)
1011
1012	movq	$VM_MAXUSER_ADDRESS-4,%rax
1013	cmpq	%rax,%rdi			/* verify address is valid */
1014	ja	fusufault
1015
1016	movq	%rsi,%rax			/* old */
1017#ifdef SMP
1018	lock
1019#endif
1020	cmpxchgq %rcx,(%rdi)			/* new = %rcx */
1021	setne	%cl
1022
1023	/*
1024	 * The old value is in %rax.  If the store succeeded it will be the
1025	 * value we expected (old) from before the store, otherwise it will
1026	 * be the current value.
1027	 */
1028	movq	%rax,%rsi
1029	xorl	%eax,%eax
1030	movq	%rax,PCB_ONFAULT(%r8)
1031	movq	%rsi,(%rdx)
1032	POP_FRAME_POINTER
1033	movzbl	%cl, %eax
1034	ret
1035END(casueword_nosmap)
1036
1037ENTRY(casueword_smap)
1038	PUSH_FRAME_POINTER
1039	movq	PCPU(CURPCB),%r8
1040	movq	$fusufault,PCB_ONFAULT(%r8)
1041
1042	movq	$VM_MAXUSER_ADDRESS-4,%rax
1043	cmpq	%rax,%rdi			/* verify address is valid */
1044	ja	fusufault
1045
1046	movq	%rsi,%rax			/* old */
1047	stac
1048#ifdef SMP
1049	lock
1050#endif
1051	cmpxchgq %rcx,(%rdi)			/* new = %rcx */
1052	clac
1053	setne	%cl
1054
1055	/*
1056	 * The old value is in %rax.  If the store succeeded it will be the
1057	 * value we expected (old) from before the store, otherwise it will
1058	 * be the current value.
1059	 */
1060	movq	%rax,%rsi
1061	xorl	%eax,%eax
1062	movq	%rax,PCB_ONFAULT(%r8)
1063	movq	%rsi,(%rdx)
1064	POP_FRAME_POINTER
1065	movzbl	%cl, %eax
1066	ret
1067END(casueword_smap)
1068
1069/*
1070 * Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit
1071 * byte from user memory.
1072 * addr = %rdi, valp = %rsi
1073 */
1074
1075ENTRY(fueword_nosmap)
1076	PUSH_FRAME_POINTER
1077	movq	PCPU(CURPCB),%rcx
1078	movq	$fusufault,PCB_ONFAULT(%rcx)
1079
1080	movq	$VM_MAXUSER_ADDRESS-8,%rax
1081	cmpq	%rax,%rdi			/* verify address is valid */
1082	ja	fusufault
1083
1084	xorl	%eax,%eax
1085	movq	(%rdi),%r11
1086	movq	%rax,PCB_ONFAULT(%rcx)
1087	movq	%r11,(%rsi)
1088	POP_FRAME_POINTER
1089	ret
1090END(fueword_nosmap)
1091
1092ENTRY(fueword_smap)
1093	PUSH_FRAME_POINTER
1094	movq	PCPU(CURPCB),%rcx
1095	movq	$fusufault,PCB_ONFAULT(%rcx)
1096
1097	movq	$VM_MAXUSER_ADDRESS-8,%rax
1098	cmpq	%rax,%rdi			/* verify address is valid */
1099	ja	fusufault
1100
1101	xorl	%eax,%eax
1102	stac
1103	movq	(%rdi),%r11
1104	clac
1105	movq	%rax,PCB_ONFAULT(%rcx)
1106	movq	%r11,(%rsi)
1107	POP_FRAME_POINTER
1108	ret
1109END(fueword_smap)
1110
1111ENTRY(fueword32_nosmap)
1112	PUSH_FRAME_POINTER
1113	movq	PCPU(CURPCB),%rcx
1114	movq	$fusufault,PCB_ONFAULT(%rcx)
1115
1116	movq	$VM_MAXUSER_ADDRESS-4,%rax
1117	cmpq	%rax,%rdi			/* verify address is valid */
1118	ja	fusufault
1119
1120	xorl	%eax,%eax
1121	movl	(%rdi),%r11d
1122	movq	%rax,PCB_ONFAULT(%rcx)
1123	movl	%r11d,(%rsi)
1124	POP_FRAME_POINTER
1125	ret
1126END(fueword32_nosmap)
1127
1128ENTRY(fueword32_smap)
1129	PUSH_FRAME_POINTER
1130	movq	PCPU(CURPCB),%rcx
1131	movq	$fusufault,PCB_ONFAULT(%rcx)
1132
1133	movq	$VM_MAXUSER_ADDRESS-4,%rax
1134	cmpq	%rax,%rdi			/* verify address is valid */
1135	ja	fusufault
1136
1137	xorl	%eax,%eax
1138	stac
1139	movl	(%rdi),%r11d
1140	clac
1141	movq	%rax,PCB_ONFAULT(%rcx)
1142	movl	%r11d,(%rsi)
1143	POP_FRAME_POINTER
1144	ret
1145END(fueword32_smap)
1146
1147ENTRY(fuword16_nosmap)
1148	PUSH_FRAME_POINTER
1149	movq	PCPU(CURPCB),%rcx
1150	movq	$fusufault,PCB_ONFAULT(%rcx)
1151
1152	movq	$VM_MAXUSER_ADDRESS-2,%rax
1153	cmpq	%rax,%rdi
1154	ja	fusufault
1155
1156	movzwl	(%rdi),%eax
1157	movq	$0,PCB_ONFAULT(%rcx)
1158	POP_FRAME_POINTER
1159	ret
1160END(fuword16_nosmap)
1161
1162ENTRY(fuword16_smap)
1163	PUSH_FRAME_POINTER
1164	movq	PCPU(CURPCB),%rcx
1165	movq	$fusufault,PCB_ONFAULT(%rcx)
1166
1167	movq	$VM_MAXUSER_ADDRESS-2,%rax
1168	cmpq	%rax,%rdi
1169	ja	fusufault
1170
1171	stac
1172	movzwl	(%rdi),%eax
1173	clac
1174	movq	$0,PCB_ONFAULT(%rcx)
1175	POP_FRAME_POINTER
1176	ret
1177END(fuword16_smap)
1178
1179ENTRY(fubyte_nosmap)
1180	PUSH_FRAME_POINTER
1181	movq	PCPU(CURPCB),%rcx
1182	movq	$fusufault,PCB_ONFAULT(%rcx)
1183
1184	movq	$VM_MAXUSER_ADDRESS-1,%rax
1185	cmpq	%rax,%rdi
1186	ja	fusufault
1187
1188	movzbl	(%rdi),%eax
1189	movq	$0,PCB_ONFAULT(%rcx)
1190	POP_FRAME_POINTER
1191	ret
1192END(fubyte_nosmap)
1193
1194ENTRY(fubyte_smap)
1195	PUSH_FRAME_POINTER
1196	movq	PCPU(CURPCB),%rcx
1197	movq	$fusufault,PCB_ONFAULT(%rcx)
1198
1199	movq	$VM_MAXUSER_ADDRESS-1,%rax
1200	cmpq	%rax,%rdi
1201	ja	fusufault
1202
1203	stac
1204	movzbl	(%rdi),%eax
1205	clac
1206	movq	$0,PCB_ONFAULT(%rcx)
1207	POP_FRAME_POINTER
1208	ret
1209END(fubyte_smap)
1210
1211/*
1212 * Store a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit byte to
1213 * user memory.
1214 * addr = %rdi, value = %rsi
1215 */
1216ENTRY(suword_nosmap)
1217	PUSH_FRAME_POINTER
1218	movq	PCPU(CURPCB),%rcx
1219	movq	$fusufault,PCB_ONFAULT(%rcx)
1220
1221	movq	$VM_MAXUSER_ADDRESS-8,%rax
1222	cmpq	%rax,%rdi			/* verify address validity */
1223	ja	fusufault
1224
1225	movq	%rsi,(%rdi)
1226	xorl	%eax,%eax
1227	movq	%rax,PCB_ONFAULT(%rcx)
1228	POP_FRAME_POINTER
1229	ret
1230END(suword_nosmap)
1231
1232ENTRY(suword_smap)
1233	PUSH_FRAME_POINTER
1234	movq	PCPU(CURPCB),%rcx
1235	movq	$fusufault,PCB_ONFAULT(%rcx)
1236
1237	movq	$VM_MAXUSER_ADDRESS-8,%rax
1238	cmpq	%rax,%rdi			/* verify address validity */
1239	ja	fusufault
1240
1241	stac
1242	movq	%rsi,(%rdi)
1243	clac
1244	xorl	%eax,%eax
1245	movq	%rax,PCB_ONFAULT(%rcx)
1246	POP_FRAME_POINTER
1247	ret
1248END(suword_smap)
1249
1250ENTRY(suword32_nosmap)
1251	PUSH_FRAME_POINTER
1252	movq	PCPU(CURPCB),%rcx
1253	movq	$fusufault,PCB_ONFAULT(%rcx)
1254
1255	movq	$VM_MAXUSER_ADDRESS-4,%rax
1256	cmpq	%rax,%rdi			/* verify address validity */
1257	ja	fusufault
1258
1259	movl	%esi,(%rdi)
1260	xorl	%eax,%eax
1261	movq	%rax,PCB_ONFAULT(%rcx)
1262	POP_FRAME_POINTER
1263	ret
1264END(suword32_nosmap)
1265
1266ENTRY(suword32_smap)
1267	PUSH_FRAME_POINTER
1268	movq	PCPU(CURPCB),%rcx
1269	movq	$fusufault,PCB_ONFAULT(%rcx)
1270
1271	movq	$VM_MAXUSER_ADDRESS-4,%rax
1272	cmpq	%rax,%rdi			/* verify address validity */
1273	ja	fusufault
1274
1275	stac
1276	movl	%esi,(%rdi)
1277	clac
1278	xorl	%eax,%eax
1279	movq	%rax,PCB_ONFAULT(%rcx)
1280	POP_FRAME_POINTER
1281	ret
1282END(suword32_smap)
1283
1284ENTRY(suword16_nosmap)
1285	PUSH_FRAME_POINTER
1286	movq	PCPU(CURPCB),%rcx
1287	movq	$fusufault,PCB_ONFAULT(%rcx)
1288
1289	movq	$VM_MAXUSER_ADDRESS-2,%rax
1290	cmpq	%rax,%rdi			/* verify address validity */
1291	ja	fusufault
1292
1293	movw	%si,(%rdi)
1294	xorl	%eax,%eax
1295	movq	%rax,PCB_ONFAULT(%rcx)
1296	POP_FRAME_POINTER
1297	ret
1298END(suword16_nosmap)
1299
1300ENTRY(suword16_smap)
1301	PUSH_FRAME_POINTER
1302	movq	PCPU(CURPCB),%rcx
1303	movq	$fusufault,PCB_ONFAULT(%rcx)
1304
1305	movq	$VM_MAXUSER_ADDRESS-2,%rax
1306	cmpq	%rax,%rdi			/* verify address validity */
1307	ja	fusufault
1308
1309	stac
1310	movw	%si,(%rdi)
1311	clac
1312	xorl	%eax,%eax
1313	movq	%rax,PCB_ONFAULT(%rcx)
1314	POP_FRAME_POINTER
1315	ret
1316END(suword16_smap)
1317
1318ENTRY(subyte_nosmap)
1319	PUSH_FRAME_POINTER
1320	movq	PCPU(CURPCB),%rcx
1321	movq	$fusufault,PCB_ONFAULT(%rcx)
1322
1323	movq	$VM_MAXUSER_ADDRESS-1,%rax
1324	cmpq	%rax,%rdi			/* verify address validity */
1325	ja	fusufault
1326
1327	movl	%esi,%eax
1328	movb	%al,(%rdi)
1329	xorl	%eax,%eax
1330	movq	%rax,PCB_ONFAULT(%rcx)
1331	POP_FRAME_POINTER
1332	ret
1333END(subyte_nosmap)
1334
1335ENTRY(subyte_smap)
1336	PUSH_FRAME_POINTER
1337	movq	PCPU(CURPCB),%rcx
1338	movq	$fusufault,PCB_ONFAULT(%rcx)
1339
1340	movq	$VM_MAXUSER_ADDRESS-1,%rax
1341	cmpq	%rax,%rdi			/* verify address validity */
1342	ja	fusufault
1343
1344	movl	%esi,%eax
1345	stac
1346	movb	%al,(%rdi)
1347	clac
1348	xorl	%eax,%eax
1349	movq	%rax,PCB_ONFAULT(%rcx)
1350	POP_FRAME_POINTER
1351	ret
1352END(subyte_smap)
1353
1354	ALIGN_TEXT
1355fusufault:
1356	testl	$CPUID_STDEXT_SMAP,cpu_stdext_feature(%rip)
1357	je	1f
1358	clac
13591:	movq	PCPU(CURPCB),%rcx
1360	xorl	%eax,%eax
1361	movq	%rax,PCB_ONFAULT(%rcx)
1362	decq	%rax
1363	POP_FRAME_POINTER
1364	ret
1365
1366/*
1367 * copyinstr(from, to, maxlen, int *lencopied)
1368 *           %rdi, %rsi, %rdx, %rcx
1369 *
1370 *	copy a string from 'from' to 'to', stop when a 0 character is reached.
1371 *	return ENAMETOOLONG if string is longer than maxlen, and
1372 *	EFAULT on protection violations. If lencopied is non-zero,
1373 *	return the actual length in *lencopied.
1374 */
1375.macro COPYINSTR smap
1376	PUSH_FRAME_POINTER
1377	movq	%rdx,%r8			/* %r8 = maxlen */
1378	movq	PCPU(CURPCB),%r9
1379	movq	$cpystrflt,PCB_ONFAULT(%r9)
1380
1381	movq	$VM_MAXUSER_ADDRESS,%rax
1382
1383	/* make sure 'from' is within bounds */
1384	subq	%rdi,%rax
1385	jbe	cpystrflt
1386
1387	SMAP_DISABLE \smap
1388
1389	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1390	cmpq	%rdx,%rax
1391	jb	8f
13921:
1393	incq	%rdx
13942:
1395	decq	%rdx
1396.if \smap == 0
1397	jz	copyinstr_toolong
1398.else
1399	jz	copyinstr_toolong_smap
1400.endif
1401
1402	movb	(%rdi),%al
1403	movb	%al,(%rsi)
1404	incq	%rsi
1405	incq	%rdi
1406	testb	%al,%al
1407	jnz	2b
1408
1409	SMAP_ENABLE \smap
1410
1411	/* Success -- 0 byte reached */
1412	decq	%rdx
1413	xorl	%eax,%eax
1414
1415	/* set *lencopied and return %eax */
1416	movq	%rax,PCB_ONFAULT(%r9)
1417
1418	testq	%rcx,%rcx
1419	jz	3f
1420	subq	%rdx,%r8
1421	movq	%r8,(%rcx)
14223:
1423	POP_FRAME_POINTER
1424	ret
1425	ALIGN_TEXT
14268:
1427	movq	%rax,%rdx
1428	movq	%rax,%r8
1429	jmp 1b
1430
1431.endm
1432
1433ENTRY(copyinstr_nosmap)
1434	COPYINSTR smap=0
1435END(copyinstr_nosmap)
1436
1437ENTRY(copyinstr_smap)
1438	COPYINSTR smap=1
1439END(copyinstr_smap)
1440
1441cpystrflt:
1442	testl	$CPUID_STDEXT_SMAP,cpu_stdext_feature(%rip)
1443	je	1f
1444	clac
14451:	movl	$EFAULT,%eax
1446cpystrflt_x:
1447	/* set *lencopied and return %eax */
1448	movq	$0,PCB_ONFAULT(%r9)
1449
1450	testq	%rcx,%rcx
1451	jz	1f
1452	subq	%rdx,%r8
1453	movq	%r8,(%rcx)
14541:
1455	POP_FRAME_POINTER
1456	ret
1457
1458copyinstr_toolong_smap:
1459	clac
1460copyinstr_toolong:
1461	/* rdx is zero - return ENAMETOOLONG or EFAULT */
1462	movq	$VM_MAXUSER_ADDRESS,%rax
1463	cmpq	%rax,%rdi
1464	jae	cpystrflt
1465	movl	$ENAMETOOLONG,%eax
1466	jmp	cpystrflt_x
1467
1468/*
1469 * Handling of special amd64 registers and descriptor tables etc
1470 */
1471/* void lgdt(struct region_descriptor *rdp); */
1472ENTRY(lgdt)
1473	/* reload the descriptor table */
1474	lgdt	(%rdi)
1475
1476	/* flush the prefetch q */
1477	jmp	1f
1478	nop
14791:
1480	movl	$KDSEL,%eax
1481	movl	%eax,%ds
1482	movl	%eax,%es
1483	movl	%eax,%fs	/* Beware, use wrmsr to set 64 bit base */
1484	movl	%eax,%gs
1485	movl	%eax,%ss
1486
1487	/* reload code selector by turning return into intersegmental return */
1488	popq	%rax
1489	pushq	$KCSEL
1490	pushq	%rax
1491	lretq
1492END(lgdt)
1493
1494/*****************************************************************************/
1495/* setjump, longjump                                                         */
1496/*****************************************************************************/
1497
1498ENTRY(setjmp)
1499	movq	%rbx,0(%rdi)			/* save rbx */
1500	movq	%rsp,8(%rdi)			/* save rsp */
1501	movq	%rbp,16(%rdi)			/* save rbp */
1502	movq	%r12,24(%rdi)			/* save r12 */
1503	movq	%r13,32(%rdi)			/* save r13 */
1504	movq	%r14,40(%rdi)			/* save r14 */
1505	movq	%r15,48(%rdi)			/* save r15 */
1506	movq	0(%rsp),%rdx			/* get rta */
1507	movq	%rdx,56(%rdi)			/* save rip */
1508	xorl	%eax,%eax			/* return(0); */
1509	ret
1510END(setjmp)
1511
1512ENTRY(longjmp)
1513	movq	0(%rdi),%rbx			/* restore rbx */
1514	movq	8(%rdi),%rsp			/* restore rsp */
1515	movq	16(%rdi),%rbp			/* restore rbp */
1516	movq	24(%rdi),%r12			/* restore r12 */
1517	movq	32(%rdi),%r13			/* restore r13 */
1518	movq	40(%rdi),%r14			/* restore r14 */
1519	movq	48(%rdi),%r15			/* restore r15 */
1520	movq	56(%rdi),%rdx			/* get rta */
1521	movq	%rdx,0(%rsp)			/* put in return frame */
1522	xorl	%eax,%eax			/* return(1); */
1523	incl	%eax
1524	ret
1525END(longjmp)
1526
1527/*
1528 * Support for reading MSRs in the safe manner.  (Instead of panic on #gp,
1529 * return an error.)
1530 */
1531ENTRY(rdmsr_safe)
1532/* int rdmsr_safe(u_int msr, uint64_t *data) */
1533	PUSH_FRAME_POINTER
1534	movq	PCPU(CURPCB),%r8
1535	movq	PCB_ONFAULT(%r8),%r9
1536	movq	$msr_onfault,PCB_ONFAULT(%r8)
1537	movl	%edi,%ecx
1538	rdmsr			/* Read MSR pointed by %ecx. Returns
1539				   hi byte in edx, lo in %eax */
1540	salq	$32,%rdx	/* sign-shift %rdx left */
1541	movl	%eax,%eax	/* zero-extend %eax -> %rax */
1542	orq	%rdx,%rax
1543	movq	%rax,(%rsi)
1544	movq	%r9,PCB_ONFAULT(%r8)
1545	xorl	%eax,%eax
1546	POP_FRAME_POINTER
1547	ret
1548
1549/*
1550 * Support for writing MSRs in the safe manner.  (Instead of panic on #gp,
1551 * return an error.)
1552 */
1553ENTRY(wrmsr_safe)
1554/* int wrmsr_safe(u_int msr, uint64_t data) */
1555	PUSH_FRAME_POINTER
1556	movq	PCPU(CURPCB),%r8
1557	movq	PCB_ONFAULT(%r8),%r9
1558	movq	$msr_onfault,PCB_ONFAULT(%r8)
1559	movl	%edi,%ecx
1560	movl	%esi,%eax
1561	sarq	$32,%rsi
1562	movl	%esi,%edx
1563	wrmsr			/* Write MSR pointed by %ecx. Accepts
1564				   hi byte in edx, lo in %eax. */
1565	movq	%r9,PCB_ONFAULT(%r8)
1566	xorl	%eax,%eax
1567	POP_FRAME_POINTER
1568	ret
1569
1570/*
1571 * MSR operations fault handler
1572 */
1573	ALIGN_TEXT
1574msr_onfault:
1575	movq	%r9,PCB_ONFAULT(%r8)
1576	movl	$EFAULT,%eax
1577	POP_FRAME_POINTER
1578	ret
1579
1580/*
1581 * void pmap_pti_pcid_invalidate(uint64_t ucr3, uint64_t kcr3);
1582 * Invalidates address space addressed by ucr3, then returns to kcr3.
1583 * Done in assembler to ensure no other memory accesses happen while
1584 * on ucr3.
1585 */
1586	ALIGN_TEXT
1587ENTRY(pmap_pti_pcid_invalidate)
1588	pushfq
1589	cli
1590	movq	%rdi,%cr3	/* to user page table */
1591	movq	%rsi,%cr3	/* back to kernel */
1592	popfq
1593	retq
1594
1595/*
1596 * void pmap_pti_pcid_invlpg(uint64_t ucr3, uint64_t kcr3, vm_offset_t va);
1597 * Invalidates virtual address va in address space ucr3, then returns to kcr3.
1598 */
1599	ALIGN_TEXT
1600ENTRY(pmap_pti_pcid_invlpg)
1601	pushfq
1602	cli
1603	movq	%rdi,%cr3	/* to user page table */
1604	invlpg	(%rdx)
1605	movq	%rsi,%cr3	/* back to kernel */
1606	popfq
1607	retq
1608
1609/*
1610 * void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr3, vm_offset_t sva,
1611 *     vm_offset_t eva);
1612 * Invalidates virtual addresses between sva and eva in address space ucr3,
1613 * then returns to kcr3.
1614 */
1615	ALIGN_TEXT
1616ENTRY(pmap_pti_pcid_invlrng)
1617	pushfq
1618	cli
1619	movq	%rdi,%cr3	/* to user page table */
16201:	invlpg	(%rdx)
1621	addq	$PAGE_SIZE,%rdx
1622	cmpq	%rdx,%rcx
1623	ja	1b
1624	movq	%rsi,%cr3	/* back to kernel */
1625	popfq
1626	retq
1627
1628	.altmacro
1629	.macro	rsb_seq_label l
1630rsb_seq_\l:
1631	.endm
1632	.macro	rsb_call_label l
1633	call	rsb_seq_\l
1634	.endm
1635	.macro	rsb_seq count
1636	ll=1
1637	.rept	\count
1638	rsb_call_label	%(ll)
1639	nop
1640	rsb_seq_label %(ll)
1641	addq	$8,%rsp
1642	ll=ll+1
1643	.endr
1644	.endm
1645
1646ENTRY(rsb_flush)
1647	rsb_seq	32
1648	ret
1649
1650/* all callers already saved %rax, %rdx, and %rcx */
1651ENTRY(handle_ibrs_entry)
1652	cmpb	$0,hw_ibrs_ibpb_active(%rip)
1653	je	1f
1654	movl	$MSR_IA32_SPEC_CTRL,%ecx
1655	rdmsr
1656	orl	$(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1657	orl	$(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32,%edx
1658	wrmsr
1659	movb	$1,PCPU(IBPB_SET)
1660	testl	$CPUID_STDEXT_SMEP,cpu_stdext_feature(%rip)
1661	je	rsb_flush
16621:	ret
1663END(handle_ibrs_entry)
1664
1665ENTRY(handle_ibrs_exit)
1666	cmpb	$0,PCPU(IBPB_SET)
1667	je	1f
1668	movl	$MSR_IA32_SPEC_CTRL,%ecx
1669	rdmsr
1670	andl	$~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1671	andl	$~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx
1672	wrmsr
1673	movb	$0,PCPU(IBPB_SET)
16741:	ret
1675END(handle_ibrs_exit)
1676
1677/* registers-neutral version, but needs stack */
1678ENTRY(handle_ibrs_exit_rs)
1679	cmpb	$0,PCPU(IBPB_SET)
1680	je	1f
1681	pushq	%rax
1682	pushq	%rdx
1683	pushq	%rcx
1684	movl	$MSR_IA32_SPEC_CTRL,%ecx
1685	rdmsr
1686	andl	$~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1687	andl	$~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx
1688	wrmsr
1689	popq	%rcx
1690	popq	%rdx
1691	popq	%rax
1692	movb	$0,PCPU(IBPB_SET)
16931:	ret
1694END(handle_ibrs_exit_rs)
1695
1696	.noaltmacro
1697
1698/*
1699 * Flush L1D cache.  Load enough of the data from the kernel text
1700 * to flush existing L1D content.
1701 *
1702 * N.B. The function does not follow ABI calling conventions, it corrupts %rbx.
1703 * The vmm.ko caller expects that only %rax, %rdx, %rbx, %rcx, %r9, and %rflags
1704 * registers are clobbered.  The NMI handler caller only needs %r13 and %r15
1705 * preserved.
1706 */
1707ENTRY(flush_l1d_sw)
1708#define	L1D_FLUSH_SIZE	(64 * 1024)
1709	movq	$KERNBASE, %r9
1710	movq	$-L1D_FLUSH_SIZE, %rcx
1711	/*
1712	 * pass 1: Preload TLB.
1713	 * Kernel text is mapped using superpages.  TLB preload is
1714	 * done for the benefit of older CPUs which split 2M page
1715	 * into 4k TLB entries.
1716	 */
17171:	movb	L1D_FLUSH_SIZE(%r9, %rcx), %al
1718	addq	$PAGE_SIZE, %rcx
1719	jne	1b
1720	xorl	%eax, %eax
1721	cpuid
1722	movq	$-L1D_FLUSH_SIZE, %rcx
1723	/* pass 2: Read each cache line. */
17242:	movb	L1D_FLUSH_SIZE(%r9, %rcx), %al
1725	addq	$64, %rcx
1726	jne	2b
1727	lfence
1728	ret
1729#undef	L1D_FLUSH_SIZE
1730END(flush_l1d_sw)
1731
1732ENTRY(flush_l1d_sw_abi)
1733	pushq	%rbx
1734	call	flush_l1d_sw
1735	popq	%rbx
1736	ret
1737END(flush_l1d_sw_abi)
1738
1739ENTRY(mds_handler_void)
1740	retq
1741END(mds_handler_void)
1742
1743ENTRY(mds_handler_verw)
1744	subq	$8, %rsp
1745	movw	%ds, (%rsp)
1746	verw	(%rsp)
1747	addq	$8, %rsp
1748	retq
1749END(mds_handler_verw)
1750
1751ENTRY(mds_handler_ivb)
1752	pushq	%rax
1753	pushq	%rdx
1754	pushq	%rcx
1755
1756	movq	%cr0, %rax
1757	testb	$CR0_TS, %al
1758	je	1f
1759	clts
17601:	movq	PCPU(MDS_BUF), %rdx
1761	movdqa	%xmm0, PCPU(MDS_TMP)
1762	pxor	%xmm0, %xmm0
1763
1764	lfence
1765	orpd	(%rdx), %xmm0
1766	orpd	(%rdx), %xmm0
1767	mfence
1768	movl	$40, %ecx
1769	addq	$16, %rdx
17702:	movntdq	%xmm0, (%rdx)
1771	addq	$16, %rdx
1772	decl	%ecx
1773	jnz	2b
1774	mfence
1775
1776	movdqa	PCPU(MDS_TMP),%xmm0
1777	testb	$CR0_TS, %al
1778	je	3f
1779	movq	%rax, %cr0
17803:	popq	%rcx
1781	popq	%rdx
1782	popq	%rax
1783	retq
1784END(mds_handler_ivb)
1785
1786ENTRY(mds_handler_bdw)
1787	pushq	%rax
1788	pushq	%rbx
1789	pushq	%rcx
1790	pushq	%rdi
1791	pushq	%rsi
1792
1793	movq	%cr0, %rax
1794	testb	$CR0_TS, %al
1795	je	1f
1796	clts
17971:	movq	PCPU(MDS_BUF), %rbx
1798	movdqa	%xmm0, PCPU(MDS_TMP)
1799	pxor	%xmm0, %xmm0
1800
1801	movq	%rbx, %rdi
1802	movq	%rbx, %rsi
1803	movl	$40, %ecx
18042:	movntdq	%xmm0, (%rbx)
1805	addq	$16, %rbx
1806	decl	%ecx
1807	jnz	2b
1808	mfence
1809	movl	$1536, %ecx
1810	rep; movsb
1811	lfence
1812
1813	movdqa	PCPU(MDS_TMP),%xmm0
1814	testb	$CR0_TS, %al
1815	je	3f
1816	movq	%rax, %cr0
18173:	popq	%rsi
1818	popq	%rdi
1819	popq	%rcx
1820	popq	%rbx
1821	popq	%rax
1822	retq
1823END(mds_handler_bdw)
1824
1825ENTRY(mds_handler_skl_sse)
1826	pushq	%rax
1827	pushq	%rdx
1828	pushq	%rcx
1829	pushq	%rdi
1830
1831	movq	%cr0, %rax
1832	testb	$CR0_TS, %al
1833	je	1f
1834	clts
18351:	movq	PCPU(MDS_BUF), %rdi
1836	movq	PCPU(MDS_BUF64), %rdx
1837	movdqa	%xmm0, PCPU(MDS_TMP)
1838	pxor	%xmm0, %xmm0
1839
1840	lfence
1841	orpd	(%rdx), %xmm0
1842	orpd	(%rdx), %xmm0
1843	xorl	%eax, %eax
18442:	clflushopt	5376(%rdi, %rax, 8)
1845	addl	$8, %eax
1846	cmpl	$8 * 12, %eax
1847	jb	2b
1848	sfence
1849	movl	$6144, %ecx
1850	xorl	%eax, %eax
1851	rep; stosb
1852	mfence
1853
1854	movdqa	PCPU(MDS_TMP), %xmm0
1855	testb	$CR0_TS, %al
1856	je	3f
1857	movq	%rax, %cr0
18583:	popq	%rdi
1859	popq	%rcx
1860	popq	%rdx
1861	popq	%rax
1862	retq
1863END(mds_handler_skl_sse)
1864
1865ENTRY(mds_handler_skl_avx)
1866	pushq	%rax
1867	pushq	%rdx
1868	pushq	%rcx
1869	pushq	%rdi
1870
1871	movq	%cr0, %rax
1872	testb	$CR0_TS, %al
1873	je	1f
1874	clts
18751:	movq	PCPU(MDS_BUF), %rdi
1876	movq	PCPU(MDS_BUF64), %rdx
1877	vmovdqa	%ymm0, PCPU(MDS_TMP)
1878	vpxor	%ymm0, %ymm0, %ymm0
1879
1880	lfence
1881	vorpd	(%rdx), %ymm0, %ymm0
1882	vorpd	(%rdx), %ymm0, %ymm0
1883	xorl	%eax, %eax
18842:	clflushopt	5376(%rdi, %rax, 8)
1885	addl	$8, %eax
1886	cmpl	$8 * 12, %eax
1887	jb	2b
1888	sfence
1889	movl	$6144, %ecx
1890	xorl	%eax, %eax
1891	rep; stosb
1892	mfence
1893
1894	vmovdqa	PCPU(MDS_TMP), %ymm0
1895	testb	$CR0_TS, %al
1896	je	3f
1897	movq	%rax, %cr0
18983:	popq	%rdi
1899	popq	%rcx
1900	popq	%rdx
1901	popq	%rax
1902	retq
1903END(mds_handler_skl_avx)
1904
1905ENTRY(mds_handler_skl_avx512)
1906	pushq	%rax
1907	pushq	%rdx
1908	pushq	%rcx
1909	pushq	%rdi
1910
1911	movq	%cr0, %rax
1912	testb	$CR0_TS, %al
1913	je	1f
1914	clts
19151:	movq	PCPU(MDS_BUF), %rdi
1916	movq	PCPU(MDS_BUF64), %rdx
1917	vmovdqa64	%zmm0, PCPU(MDS_TMP)
1918	vpxord	%zmm0, %zmm0, %zmm0
1919
1920	lfence
1921	vorpd	(%rdx), %zmm0, %zmm0
1922	vorpd	(%rdx), %zmm0, %zmm0
1923	xorl	%eax, %eax
19242:	clflushopt	5376(%rdi, %rax, 8)
1925	addl	$8, %eax
1926	cmpl	$8 * 12, %eax
1927	jb	2b
1928	sfence
1929	movl	$6144, %ecx
1930	xorl	%eax, %eax
1931	rep; stosb
1932	mfence
1933
1934	vmovdqa64	PCPU(MDS_TMP), %zmm0
1935	testb	$CR0_TS, %al
1936	je	3f
1937	movq	%rax, %cr0
19383:	popq	%rdi
1939	popq	%rcx
1940	popq	%rdx
1941	popq	%rax
1942	retq
1943END(mds_handler_skl_avx512)
1944
1945ENTRY(mds_handler_silvermont)
1946	pushq	%rax
1947	pushq	%rdx
1948	pushq	%rcx
1949
1950	movq	%cr0, %rax
1951	testb	$CR0_TS, %al
1952	je	1f
1953	clts
19541:	movq	PCPU(MDS_BUF), %rdx
1955	movdqa	%xmm0, PCPU(MDS_TMP)
1956	pxor	%xmm0, %xmm0
1957
1958	movl	$16, %ecx
19592:	movntdq	%xmm0, (%rdx)
1960	addq	$16, %rdx
1961	decl	%ecx
1962	jnz	2b
1963	mfence
1964
1965	movdqa	PCPU(MDS_TMP),%xmm0
1966	testb	$CR0_TS, %al
1967	je	3f
1968	movq	%rax, %cr0
19693:	popq	%rcx
1970	popq	%rdx
1971	popq	%rax
1972	retq
1973END(mds_handler_silvermont)
1974
1975/*
1976 * Do the same as Linux and execute IRET explicitly, despite IPI
1977 * return does it as well.
1978 */
1979ENTRY(cpu_sync_core)
1980/*
1981 * Can utilize SERIALIZE when instruction is moved from
1982 * 'future extensions' to SDM.
1983 */
1984	movq	(%rsp), %rdx
1985	movl	%ss, %eax
1986	pushq	%rax
1987	pushq	%rsp
1988	addq	$16, (%rsp)
1989	pushfq
1990	movl	%cs, %eax
1991	pushq	%rax
1992	pushq	%rdx
1993	iretq
1994END(cpu_sync_core)
1995