xref: /freebsd/sys/amd64/amd64/support.S (revision 16038816)
1/*-
2 * Copyright (c) 2018-2019 The FreeBSD Foundation
3 * Copyright (c) 2003 Peter Wemm.
4 * Copyright (c) 1993 The Regents of the University of California.
5 * All rights reserved.
6 *
7 * Portions of this software were developed by
8 * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
9 * the FreeBSD Foundation.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * $FreeBSD$
36 */
37
38#include "opt_ddb.h"
39
40#include <machine/asmacros.h>
41#include <machine/specialreg.h>
42#include <machine/pmap.h>
43
44#include "assym.inc"
45
46	.text
47
48/* Address: %rdi */
49ENTRY(pagezero_std)
50	PUSH_FRAME_POINTER
51	movl	$PAGE_SIZE/8,%ecx
52	xorl	%eax,%eax
53	rep
54	stosq
55	POP_FRAME_POINTER
56	ret
57END(pagezero_std)
58
59ENTRY(pagezero_erms)
60	PUSH_FRAME_POINTER
61	movl	$PAGE_SIZE,%ecx
62	xorl	%eax,%eax
63	rep
64	stosb
65	POP_FRAME_POINTER
66	ret
67END(pagezero_erms)
68
69/*
70 * pagecopy(%rdi=from, %rsi=to)
71 */
72ENTRY(pagecopy)
73	PUSH_FRAME_POINTER
74	movl	$PAGE_SIZE/8,%ecx
75	movq	%rdi,%r9
76	movq	%rsi,%rdi
77	movq	%r9,%rsi
78	rep
79	movsq
80	POP_FRAME_POINTER
81	ret
82END(pagecopy)
83
84/*
85 * memcmpy(b1, b2, len)
86 *	   rdi,rsi,rdx
87 */
88ENTRY(memcmp)
89	PUSH_FRAME_POINTER
90
91	xorl	%eax,%eax
9210:
93	cmpq	$16,%rdx
94	ja	101632f
95
96	cmpb	$8,%dl
97	jg	100816f
98
99	cmpb	$4,%dl
100	jg	100408f
101
102	cmpb	$2,%dl
103	jge	100204f
104
105	cmpb	$1,%dl
106	jl	100000f
107	movzbl	(%rdi),%eax
108	movzbl	(%rsi),%r8d
109	subl	%r8d,%eax
110100000:
111	POP_FRAME_POINTER
112	ret
113
114	ALIGN_TEXT
115100816:
116	movq	(%rdi),%r8
117	movq	(%rsi),%r9
118	cmpq	%r8,%r9
119	jne	80f
120	movq	-8(%rdi,%rdx),%r8
121	movq	-8(%rsi,%rdx),%r9
122	cmpq	%r8,%r9
123	jne	10081608f
124	POP_FRAME_POINTER
125	ret
126	ALIGN_TEXT
127100408:
128	movl	(%rdi),%r8d
129	movl	(%rsi),%r9d
130	cmpl	%r8d,%r9d
131	jne	80f
132	movl	-4(%rdi,%rdx),%r8d
133	movl	-4(%rsi,%rdx),%r9d
134	cmpl	%r8d,%r9d
135	jne	10040804f
136	POP_FRAME_POINTER
137	ret
138	ALIGN_TEXT
139100204:
140	movzwl	(%rdi),%r8d
141	movzwl	(%rsi),%r9d
142	cmpl	%r8d,%r9d
143	jne	1f
144	movzwl	-2(%rdi,%rdx),%r8d
145	movzwl	-2(%rsi,%rdx),%r9d
146	cmpl	%r8d,%r9d
147	jne	1f
148	POP_FRAME_POINTER
149	ret
150	ALIGN_TEXT
151101632:
152	cmpq	$32,%rdx
153	ja	103200f
154	movq	(%rdi),%r8
155	movq	(%rsi),%r9
156	cmpq	%r8,%r9
157	jne	80f
158	movq	8(%rdi),%r8
159	movq	8(%rsi),%r9
160	cmpq	%r8,%r9
161	jne	10163208f
162	movq	-16(%rdi,%rdx),%r8
163	movq	-16(%rsi,%rdx),%r9
164	cmpq	%r8,%r9
165	jne	10163216f
166	movq	-8(%rdi,%rdx),%r8
167	movq	-8(%rsi,%rdx),%r9
168	cmpq	%r8,%r9
169	jne	10163224f
170	POP_FRAME_POINTER
171	ret
172	ALIGN_TEXT
173103200:
174	movq	(%rdi),%r8
175	movq	8(%rdi),%r9
176	subq	(%rsi),%r8
177	subq	8(%rsi),%r9
178	orq	%r8,%r9
179	jnz	10320000f
180
181	movq    16(%rdi),%r8
182	movq    24(%rdi),%r9
183	subq    16(%rsi),%r8
184	subq    24(%rsi),%r9
185	orq	%r8,%r9
186	jnz     10320016f
187
188	leaq	32(%rdi),%rdi
189	leaq	32(%rsi),%rsi
190	subq	$32,%rdx
191	cmpq	$32,%rdx
192	jae	103200b
193	cmpb	$0,%dl
194	jne	10b
195	POP_FRAME_POINTER
196	ret
197
198/*
199 * Mismatch was found.
200 *
201 * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes).
202 */
203	ALIGN_TEXT
20410320016:
205	leaq	16(%rdi),%rdi
206	leaq	16(%rsi),%rsi
20710320000:
208	movq	(%rdi),%r8
209	movq	(%rsi),%r9
210	cmpq	%r8,%r9
211	jne	80f
212	leaq	8(%rdi),%rdi
213	leaq	8(%rsi),%rsi
214	jmp	80f
215	ALIGN_TEXT
21610081608:
21710163224:
218	leaq	-8(%rdi,%rdx),%rdi
219	leaq	-8(%rsi,%rdx),%rsi
220	jmp	80f
221	ALIGN_TEXT
22210163216:
223	leaq	-16(%rdi,%rdx),%rdi
224	leaq	-16(%rsi,%rdx),%rsi
225	jmp	80f
226	ALIGN_TEXT
22710163208:
228	leaq	8(%rdi),%rdi
229	leaq	8(%rsi),%rsi
230	jmp	80f
231	ALIGN_TEXT
23210040804:
233	leaq	-4(%rdi,%rdx),%rdi
234	leaq	-4(%rsi,%rdx),%rsi
235	jmp	1f
236
237	ALIGN_TEXT
23880:
239	movl	(%rdi),%r8d
240	movl	(%rsi),%r9d
241	cmpl	%r8d,%r9d
242	jne	1f
243	leaq	4(%rdi),%rdi
244	leaq	4(%rsi),%rsi
245
246/*
247 * We have up to 4 bytes to inspect.
248 */
2491:
250	movzbl	(%rdi),%eax
251	movzbl	(%rsi),%r8d
252	cmpb	%r8b,%al
253	jne	2f
254
255	movzbl	1(%rdi),%eax
256	movzbl	1(%rsi),%r8d
257	cmpb	%r8b,%al
258	jne	2f
259
260	movzbl	2(%rdi),%eax
261	movzbl	2(%rsi),%r8d
262	cmpb	%r8b,%al
263	jne	2f
264
265	movzbl	3(%rdi),%eax
266	movzbl	3(%rsi),%r8d
2672:
268	subl	%r8d,%eax
269	POP_FRAME_POINTER
270	ret
271END(memcmp)
272
273/*
274 * memmove(dst, src, cnt)
275 *         rdi, rsi, rdx
276 */
277
278/*
279 * Register state at entry is supposed to be as follows:
280 * rdi - destination
281 * rsi - source
282 * rdx - count
283 *
284 * The macro possibly clobbers the above and: rcx, r8, r9, r10
285 * It does not clobber rax nor r11.
286 */
287.macro MEMMOVE erms overlap begin end
288	\begin
289
290	/*
291	 * For sizes 0..32 all data is read before it is written, so there
292	 * is no correctness issue with direction of copying.
293	 */
294	cmpq	$32,%rcx
295	jbe	101632f
296
297.if \overlap == 1
298	movq	%rdi,%r8
299	subq	%rsi,%r8
300	cmpq	%rcx,%r8	/* overlapping && src < dst? */
301	jb	2f
302.endif
303
304	cmpq	$256,%rcx
305	ja	1256f
306
307	ALIGN_TEXT
308103200:
309	movq	(%rsi),%rdx
310	movq	%rdx,(%rdi)
311	movq	8(%rsi),%rdx
312	movq	%rdx,8(%rdi)
313	movq	16(%rsi),%rdx
314	movq	%rdx,16(%rdi)
315	movq	24(%rsi),%rdx
316	movq	%rdx,24(%rdi)
317	leaq	32(%rsi),%rsi
318	leaq	32(%rdi),%rdi
319	subq	$32,%rcx
320	cmpq	$32,%rcx
321	jae	103200b
322	cmpb	$0,%cl
323	jne	101632f
324	\end
325	ret
326	ALIGN_TEXT
327101632:
328	cmpb	$16,%cl
329	jl	100816f
330	movq	(%rsi),%rdx
331	movq	8(%rsi),%r8
332	movq	-16(%rsi,%rcx),%r9
333	movq	-8(%rsi,%rcx),%r10
334	movq	%rdx,(%rdi)
335	movq	%r8,8(%rdi)
336	movq	%r9,-16(%rdi,%rcx)
337	movq	%r10,-8(%rdi,%rcx)
338	\end
339	ret
340	ALIGN_TEXT
341100816:
342	cmpb	$8,%cl
343	jl	100408f
344	movq	(%rsi),%rdx
345	movq	-8(%rsi,%rcx),%r8
346	movq	%rdx,(%rdi)
347	movq	%r8,-8(%rdi,%rcx,)
348	\end
349	ret
350	ALIGN_TEXT
351100408:
352	cmpb	$4,%cl
353	jl	100204f
354	movl	(%rsi),%edx
355	movl	-4(%rsi,%rcx),%r8d
356	movl	%edx,(%rdi)
357	movl	%r8d,-4(%rdi,%rcx)
358	\end
359	ret
360	ALIGN_TEXT
361100204:
362	cmpb	$2,%cl
363	jl	100001f
364	movzwl	(%rsi),%edx
365	movzwl	-2(%rsi,%rcx),%r8d
366	movw	%dx,(%rdi)
367	movw	%r8w,-2(%rdi,%rcx)
368	\end
369	ret
370	ALIGN_TEXT
371100001:
372	cmpb	$1,%cl
373	jl	100000f
374	movb	(%rsi),%dl
375	movb	%dl,(%rdi)
376100000:
377	\end
378	ret
379
380	ALIGN_TEXT
3811256:
382	testb	$15,%dil
383	jnz	100f
384.if \erms == 1
385	rep
386	movsb
387.else
388	shrq	$3,%rcx                         /* copy by 64-bit words */
389	rep
390	movsq
391	movq	%rdx,%rcx
392	andl	$7,%ecx                         /* any bytes left? */
393	jne	100408b
394.endif
395	\end
396	ret
397100:
398	movq	(%rsi),%r8
399	movq	8(%rsi),%r9
400	movq	%rdi,%r10
401	movq	%rdi,%rcx
402	andq	$15,%rcx
403	leaq	-16(%rdx,%rcx),%rdx
404	neg	%rcx
405	leaq	16(%rdi,%rcx),%rdi
406	leaq	16(%rsi,%rcx),%rsi
407	movq	%rdx,%rcx
408.if \erms == 1
409	rep
410	movsb
411	movq	%r8,(%r10)
412	movq	%r9,8(%r10)
413.else
414	shrq	$3,%rcx                         /* copy by 64-bit words */
415	rep
416	movsq
417	movq	%r8,(%r10)
418	movq	%r9,8(%r10)
419	movq	%rdx,%rcx
420	andl	$7,%ecx                         /* any bytes left? */
421	jne	100408b
422.endif
423	\end
424	ret
425
426.if \overlap == 1
427	/*
428	 * Copy backwards.
429	 */
430        ALIGN_TEXT
4312:
432	cmpq	$256,%rcx
433	ja	2256f
434
435	leaq	-8(%rdi,%rcx),%rdi
436	leaq	-8(%rsi,%rcx),%rsi
437
438	cmpq	$32,%rcx
439	jb	2016f
440
441	ALIGN_TEXT
4422032:
443	movq	(%rsi),%rdx
444	movq	%rdx,(%rdi)
445	movq	-8(%rsi),%rdx
446	movq	%rdx,-8(%rdi)
447	movq	-16(%rsi),%rdx
448	movq	%rdx,-16(%rdi)
449	movq	-24(%rsi),%rdx
450	movq	%rdx,-24(%rdi)
451	leaq	-32(%rsi),%rsi
452	leaq	-32(%rdi),%rdi
453	subq	$32,%rcx
454	cmpq	$32,%rcx
455	jae	2032b
456	cmpb	$0,%cl
457	jne	2016f
458	\end
459	ret
460	ALIGN_TEXT
4612016:
462	cmpb	$16,%cl
463	jl	2008f
464	movq	(%rsi),%rdx
465	movq	%rdx,(%rdi)
466	movq	-8(%rsi),%rdx
467	movq	%rdx,-8(%rdi)
468	subb	$16,%cl
469	jz	2000f
470	leaq	-16(%rsi),%rsi
471	leaq	-16(%rdi),%rdi
4722008:
473	cmpb	$8,%cl
474	jl	2004f
475	movq	(%rsi),%rdx
476	movq	%rdx,(%rdi)
477	subb	$8,%cl
478	jz	2000f
479	leaq	-8(%rsi),%rsi
480	leaq	-8(%rdi),%rdi
4812004:
482	cmpb	$4,%cl
483	jl	2002f
484	movl	4(%rsi),%edx
485	movl	%edx,4(%rdi)
486	subb	$4,%cl
487	jz	2000f
488	leaq	-4(%rsi),%rsi
489	leaq	-4(%rdi),%rdi
4902002:
491	cmpb	$2,%cl
492	jl	2001f
493	movw	6(%rsi),%dx
494	movw	%dx,6(%rdi)
495	subb	$2,%cl
496	jz	2000f
497	leaq	-2(%rsi),%rsi
498	leaq	-2(%rdi),%rdi
4992001:
500	cmpb	$1,%cl
501	jl	2000f
502	movb	7(%rsi),%dl
503	movb	%dl,7(%rdi)
5042000:
505	\end
506	ret
507	ALIGN_TEXT
5082256:
509	std
510.if \erms == 1
511	leaq	-1(%rdi,%rcx),%rdi
512	leaq	-1(%rsi,%rcx),%rsi
513	rep
514	movsb
515	cld
516.else
517	leaq	-8(%rdi,%rcx),%rdi
518	leaq	-8(%rsi,%rcx),%rsi
519	shrq	$3,%rcx
520	rep
521	movsq
522	cld
523	movq	%rdx,%rcx
524	andb	$7,%cl
525	jne	2004b
526.endif
527	\end
528	ret
529.endif
530.endm
531
532.macro MEMMOVE_BEGIN
533	PUSH_FRAME_POINTER
534	movq	%rdi,%rax
535	movq	%rdx,%rcx
536.endm
537
538.macro MEMMOVE_END
539	POP_FRAME_POINTER
540.endm
541
542ENTRY(memmove_std)
543	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
544END(memmove_std)
545
546ENTRY(memmove_erms)
547	MEMMOVE erms=1 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
548END(memmove_erms)
549
550/*
551 * memcpy(dst, src, len)
552 *        rdi, rsi, rdx
553 *
554 * Note: memcpy does not support overlapping copies
555 */
556ENTRY(memcpy_std)
557	MEMMOVE erms=0 overlap=0 begin=MEMMOVE_BEGIN end=MEMMOVE_END
558END(memcpy_std)
559
560ENTRY(memcpy_erms)
561	MEMMOVE erms=1 overlap=0 begin=MEMMOVE_BEGIN end=MEMMOVE_END
562END(memcpy_erms)
563
564/*
565 * memset(dst, c,   len)
566 *        rdi, rsi, rdx
567 */
568.macro MEMSET erms
569	PUSH_FRAME_POINTER
570	movq	%rdi,%rax
571	movq	%rdx,%rcx
572	movzbq	%sil,%r8
573	movabs	$0x0101010101010101,%r10
574	imulq	%r8,%r10
575
576	cmpq	$32,%rcx
577	jbe	101632f
578
579	cmpq	$256,%rcx
580	ja	1256f
581
582	ALIGN_TEXT
583103200:
584	movq	%r10,(%rdi)
585	movq	%r10,8(%rdi)
586	movq	%r10,16(%rdi)
587	movq	%r10,24(%rdi)
588	leaq	32(%rdi),%rdi
589	subq	$32,%rcx
590	cmpq	$32,%rcx
591	ja	103200b
592	cmpb	$16,%cl
593	ja	201632f
594	movq	%r10,-16(%rdi,%rcx)
595	movq	%r10,-8(%rdi,%rcx)
596	POP_FRAME_POINTER
597	ret
598	ALIGN_TEXT
599101632:
600	cmpb	$16,%cl
601	jl	100816f
602201632:
603	movq	%r10,(%rdi)
604	movq	%r10,8(%rdi)
605	movq	%r10,-16(%rdi,%rcx)
606	movq	%r10,-8(%rdi,%rcx)
607	POP_FRAME_POINTER
608	ret
609	ALIGN_TEXT
610100816:
611	cmpb	$8,%cl
612	jl	100408f
613	movq	%r10,(%rdi)
614	movq	%r10,-8(%rdi,%rcx)
615	POP_FRAME_POINTER
616	ret
617	ALIGN_TEXT
618100408:
619	cmpb	$4,%cl
620	jl	100204f
621	movl	%r10d,(%rdi)
622	movl	%r10d,-4(%rdi,%rcx)
623	POP_FRAME_POINTER
624	ret
625	ALIGN_TEXT
626100204:
627	cmpb	$2,%cl
628	jl	100001f
629	movw	%r10w,(%rdi)
630	movw	%r10w,-2(%rdi,%rcx)
631	POP_FRAME_POINTER
632	ret
633	ALIGN_TEXT
634100001:
635	cmpb	$0,%cl
636	je	100000f
637	movb	%r10b,(%rdi)
638100000:
639	POP_FRAME_POINTER
640	ret
641	ALIGN_TEXT
6421256:
643	movq	%rdi,%r9
644	movq	%r10,%rax
645	testl	$15,%edi
646	jnz	3f
6471:
648.if \erms == 1
649	rep
650	stosb
651	movq	%r9,%rax
652.else
653	movq	%rcx,%rdx
654	shrq	$3,%rcx
655	rep
656	stosq
657	movq	%r9,%rax
658	andl	$7,%edx
659	jnz	2f
660	POP_FRAME_POINTER
661	ret
6622:
663	movq	%r10,-8(%rdi,%rdx)
664.endif
665	POP_FRAME_POINTER
666	ret
667	ALIGN_TEXT
6683:
669	movq	%r10,(%rdi)
670	movq	%r10,8(%rdi)
671	movq	%rdi,%r8
672	andq	$15,%r8
673	leaq	-16(%rcx,%r8),%rcx
674	neg	%r8
675	leaq	16(%rdi,%r8),%rdi
676	jmp	1b
677.endm
678
679ENTRY(memset_std)
680	MEMSET erms=0
681END(memset_std)
682
683ENTRY(memset_erms)
684	MEMSET erms=1
685END(memset_erms)
686
687/* fillw(pat, base, cnt) */
688/*       %rdi,%rsi, %rdx */
689ENTRY(fillw)
690	PUSH_FRAME_POINTER
691	movq	%rdi,%rax
692	movq	%rsi,%rdi
693	movq	%rdx,%rcx
694	rep
695	stosw
696	POP_FRAME_POINTER
697	ret
698END(fillw)
699
700/*
701 * strlen(string)
702 *	  %rdi
703 *
704 * Uses the ((x - 0x01....01) & ~x & 0x80....80) trick.
705 *
706 * 0x01....01 is replaced with 0x0 - 0x01....01 so that it can be added
707 * with leaq.
708 *
709 * For a description see either:
710 * - "Hacker's Delight" by Henry S. Warren, Jr.
711 * - "Optimizing subroutines in assembly language: An optimization guide for x86 platforms"
712 *   by Agner Fog
713 *
714 * The latter contains a 32-bit variant of the same algorithm coded in assembly for i386.
715 */
716ENTRY(strlen)
717	PUSH_FRAME_POINTER
718	movabsq	$0xfefefefefefefeff,%r8
719	movabsq	$0x8080808080808080,%r9
720
721	movq	%rdi,%r10
722	movq	%rdi,%rcx
723	testb	$7,%dil
724	jz	2f
725
726	/*
727	 * Handle misaligned reads: align to 8 and fill
728	 * the spurious bytes.
729	 */
730	andq	$~7,%rdi
731	movq	(%rdi),%r11
732	shlq	$3,%rcx
733	movq	$-1,%rdx
734	shlq	%cl,%rdx
735	notq	%rdx
736	orq	%rdx,%r11
737
738	leaq	(%r11,%r8),%rcx
739	notq	%r11
740	andq	%r11,%rcx
741	andq	%r9,%rcx
742	jnz	3f
743
744	/*
745	 * Main loop.
746	 */
747	ALIGN_TEXT
7481:
749	leaq	8(%rdi),%rdi
7502:
751	movq	(%rdi),%r11
752	leaq	(%r11,%r8),%rcx
753	notq	%r11
754	andq	%r11,%rcx
755	andq	%r9,%rcx
756	jz	1b
7573:
758	bsfq	%rcx,%rcx
759	shrq	$3,%rcx
760	leaq	(%rcx,%rdi),%rax
761	subq	%r10,%rax
762	POP_FRAME_POINTER
763	ret
764END(strlen)
765
766/*****************************************************************************/
767/* copyout and fubyte family                                                 */
768/*****************************************************************************/
769/*
770 * Access user memory from inside the kernel. These routines should be
771 * the only places that do this.
772 *
773 * These routines set curpcb->pcb_onfault for the time they execute. When a
774 * protection violation occurs inside the functions, the trap handler
775 * returns to *curpcb->pcb_onfault instead of the function.
776 */
777
778.macro SMAP_DISABLE smap
779.if	\smap
780	stac
781.endif
782.endm
783
784
785.macro SMAP_ENABLE smap
786.if	\smap
787	clac
788.endif
789.endm
790
791.macro COPYINOUT_BEGIN
792.endm
793
794.macro COPYINOUT_END
795	movq	%rax,PCB_ONFAULT(%r11)
796	POP_FRAME_POINTER
797.endm
798
799.macro COPYINOUT_SMAP_END
800	SMAP_ENABLE smap=1
801	COPYINOUT_END
802.endm
803
804/*
805 * copyout(from_kernel, to_user, len)
806 *         %rdi,        %rsi,    %rdx
807 */
808.macro	COPYOUT smap erms
809	PUSH_FRAME_POINTER
810	movq	PCPU(CURPCB),%r11
811	movq	$copy_fault,PCB_ONFAULT(%r11)
812
813	/*
814	 * Check explicitly for non-user addresses.
815	 * First, prevent address wrapping.
816	 */
817	movq	%rsi,%rax
818	addq	%rdx,%rax
819	jc	copy_fault
820/*
821 * XXX STOP USING VM_MAXUSER_ADDRESS.
822 * It is an end address, not a max, so every time it is used correctly it
823 * looks like there is an off by one error, and of course it caused an off
824 * by one error in several places.
825 */
826	movq	$VM_MAXUSER_ADDRESS,%rcx
827	cmpq	%rcx,%rax
828	ja	copy_fault
829
830	/*
831	 * Set return value to zero. Remaining failure mode goes through
832	 * copy_fault.
833	 */
834	xorl	%eax,%eax
835
836	/*
837	 * Set up arguments for MEMMOVE.
838	 */
839	movq	%rdi,%r8
840	movq	%rsi,%rdi
841	movq	%r8,%rsi
842	movq	%rdx,%rcx
843
844
845	SMAP_DISABLE \smap
846.if	\smap == 1
847	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_SMAP_END
848.else
849	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_END
850.endif
851	/* NOTREACHED */
852.endm
853
854ENTRY(copyout_nosmap_std)
855	COPYOUT smap=0 erms=0
856END(copyout_nosmap_std)
857
858ENTRY(copyout_smap_std)
859	COPYOUT smap=1 erms=0
860END(copyout_smap_std)
861
862ENTRY(copyout_nosmap_erms)
863	COPYOUT smap=0 erms=1
864END(copyout_nosmap_erms)
865
866ENTRY(copyout_smap_erms)
867	COPYOUT smap=1 erms=1
868END(copyout_smap_erms)
869
870/*
871 * copyin(from_user, to_kernel, len)
872 *        %rdi,      %rsi,      %rdx
873 */
874.macro	COPYIN smap erms
875	PUSH_FRAME_POINTER
876	movq	PCPU(CURPCB),%r11
877	movq	$copy_fault,PCB_ONFAULT(%r11)
878
879	/*
880	 * make sure address is valid
881	 */
882	movq	%rdi,%rax
883	addq	%rdx,%rax
884	jc	copy_fault
885	movq	$VM_MAXUSER_ADDRESS,%rcx
886	cmpq	%rcx,%rax
887	ja	copy_fault
888
889	xorl	%eax,%eax
890
891	movq	%rdi,%r8
892	movq	%rsi,%rdi
893	movq	%r8,%rsi
894	movq	%rdx,%rcx
895
896	SMAP_DISABLE \smap
897.if	\smap == 1
898	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_SMAP_END
899.else
900	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_END
901.endif
902	/* NOTREACHED */
903.endm
904
905ENTRY(copyin_nosmap_std)
906	COPYIN smap=0 erms=0
907END(copyin_nosmap_std)
908
909ENTRY(copyin_smap_std)
910	COPYIN smap=1 erms=0
911END(copyin_smap_std)
912
913ENTRY(copyin_nosmap_erms)
914	COPYIN smap=0 erms=1
915END(copyin_nosmap_erms)
916
917ENTRY(copyin_smap_erms)
918	COPYIN smap=1 erms=1
919END(copyin_smap_erms)
920
921	ALIGN_TEXT
922copy_fault:
923	testl	$CPUID_STDEXT_SMAP,cpu_stdext_feature(%rip)
924	je	1f
925	clac
9261:	movq	$0,PCB_ONFAULT(%r11)
927	movl	$EFAULT,%eax
928	POP_FRAME_POINTER
929	ret
930
931/*
932 * casueword32.  Compare and set user integer.  Returns -1 on fault,
933 *        0 if access was successful.  Old value is written to *oldp.
934 *        dst = %rdi, old = %esi, oldp = %rdx, new = %ecx
935 */
936ENTRY(casueword32_nosmap)
937	PUSH_FRAME_POINTER
938	movq	PCPU(CURPCB),%r8
939	movq	$fusufault,PCB_ONFAULT(%r8)
940
941	movq	$VM_MAXUSER_ADDRESS-4,%rax
942	cmpq	%rax,%rdi			/* verify address is valid */
943	ja	fusufault
944
945	movl	%esi,%eax			/* old */
946#ifdef SMP
947	lock
948#endif
949	cmpxchgl %ecx,(%rdi)			/* new = %ecx */
950	setne	%cl
951
952	/*
953	 * The old value is in %eax.  If the store succeeded it will be the
954	 * value we expected (old) from before the store, otherwise it will
955	 * be the current value.  Save %eax into %esi to prepare the return
956	 * value.
957	 */
958	movl	%eax,%esi
959	xorl	%eax,%eax
960	movq	%rax,PCB_ONFAULT(%r8)
961
962	/*
963	 * Access the oldp after the pcb_onfault is cleared, to correctly
964	 * catch corrupted pointer.
965	 */
966	movl	%esi,(%rdx)			/* oldp = %rdx */
967	POP_FRAME_POINTER
968	movzbl	%cl, %eax
969	ret
970END(casueword32_nosmap)
971
972ENTRY(casueword32_smap)
973	PUSH_FRAME_POINTER
974	movq	PCPU(CURPCB),%r8
975	movq	$fusufault,PCB_ONFAULT(%r8)
976
977	movq	$VM_MAXUSER_ADDRESS-4,%rax
978	cmpq	%rax,%rdi			/* verify address is valid */
979	ja	fusufault
980
981	movl	%esi,%eax			/* old */
982	stac
983#ifdef SMP
984	lock
985#endif
986	cmpxchgl %ecx,(%rdi)			/* new = %ecx */
987	clac
988	setne	%cl
989
990	/*
991	 * The old value is in %eax.  If the store succeeded it will be the
992	 * value we expected (old) from before the store, otherwise it will
993	 * be the current value.  Save %eax into %esi to prepare the return
994	 * value.
995	 */
996	movl	%eax,%esi
997	xorl	%eax,%eax
998	movq	%rax,PCB_ONFAULT(%r8)
999
1000	/*
1001	 * Access the oldp after the pcb_onfault is cleared, to correctly
1002	 * catch corrupted pointer.
1003	 */
1004	movl	%esi,(%rdx)			/* oldp = %rdx */
1005	POP_FRAME_POINTER
1006	movzbl	%cl, %eax
1007	ret
1008END(casueword32_smap)
1009
1010/*
1011 * casueword.  Compare and set user long.  Returns -1 on fault,
1012 *        0 if access was successful.  Old value is written to *oldp.
1013 *        dst = %rdi, old = %rsi, oldp = %rdx, new = %rcx
1014 */
1015ENTRY(casueword_nosmap)
1016	PUSH_FRAME_POINTER
1017	movq	PCPU(CURPCB),%r8
1018	movq	$fusufault,PCB_ONFAULT(%r8)
1019
1020	movq	$VM_MAXUSER_ADDRESS-4,%rax
1021	cmpq	%rax,%rdi			/* verify address is valid */
1022	ja	fusufault
1023
1024	movq	%rsi,%rax			/* old */
1025#ifdef SMP
1026	lock
1027#endif
1028	cmpxchgq %rcx,(%rdi)			/* new = %rcx */
1029	setne	%cl
1030
1031	/*
1032	 * The old value is in %rax.  If the store succeeded it will be the
1033	 * value we expected (old) from before the store, otherwise it will
1034	 * be the current value.
1035	 */
1036	movq	%rax,%rsi
1037	xorl	%eax,%eax
1038	movq	%rax,PCB_ONFAULT(%r8)
1039	movq	%rsi,(%rdx)
1040	POP_FRAME_POINTER
1041	movzbl	%cl, %eax
1042	ret
1043END(casueword_nosmap)
1044
1045ENTRY(casueword_smap)
1046	PUSH_FRAME_POINTER
1047	movq	PCPU(CURPCB),%r8
1048	movq	$fusufault,PCB_ONFAULT(%r8)
1049
1050	movq	$VM_MAXUSER_ADDRESS-4,%rax
1051	cmpq	%rax,%rdi			/* verify address is valid */
1052	ja	fusufault
1053
1054	movq	%rsi,%rax			/* old */
1055	stac
1056#ifdef SMP
1057	lock
1058#endif
1059	cmpxchgq %rcx,(%rdi)			/* new = %rcx */
1060	clac
1061	setne	%cl
1062
1063	/*
1064	 * The old value is in %rax.  If the store succeeded it will be the
1065	 * value we expected (old) from before the store, otherwise it will
1066	 * be the current value.
1067	 */
1068	movq	%rax,%rsi
1069	xorl	%eax,%eax
1070	movq	%rax,PCB_ONFAULT(%r8)
1071	movq	%rsi,(%rdx)
1072	POP_FRAME_POINTER
1073	movzbl	%cl, %eax
1074	ret
1075END(casueword_smap)
1076
1077/*
1078 * Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit
1079 * byte from user memory.
1080 * addr = %rdi, valp = %rsi
1081 */
1082
1083ENTRY(fueword_nosmap)
1084	PUSH_FRAME_POINTER
1085	movq	PCPU(CURPCB),%rcx
1086	movq	$fusufault,PCB_ONFAULT(%rcx)
1087
1088	movq	$VM_MAXUSER_ADDRESS-8,%rax
1089	cmpq	%rax,%rdi			/* verify address is valid */
1090	ja	fusufault
1091
1092	xorl	%eax,%eax
1093	movq	(%rdi),%r11
1094	movq	%rax,PCB_ONFAULT(%rcx)
1095	movq	%r11,(%rsi)
1096	POP_FRAME_POINTER
1097	ret
1098END(fueword_nosmap)
1099
1100ENTRY(fueword_smap)
1101	PUSH_FRAME_POINTER
1102	movq	PCPU(CURPCB),%rcx
1103	movq	$fusufault,PCB_ONFAULT(%rcx)
1104
1105	movq	$VM_MAXUSER_ADDRESS-8,%rax
1106	cmpq	%rax,%rdi			/* verify address is valid */
1107	ja	fusufault
1108
1109	xorl	%eax,%eax
1110	stac
1111	movq	(%rdi),%r11
1112	clac
1113	movq	%rax,PCB_ONFAULT(%rcx)
1114	movq	%r11,(%rsi)
1115	POP_FRAME_POINTER
1116	ret
1117END(fueword_smap)
1118
1119ENTRY(fueword32_nosmap)
1120	PUSH_FRAME_POINTER
1121	movq	PCPU(CURPCB),%rcx
1122	movq	$fusufault,PCB_ONFAULT(%rcx)
1123
1124	movq	$VM_MAXUSER_ADDRESS-4,%rax
1125	cmpq	%rax,%rdi			/* verify address is valid */
1126	ja	fusufault
1127
1128	xorl	%eax,%eax
1129	movl	(%rdi),%r11d
1130	movq	%rax,PCB_ONFAULT(%rcx)
1131	movl	%r11d,(%rsi)
1132	POP_FRAME_POINTER
1133	ret
1134END(fueword32_nosmap)
1135
1136ENTRY(fueword32_smap)
1137	PUSH_FRAME_POINTER
1138	movq	PCPU(CURPCB),%rcx
1139	movq	$fusufault,PCB_ONFAULT(%rcx)
1140
1141	movq	$VM_MAXUSER_ADDRESS-4,%rax
1142	cmpq	%rax,%rdi			/* verify address is valid */
1143	ja	fusufault
1144
1145	xorl	%eax,%eax
1146	stac
1147	movl	(%rdi),%r11d
1148	clac
1149	movq	%rax,PCB_ONFAULT(%rcx)
1150	movl	%r11d,(%rsi)
1151	POP_FRAME_POINTER
1152	ret
1153END(fueword32_smap)
1154
1155ENTRY(fuword16_nosmap)
1156	PUSH_FRAME_POINTER
1157	movq	PCPU(CURPCB),%rcx
1158	movq	$fusufault,PCB_ONFAULT(%rcx)
1159
1160	movq	$VM_MAXUSER_ADDRESS-2,%rax
1161	cmpq	%rax,%rdi
1162	ja	fusufault
1163
1164	movzwl	(%rdi),%eax
1165	movq	$0,PCB_ONFAULT(%rcx)
1166	POP_FRAME_POINTER
1167	ret
1168END(fuword16_nosmap)
1169
1170ENTRY(fuword16_smap)
1171	PUSH_FRAME_POINTER
1172	movq	PCPU(CURPCB),%rcx
1173	movq	$fusufault,PCB_ONFAULT(%rcx)
1174
1175	movq	$VM_MAXUSER_ADDRESS-2,%rax
1176	cmpq	%rax,%rdi
1177	ja	fusufault
1178
1179	stac
1180	movzwl	(%rdi),%eax
1181	clac
1182	movq	$0,PCB_ONFAULT(%rcx)
1183	POP_FRAME_POINTER
1184	ret
1185END(fuword16_smap)
1186
1187ENTRY(fubyte_nosmap)
1188	PUSH_FRAME_POINTER
1189	movq	PCPU(CURPCB),%rcx
1190	movq	$fusufault,PCB_ONFAULT(%rcx)
1191
1192	movq	$VM_MAXUSER_ADDRESS-1,%rax
1193	cmpq	%rax,%rdi
1194	ja	fusufault
1195
1196	movzbl	(%rdi),%eax
1197	movq	$0,PCB_ONFAULT(%rcx)
1198	POP_FRAME_POINTER
1199	ret
1200END(fubyte_nosmap)
1201
1202ENTRY(fubyte_smap)
1203	PUSH_FRAME_POINTER
1204	movq	PCPU(CURPCB),%rcx
1205	movq	$fusufault,PCB_ONFAULT(%rcx)
1206
1207	movq	$VM_MAXUSER_ADDRESS-1,%rax
1208	cmpq	%rax,%rdi
1209	ja	fusufault
1210
1211	stac
1212	movzbl	(%rdi),%eax
1213	clac
1214	movq	$0,PCB_ONFAULT(%rcx)
1215	POP_FRAME_POINTER
1216	ret
1217END(fubyte_smap)
1218
1219/*
1220 * Store a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit byte to
1221 * user memory.
1222 * addr = %rdi, value = %rsi
1223 */
1224ENTRY(suword_nosmap)
1225	PUSH_FRAME_POINTER
1226	movq	PCPU(CURPCB),%rcx
1227	movq	$fusufault,PCB_ONFAULT(%rcx)
1228
1229	movq	$VM_MAXUSER_ADDRESS-8,%rax
1230	cmpq	%rax,%rdi			/* verify address validity */
1231	ja	fusufault
1232
1233	movq	%rsi,(%rdi)
1234	xorl	%eax,%eax
1235	movq	%rax,PCB_ONFAULT(%rcx)
1236	POP_FRAME_POINTER
1237	ret
1238END(suword_nosmap)
1239
1240ENTRY(suword_smap)
1241	PUSH_FRAME_POINTER
1242	movq	PCPU(CURPCB),%rcx
1243	movq	$fusufault,PCB_ONFAULT(%rcx)
1244
1245	movq	$VM_MAXUSER_ADDRESS-8,%rax
1246	cmpq	%rax,%rdi			/* verify address validity */
1247	ja	fusufault
1248
1249	stac
1250	movq	%rsi,(%rdi)
1251	clac
1252	xorl	%eax,%eax
1253	movq	%rax,PCB_ONFAULT(%rcx)
1254	POP_FRAME_POINTER
1255	ret
1256END(suword_smap)
1257
1258ENTRY(suword32_nosmap)
1259	PUSH_FRAME_POINTER
1260	movq	PCPU(CURPCB),%rcx
1261	movq	$fusufault,PCB_ONFAULT(%rcx)
1262
1263	movq	$VM_MAXUSER_ADDRESS-4,%rax
1264	cmpq	%rax,%rdi			/* verify address validity */
1265	ja	fusufault
1266
1267	movl	%esi,(%rdi)
1268	xorl	%eax,%eax
1269	movq	%rax,PCB_ONFAULT(%rcx)
1270	POP_FRAME_POINTER
1271	ret
1272END(suword32_nosmap)
1273
1274ENTRY(suword32_smap)
1275	PUSH_FRAME_POINTER
1276	movq	PCPU(CURPCB),%rcx
1277	movq	$fusufault,PCB_ONFAULT(%rcx)
1278
1279	movq	$VM_MAXUSER_ADDRESS-4,%rax
1280	cmpq	%rax,%rdi			/* verify address validity */
1281	ja	fusufault
1282
1283	stac
1284	movl	%esi,(%rdi)
1285	clac
1286	xorl	%eax,%eax
1287	movq	%rax,PCB_ONFAULT(%rcx)
1288	POP_FRAME_POINTER
1289	ret
1290END(suword32_smap)
1291
1292ENTRY(suword16_nosmap)
1293	PUSH_FRAME_POINTER
1294	movq	PCPU(CURPCB),%rcx
1295	movq	$fusufault,PCB_ONFAULT(%rcx)
1296
1297	movq	$VM_MAXUSER_ADDRESS-2,%rax
1298	cmpq	%rax,%rdi			/* verify address validity */
1299	ja	fusufault
1300
1301	movw	%si,(%rdi)
1302	xorl	%eax,%eax
1303	movq	%rax,PCB_ONFAULT(%rcx)
1304	POP_FRAME_POINTER
1305	ret
1306END(suword16_nosmap)
1307
1308ENTRY(suword16_smap)
1309	PUSH_FRAME_POINTER
1310	movq	PCPU(CURPCB),%rcx
1311	movq	$fusufault,PCB_ONFAULT(%rcx)
1312
1313	movq	$VM_MAXUSER_ADDRESS-2,%rax
1314	cmpq	%rax,%rdi			/* verify address validity */
1315	ja	fusufault
1316
1317	stac
1318	movw	%si,(%rdi)
1319	clac
1320	xorl	%eax,%eax
1321	movq	%rax,PCB_ONFAULT(%rcx)
1322	POP_FRAME_POINTER
1323	ret
1324END(suword16_smap)
1325
1326ENTRY(subyte_nosmap)
1327	PUSH_FRAME_POINTER
1328	movq	PCPU(CURPCB),%rcx
1329	movq	$fusufault,PCB_ONFAULT(%rcx)
1330
1331	movq	$VM_MAXUSER_ADDRESS-1,%rax
1332	cmpq	%rax,%rdi			/* verify address validity */
1333	ja	fusufault
1334
1335	movl	%esi,%eax
1336	movb	%al,(%rdi)
1337	xorl	%eax,%eax
1338	movq	%rax,PCB_ONFAULT(%rcx)
1339	POP_FRAME_POINTER
1340	ret
1341END(subyte_nosmap)
1342
1343ENTRY(subyte_smap)
1344	PUSH_FRAME_POINTER
1345	movq	PCPU(CURPCB),%rcx
1346	movq	$fusufault,PCB_ONFAULT(%rcx)
1347
1348	movq	$VM_MAXUSER_ADDRESS-1,%rax
1349	cmpq	%rax,%rdi			/* verify address validity */
1350	ja	fusufault
1351
1352	movl	%esi,%eax
1353	stac
1354	movb	%al,(%rdi)
1355	clac
1356	xorl	%eax,%eax
1357	movq	%rax,PCB_ONFAULT(%rcx)
1358	POP_FRAME_POINTER
1359	ret
1360END(subyte_smap)
1361
1362	ALIGN_TEXT
1363fusufault:
1364	testl	$CPUID_STDEXT_SMAP,cpu_stdext_feature(%rip)
1365	je	1f
1366	clac
13671:	movq	PCPU(CURPCB),%rcx
1368	xorl	%eax,%eax
1369	movq	%rax,PCB_ONFAULT(%rcx)
1370	decq	%rax
1371	POP_FRAME_POINTER
1372	ret
1373
1374/*
1375 * copyinstr(from, to, maxlen, int *lencopied)
1376 *           %rdi, %rsi, %rdx, %rcx
1377 *
1378 *	copy a string from 'from' to 'to', stop when a 0 character is reached.
1379 *	return ENAMETOOLONG if string is longer than maxlen, and
1380 *	EFAULT on protection violations. If lencopied is non-zero,
1381 *	return the actual length in *lencopied.
1382 */
1383.macro COPYINSTR smap
1384	PUSH_FRAME_POINTER
1385	movq	%rdx,%r8			/* %r8 = maxlen */
1386	movq	PCPU(CURPCB),%r9
1387	movq	$cpystrflt,PCB_ONFAULT(%r9)
1388
1389	movq	$VM_MAXUSER_ADDRESS,%rax
1390
1391	/* make sure 'from' is within bounds */
1392	subq	%rdi,%rax
1393	jbe	cpystrflt
1394
1395	SMAP_DISABLE \smap
1396
1397	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1398	cmpq	%rdx,%rax
1399	jb	8f
14001:
1401	incq	%rdx
14022:
1403	decq	%rdx
1404.if \smap == 0
1405	jz	copyinstr_toolong
1406.else
1407	jz	copyinstr_toolong_smap
1408.endif
1409
1410	movb	(%rdi),%al
1411	movb	%al,(%rsi)
1412	incq	%rsi
1413	incq	%rdi
1414	testb	%al,%al
1415	jnz	2b
1416
1417	SMAP_ENABLE \smap
1418
1419	/* Success -- 0 byte reached */
1420	decq	%rdx
1421	xorl	%eax,%eax
1422
1423	/* set *lencopied and return %eax */
1424	movq	%rax,PCB_ONFAULT(%r9)
1425
1426	testq	%rcx,%rcx
1427	jz	3f
1428	subq	%rdx,%r8
1429	movq	%r8,(%rcx)
14303:
1431	POP_FRAME_POINTER
1432	ret
1433	ALIGN_TEXT
14348:
1435	movq	%rax,%rdx
1436	movq	%rax,%r8
1437	jmp 1b
1438
1439.endm
1440
1441ENTRY(copyinstr_nosmap)
1442	COPYINSTR smap=0
1443END(copyinstr_nosmap)
1444
1445ENTRY(copyinstr_smap)
1446	COPYINSTR smap=1
1447END(copyinstr_smap)
1448
1449cpystrflt:
1450	testl	$CPUID_STDEXT_SMAP,cpu_stdext_feature(%rip)
1451	je	1f
1452	clac
14531:	movl	$EFAULT,%eax
1454cpystrflt_x:
1455	/* set *lencopied and return %eax */
1456	movq	$0,PCB_ONFAULT(%r9)
1457
1458	testq	%rcx,%rcx
1459	jz	1f
1460	subq	%rdx,%r8
1461	movq	%r8,(%rcx)
14621:
1463	POP_FRAME_POINTER
1464	ret
1465
1466copyinstr_toolong_smap:
1467	clac
1468copyinstr_toolong:
1469	/* rdx is zero - return ENAMETOOLONG or EFAULT */
1470	movq	$VM_MAXUSER_ADDRESS,%rax
1471	cmpq	%rax,%rdi
1472	jae	cpystrflt
1473	movl	$ENAMETOOLONG,%eax
1474	jmp	cpystrflt_x
1475
1476/*
1477 * Handling of special amd64 registers and descriptor tables etc
1478 */
1479/* void lgdt(struct region_descriptor *rdp); */
1480ENTRY(lgdt)
1481	/* reload the descriptor table */
1482	lgdt	(%rdi)
1483
1484	/* flush the prefetch q */
1485	jmp	1f
1486	nop
14871:
1488	movl	$KDSEL,%eax
1489	movl	%eax,%ds
1490	movl	%eax,%es
1491	movl	%eax,%fs	/* Beware, use wrmsr to set 64 bit base */
1492	movl	%eax,%gs
1493	movl	%eax,%ss
1494
1495	/* reload code selector by turning return into intersegmental return */
1496	popq	%rax
1497	pushq	$KCSEL
1498	pushq	%rax
1499	lretq
1500END(lgdt)
1501
1502/*****************************************************************************/
1503/* setjump, longjump                                                         */
1504/*****************************************************************************/
1505
1506ENTRY(setjmp)
1507	movq	%rbx,0(%rdi)			/* save rbx */
1508	movq	%rsp,8(%rdi)			/* save rsp */
1509	movq	%rbp,16(%rdi)			/* save rbp */
1510	movq	%r12,24(%rdi)			/* save r12 */
1511	movq	%r13,32(%rdi)			/* save r13 */
1512	movq	%r14,40(%rdi)			/* save r14 */
1513	movq	%r15,48(%rdi)			/* save r15 */
1514	movq	0(%rsp),%rdx			/* get rta */
1515	movq	%rdx,56(%rdi)			/* save rip */
1516	xorl	%eax,%eax			/* return(0); */
1517	ret
1518END(setjmp)
1519
1520ENTRY(longjmp)
1521	movq	0(%rdi),%rbx			/* restore rbx */
1522	movq	8(%rdi),%rsp			/* restore rsp */
1523	movq	16(%rdi),%rbp			/* restore rbp */
1524	movq	24(%rdi),%r12			/* restore r12 */
1525	movq	32(%rdi),%r13			/* restore r13 */
1526	movq	40(%rdi),%r14			/* restore r14 */
1527	movq	48(%rdi),%r15			/* restore r15 */
1528	movq	56(%rdi),%rdx			/* get rta */
1529	movq	%rdx,0(%rsp)			/* put in return frame */
1530	xorl	%eax,%eax			/* return(1); */
1531	incl	%eax
1532	ret
1533END(longjmp)
1534
1535/*
1536 * Support for reading MSRs in the safe manner.  (Instead of panic on #gp,
1537 * return an error.)
1538 */
1539ENTRY(rdmsr_safe)
1540/* int rdmsr_safe(u_int msr, uint64_t *data) */
1541	PUSH_FRAME_POINTER
1542	movq	PCPU(CURPCB),%r8
1543	movq	$msr_onfault,PCB_ONFAULT(%r8)
1544	movl	%edi,%ecx
1545	rdmsr			/* Read MSR pointed by %ecx. Returns
1546				   hi byte in edx, lo in %eax */
1547	salq	$32,%rdx	/* sign-shift %rdx left */
1548	movl	%eax,%eax	/* zero-extend %eax -> %rax */
1549	orq	%rdx,%rax
1550	movq	%rax,(%rsi)
1551	xorq	%rax,%rax
1552	movq	%rax,PCB_ONFAULT(%r8)
1553	POP_FRAME_POINTER
1554	ret
1555
1556/*
1557 * Support for writing MSRs in the safe manner.  (Instead of panic on #gp,
1558 * return an error.)
1559 */
1560ENTRY(wrmsr_safe)
1561/* int wrmsr_safe(u_int msr, uint64_t data) */
1562	PUSH_FRAME_POINTER
1563	movq	PCPU(CURPCB),%r8
1564	movq	$msr_onfault,PCB_ONFAULT(%r8)
1565	movl	%edi,%ecx
1566	movl	%esi,%eax
1567	sarq	$32,%rsi
1568	movl	%esi,%edx
1569	wrmsr			/* Write MSR pointed by %ecx. Accepts
1570				   hi byte in edx, lo in %eax. */
1571	xorq	%rax,%rax
1572	movq	%rax,PCB_ONFAULT(%r8)
1573	POP_FRAME_POINTER
1574	ret
1575
1576/*
1577 * MSR operations fault handler
1578 */
1579	ALIGN_TEXT
1580msr_onfault:
1581	movq	$0,PCB_ONFAULT(%r8)
1582	movl	$EFAULT,%eax
1583	POP_FRAME_POINTER
1584	ret
1585
1586/*
1587 * void pmap_pti_pcid_invalidate(uint64_t ucr3, uint64_t kcr3);
1588 * Invalidates address space addressed by ucr3, then returns to kcr3.
1589 * Done in assembler to ensure no other memory accesses happen while
1590 * on ucr3.
1591 */
1592	ALIGN_TEXT
1593ENTRY(pmap_pti_pcid_invalidate)
1594	pushfq
1595	cli
1596	movq	%rdi,%cr3	/* to user page table */
1597	movq	%rsi,%cr3	/* back to kernel */
1598	popfq
1599	retq
1600
1601/*
1602 * void pmap_pti_pcid_invlpg(uint64_t ucr3, uint64_t kcr3, vm_offset_t va);
1603 * Invalidates virtual address va in address space ucr3, then returns to kcr3.
1604 */
1605	ALIGN_TEXT
1606ENTRY(pmap_pti_pcid_invlpg)
1607	pushfq
1608	cli
1609	movq	%rdi,%cr3	/* to user page table */
1610	invlpg	(%rdx)
1611	movq	%rsi,%cr3	/* back to kernel */
1612	popfq
1613	retq
1614
1615/*
1616 * void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr3, vm_offset_t sva,
1617 *     vm_offset_t eva);
1618 * Invalidates virtual addresses between sva and eva in address space ucr3,
1619 * then returns to kcr3.
1620 */
1621	ALIGN_TEXT
1622ENTRY(pmap_pti_pcid_invlrng)
1623	pushfq
1624	cli
1625	movq	%rdi,%cr3	/* to user page table */
16261:	invlpg	(%rdx)
1627	addq	$PAGE_SIZE,%rdx
1628	cmpq	%rdx,%rcx
1629	ja	1b
1630	movq	%rsi,%cr3	/* back to kernel */
1631	popfq
1632	retq
1633
1634	.altmacro
1635	.macro	rsb_seq_label l
1636rsb_seq_\l:
1637	.endm
1638	.macro	rsb_call_label l
1639	call	rsb_seq_\l
1640	.endm
1641	.macro	rsb_seq count
1642	ll=1
1643	.rept	\count
1644	rsb_call_label	%(ll)
1645	nop
1646	rsb_seq_label %(ll)
1647	addq	$8,%rsp
1648	ll=ll+1
1649	.endr
1650	.endm
1651
1652ENTRY(rsb_flush)
1653	rsb_seq	32
1654	ret
1655
1656/* all callers already saved %rax, %rdx, and %rcx */
1657ENTRY(handle_ibrs_entry)
1658	cmpb	$0,hw_ibrs_ibpb_active(%rip)
1659	je	1f
1660	movl	$MSR_IA32_SPEC_CTRL,%ecx
1661	rdmsr
1662	orl	$(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1663	orl	$(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32,%edx
1664	wrmsr
1665	movb	$1,PCPU(IBPB_SET)
1666	testl	$CPUID_STDEXT_SMEP,cpu_stdext_feature(%rip)
1667	je	rsb_flush
16681:	ret
1669END(handle_ibrs_entry)
1670
1671ENTRY(handle_ibrs_exit)
1672	cmpb	$0,PCPU(IBPB_SET)
1673	je	1f
1674	movl	$MSR_IA32_SPEC_CTRL,%ecx
1675	rdmsr
1676	andl	$~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1677	andl	$~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx
1678	wrmsr
1679	movb	$0,PCPU(IBPB_SET)
16801:	ret
1681END(handle_ibrs_exit)
1682
1683/* registers-neutral version, but needs stack */
1684ENTRY(handle_ibrs_exit_rs)
1685	cmpb	$0,PCPU(IBPB_SET)
1686	je	1f
1687	pushq	%rax
1688	pushq	%rdx
1689	pushq	%rcx
1690	movl	$MSR_IA32_SPEC_CTRL,%ecx
1691	rdmsr
1692	andl	$~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1693	andl	$~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx
1694	wrmsr
1695	popq	%rcx
1696	popq	%rdx
1697	popq	%rax
1698	movb	$0,PCPU(IBPB_SET)
16991:	ret
1700END(handle_ibrs_exit_rs)
1701
1702	.noaltmacro
1703
1704/*
1705 * Flush L1D cache.  Load enough of the data from the kernel text
1706 * to flush existing L1D content.
1707 *
1708 * N.B. The function does not follow ABI calling conventions, it corrupts %rbx.
1709 * The vmm.ko caller expects that only %rax, %rdx, %rbx, %rcx, %r9, and %rflags
1710 * registers are clobbered.  The NMI handler caller only needs %r13 and %r15
1711 * preserved.
1712 */
1713ENTRY(flush_l1d_sw)
1714#define	L1D_FLUSH_SIZE	(64 * 1024)
1715	movq	$KERNBASE, %r9
1716	movq	$-L1D_FLUSH_SIZE, %rcx
1717	/*
1718	 * pass 1: Preload TLB.
1719	 * Kernel text is mapped using superpages.  TLB preload is
1720	 * done for the benefit of older CPUs which split 2M page
1721	 * into 4k TLB entries.
1722	 */
17231:	movb	L1D_FLUSH_SIZE(%r9, %rcx), %al
1724	addq	$PAGE_SIZE, %rcx
1725	jne	1b
1726	xorl	%eax, %eax
1727	cpuid
1728	movq	$-L1D_FLUSH_SIZE, %rcx
1729	/* pass 2: Read each cache line. */
17302:	movb	L1D_FLUSH_SIZE(%r9, %rcx), %al
1731	addq	$64, %rcx
1732	jne	2b
1733	lfence
1734	ret
1735#undef	L1D_FLUSH_SIZE
1736END(flush_l1d_sw)
1737
1738ENTRY(flush_l1d_sw_abi)
1739	pushq	%rbx
1740	call	flush_l1d_sw
1741	popq	%rbx
1742	ret
1743END(flush_l1d_sw_abi)
1744
1745ENTRY(mds_handler_void)
1746	retq
1747END(mds_handler_void)
1748
1749ENTRY(mds_handler_verw)
1750	subq	$8, %rsp
1751	movw	%ds, (%rsp)
1752	verw	(%rsp)
1753	addq	$8, %rsp
1754	retq
1755END(mds_handler_verw)
1756
1757ENTRY(mds_handler_ivb)
1758	pushq	%rax
1759	pushq	%rdx
1760	pushq	%rcx
1761
1762	movq	%cr0, %rax
1763	testb	$CR0_TS, %al
1764	je	1f
1765	clts
17661:	movq	PCPU(MDS_BUF), %rdx
1767	movdqa	%xmm0, PCPU(MDS_TMP)
1768	pxor	%xmm0, %xmm0
1769
1770	lfence
1771	orpd	(%rdx), %xmm0
1772	orpd	(%rdx), %xmm0
1773	mfence
1774	movl	$40, %ecx
1775	addq	$16, %rdx
17762:	movntdq	%xmm0, (%rdx)
1777	addq	$16, %rdx
1778	decl	%ecx
1779	jnz	2b
1780	mfence
1781
1782	movdqa	PCPU(MDS_TMP),%xmm0
1783	testb	$CR0_TS, %al
1784	je	3f
1785	movq	%rax, %cr0
17863:	popq	%rcx
1787	popq	%rdx
1788	popq	%rax
1789	retq
1790END(mds_handler_ivb)
1791
1792ENTRY(mds_handler_bdw)
1793	pushq	%rax
1794	pushq	%rbx
1795	pushq	%rcx
1796	pushq	%rdi
1797	pushq	%rsi
1798
1799	movq	%cr0, %rax
1800	testb	$CR0_TS, %al
1801	je	1f
1802	clts
18031:	movq	PCPU(MDS_BUF), %rbx
1804	movdqa	%xmm0, PCPU(MDS_TMP)
1805	pxor	%xmm0, %xmm0
1806
1807	movq	%rbx, %rdi
1808	movq	%rbx, %rsi
1809	movl	$40, %ecx
18102:	movntdq	%xmm0, (%rbx)
1811	addq	$16, %rbx
1812	decl	%ecx
1813	jnz	2b
1814	mfence
1815	movl	$1536, %ecx
1816	rep; movsb
1817	lfence
1818
1819	movdqa	PCPU(MDS_TMP),%xmm0
1820	testb	$CR0_TS, %al
1821	je	3f
1822	movq	%rax, %cr0
18233:	popq	%rsi
1824	popq	%rdi
1825	popq	%rcx
1826	popq	%rbx
1827	popq	%rax
1828	retq
1829END(mds_handler_bdw)
1830
1831ENTRY(mds_handler_skl_sse)
1832	pushq	%rax
1833	pushq	%rdx
1834	pushq	%rcx
1835	pushq	%rdi
1836
1837	movq	%cr0, %rax
1838	testb	$CR0_TS, %al
1839	je	1f
1840	clts
18411:	movq	PCPU(MDS_BUF), %rdi
1842	movq	PCPU(MDS_BUF64), %rdx
1843	movdqa	%xmm0, PCPU(MDS_TMP)
1844	pxor	%xmm0, %xmm0
1845
1846	lfence
1847	orpd	(%rdx), %xmm0
1848	orpd	(%rdx), %xmm0
1849	xorl	%eax, %eax
18502:	clflushopt	5376(%rdi, %rax, 8)
1851	addl	$8, %eax
1852	cmpl	$8 * 12, %eax
1853	jb	2b
1854	sfence
1855	movl	$6144, %ecx
1856	xorl	%eax, %eax
1857	rep; stosb
1858	mfence
1859
1860	movdqa	PCPU(MDS_TMP), %xmm0
1861	testb	$CR0_TS, %al
1862	je	3f
1863	movq	%rax, %cr0
18643:	popq	%rdi
1865	popq	%rcx
1866	popq	%rdx
1867	popq	%rax
1868	retq
1869END(mds_handler_skl_sse)
1870
1871ENTRY(mds_handler_skl_avx)
1872	pushq	%rax
1873	pushq	%rdx
1874	pushq	%rcx
1875	pushq	%rdi
1876
1877	movq	%cr0, %rax
1878	testb	$CR0_TS, %al
1879	je	1f
1880	clts
18811:	movq	PCPU(MDS_BUF), %rdi
1882	movq	PCPU(MDS_BUF64), %rdx
1883	vmovdqa	%ymm0, PCPU(MDS_TMP)
1884	vpxor	%ymm0, %ymm0, %ymm0
1885
1886	lfence
1887	vorpd	(%rdx), %ymm0, %ymm0
1888	vorpd	(%rdx), %ymm0, %ymm0
1889	xorl	%eax, %eax
18902:	clflushopt	5376(%rdi, %rax, 8)
1891	addl	$8, %eax
1892	cmpl	$8 * 12, %eax
1893	jb	2b
1894	sfence
1895	movl	$6144, %ecx
1896	xorl	%eax, %eax
1897	rep; stosb
1898	mfence
1899
1900	vmovdqa	PCPU(MDS_TMP), %ymm0
1901	testb	$CR0_TS, %al
1902	je	3f
1903	movq	%rax, %cr0
19043:	popq	%rdi
1905	popq	%rcx
1906	popq	%rdx
1907	popq	%rax
1908	retq
1909END(mds_handler_skl_avx)
1910
1911ENTRY(mds_handler_skl_avx512)
1912	pushq	%rax
1913	pushq	%rdx
1914	pushq	%rcx
1915	pushq	%rdi
1916
1917	movq	%cr0, %rax
1918	testb	$CR0_TS, %al
1919	je	1f
1920	clts
19211:	movq	PCPU(MDS_BUF), %rdi
1922	movq	PCPU(MDS_BUF64), %rdx
1923	vmovdqa64	%zmm0, PCPU(MDS_TMP)
1924	vpxord	%zmm0, %zmm0, %zmm0
1925
1926	lfence
1927	vorpd	(%rdx), %zmm0, %zmm0
1928	vorpd	(%rdx), %zmm0, %zmm0
1929	xorl	%eax, %eax
19302:	clflushopt	5376(%rdi, %rax, 8)
1931	addl	$8, %eax
1932	cmpl	$8 * 12, %eax
1933	jb	2b
1934	sfence
1935	movl	$6144, %ecx
1936	xorl	%eax, %eax
1937	rep; stosb
1938	mfence
1939
1940	vmovdqa64	PCPU(MDS_TMP), %zmm0
1941	testb	$CR0_TS, %al
1942	je	3f
1943	movq	%rax, %cr0
19443:	popq	%rdi
1945	popq	%rcx
1946	popq	%rdx
1947	popq	%rax
1948	retq
1949END(mds_handler_skl_avx512)
1950
1951ENTRY(mds_handler_silvermont)
1952	pushq	%rax
1953	pushq	%rdx
1954	pushq	%rcx
1955
1956	movq	%cr0, %rax
1957	testb	$CR0_TS, %al
1958	je	1f
1959	clts
19601:	movq	PCPU(MDS_BUF), %rdx
1961	movdqa	%xmm0, PCPU(MDS_TMP)
1962	pxor	%xmm0, %xmm0
1963
1964	movl	$16, %ecx
19652:	movntdq	%xmm0, (%rdx)
1966	addq	$16, %rdx
1967	decl	%ecx
1968	jnz	2b
1969	mfence
1970
1971	movdqa	PCPU(MDS_TMP),%xmm0
1972	testb	$CR0_TS, %al
1973	je	3f
1974	movq	%rax, %cr0
19753:	popq	%rcx
1976	popq	%rdx
1977	popq	%rax
1978	retq
1979END(mds_handler_silvermont)
1980