xref: /freebsd/sys/amd64/amd64/support.S (revision 1f1e2261)
1/*-
2 * Copyright (c) 2018-2019 The FreeBSD Foundation
3 * Copyright (c) 2003 Peter Wemm.
4 * Copyright (c) 1993 The Regents of the University of California.
5 * All rights reserved.
6 *
7 * Portions of this software were developed by
8 * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
9 * the FreeBSD Foundation.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * $FreeBSD$
36 */
37
38#include "opt_ddb.h"
39
40#include <machine/asmacros.h>
41#include <machine/specialreg.h>
42#include <machine/pmap.h>
43
44#include "assym.inc"
45
46	.text
47
48/* Address: %rdi */
49ENTRY(pagezero_std)
50	PUSH_FRAME_POINTER
51	movl	$PAGE_SIZE/8,%ecx
52	xorl	%eax,%eax
53	rep
54	stosq
55	POP_FRAME_POINTER
56	ret
57END(pagezero_std)
58
59ENTRY(pagezero_erms)
60	PUSH_FRAME_POINTER
61	movl	$PAGE_SIZE,%ecx
62	xorl	%eax,%eax
63	rep
64	stosb
65	POP_FRAME_POINTER
66	ret
67END(pagezero_erms)
68
69/*
70 * pagecopy(%rdi=from, %rsi=to)
71 */
72ENTRY(pagecopy)
73	PUSH_FRAME_POINTER
74	movl	$PAGE_SIZE/8,%ecx
75	movq	%rdi,%r9
76	movq	%rsi,%rdi
77	movq	%r9,%rsi
78	rep
79	movsq
80	POP_FRAME_POINTER
81	ret
82END(pagecopy)
83
84/*
85 * memcmp(b1, b2, len)
86 *	   rdi,rsi,rdx
87 */
88ENTRY(memcmp)
89	PUSH_FRAME_POINTER
90
91	xorl	%eax,%eax
9210:
93	cmpq	$16,%rdx
94	ja	101632f
95
96	cmpb	$8,%dl
97	jg	100816f
98
99	cmpb	$4,%dl
100	jg	100408f
101
102	cmpb	$2,%dl
103	jge	100204f
104
105	cmpb	$1,%dl
106	jl	100000f
107	movzbl	(%rdi),%eax
108	movzbl	(%rsi),%r8d
109	subl	%r8d,%eax
110100000:
111	POP_FRAME_POINTER
112	ret
113
114	ALIGN_TEXT
115100816:
116	movq	(%rdi),%r8
117	movq	(%rsi),%r9
118	cmpq	%r8,%r9
119	jne	80f
120	movq	-8(%rdi,%rdx),%r8
121	movq	-8(%rsi,%rdx),%r9
122	cmpq	%r8,%r9
123	jne	10081608f
124	POP_FRAME_POINTER
125	ret
126	ALIGN_TEXT
127100408:
128	movl	(%rdi),%r8d
129	movl	(%rsi),%r9d
130	cmpl	%r8d,%r9d
131	jne	80f
132	movl	-4(%rdi,%rdx),%r8d
133	movl	-4(%rsi,%rdx),%r9d
134	cmpl	%r8d,%r9d
135	jne	10040804f
136	POP_FRAME_POINTER
137	ret
138	ALIGN_TEXT
139100204:
140	movzwl	(%rdi),%r8d
141	movzwl	(%rsi),%r9d
142	cmpl	%r8d,%r9d
143	jne	1f
144	movzwl	-2(%rdi,%rdx),%r8d
145	movzwl	-2(%rsi,%rdx),%r9d
146	cmpl	%r8d,%r9d
147	jne	1f
148	POP_FRAME_POINTER
149	ret
150	ALIGN_TEXT
151101632:
152	cmpq	$32,%rdx
153	ja	103200f
154	movq	(%rdi),%r8
155	movq	(%rsi),%r9
156	cmpq	%r8,%r9
157	jne	80f
158	movq	8(%rdi),%r8
159	movq	8(%rsi),%r9
160	cmpq	%r8,%r9
161	jne	10163208f
162	movq	-16(%rdi,%rdx),%r8
163	movq	-16(%rsi,%rdx),%r9
164	cmpq	%r8,%r9
165	jne	10163216f
166	movq	-8(%rdi,%rdx),%r8
167	movq	-8(%rsi,%rdx),%r9
168	cmpq	%r8,%r9
169	jne	10163224f
170	POP_FRAME_POINTER
171	ret
172	ALIGN_TEXT
173103200:
174	movq	(%rdi),%r8
175	movq	8(%rdi),%r9
176	subq	(%rsi),%r8
177	subq	8(%rsi),%r9
178	orq	%r8,%r9
179	jnz	10320000f
180
181	movq    16(%rdi),%r8
182	movq    24(%rdi),%r9
183	subq    16(%rsi),%r8
184	subq    24(%rsi),%r9
185	orq	%r8,%r9
186	jnz     10320016f
187
188	leaq	32(%rdi),%rdi
189	leaq	32(%rsi),%rsi
190	subq	$32,%rdx
191	cmpq	$32,%rdx
192	jae	103200b
193	cmpb	$0,%dl
194	jne	10b
195	POP_FRAME_POINTER
196	ret
197
198/*
199 * Mismatch was found.
200 *
201 * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes).
202 */
203	ALIGN_TEXT
20410320016:
205	leaq	16(%rdi),%rdi
206	leaq	16(%rsi),%rsi
20710320000:
208	movq	(%rdi),%r8
209	movq	(%rsi),%r9
210	cmpq	%r8,%r9
211	jne	80f
212	leaq	8(%rdi),%rdi
213	leaq	8(%rsi),%rsi
214	jmp	80f
215	ALIGN_TEXT
21610081608:
21710163224:
218	leaq	-8(%rdi,%rdx),%rdi
219	leaq	-8(%rsi,%rdx),%rsi
220	jmp	80f
221	ALIGN_TEXT
22210163216:
223	leaq	-16(%rdi,%rdx),%rdi
224	leaq	-16(%rsi,%rdx),%rsi
225	jmp	80f
226	ALIGN_TEXT
22710163208:
228	leaq	8(%rdi),%rdi
229	leaq	8(%rsi),%rsi
230	jmp	80f
231	ALIGN_TEXT
23210040804:
233	leaq	-4(%rdi,%rdx),%rdi
234	leaq	-4(%rsi,%rdx),%rsi
235	jmp	1f
236
237	ALIGN_TEXT
23880:
239	movl	(%rdi),%r8d
240	movl	(%rsi),%r9d
241	cmpl	%r8d,%r9d
242	jne	1f
243	leaq	4(%rdi),%rdi
244	leaq	4(%rsi),%rsi
245
246/*
247 * We have up to 4 bytes to inspect.
248 */
2491:
250	movzbl	(%rdi),%eax
251	movzbl	(%rsi),%r8d
252	cmpb	%r8b,%al
253	jne	2f
254
255	movzbl	1(%rdi),%eax
256	movzbl	1(%rsi),%r8d
257	cmpb	%r8b,%al
258	jne	2f
259
260	movzbl	2(%rdi),%eax
261	movzbl	2(%rsi),%r8d
262	cmpb	%r8b,%al
263	jne	2f
264
265	movzbl	3(%rdi),%eax
266	movzbl	3(%rsi),%r8d
2672:
268	subl	%r8d,%eax
269	POP_FRAME_POINTER
270	ret
271END(memcmp)
272
273/*
274 * memmove(dst, src, cnt)
275 *         rdi, rsi, rdx
276 */
277
278/*
279 * Register state at entry is supposed to be as follows:
280 * rdi - destination
281 * rsi - source
282 * rdx - count
283 *
284 * The macro possibly clobbers the above and: rcx, r8, r9, r10
285 * It does not clobber rax nor r11.
286 */
287.macro MEMMOVE erms overlap begin end
288	\begin
289
290	/*
291	 * For sizes 0..32 all data is read before it is written, so there
292	 * is no correctness issue with direction of copying.
293	 */
294	cmpq	$32,%rcx
295	jbe	101632f
296
297.if \overlap == 1
298	movq	%rdi,%r8
299	subq	%rsi,%r8
300	cmpq	%rcx,%r8	/* overlapping && src < dst? */
301	jb	2f
302.endif
303
304	cmpq	$256,%rcx
305	ja	1256f
306
307	ALIGN_TEXT
308103200:
309	movq	(%rsi),%rdx
310	movq	%rdx,(%rdi)
311	movq	8(%rsi),%rdx
312	movq	%rdx,8(%rdi)
313	movq	16(%rsi),%rdx
314	movq	%rdx,16(%rdi)
315	movq	24(%rsi),%rdx
316	movq	%rdx,24(%rdi)
317	leaq	32(%rsi),%rsi
318	leaq	32(%rdi),%rdi
319	subq	$32,%rcx
320	cmpq	$32,%rcx
321	jae	103200b
322	cmpb	$0,%cl
323	jne	101632f
324	\end
325	ret
326	ALIGN_TEXT
327101632:
328	cmpb	$16,%cl
329	jl	100816f
330	movq	(%rsi),%rdx
331	movq	8(%rsi),%r8
332	movq	-16(%rsi,%rcx),%r9
333	movq	-8(%rsi,%rcx),%r10
334	movq	%rdx,(%rdi)
335	movq	%r8,8(%rdi)
336	movq	%r9,-16(%rdi,%rcx)
337	movq	%r10,-8(%rdi,%rcx)
338	\end
339	ret
340	ALIGN_TEXT
341100816:
342	cmpb	$8,%cl
343	jl	100408f
344	movq	(%rsi),%rdx
345	movq	-8(%rsi,%rcx),%r8
346	movq	%rdx,(%rdi)
347	movq	%r8,-8(%rdi,%rcx,)
348	\end
349	ret
350	ALIGN_TEXT
351100408:
352	cmpb	$4,%cl
353	jl	100204f
354	movl	(%rsi),%edx
355	movl	-4(%rsi,%rcx),%r8d
356	movl	%edx,(%rdi)
357	movl	%r8d,-4(%rdi,%rcx)
358	\end
359	ret
360	ALIGN_TEXT
361100204:
362	cmpb	$2,%cl
363	jl	100001f
364	movzwl	(%rsi),%edx
365	movzwl	-2(%rsi,%rcx),%r8d
366	movw	%dx,(%rdi)
367	movw	%r8w,-2(%rdi,%rcx)
368	\end
369	ret
370	ALIGN_TEXT
371100001:
372	cmpb	$1,%cl
373	jl	100000f
374	movb	(%rsi),%dl
375	movb	%dl,(%rdi)
376100000:
377	\end
378	ret
379
380	ALIGN_TEXT
3811256:
382	testb	$15,%dil
383	jnz	100f
384.if \erms == 1
385	rep
386	movsb
387.else
388	shrq	$3,%rcx                         /* copy by 64-bit words */
389	rep
390	movsq
391	movq	%rdx,%rcx
392	andl	$7,%ecx                         /* any bytes left? */
393	jne	100408b
394.endif
395	\end
396	ret
397100:
398	movq	(%rsi),%r8
399	movq	8(%rsi),%r9
400	movq	%rdi,%r10
401	movq	%rdi,%rcx
402	andq	$15,%rcx
403	leaq	-16(%rdx,%rcx),%rdx
404	neg	%rcx
405	leaq	16(%rdi,%rcx),%rdi
406	leaq	16(%rsi,%rcx),%rsi
407	movq	%rdx,%rcx
408.if \erms == 1
409	rep
410	movsb
411	movq	%r8,(%r10)
412	movq	%r9,8(%r10)
413.else
414	shrq	$3,%rcx                         /* copy by 64-bit words */
415	rep
416	movsq
417	movq	%r8,(%r10)
418	movq	%r9,8(%r10)
419	movq	%rdx,%rcx
420	andl	$7,%ecx                         /* any bytes left? */
421	jne	100408b
422.endif
423	\end
424	ret
425
426.if \overlap == 1
427	/*
428	 * Copy backwards.
429	 */
430        ALIGN_TEXT
4312:
432	cmpq	$256,%rcx
433	ja	2256f
434
435	leaq	-8(%rdi,%rcx),%rdi
436	leaq	-8(%rsi,%rcx),%rsi
437
438	cmpq	$32,%rcx
439	jb	2016f
440
441	ALIGN_TEXT
4422032:
443	movq	(%rsi),%rdx
444	movq	%rdx,(%rdi)
445	movq	-8(%rsi),%rdx
446	movq	%rdx,-8(%rdi)
447	movq	-16(%rsi),%rdx
448	movq	%rdx,-16(%rdi)
449	movq	-24(%rsi),%rdx
450	movq	%rdx,-24(%rdi)
451	leaq	-32(%rsi),%rsi
452	leaq	-32(%rdi),%rdi
453	subq	$32,%rcx
454	cmpq	$32,%rcx
455	jae	2032b
456	cmpb	$0,%cl
457	jne	2016f
458	\end
459	ret
460	ALIGN_TEXT
4612016:
462	cmpb	$16,%cl
463	jl	2008f
464	movq	(%rsi),%rdx
465	movq	%rdx,(%rdi)
466	movq	-8(%rsi),%rdx
467	movq	%rdx,-8(%rdi)
468	subb	$16,%cl
469	jz	2000f
470	leaq	-16(%rsi),%rsi
471	leaq	-16(%rdi),%rdi
4722008:
473	cmpb	$8,%cl
474	jl	2004f
475	movq	(%rsi),%rdx
476	movq	%rdx,(%rdi)
477	subb	$8,%cl
478	jz	2000f
479	leaq	-8(%rsi),%rsi
480	leaq	-8(%rdi),%rdi
4812004:
482	cmpb	$4,%cl
483	jl	2002f
484	movl	4(%rsi),%edx
485	movl	%edx,4(%rdi)
486	subb	$4,%cl
487	jz	2000f
488	leaq	-4(%rsi),%rsi
489	leaq	-4(%rdi),%rdi
4902002:
491	cmpb	$2,%cl
492	jl	2001f
493	movw	6(%rsi),%dx
494	movw	%dx,6(%rdi)
495	subb	$2,%cl
496	jz	2000f
497	leaq	-2(%rsi),%rsi
498	leaq	-2(%rdi),%rdi
4992001:
500	cmpb	$1,%cl
501	jl	2000f
502	movb	7(%rsi),%dl
503	movb	%dl,7(%rdi)
5042000:
505	\end
506	ret
507	ALIGN_TEXT
5082256:
509	std
510	leaq	-8(%rdi,%rcx),%rdi
511	leaq	-8(%rsi,%rcx),%rsi
512	shrq	$3,%rcx
513	rep
514	movsq
515	cld
516	movq	%rdx,%rcx
517	andb	$7,%cl
518	jne	2004b
519	\end
520	ret
521.endif
522.endm
523
524.macro MEMMOVE_BEGIN
525	PUSH_FRAME_POINTER
526	movq	%rdi,%rax
527	movq	%rdx,%rcx
528.endm
529
530.macro MEMMOVE_END
531	POP_FRAME_POINTER
532.endm
533
534ENTRY(memmove_std)
535	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
536END(memmove_std)
537
538ENTRY(memmove_erms)
539	MEMMOVE erms=1 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
540END(memmove_erms)
541
542/*
543 * memcpy(dst, src, len)
544 *        rdi, rsi, rdx
545 *
546 * Note: memcpy does not support overlapping copies
547 */
548ENTRY(memcpy_std)
549	MEMMOVE erms=0 overlap=0 begin=MEMMOVE_BEGIN end=MEMMOVE_END
550END(memcpy_std)
551
552ENTRY(memcpy_erms)
553	MEMMOVE erms=1 overlap=0 begin=MEMMOVE_BEGIN end=MEMMOVE_END
554END(memcpy_erms)
555
556/*
557 * memset(dst, c,   len)
558 *        rdi, rsi, rdx
559 */
560.macro MEMSET erms
561	PUSH_FRAME_POINTER
562	movq	%rdi,%rax
563	movq	%rdx,%rcx
564	movzbq	%sil,%r8
565	movabs	$0x0101010101010101,%r10
566	imulq	%r8,%r10
567
568	cmpq	$32,%rcx
569	jbe	101632f
570
571	cmpq	$256,%rcx
572	ja	1256f
573
574	ALIGN_TEXT
575103200:
576	movq	%r10,(%rdi)
577	movq	%r10,8(%rdi)
578	movq	%r10,16(%rdi)
579	movq	%r10,24(%rdi)
580	leaq	32(%rdi),%rdi
581	subq	$32,%rcx
582	cmpq	$32,%rcx
583	ja	103200b
584	cmpb	$16,%cl
585	ja	201632f
586	movq	%r10,-16(%rdi,%rcx)
587	movq	%r10,-8(%rdi,%rcx)
588	POP_FRAME_POINTER
589	ret
590	ALIGN_TEXT
591101632:
592	cmpb	$16,%cl
593	jl	100816f
594201632:
595	movq	%r10,(%rdi)
596	movq	%r10,8(%rdi)
597	movq	%r10,-16(%rdi,%rcx)
598	movq	%r10,-8(%rdi,%rcx)
599	POP_FRAME_POINTER
600	ret
601	ALIGN_TEXT
602100816:
603	cmpb	$8,%cl
604	jl	100408f
605	movq	%r10,(%rdi)
606	movq	%r10,-8(%rdi,%rcx)
607	POP_FRAME_POINTER
608	ret
609	ALIGN_TEXT
610100408:
611	cmpb	$4,%cl
612	jl	100204f
613	movl	%r10d,(%rdi)
614	movl	%r10d,-4(%rdi,%rcx)
615	POP_FRAME_POINTER
616	ret
617	ALIGN_TEXT
618100204:
619	cmpb	$2,%cl
620	jl	100001f
621	movw	%r10w,(%rdi)
622	movw	%r10w,-2(%rdi,%rcx)
623	POP_FRAME_POINTER
624	ret
625	ALIGN_TEXT
626100001:
627	cmpb	$0,%cl
628	je	100000f
629	movb	%r10b,(%rdi)
630100000:
631	POP_FRAME_POINTER
632	ret
633	ALIGN_TEXT
6341256:
635	movq	%rdi,%r9
636	movq	%r10,%rax
637	testl	$15,%edi
638	jnz	3f
6391:
640.if \erms == 1
641	rep
642	stosb
643	movq	%r9,%rax
644.else
645	movq	%rcx,%rdx
646	shrq	$3,%rcx
647	rep
648	stosq
649	movq	%r9,%rax
650	andl	$7,%edx
651	jnz	2f
652	POP_FRAME_POINTER
653	ret
6542:
655	movq	%r10,-8(%rdi,%rdx)
656.endif
657	POP_FRAME_POINTER
658	ret
659	ALIGN_TEXT
6603:
661	movq	%r10,(%rdi)
662	movq	%r10,8(%rdi)
663	movq	%rdi,%r8
664	andq	$15,%r8
665	leaq	-16(%rcx,%r8),%rcx
666	neg	%r8
667	leaq	16(%rdi,%r8),%rdi
668	jmp	1b
669.endm
670
671ENTRY(memset_std)
672	MEMSET erms=0
673END(memset_std)
674
675ENTRY(memset_erms)
676	MEMSET erms=1
677END(memset_erms)
678
679/* fillw(pat, base, cnt) */
680/*       %rdi,%rsi, %rdx */
681ENTRY(fillw)
682	PUSH_FRAME_POINTER
683	movq	%rdi,%rax
684	movq	%rsi,%rdi
685	movq	%rdx,%rcx
686	rep
687	stosw
688	POP_FRAME_POINTER
689	ret
690END(fillw)
691
692/*
693 * strlen(string)
694 *	  %rdi
695 *
696 * Uses the ((x - 0x01....01) & ~x & 0x80....80) trick.
697 *
698 * 0x01....01 is replaced with 0x0 - 0x01....01 so that it can be added
699 * with leaq.
700 *
701 * For a description see either:
702 * - "Hacker's Delight" by Henry S. Warren, Jr.
703 * - "Optimizing subroutines in assembly language: An optimization guide for x86 platforms"
704 *   by Agner Fog
705 *
706 * The latter contains a 32-bit variant of the same algorithm coded in assembly for i386.
707 */
708ENTRY(strlen)
709	PUSH_FRAME_POINTER
710	movabsq	$0xfefefefefefefeff,%r8
711	movabsq	$0x8080808080808080,%r9
712
713	movq	%rdi,%r10
714	movq	%rdi,%rcx
715	testb	$7,%dil
716	jz	2f
717
718	/*
719	 * Handle misaligned reads: align to 8 and fill
720	 * the spurious bytes.
721	 */
722	andq	$~7,%rdi
723	movq	(%rdi),%r11
724	shlq	$3,%rcx
725	movq	$-1,%rdx
726	shlq	%cl,%rdx
727	notq	%rdx
728	orq	%rdx,%r11
729
730	leaq	(%r11,%r8),%rcx
731	notq	%r11
732	andq	%r11,%rcx
733	andq	%r9,%rcx
734	jnz	3f
735
736	/*
737	 * Main loop.
738	 */
739	ALIGN_TEXT
7401:
741	leaq	8(%rdi),%rdi
7422:
743	movq	(%rdi),%r11
744	leaq	(%r11,%r8),%rcx
745	notq	%r11
746	andq	%r11,%rcx
747	andq	%r9,%rcx
748	jz	1b
7493:
750	bsfq	%rcx,%rcx
751	shrq	$3,%rcx
752	leaq	(%rcx,%rdi),%rax
753	subq	%r10,%rax
754	POP_FRAME_POINTER
755	ret
756END(strlen)
757
758/*****************************************************************************/
759/* copyout and fubyte family                                                 */
760/*****************************************************************************/
761/*
762 * Access user memory from inside the kernel. These routines should be
763 * the only places that do this.
764 *
765 * These routines set curpcb->pcb_onfault for the time they execute. When a
766 * protection violation occurs inside the functions, the trap handler
767 * returns to *curpcb->pcb_onfault instead of the function.
768 */
769
770.macro SMAP_DISABLE smap
771.if	\smap
772	stac
773.endif
774.endm
775
776
777.macro SMAP_ENABLE smap
778.if	\smap
779	clac
780.endif
781.endm
782
783.macro COPYINOUT_BEGIN
784.endm
785
786.macro COPYINOUT_END
787	movq	%rax,PCB_ONFAULT(%r11)
788	POP_FRAME_POINTER
789.endm
790
791.macro COPYINOUT_SMAP_END
792	SMAP_ENABLE smap=1
793	COPYINOUT_END
794.endm
795
796/*
797 * copyout(from_kernel, to_user, len)
798 *         %rdi,        %rsi,    %rdx
799 */
800.macro	COPYOUT smap erms
801	PUSH_FRAME_POINTER
802	movq	PCPU(CURPCB),%r11
803	movq	$copy_fault,PCB_ONFAULT(%r11)
804
805	/*
806	 * Check explicitly for non-user addresses.
807	 * First, prevent address wrapping.
808	 */
809	movq	%rsi,%rax
810	addq	%rdx,%rax
811	jc	copy_fault
812/*
813 * XXX STOP USING VM_MAXUSER_ADDRESS.
814 * It is an end address, not a max, so every time it is used correctly it
815 * looks like there is an off by one error, and of course it caused an off
816 * by one error in several places.
817 */
818	movq	$VM_MAXUSER_ADDRESS,%rcx
819	cmpq	%rcx,%rax
820	ja	copy_fault
821
822	/*
823	 * Set return value to zero. Remaining failure mode goes through
824	 * copy_fault.
825	 */
826	xorl	%eax,%eax
827
828	/*
829	 * Set up arguments for MEMMOVE.
830	 */
831	movq	%rdi,%r8
832	movq	%rsi,%rdi
833	movq	%r8,%rsi
834	movq	%rdx,%rcx
835
836
837	SMAP_DISABLE \smap
838.if	\smap == 1
839	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_SMAP_END
840.else
841	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_END
842.endif
843	/* NOTREACHED */
844.endm
845
846ENTRY(copyout_nosmap_std)
847	COPYOUT smap=0 erms=0
848END(copyout_nosmap_std)
849
850ENTRY(copyout_smap_std)
851	COPYOUT smap=1 erms=0
852END(copyout_smap_std)
853
854ENTRY(copyout_nosmap_erms)
855	COPYOUT smap=0 erms=1
856END(copyout_nosmap_erms)
857
858ENTRY(copyout_smap_erms)
859	COPYOUT smap=1 erms=1
860END(copyout_smap_erms)
861
862/*
863 * copyin(from_user, to_kernel, len)
864 *        %rdi,      %rsi,      %rdx
865 */
866.macro	COPYIN smap erms
867	PUSH_FRAME_POINTER
868	movq	PCPU(CURPCB),%r11
869	movq	$copy_fault,PCB_ONFAULT(%r11)
870
871	/*
872	 * make sure address is valid
873	 */
874	movq	%rdi,%rax
875	addq	%rdx,%rax
876	jc	copy_fault
877	movq	$VM_MAXUSER_ADDRESS,%rcx
878	cmpq	%rcx,%rax
879	ja	copy_fault
880
881	xorl	%eax,%eax
882
883	movq	%rdi,%r8
884	movq	%rsi,%rdi
885	movq	%r8,%rsi
886	movq	%rdx,%rcx
887
888	SMAP_DISABLE \smap
889.if	\smap == 1
890	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_SMAP_END
891.else
892	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_END
893.endif
894	/* NOTREACHED */
895.endm
896
897ENTRY(copyin_nosmap_std)
898	COPYIN smap=0 erms=0
899END(copyin_nosmap_std)
900
901ENTRY(copyin_smap_std)
902	COPYIN smap=1 erms=0
903END(copyin_smap_std)
904
905ENTRY(copyin_nosmap_erms)
906	COPYIN smap=0 erms=1
907END(copyin_nosmap_erms)
908
909ENTRY(copyin_smap_erms)
910	COPYIN smap=1 erms=1
911END(copyin_smap_erms)
912
913	ALIGN_TEXT
914copy_fault:
915	testl	$CPUID_STDEXT_SMAP,cpu_stdext_feature(%rip)
916	je	1f
917	clac
9181:	movq	$0,PCB_ONFAULT(%r11)
919	movl	$EFAULT,%eax
920	POP_FRAME_POINTER
921	ret
922
923/*
924 * casueword32.  Compare and set user integer.  Returns -1 on fault,
925 *        0 if access was successful.  Old value is written to *oldp.
926 *        dst = %rdi, old = %esi, oldp = %rdx, new = %ecx
927 */
928ENTRY(casueword32_nosmap)
929	PUSH_FRAME_POINTER
930	movq	PCPU(CURPCB),%r8
931	movq	$fusufault,PCB_ONFAULT(%r8)
932
933	movq	$VM_MAXUSER_ADDRESS-4,%rax
934	cmpq	%rax,%rdi			/* verify address is valid */
935	ja	fusufault
936
937	movl	%esi,%eax			/* old */
938#ifdef SMP
939	lock
940#endif
941	cmpxchgl %ecx,(%rdi)			/* new = %ecx */
942	setne	%cl
943
944	/*
945	 * The old value is in %eax.  If the store succeeded it will be the
946	 * value we expected (old) from before the store, otherwise it will
947	 * be the current value.  Save %eax into %esi to prepare the return
948	 * value.
949	 */
950	movl	%eax,%esi
951	xorl	%eax,%eax
952	movq	%rax,PCB_ONFAULT(%r8)
953
954	/*
955	 * Access the oldp after the pcb_onfault is cleared, to correctly
956	 * catch corrupted pointer.
957	 */
958	movl	%esi,(%rdx)			/* oldp = %rdx */
959	POP_FRAME_POINTER
960	movzbl	%cl, %eax
961	ret
962END(casueword32_nosmap)
963
964ENTRY(casueword32_smap)
965	PUSH_FRAME_POINTER
966	movq	PCPU(CURPCB),%r8
967	movq	$fusufault,PCB_ONFAULT(%r8)
968
969	movq	$VM_MAXUSER_ADDRESS-4,%rax
970	cmpq	%rax,%rdi			/* verify address is valid */
971	ja	fusufault
972
973	movl	%esi,%eax			/* old */
974	stac
975#ifdef SMP
976	lock
977#endif
978	cmpxchgl %ecx,(%rdi)			/* new = %ecx */
979	clac
980	setne	%cl
981
982	/*
983	 * The old value is in %eax.  If the store succeeded it will be the
984	 * value we expected (old) from before the store, otherwise it will
985	 * be the current value.  Save %eax into %esi to prepare the return
986	 * value.
987	 */
988	movl	%eax,%esi
989	xorl	%eax,%eax
990	movq	%rax,PCB_ONFAULT(%r8)
991
992	/*
993	 * Access the oldp after the pcb_onfault is cleared, to correctly
994	 * catch corrupted pointer.
995	 */
996	movl	%esi,(%rdx)			/* oldp = %rdx */
997	POP_FRAME_POINTER
998	movzbl	%cl, %eax
999	ret
1000END(casueword32_smap)
1001
1002/*
1003 * casueword.  Compare and set user long.  Returns -1 on fault,
1004 *        0 if access was successful.  Old value is written to *oldp.
1005 *        dst = %rdi, old = %rsi, oldp = %rdx, new = %rcx
1006 */
1007ENTRY(casueword_nosmap)
1008	PUSH_FRAME_POINTER
1009	movq	PCPU(CURPCB),%r8
1010	movq	$fusufault,PCB_ONFAULT(%r8)
1011
1012	movq	$VM_MAXUSER_ADDRESS-4,%rax
1013	cmpq	%rax,%rdi			/* verify address is valid */
1014	ja	fusufault
1015
1016	movq	%rsi,%rax			/* old */
1017#ifdef SMP
1018	lock
1019#endif
1020	cmpxchgq %rcx,(%rdi)			/* new = %rcx */
1021	setne	%cl
1022
1023	/*
1024	 * The old value is in %rax.  If the store succeeded it will be the
1025	 * value we expected (old) from before the store, otherwise it will
1026	 * be the current value.
1027	 */
1028	movq	%rax,%rsi
1029	xorl	%eax,%eax
1030	movq	%rax,PCB_ONFAULT(%r8)
1031	movq	%rsi,(%rdx)
1032	POP_FRAME_POINTER
1033	movzbl	%cl, %eax
1034	ret
1035END(casueword_nosmap)
1036
1037ENTRY(casueword_smap)
1038	PUSH_FRAME_POINTER
1039	movq	PCPU(CURPCB),%r8
1040	movq	$fusufault,PCB_ONFAULT(%r8)
1041
1042	movq	$VM_MAXUSER_ADDRESS-4,%rax
1043	cmpq	%rax,%rdi			/* verify address is valid */
1044	ja	fusufault
1045
1046	movq	%rsi,%rax			/* old */
1047	stac
1048#ifdef SMP
1049	lock
1050#endif
1051	cmpxchgq %rcx,(%rdi)			/* new = %rcx */
1052	clac
1053	setne	%cl
1054
1055	/*
1056	 * The old value is in %rax.  If the store succeeded it will be the
1057	 * value we expected (old) from before the store, otherwise it will
1058	 * be the current value.
1059	 */
1060	movq	%rax,%rsi
1061	xorl	%eax,%eax
1062	movq	%rax,PCB_ONFAULT(%r8)
1063	movq	%rsi,(%rdx)
1064	POP_FRAME_POINTER
1065	movzbl	%cl, %eax
1066	ret
1067END(casueword_smap)
1068
1069/*
1070 * Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit
1071 * byte from user memory.
1072 * addr = %rdi, valp = %rsi
1073 */
1074
1075ENTRY(fueword_nosmap)
1076	PUSH_FRAME_POINTER
1077	movq	PCPU(CURPCB),%rcx
1078	movq	$fusufault,PCB_ONFAULT(%rcx)
1079
1080	movq	$VM_MAXUSER_ADDRESS-8,%rax
1081	cmpq	%rax,%rdi			/* verify address is valid */
1082	ja	fusufault
1083
1084	xorl	%eax,%eax
1085	movq	(%rdi),%r11
1086	movq	%rax,PCB_ONFAULT(%rcx)
1087	movq	%r11,(%rsi)
1088	POP_FRAME_POINTER
1089	ret
1090END(fueword_nosmap)
1091
1092ENTRY(fueword_smap)
1093	PUSH_FRAME_POINTER
1094	movq	PCPU(CURPCB),%rcx
1095	movq	$fusufault,PCB_ONFAULT(%rcx)
1096
1097	movq	$VM_MAXUSER_ADDRESS-8,%rax
1098	cmpq	%rax,%rdi			/* verify address is valid */
1099	ja	fusufault
1100
1101	xorl	%eax,%eax
1102	stac
1103	movq	(%rdi),%r11
1104	clac
1105	movq	%rax,PCB_ONFAULT(%rcx)
1106	movq	%r11,(%rsi)
1107	POP_FRAME_POINTER
1108	ret
1109END(fueword_smap)
1110
1111ENTRY(fueword32_nosmap)
1112	PUSH_FRAME_POINTER
1113	movq	PCPU(CURPCB),%rcx
1114	movq	$fusufault,PCB_ONFAULT(%rcx)
1115
1116	movq	$VM_MAXUSER_ADDRESS-4,%rax
1117	cmpq	%rax,%rdi			/* verify address is valid */
1118	ja	fusufault
1119
1120	xorl	%eax,%eax
1121	movl	(%rdi),%r11d
1122	movq	%rax,PCB_ONFAULT(%rcx)
1123	movl	%r11d,(%rsi)
1124	POP_FRAME_POINTER
1125	ret
1126END(fueword32_nosmap)
1127
1128ENTRY(fueword32_smap)
1129	PUSH_FRAME_POINTER
1130	movq	PCPU(CURPCB),%rcx
1131	movq	$fusufault,PCB_ONFAULT(%rcx)
1132
1133	movq	$VM_MAXUSER_ADDRESS-4,%rax
1134	cmpq	%rax,%rdi			/* verify address is valid */
1135	ja	fusufault
1136
1137	xorl	%eax,%eax
1138	stac
1139	movl	(%rdi),%r11d
1140	clac
1141	movq	%rax,PCB_ONFAULT(%rcx)
1142	movl	%r11d,(%rsi)
1143	POP_FRAME_POINTER
1144	ret
1145END(fueword32_smap)
1146
1147ENTRY(fuword16_nosmap)
1148	PUSH_FRAME_POINTER
1149	movq	PCPU(CURPCB),%rcx
1150	movq	$fusufault,PCB_ONFAULT(%rcx)
1151
1152	movq	$VM_MAXUSER_ADDRESS-2,%rax
1153	cmpq	%rax,%rdi
1154	ja	fusufault
1155
1156	movzwl	(%rdi),%eax
1157	movq	$0,PCB_ONFAULT(%rcx)
1158	POP_FRAME_POINTER
1159	ret
1160END(fuword16_nosmap)
1161
1162ENTRY(fuword16_smap)
1163	PUSH_FRAME_POINTER
1164	movq	PCPU(CURPCB),%rcx
1165	movq	$fusufault,PCB_ONFAULT(%rcx)
1166
1167	movq	$VM_MAXUSER_ADDRESS-2,%rax
1168	cmpq	%rax,%rdi
1169	ja	fusufault
1170
1171	stac
1172	movzwl	(%rdi),%eax
1173	clac
1174	movq	$0,PCB_ONFAULT(%rcx)
1175	POP_FRAME_POINTER
1176	ret
1177END(fuword16_smap)
1178
1179ENTRY(fubyte_nosmap)
1180	PUSH_FRAME_POINTER
1181	movq	PCPU(CURPCB),%rcx
1182	movq	$fusufault,PCB_ONFAULT(%rcx)
1183
1184	movq	$VM_MAXUSER_ADDRESS-1,%rax
1185	cmpq	%rax,%rdi
1186	ja	fusufault
1187
1188	movzbl	(%rdi),%eax
1189	movq	$0,PCB_ONFAULT(%rcx)
1190	POP_FRAME_POINTER
1191	ret
1192END(fubyte_nosmap)
1193
1194ENTRY(fubyte_smap)
1195	PUSH_FRAME_POINTER
1196	movq	PCPU(CURPCB),%rcx
1197	movq	$fusufault,PCB_ONFAULT(%rcx)
1198
1199	movq	$VM_MAXUSER_ADDRESS-1,%rax
1200	cmpq	%rax,%rdi
1201	ja	fusufault
1202
1203	stac
1204	movzbl	(%rdi),%eax
1205	clac
1206	movq	$0,PCB_ONFAULT(%rcx)
1207	POP_FRAME_POINTER
1208	ret
1209END(fubyte_smap)
1210
1211/*
1212 * Store a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit byte to
1213 * user memory.
1214 * addr = %rdi, value = %rsi
1215 */
1216ENTRY(suword_nosmap)
1217	PUSH_FRAME_POINTER
1218	movq	PCPU(CURPCB),%rcx
1219	movq	$fusufault,PCB_ONFAULT(%rcx)
1220
1221	movq	$VM_MAXUSER_ADDRESS-8,%rax
1222	cmpq	%rax,%rdi			/* verify address validity */
1223	ja	fusufault
1224
1225	movq	%rsi,(%rdi)
1226	xorl	%eax,%eax
1227	movq	%rax,PCB_ONFAULT(%rcx)
1228	POP_FRAME_POINTER
1229	ret
1230END(suword_nosmap)
1231
1232ENTRY(suword_smap)
1233	PUSH_FRAME_POINTER
1234	movq	PCPU(CURPCB),%rcx
1235	movq	$fusufault,PCB_ONFAULT(%rcx)
1236
1237	movq	$VM_MAXUSER_ADDRESS-8,%rax
1238	cmpq	%rax,%rdi			/* verify address validity */
1239	ja	fusufault
1240
1241	stac
1242	movq	%rsi,(%rdi)
1243	clac
1244	xorl	%eax,%eax
1245	movq	%rax,PCB_ONFAULT(%rcx)
1246	POP_FRAME_POINTER
1247	ret
1248END(suword_smap)
1249
1250ENTRY(suword32_nosmap)
1251	PUSH_FRAME_POINTER
1252	movq	PCPU(CURPCB),%rcx
1253	movq	$fusufault,PCB_ONFAULT(%rcx)
1254
1255	movq	$VM_MAXUSER_ADDRESS-4,%rax
1256	cmpq	%rax,%rdi			/* verify address validity */
1257	ja	fusufault
1258
1259	movl	%esi,(%rdi)
1260	xorl	%eax,%eax
1261	movq	%rax,PCB_ONFAULT(%rcx)
1262	POP_FRAME_POINTER
1263	ret
1264END(suword32_nosmap)
1265
1266ENTRY(suword32_smap)
1267	PUSH_FRAME_POINTER
1268	movq	PCPU(CURPCB),%rcx
1269	movq	$fusufault,PCB_ONFAULT(%rcx)
1270
1271	movq	$VM_MAXUSER_ADDRESS-4,%rax
1272	cmpq	%rax,%rdi			/* verify address validity */
1273	ja	fusufault
1274
1275	stac
1276	movl	%esi,(%rdi)
1277	clac
1278	xorl	%eax,%eax
1279	movq	%rax,PCB_ONFAULT(%rcx)
1280	POP_FRAME_POINTER
1281	ret
1282END(suword32_smap)
1283
1284ENTRY(suword16_nosmap)
1285	PUSH_FRAME_POINTER
1286	movq	PCPU(CURPCB),%rcx
1287	movq	$fusufault,PCB_ONFAULT(%rcx)
1288
1289	movq	$VM_MAXUSER_ADDRESS-2,%rax
1290	cmpq	%rax,%rdi			/* verify address validity */
1291	ja	fusufault
1292
1293	movw	%si,(%rdi)
1294	xorl	%eax,%eax
1295	movq	%rax,PCB_ONFAULT(%rcx)
1296	POP_FRAME_POINTER
1297	ret
1298END(suword16_nosmap)
1299
1300ENTRY(suword16_smap)
1301	PUSH_FRAME_POINTER
1302	movq	PCPU(CURPCB),%rcx
1303	movq	$fusufault,PCB_ONFAULT(%rcx)
1304
1305	movq	$VM_MAXUSER_ADDRESS-2,%rax
1306	cmpq	%rax,%rdi			/* verify address validity */
1307	ja	fusufault
1308
1309	stac
1310	movw	%si,(%rdi)
1311	clac
1312	xorl	%eax,%eax
1313	movq	%rax,PCB_ONFAULT(%rcx)
1314	POP_FRAME_POINTER
1315	ret
1316END(suword16_smap)
1317
1318ENTRY(subyte_nosmap)
1319	PUSH_FRAME_POINTER
1320	movq	PCPU(CURPCB),%rcx
1321	movq	$fusufault,PCB_ONFAULT(%rcx)
1322
1323	movq	$VM_MAXUSER_ADDRESS-1,%rax
1324	cmpq	%rax,%rdi			/* verify address validity */
1325	ja	fusufault
1326
1327	movl	%esi,%eax
1328	movb	%al,(%rdi)
1329	xorl	%eax,%eax
1330	movq	%rax,PCB_ONFAULT(%rcx)
1331	POP_FRAME_POINTER
1332	ret
1333END(subyte_nosmap)
1334
1335ENTRY(subyte_smap)
1336	PUSH_FRAME_POINTER
1337	movq	PCPU(CURPCB),%rcx
1338	movq	$fusufault,PCB_ONFAULT(%rcx)
1339
1340	movq	$VM_MAXUSER_ADDRESS-1,%rax
1341	cmpq	%rax,%rdi			/* verify address validity */
1342	ja	fusufault
1343
1344	movl	%esi,%eax
1345	stac
1346	movb	%al,(%rdi)
1347	clac
1348	xorl	%eax,%eax
1349	movq	%rax,PCB_ONFAULT(%rcx)
1350	POP_FRAME_POINTER
1351	ret
1352END(subyte_smap)
1353
1354	ALIGN_TEXT
1355fusufault:
1356	testl	$CPUID_STDEXT_SMAP,cpu_stdext_feature(%rip)
1357	je	1f
1358	clac
13591:	movq	PCPU(CURPCB),%rcx
1360	xorl	%eax,%eax
1361	movq	%rax,PCB_ONFAULT(%rcx)
1362	decq	%rax
1363	POP_FRAME_POINTER
1364	ret
1365
1366/*
1367 * copyinstr(from, to, maxlen, int *lencopied)
1368 *           %rdi, %rsi, %rdx, %rcx
1369 *
1370 *	copy a string from 'from' to 'to', stop when a 0 character is reached.
1371 *	return ENAMETOOLONG if string is longer than maxlen, and
1372 *	EFAULT on protection violations. If lencopied is non-zero,
1373 *	return the actual length in *lencopied.
1374 */
1375.macro COPYINSTR smap
1376	PUSH_FRAME_POINTER
1377	movq	%rdx,%r8			/* %r8 = maxlen */
1378	movq	PCPU(CURPCB),%r9
1379	movq	$cpystrflt,PCB_ONFAULT(%r9)
1380
1381	movq	$VM_MAXUSER_ADDRESS,%rax
1382
1383	/* make sure 'from' is within bounds */
1384	subq	%rdi,%rax
1385	jbe	cpystrflt
1386
1387	SMAP_DISABLE \smap
1388
1389	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1390	cmpq	%rdx,%rax
1391	jb	8f
13921:
1393	incq	%rdx
13942:
1395	decq	%rdx
1396.if \smap == 0
1397	jz	copyinstr_toolong
1398.else
1399	jz	copyinstr_toolong_smap
1400.endif
1401
1402	movb	(%rdi),%al
1403	movb	%al,(%rsi)
1404	incq	%rsi
1405	incq	%rdi
1406	testb	%al,%al
1407	jnz	2b
1408
1409	SMAP_ENABLE \smap
1410
1411	/* Success -- 0 byte reached */
1412	decq	%rdx
1413	xorl	%eax,%eax
1414
1415	/* set *lencopied and return %eax */
1416	movq	%rax,PCB_ONFAULT(%r9)
1417
1418	testq	%rcx,%rcx
1419	jz	3f
1420	subq	%rdx,%r8
1421	movq	%r8,(%rcx)
14223:
1423	POP_FRAME_POINTER
1424	ret
1425	ALIGN_TEXT
14268:
1427	movq	%rax,%rdx
1428	movq	%rax,%r8
1429	jmp 1b
1430
1431.endm
1432
1433ENTRY(copyinstr_nosmap)
1434	COPYINSTR smap=0
1435END(copyinstr_nosmap)
1436
1437ENTRY(copyinstr_smap)
1438	COPYINSTR smap=1
1439END(copyinstr_smap)
1440
1441cpystrflt:
1442	testl	$CPUID_STDEXT_SMAP,cpu_stdext_feature(%rip)
1443	je	1f
1444	clac
14451:	movl	$EFAULT,%eax
1446cpystrflt_x:
1447	/* set *lencopied and return %eax */
1448	movq	$0,PCB_ONFAULT(%r9)
1449
1450	testq	%rcx,%rcx
1451	jz	1f
1452	subq	%rdx,%r8
1453	movq	%r8,(%rcx)
14541:
1455	POP_FRAME_POINTER
1456	ret
1457
1458copyinstr_toolong_smap:
1459	clac
1460copyinstr_toolong:
1461	/* rdx is zero - return ENAMETOOLONG or EFAULT */
1462	movq	$VM_MAXUSER_ADDRESS,%rax
1463	cmpq	%rax,%rdi
1464	jae	cpystrflt
1465	movl	$ENAMETOOLONG,%eax
1466	jmp	cpystrflt_x
1467
1468/*
1469 * Handling of special amd64 registers and descriptor tables etc
1470 */
1471/* void lgdt(struct region_descriptor *rdp); */
1472ENTRY(lgdt)
1473	/* reload the descriptor table */
1474	lgdt	(%rdi)
1475
1476	/* flush the prefetch q */
1477	jmp	1f
1478	nop
14791:
1480	movl	$KDSEL,%eax
1481	movl	%eax,%ds
1482	movl	%eax,%es
1483	movl	%eax,%fs	/* Beware, use wrmsr to set 64 bit base */
1484	movl	%eax,%gs
1485	movl	%eax,%ss
1486
1487	/* reload code selector by turning return into intersegmental return */
1488	popq	%rax
1489	pushq	$KCSEL
1490	pushq	%rax
1491	lretq
1492END(lgdt)
1493
1494/*****************************************************************************/
1495/* setjump, longjump                                                         */
1496/*****************************************************************************/
1497
1498ENTRY(setjmp)
1499	movq	%rbx,0(%rdi)			/* save rbx */
1500	movq	%rsp,8(%rdi)			/* save rsp */
1501	movq	%rbp,16(%rdi)			/* save rbp */
1502	movq	%r12,24(%rdi)			/* save r12 */
1503	movq	%r13,32(%rdi)			/* save r13 */
1504	movq	%r14,40(%rdi)			/* save r14 */
1505	movq	%r15,48(%rdi)			/* save r15 */
1506	movq	0(%rsp),%rdx			/* get rta */
1507	movq	%rdx,56(%rdi)			/* save rip */
1508	xorl	%eax,%eax			/* return(0); */
1509	ret
1510END(setjmp)
1511
1512ENTRY(longjmp)
1513	movq	0(%rdi),%rbx			/* restore rbx */
1514	movq	8(%rdi),%rsp			/* restore rsp */
1515	movq	16(%rdi),%rbp			/* restore rbp */
1516	movq	24(%rdi),%r12			/* restore r12 */
1517	movq	32(%rdi),%r13			/* restore r13 */
1518	movq	40(%rdi),%r14			/* restore r14 */
1519	movq	48(%rdi),%r15			/* restore r15 */
1520	movq	56(%rdi),%rdx			/* get rta */
1521	movq	%rdx,0(%rsp)			/* put in return frame */
1522	xorl	%eax,%eax			/* return(1); */
1523	incl	%eax
1524	ret
1525END(longjmp)
1526
1527/*
1528 * Support for reading MSRs in the safe manner.  (Instead of panic on #gp,
1529 * return an error.)
1530 */
1531ENTRY(rdmsr_safe)
1532/* int rdmsr_safe(u_int msr, uint64_t *data) */
1533	PUSH_FRAME_POINTER
1534	movq	PCPU(CURPCB),%r8
1535	movq	$msr_onfault,PCB_ONFAULT(%r8)
1536	movl	%edi,%ecx
1537	rdmsr			/* Read MSR pointed by %ecx. Returns
1538				   hi byte in edx, lo in %eax */
1539	salq	$32,%rdx	/* sign-shift %rdx left */
1540	movl	%eax,%eax	/* zero-extend %eax -> %rax */
1541	orq	%rdx,%rax
1542	movq	%rax,(%rsi)
1543	xorq	%rax,%rax
1544	movq	%rax,PCB_ONFAULT(%r8)
1545	POP_FRAME_POINTER
1546	ret
1547
1548/*
1549 * Support for writing MSRs in the safe manner.  (Instead of panic on #gp,
1550 * return an error.)
1551 */
1552ENTRY(wrmsr_safe)
1553/* int wrmsr_safe(u_int msr, uint64_t data) */
1554	PUSH_FRAME_POINTER
1555	movq	PCPU(CURPCB),%r8
1556	movq	$msr_onfault,PCB_ONFAULT(%r8)
1557	movl	%edi,%ecx
1558	movl	%esi,%eax
1559	sarq	$32,%rsi
1560	movl	%esi,%edx
1561	wrmsr			/* Write MSR pointed by %ecx. Accepts
1562				   hi byte in edx, lo in %eax. */
1563	xorq	%rax,%rax
1564	movq	%rax,PCB_ONFAULT(%r8)
1565	POP_FRAME_POINTER
1566	ret
1567
1568/*
1569 * MSR operations fault handler
1570 */
1571	ALIGN_TEXT
1572msr_onfault:
1573	movq	$0,PCB_ONFAULT(%r8)
1574	movl	$EFAULT,%eax
1575	POP_FRAME_POINTER
1576	ret
1577
1578/*
1579 * void pmap_pti_pcid_invalidate(uint64_t ucr3, uint64_t kcr3);
1580 * Invalidates address space addressed by ucr3, then returns to kcr3.
1581 * Done in assembler to ensure no other memory accesses happen while
1582 * on ucr3.
1583 */
1584	ALIGN_TEXT
1585ENTRY(pmap_pti_pcid_invalidate)
1586	pushfq
1587	cli
1588	movq	%rdi,%cr3	/* to user page table */
1589	movq	%rsi,%cr3	/* back to kernel */
1590	popfq
1591	retq
1592
1593/*
1594 * void pmap_pti_pcid_invlpg(uint64_t ucr3, uint64_t kcr3, vm_offset_t va);
1595 * Invalidates virtual address va in address space ucr3, then returns to kcr3.
1596 */
1597	ALIGN_TEXT
1598ENTRY(pmap_pti_pcid_invlpg)
1599	pushfq
1600	cli
1601	movq	%rdi,%cr3	/* to user page table */
1602	invlpg	(%rdx)
1603	movq	%rsi,%cr3	/* back to kernel */
1604	popfq
1605	retq
1606
1607/*
1608 * void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr3, vm_offset_t sva,
1609 *     vm_offset_t eva);
1610 * Invalidates virtual addresses between sva and eva in address space ucr3,
1611 * then returns to kcr3.
1612 */
1613	ALIGN_TEXT
1614ENTRY(pmap_pti_pcid_invlrng)
1615	pushfq
1616	cli
1617	movq	%rdi,%cr3	/* to user page table */
16181:	invlpg	(%rdx)
1619	addq	$PAGE_SIZE,%rdx
1620	cmpq	%rdx,%rcx
1621	ja	1b
1622	movq	%rsi,%cr3	/* back to kernel */
1623	popfq
1624	retq
1625
1626	.altmacro
1627	.macro	rsb_seq_label l
1628rsb_seq_\l:
1629	.endm
1630	.macro	rsb_call_label l
1631	call	rsb_seq_\l
1632	.endm
1633	.macro	rsb_seq count
1634	ll=1
1635	.rept	\count
1636	rsb_call_label	%(ll)
1637	nop
1638	rsb_seq_label %(ll)
1639	addq	$8,%rsp
1640	ll=ll+1
1641	.endr
1642	.endm
1643
1644ENTRY(rsb_flush)
1645	rsb_seq	32
1646	ret
1647
1648/* all callers already saved %rax, %rdx, and %rcx */
1649ENTRY(handle_ibrs_entry)
1650	cmpb	$0,hw_ibrs_ibpb_active(%rip)
1651	je	1f
1652	movl	$MSR_IA32_SPEC_CTRL,%ecx
1653	rdmsr
1654	orl	$(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1655	orl	$(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32,%edx
1656	wrmsr
1657	movb	$1,PCPU(IBPB_SET)
1658	testl	$CPUID_STDEXT_SMEP,cpu_stdext_feature(%rip)
1659	je	rsb_flush
16601:	ret
1661END(handle_ibrs_entry)
1662
1663ENTRY(handle_ibrs_exit)
1664	cmpb	$0,PCPU(IBPB_SET)
1665	je	1f
1666	movl	$MSR_IA32_SPEC_CTRL,%ecx
1667	rdmsr
1668	andl	$~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1669	andl	$~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx
1670	wrmsr
1671	movb	$0,PCPU(IBPB_SET)
16721:	ret
1673END(handle_ibrs_exit)
1674
1675/* registers-neutral version, but needs stack */
1676ENTRY(handle_ibrs_exit_rs)
1677	cmpb	$0,PCPU(IBPB_SET)
1678	je	1f
1679	pushq	%rax
1680	pushq	%rdx
1681	pushq	%rcx
1682	movl	$MSR_IA32_SPEC_CTRL,%ecx
1683	rdmsr
1684	andl	$~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1685	andl	$~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx
1686	wrmsr
1687	popq	%rcx
1688	popq	%rdx
1689	popq	%rax
1690	movb	$0,PCPU(IBPB_SET)
16911:	ret
1692END(handle_ibrs_exit_rs)
1693
1694	.noaltmacro
1695
1696/*
1697 * Flush L1D cache.  Load enough of the data from the kernel text
1698 * to flush existing L1D content.
1699 *
1700 * N.B. The function does not follow ABI calling conventions, it corrupts %rbx.
1701 * The vmm.ko caller expects that only %rax, %rdx, %rbx, %rcx, %r9, and %rflags
1702 * registers are clobbered.  The NMI handler caller only needs %r13 and %r15
1703 * preserved.
1704 */
1705ENTRY(flush_l1d_sw)
1706#define	L1D_FLUSH_SIZE	(64 * 1024)
1707	movq	$KERNBASE, %r9
1708	movq	$-L1D_FLUSH_SIZE, %rcx
1709	/*
1710	 * pass 1: Preload TLB.
1711	 * Kernel text is mapped using superpages.  TLB preload is
1712	 * done for the benefit of older CPUs which split 2M page
1713	 * into 4k TLB entries.
1714	 */
17151:	movb	L1D_FLUSH_SIZE(%r9, %rcx), %al
1716	addq	$PAGE_SIZE, %rcx
1717	jne	1b
1718	xorl	%eax, %eax
1719	cpuid
1720	movq	$-L1D_FLUSH_SIZE, %rcx
1721	/* pass 2: Read each cache line. */
17222:	movb	L1D_FLUSH_SIZE(%r9, %rcx), %al
1723	addq	$64, %rcx
1724	jne	2b
1725	lfence
1726	ret
1727#undef	L1D_FLUSH_SIZE
1728END(flush_l1d_sw)
1729
1730ENTRY(flush_l1d_sw_abi)
1731	pushq	%rbx
1732	call	flush_l1d_sw
1733	popq	%rbx
1734	ret
1735END(flush_l1d_sw_abi)
1736
1737ENTRY(mds_handler_void)
1738	retq
1739END(mds_handler_void)
1740
1741ENTRY(mds_handler_verw)
1742	subq	$8, %rsp
1743	movw	%ds, (%rsp)
1744	verw	(%rsp)
1745	addq	$8, %rsp
1746	retq
1747END(mds_handler_verw)
1748
1749ENTRY(mds_handler_ivb)
1750	pushq	%rax
1751	pushq	%rdx
1752	pushq	%rcx
1753
1754	movq	%cr0, %rax
1755	testb	$CR0_TS, %al
1756	je	1f
1757	clts
17581:	movq	PCPU(MDS_BUF), %rdx
1759	movdqa	%xmm0, PCPU(MDS_TMP)
1760	pxor	%xmm0, %xmm0
1761
1762	lfence
1763	orpd	(%rdx), %xmm0
1764	orpd	(%rdx), %xmm0
1765	mfence
1766	movl	$40, %ecx
1767	addq	$16, %rdx
17682:	movntdq	%xmm0, (%rdx)
1769	addq	$16, %rdx
1770	decl	%ecx
1771	jnz	2b
1772	mfence
1773
1774	movdqa	PCPU(MDS_TMP),%xmm0
1775	testb	$CR0_TS, %al
1776	je	3f
1777	movq	%rax, %cr0
17783:	popq	%rcx
1779	popq	%rdx
1780	popq	%rax
1781	retq
1782END(mds_handler_ivb)
1783
1784ENTRY(mds_handler_bdw)
1785	pushq	%rax
1786	pushq	%rbx
1787	pushq	%rcx
1788	pushq	%rdi
1789	pushq	%rsi
1790
1791	movq	%cr0, %rax
1792	testb	$CR0_TS, %al
1793	je	1f
1794	clts
17951:	movq	PCPU(MDS_BUF), %rbx
1796	movdqa	%xmm0, PCPU(MDS_TMP)
1797	pxor	%xmm0, %xmm0
1798
1799	movq	%rbx, %rdi
1800	movq	%rbx, %rsi
1801	movl	$40, %ecx
18022:	movntdq	%xmm0, (%rbx)
1803	addq	$16, %rbx
1804	decl	%ecx
1805	jnz	2b
1806	mfence
1807	movl	$1536, %ecx
1808	rep; movsb
1809	lfence
1810
1811	movdqa	PCPU(MDS_TMP),%xmm0
1812	testb	$CR0_TS, %al
1813	je	3f
1814	movq	%rax, %cr0
18153:	popq	%rsi
1816	popq	%rdi
1817	popq	%rcx
1818	popq	%rbx
1819	popq	%rax
1820	retq
1821END(mds_handler_bdw)
1822
1823ENTRY(mds_handler_skl_sse)
1824	pushq	%rax
1825	pushq	%rdx
1826	pushq	%rcx
1827	pushq	%rdi
1828
1829	movq	%cr0, %rax
1830	testb	$CR0_TS, %al
1831	je	1f
1832	clts
18331:	movq	PCPU(MDS_BUF), %rdi
1834	movq	PCPU(MDS_BUF64), %rdx
1835	movdqa	%xmm0, PCPU(MDS_TMP)
1836	pxor	%xmm0, %xmm0
1837
1838	lfence
1839	orpd	(%rdx), %xmm0
1840	orpd	(%rdx), %xmm0
1841	xorl	%eax, %eax
18422:	clflushopt	5376(%rdi, %rax, 8)
1843	addl	$8, %eax
1844	cmpl	$8 * 12, %eax
1845	jb	2b
1846	sfence
1847	movl	$6144, %ecx
1848	xorl	%eax, %eax
1849	rep; stosb
1850	mfence
1851
1852	movdqa	PCPU(MDS_TMP), %xmm0
1853	testb	$CR0_TS, %al
1854	je	3f
1855	movq	%rax, %cr0
18563:	popq	%rdi
1857	popq	%rcx
1858	popq	%rdx
1859	popq	%rax
1860	retq
1861END(mds_handler_skl_sse)
1862
1863ENTRY(mds_handler_skl_avx)
1864	pushq	%rax
1865	pushq	%rdx
1866	pushq	%rcx
1867	pushq	%rdi
1868
1869	movq	%cr0, %rax
1870	testb	$CR0_TS, %al
1871	je	1f
1872	clts
18731:	movq	PCPU(MDS_BUF), %rdi
1874	movq	PCPU(MDS_BUF64), %rdx
1875	vmovdqa	%ymm0, PCPU(MDS_TMP)
1876	vpxor	%ymm0, %ymm0, %ymm0
1877
1878	lfence
1879	vorpd	(%rdx), %ymm0, %ymm0
1880	vorpd	(%rdx), %ymm0, %ymm0
1881	xorl	%eax, %eax
18822:	clflushopt	5376(%rdi, %rax, 8)
1883	addl	$8, %eax
1884	cmpl	$8 * 12, %eax
1885	jb	2b
1886	sfence
1887	movl	$6144, %ecx
1888	xorl	%eax, %eax
1889	rep; stosb
1890	mfence
1891
1892	vmovdqa	PCPU(MDS_TMP), %ymm0
1893	testb	$CR0_TS, %al
1894	je	3f
1895	movq	%rax, %cr0
18963:	popq	%rdi
1897	popq	%rcx
1898	popq	%rdx
1899	popq	%rax
1900	retq
1901END(mds_handler_skl_avx)
1902
1903ENTRY(mds_handler_skl_avx512)
1904	pushq	%rax
1905	pushq	%rdx
1906	pushq	%rcx
1907	pushq	%rdi
1908
1909	movq	%cr0, %rax
1910	testb	$CR0_TS, %al
1911	je	1f
1912	clts
19131:	movq	PCPU(MDS_BUF), %rdi
1914	movq	PCPU(MDS_BUF64), %rdx
1915	vmovdqa64	%zmm0, PCPU(MDS_TMP)
1916	vpxord	%zmm0, %zmm0, %zmm0
1917
1918	lfence
1919	vorpd	(%rdx), %zmm0, %zmm0
1920	vorpd	(%rdx), %zmm0, %zmm0
1921	xorl	%eax, %eax
19222:	clflushopt	5376(%rdi, %rax, 8)
1923	addl	$8, %eax
1924	cmpl	$8 * 12, %eax
1925	jb	2b
1926	sfence
1927	movl	$6144, %ecx
1928	xorl	%eax, %eax
1929	rep; stosb
1930	mfence
1931
1932	vmovdqa64	PCPU(MDS_TMP), %zmm0
1933	testb	$CR0_TS, %al
1934	je	3f
1935	movq	%rax, %cr0
19363:	popq	%rdi
1937	popq	%rcx
1938	popq	%rdx
1939	popq	%rax
1940	retq
1941END(mds_handler_skl_avx512)
1942
1943ENTRY(mds_handler_silvermont)
1944	pushq	%rax
1945	pushq	%rdx
1946	pushq	%rcx
1947
1948	movq	%cr0, %rax
1949	testb	$CR0_TS, %al
1950	je	1f
1951	clts
19521:	movq	PCPU(MDS_BUF), %rdx
1953	movdqa	%xmm0, PCPU(MDS_TMP)
1954	pxor	%xmm0, %xmm0
1955
1956	movl	$16, %ecx
19572:	movntdq	%xmm0, (%rdx)
1958	addq	$16, %rdx
1959	decl	%ecx
1960	jnz	2b
1961	mfence
1962
1963	movdqa	PCPU(MDS_TMP),%xmm0
1964	testb	$CR0_TS, %al
1965	je	3f
1966	movq	%rax, %cr0
19673:	popq	%rcx
1968	popq	%rdx
1969	popq	%rax
1970	retq
1971END(mds_handler_silvermont)
1972