xref: /freebsd/sys/amd64/amd64/support.S (revision 81ad6265)
1/*-
2 * Copyright (c) 2018-2019 The FreeBSD Foundation
3 * Copyright (c) 2003 Peter Wemm.
4 * Copyright (c) 1993 The Regents of the University of California.
5 * All rights reserved.
6 *
7 * Portions of this software were developed by
8 * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
9 * the FreeBSD Foundation.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * $FreeBSD$
36 */
37
38#include "opt_ddb.h"
39
40#include <machine/asmacros.h>
41#include <machine/specialreg.h>
42#include <machine/pmap.h>
43
44#include "assym.inc"
45
46	.text
47
48/* Address: %rdi */
49ENTRY(pagezero_std)
50	PUSH_FRAME_POINTER
51	movl	$PAGE_SIZE/8,%ecx
52	xorl	%eax,%eax
53	rep
54	stosq
55	POP_FRAME_POINTER
56	ret
57END(pagezero_std)
58
59ENTRY(pagezero_erms)
60	PUSH_FRAME_POINTER
61	movl	$PAGE_SIZE,%ecx
62	xorl	%eax,%eax
63	rep
64	stosb
65	POP_FRAME_POINTER
66	ret
67END(pagezero_erms)
68
69/*
70 * pagecopy(%rdi=from, %rsi=to)
71 */
72ENTRY(pagecopy)
73	PUSH_FRAME_POINTER
74	movl	$PAGE_SIZE/8,%ecx
75	movq	%rdi,%r9
76	movq	%rsi,%rdi
77	movq	%r9,%rsi
78	rep
79	movsq
80	POP_FRAME_POINTER
81	ret
82END(pagecopy)
83
84/*
85 * memcmp(b1, b2, len)
86 *	   rdi,rsi,rdx
87 */
88ENTRY(memcmp)
89	PUSH_FRAME_POINTER
90
91	xorl	%eax,%eax
9210:
93	cmpq	$16,%rdx
94	ja	101632f
95
96	cmpb	$8,%dl
97	jg	100816f
98
99	cmpb	$4,%dl
100	jg	100408f
101
102	cmpb	$2,%dl
103	jge	100204f
104
105	cmpb	$1,%dl
106	jl	100000f
107	movzbl	(%rdi),%eax
108	movzbl	(%rsi),%r8d
109	subl	%r8d,%eax
110100000:
111	POP_FRAME_POINTER
112	ret
113
114	ALIGN_TEXT
115100816:
116	movq	(%rdi),%r8
117	movq	(%rsi),%r9
118	cmpq	%r8,%r9
119	jne	80f
120	movq	-8(%rdi,%rdx),%r8
121	movq	-8(%rsi,%rdx),%r9
122	cmpq	%r8,%r9
123	jne	10081608f
124	POP_FRAME_POINTER
125	ret
126	ALIGN_TEXT
127100408:
128	movl	(%rdi),%r8d
129	movl	(%rsi),%r9d
130	cmpl	%r8d,%r9d
131	jne	80f
132	movl	-4(%rdi,%rdx),%r8d
133	movl	-4(%rsi,%rdx),%r9d
134	cmpl	%r8d,%r9d
135	jne	10040804f
136	POP_FRAME_POINTER
137	ret
138	ALIGN_TEXT
139100204:
140	movzwl	(%rdi),%r8d
141	movzwl	(%rsi),%r9d
142	cmpl	%r8d,%r9d
143	jne	1f
144	movzwl	-2(%rdi,%rdx),%r8d
145	movzwl	-2(%rsi,%rdx),%r9d
146	cmpl	%r8d,%r9d
147	jne	1f
148	POP_FRAME_POINTER
149	ret
150	ALIGN_TEXT
151101632:
152	cmpq	$32,%rdx
153	ja	103200f
154	movq	(%rdi),%r8
155	movq	(%rsi),%r9
156	cmpq	%r8,%r9
157	jne	80f
158	movq	8(%rdi),%r8
159	movq	8(%rsi),%r9
160	cmpq	%r8,%r9
161	jne	10163208f
162	movq	-16(%rdi,%rdx),%r8
163	movq	-16(%rsi,%rdx),%r9
164	cmpq	%r8,%r9
165	jne	10163216f
166	movq	-8(%rdi,%rdx),%r8
167	movq	-8(%rsi,%rdx),%r9
168	cmpq	%r8,%r9
169	jne	10163224f
170	POP_FRAME_POINTER
171	ret
172	ALIGN_TEXT
173103200:
174	movq	(%rdi),%r8
175	movq	8(%rdi),%r9
176	subq	(%rsi),%r8
177	subq	8(%rsi),%r9
178	orq	%r8,%r9
179	jnz	10320000f
180
181	movq    16(%rdi),%r8
182	movq    24(%rdi),%r9
183	subq    16(%rsi),%r8
184	subq    24(%rsi),%r9
185	orq	%r8,%r9
186	jnz     10320016f
187
188	leaq	32(%rdi),%rdi
189	leaq	32(%rsi),%rsi
190	subq	$32,%rdx
191	cmpq	$32,%rdx
192	jae	103200b
193	cmpb	$0,%dl
194	jne	10b
195	POP_FRAME_POINTER
196	ret
197
198/*
199 * Mismatch was found.
200 *
201 * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes).
202 */
203	ALIGN_TEXT
20410320016:
205	leaq	16(%rdi),%rdi
206	leaq	16(%rsi),%rsi
20710320000:
208	movq	(%rdi),%r8
209	movq	(%rsi),%r9
210	cmpq	%r8,%r9
211	jne	80f
212	leaq	8(%rdi),%rdi
213	leaq	8(%rsi),%rsi
214	jmp	80f
215	ALIGN_TEXT
21610081608:
21710163224:
218	leaq	-8(%rdi,%rdx),%rdi
219	leaq	-8(%rsi,%rdx),%rsi
220	jmp	80f
221	ALIGN_TEXT
22210163216:
223	leaq	-16(%rdi,%rdx),%rdi
224	leaq	-16(%rsi,%rdx),%rsi
225	jmp	80f
226	ALIGN_TEXT
22710163208:
228	leaq	8(%rdi),%rdi
229	leaq	8(%rsi),%rsi
230	jmp	80f
231	ALIGN_TEXT
23210040804:
233	leaq	-4(%rdi,%rdx),%rdi
234	leaq	-4(%rsi,%rdx),%rsi
235	jmp	1f
236
237	ALIGN_TEXT
23880:
239	movl	(%rdi),%r8d
240	movl	(%rsi),%r9d
241	cmpl	%r8d,%r9d
242	jne	1f
243	leaq	4(%rdi),%rdi
244	leaq	4(%rsi),%rsi
245
246/*
247 * We have up to 4 bytes to inspect.
248 */
2491:
250	movzbl	(%rdi),%eax
251	movzbl	(%rsi),%r8d
252	cmpb	%r8b,%al
253	jne	2f
254
255	movzbl	1(%rdi),%eax
256	movzbl	1(%rsi),%r8d
257	cmpb	%r8b,%al
258	jne	2f
259
260	movzbl	2(%rdi),%eax
261	movzbl	2(%rsi),%r8d
262	cmpb	%r8b,%al
263	jne	2f
264
265	movzbl	3(%rdi),%eax
266	movzbl	3(%rsi),%r8d
2672:
268	subl	%r8d,%eax
269	POP_FRAME_POINTER
270	ret
271END(memcmp)
272
273/*
274 * memmove(dst, src, cnt)
275 *         rdi, rsi, rdx
276 */
277
278/*
279 * Register state at entry is supposed to be as follows:
280 * rdi - destination
281 * rsi - source
282 * rdx - count
283 *
284 * The macro possibly clobbers the above and: rcx, r8, r9, r10
285 * It does not clobber rax nor r11.
286 */
287.macro MEMMOVE erms overlap begin end
288	\begin
289
290	/*
291	 * For sizes 0..32 all data is read before it is written, so there
292	 * is no correctness issue with direction of copying.
293	 */
294	cmpq	$32,%rcx
295	jbe	101632f
296
297.if \overlap == 1
298	movq	%rdi,%r8
299	subq	%rsi,%r8
300	cmpq	%rcx,%r8	/* overlapping && src < dst? */
301	jb	2f
302.endif
303
304	cmpq	$256,%rcx
305	ja	1256f
306
307	ALIGN_TEXT
308103200:
309	movq	(%rsi),%rdx
310	movq	%rdx,(%rdi)
311	movq	8(%rsi),%rdx
312	movq	%rdx,8(%rdi)
313	movq	16(%rsi),%rdx
314	movq	%rdx,16(%rdi)
315	movq	24(%rsi),%rdx
316	movq	%rdx,24(%rdi)
317	leaq	32(%rsi),%rsi
318	leaq	32(%rdi),%rdi
319	subq	$32,%rcx
320	cmpq	$32,%rcx
321	jae	103200b
322	cmpb	$0,%cl
323	jne	101632f
324	\end
325	ret
326	ALIGN_TEXT
327101632:
328	cmpb	$16,%cl
329	jl	100816f
330	movq	(%rsi),%rdx
331	movq	8(%rsi),%r8
332	movq	-16(%rsi,%rcx),%r9
333	movq	-8(%rsi,%rcx),%r10
334	movq	%rdx,(%rdi)
335	movq	%r8,8(%rdi)
336	movq	%r9,-16(%rdi,%rcx)
337	movq	%r10,-8(%rdi,%rcx)
338	\end
339	ret
340	ALIGN_TEXT
341100816:
342	cmpb	$8,%cl
343	jl	100408f
344	movq	(%rsi),%rdx
345	movq	-8(%rsi,%rcx),%r8
346	movq	%rdx,(%rdi)
347	movq	%r8,-8(%rdi,%rcx,)
348	\end
349	ret
350	ALIGN_TEXT
351100408:
352	cmpb	$4,%cl
353	jl	100204f
354	movl	(%rsi),%edx
355	movl	-4(%rsi,%rcx),%r8d
356	movl	%edx,(%rdi)
357	movl	%r8d,-4(%rdi,%rcx)
358	\end
359	ret
360	ALIGN_TEXT
361100204:
362	cmpb	$2,%cl
363	jl	100001f
364	movzwl	(%rsi),%edx
365	movzwl	-2(%rsi,%rcx),%r8d
366	movw	%dx,(%rdi)
367	movw	%r8w,-2(%rdi,%rcx)
368	\end
369	ret
370	ALIGN_TEXT
371100001:
372	cmpb	$1,%cl
373	jl	100000f
374	movb	(%rsi),%dl
375	movb	%dl,(%rdi)
376100000:
377	\end
378	ret
379
380	ALIGN_TEXT
3811256:
382	testb	$15,%dil
383	jnz	100f
384.if \erms == 1
385	rep
386	movsb
387.else
388	shrq	$3,%rcx                         /* copy by 64-bit words */
389	rep
390	movsq
391	movq	%rdx,%rcx
392	andl	$7,%ecx                         /* any bytes left? */
393	jne	100408b
394.endif
395	\end
396	ret
397100:
398	movq	(%rsi),%r8
399	movq	8(%rsi),%r9
400	movq	%rdi,%r10
401	movq	%rdi,%rcx
402	andq	$15,%rcx
403	leaq	-16(%rdx,%rcx),%rdx
404	neg	%rcx
405	leaq	16(%rdi,%rcx),%rdi
406	leaq	16(%rsi,%rcx),%rsi
407	movq	%rdx,%rcx
408.if \erms == 1
409	rep
410	movsb
411	movq	%r8,(%r10)
412	movq	%r9,8(%r10)
413.else
414	shrq	$3,%rcx                         /* copy by 64-bit words */
415	rep
416	movsq
417	movq	%r8,(%r10)
418	movq	%r9,8(%r10)
419	movq	%rdx,%rcx
420	andl	$7,%ecx                         /* any bytes left? */
421	jne	100408b
422.endif
423	\end
424	ret
425
426.if \overlap == 1
427	/*
428	 * Copy backwards.
429	 */
430        ALIGN_TEXT
4312:
432	cmpq	$256,%rcx
433	ja	2256f
434
435	leaq	-8(%rdi,%rcx),%rdi
436	leaq	-8(%rsi,%rcx),%rsi
437
438	cmpq	$32,%rcx
439	jb	2016f
440
441	ALIGN_TEXT
4422032:
443	movq	(%rsi),%rdx
444	movq	%rdx,(%rdi)
445	movq	-8(%rsi),%rdx
446	movq	%rdx,-8(%rdi)
447	movq	-16(%rsi),%rdx
448	movq	%rdx,-16(%rdi)
449	movq	-24(%rsi),%rdx
450	movq	%rdx,-24(%rdi)
451	leaq	-32(%rsi),%rsi
452	leaq	-32(%rdi),%rdi
453	subq	$32,%rcx
454	cmpq	$32,%rcx
455	jae	2032b
456	cmpb	$0,%cl
457	jne	2016f
458	\end
459	ret
460	ALIGN_TEXT
4612016:
462	cmpb	$16,%cl
463	jl	2008f
464	movq	(%rsi),%rdx
465	movq	%rdx,(%rdi)
466	movq	-8(%rsi),%rdx
467	movq	%rdx,-8(%rdi)
468	subb	$16,%cl
469	jz	2000f
470	leaq	-16(%rsi),%rsi
471	leaq	-16(%rdi),%rdi
4722008:
473	cmpb	$8,%cl
474	jl	2004f
475	movq	(%rsi),%rdx
476	movq	%rdx,(%rdi)
477	subb	$8,%cl
478	jz	2000f
479	leaq	-8(%rsi),%rsi
480	leaq	-8(%rdi),%rdi
4812004:
482	cmpb	$4,%cl
483	jl	2002f
484	movl	4(%rsi),%edx
485	movl	%edx,4(%rdi)
486	subb	$4,%cl
487	jz	2000f
488	leaq	-4(%rsi),%rsi
489	leaq	-4(%rdi),%rdi
4902002:
491	cmpb	$2,%cl
492	jl	2001f
493	movw	6(%rsi),%dx
494	movw	%dx,6(%rdi)
495	subb	$2,%cl
496	jz	2000f
497	leaq	-2(%rsi),%rsi
498	leaq	-2(%rdi),%rdi
4992001:
500	cmpb	$1,%cl
501	jl	2000f
502	movb	7(%rsi),%dl
503	movb	%dl,7(%rdi)
5042000:
505	\end
506	ret
507	ALIGN_TEXT
5082256:
509	std
510	leaq	-8(%rdi,%rcx),%rdi
511	leaq	-8(%rsi,%rcx),%rsi
512	shrq	$3,%rcx
513	rep
514	movsq
515	cld
516	movq	%rdx,%rcx
517	andb	$7,%cl
518	jne	2004b
519	\end
520	ret
521.endif
522.endm
523
524.macro MEMMOVE_BEGIN
525	PUSH_FRAME_POINTER
526	movq	%rdi,%rax
527	movq	%rdx,%rcx
528.endm
529
530.macro MEMMOVE_END
531	POP_FRAME_POINTER
532.endm
533
534ENTRY(memmove_std)
535	MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
536END(memmove_std)
537
538ENTRY(memmove_erms)
539	MEMMOVE erms=1 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
540END(memmove_erms)
541
542/*
543 * memcpy(dst, src, len)
544 *        rdi, rsi, rdx
545 *
546 * Note: memcpy does not support overlapping copies
547 */
548ENTRY(memcpy_std)
549	MEMMOVE erms=0 overlap=0 begin=MEMMOVE_BEGIN end=MEMMOVE_END
550END(memcpy_std)
551
552ENTRY(memcpy_erms)
553	MEMMOVE erms=1 overlap=0 begin=MEMMOVE_BEGIN end=MEMMOVE_END
554END(memcpy_erms)
555
556/*
557 * memset(dst, c,   len)
558 *        rdi, rsi, rdx
559 */
560.macro MEMSET erms
561	PUSH_FRAME_POINTER
562	movq	%rdi,%rax
563	movq	%rdx,%rcx
564	movzbq	%sil,%r8
565	movabs	$0x0101010101010101,%r10
566	imulq	%r8,%r10
567
568	cmpq	$32,%rcx
569	jbe	101632f
570
571	cmpq	$256,%rcx
572	ja	1256f
573
574	ALIGN_TEXT
575103200:
576	movq	%r10,(%rdi)
577	movq	%r10,8(%rdi)
578	movq	%r10,16(%rdi)
579	movq	%r10,24(%rdi)
580	leaq	32(%rdi),%rdi
581	subq	$32,%rcx
582	cmpq	$32,%rcx
583	ja	103200b
584	cmpb	$16,%cl
585	ja	201632f
586	movq	%r10,-16(%rdi,%rcx)
587	movq	%r10,-8(%rdi,%rcx)
588	POP_FRAME_POINTER
589	ret
590	ALIGN_TEXT
591101632:
592	cmpb	$16,%cl
593	jl	100816f
594201632:
595	movq	%r10,(%rdi)
596	movq	%r10,8(%rdi)
597	movq	%r10,-16(%rdi,%rcx)
598	movq	%r10,-8(%rdi,%rcx)
599	POP_FRAME_POINTER
600	ret
601	ALIGN_TEXT
602100816:
603	cmpb	$8,%cl
604	jl	100408f
605	movq	%r10,(%rdi)
606	movq	%r10,-8(%rdi,%rcx)
607	POP_FRAME_POINTER
608	ret
609	ALIGN_TEXT
610100408:
611	cmpb	$4,%cl
612	jl	100204f
613	movl	%r10d,(%rdi)
614	movl	%r10d,-4(%rdi,%rcx)
615	POP_FRAME_POINTER
616	ret
617	ALIGN_TEXT
618100204:
619	cmpb	$2,%cl
620	jl	100001f
621	movw	%r10w,(%rdi)
622	movw	%r10w,-2(%rdi,%rcx)
623	POP_FRAME_POINTER
624	ret
625	ALIGN_TEXT
626100001:
627	cmpb	$0,%cl
628	je	100000f
629	movb	%r10b,(%rdi)
630100000:
631	POP_FRAME_POINTER
632	ret
633	ALIGN_TEXT
6341256:
635	movq	%rdi,%r9
636	movq	%r10,%rax
637	testl	$15,%edi
638	jnz	3f
6391:
640.if \erms == 1
641	rep
642	stosb
643	movq	%r9,%rax
644.else
645	movq	%rcx,%rdx
646	shrq	$3,%rcx
647	rep
648	stosq
649	movq	%r9,%rax
650	andl	$7,%edx
651	jnz	2f
652	POP_FRAME_POINTER
653	ret
6542:
655	movq	%r10,-8(%rdi,%rdx)
656.endif
657	POP_FRAME_POINTER
658	ret
659	ALIGN_TEXT
6603:
661	movq	%r10,(%rdi)
662	movq	%r10,8(%rdi)
663	movq	%rdi,%r8
664	andq	$15,%r8
665	leaq	-16(%rcx,%r8),%rcx
666	neg	%r8
667	leaq	16(%rdi,%r8),%rdi
668	jmp	1b
669.endm
670
671ENTRY(memset_std)
672	MEMSET erms=0
673END(memset_std)
674
675ENTRY(memset_erms)
676	MEMSET erms=1
677END(memset_erms)
678
679/* fillw(pat, base, cnt) */
680/*       %rdi,%rsi, %rdx */
681ENTRY(fillw)
682	PUSH_FRAME_POINTER
683	movq	%rdi,%rax
684	movq	%rsi,%rdi
685	movq	%rdx,%rcx
686	rep
687	stosw
688	POP_FRAME_POINTER
689	ret
690END(fillw)
691
692/*
693 * strlen(string)
694 *	  %rdi
695 *
696 * Uses the ((x - 0x01....01) & ~x & 0x80....80) trick.
697 *
698 * 0x01....01 is replaced with 0x0 - 0x01....01 so that it can be added
699 * with leaq.
700 *
701 * For a description see either:
702 * - "Hacker's Delight" by Henry S. Warren, Jr.
703 * - "Optimizing subroutines in assembly language: An optimization guide for x86 platforms"
704 *   by Agner Fog
705 *
706 * The latter contains a 32-bit variant of the same algorithm coded in assembly for i386.
707 */
708ENTRY(strlen)
709	PUSH_FRAME_POINTER
710	movabsq	$0xfefefefefefefeff,%r8
711	movabsq	$0x8080808080808080,%r9
712
713	movq	%rdi,%r10
714	movq	%rdi,%rcx
715	testb	$7,%dil
716	jz	2f
717
718	/*
719	 * Handle misaligned reads: align to 8 and fill
720	 * the spurious bytes.
721	 */
722	andq	$~7,%rdi
723	movq	(%rdi),%r11
724	shlq	$3,%rcx
725	movq	$-1,%rdx
726	shlq	%cl,%rdx
727	notq	%rdx
728	orq	%rdx,%r11
729
730	leaq	(%r11,%r8),%rcx
731	notq	%r11
732	andq	%r11,%rcx
733	andq	%r9,%rcx
734	jnz	3f
735
736	/*
737	 * Main loop.
738	 */
739	ALIGN_TEXT
7401:
741	leaq	8(%rdi),%rdi
7422:
743	movq	(%rdi),%r11
744	leaq	(%r11,%r8),%rcx
745	notq	%r11
746	andq	%r11,%rcx
747	andq	%r9,%rcx
748	jz	1b
7493:
750	bsfq	%rcx,%rcx
751	shrq	$3,%rcx
752	leaq	(%rcx,%rdi),%rax
753	subq	%r10,%rax
754	POP_FRAME_POINTER
755	ret
756END(strlen)
757
758/*****************************************************************************/
759/* copyout and fubyte family                                                 */
760/*****************************************************************************/
761/*
762 * Access user memory from inside the kernel. These routines should be
763 * the only places that do this.
764 *
765 * These routines set curpcb->pcb_onfault for the time they execute. When a
766 * protection violation occurs inside the functions, the trap handler
767 * returns to *curpcb->pcb_onfault instead of the function.
768 */
769
770.macro SMAP_DISABLE smap
771.if	\smap
772	stac
773.endif
774.endm
775
776
777.macro SMAP_ENABLE smap
778.if	\smap
779	clac
780.endif
781.endm
782
783.macro COPYINOUT_BEGIN
784.endm
785
786.macro COPYINOUT_END
787	movq	%rax,PCB_ONFAULT(%r11)
788	POP_FRAME_POINTER
789.endm
790
791.macro COPYINOUT_SMAP_END
792	SMAP_ENABLE smap=1
793	COPYINOUT_END
794.endm
795
796/*
797 * copyout(from_kernel, to_user, len)
798 *         %rdi,        %rsi,    %rdx
799 */
800.macro	COPYOUT smap erms
801	PUSH_FRAME_POINTER
802	movq	PCPU(CURPCB),%r11
803	movq	$copy_fault,PCB_ONFAULT(%r11)
804
805	/*
806	 * Check explicitly for non-user addresses.
807	 * First, prevent address wrapping.
808	 */
809	movq	%rsi,%rax
810	addq	%rdx,%rax
811	jc	copy_fault
812/*
813 * XXX STOP USING VM_MAXUSER_ADDRESS.
814 * It is an end address, not a max, so every time it is used correctly it
815 * looks like there is an off by one error, and of course it caused an off
816 * by one error in several places.
817 */
818	movq	$VM_MAXUSER_ADDRESS,%rcx
819	cmpq	%rcx,%rax
820	ja	copy_fault
821
822	/*
823	 * Set return value to zero. Remaining failure mode goes through
824	 * copy_fault.
825	 */
826	xorl	%eax,%eax
827
828	/*
829	 * Set up arguments for MEMMOVE.
830	 */
831	movq	%rdi,%r8
832	movq	%rsi,%rdi
833	movq	%r8,%rsi
834	movq	%rdx,%rcx
835
836
837	SMAP_DISABLE \smap
838.if	\smap == 1
839	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_SMAP_END
840.else
841	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_END
842.endif
843	/* NOTREACHED */
844.endm
845
846ENTRY(copyout_nosmap_std)
847	COPYOUT smap=0 erms=0
848END(copyout_nosmap_std)
849
850ENTRY(copyout_smap_std)
851	COPYOUT smap=1 erms=0
852END(copyout_smap_std)
853
854ENTRY(copyout_nosmap_erms)
855	COPYOUT smap=0 erms=1
856END(copyout_nosmap_erms)
857
858ENTRY(copyout_smap_erms)
859	COPYOUT smap=1 erms=1
860END(copyout_smap_erms)
861
862/*
863 * copyin(from_user, to_kernel, len)
864 *        %rdi,      %rsi,      %rdx
865 */
866.macro	COPYIN smap erms
867	PUSH_FRAME_POINTER
868	movq	PCPU(CURPCB),%r11
869	movq	$copy_fault,PCB_ONFAULT(%r11)
870
871	/*
872	 * make sure address is valid
873	 */
874	movq	%rdi,%rax
875	addq	%rdx,%rax
876	jc	copy_fault
877	movq	$VM_MAXUSER_ADDRESS,%rcx
878	cmpq	%rcx,%rax
879	ja	copy_fault
880
881	xorl	%eax,%eax
882
883	movq	%rdi,%r8
884	movq	%rsi,%rdi
885	movq	%r8,%rsi
886	movq	%rdx,%rcx
887
888	SMAP_DISABLE \smap
889.if	\smap == 1
890	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_SMAP_END
891.else
892	MEMMOVE erms=\erms overlap=0 begin=COPYINOUT_BEGIN end=COPYINOUT_END
893.endif
894	/* NOTREACHED */
895.endm
896
897ENTRY(copyin_nosmap_std)
898	COPYIN smap=0 erms=0
899END(copyin_nosmap_std)
900
901ENTRY(copyin_smap_std)
902	COPYIN smap=1 erms=0
903END(copyin_smap_std)
904
905ENTRY(copyin_nosmap_erms)
906	COPYIN smap=0 erms=1
907END(copyin_nosmap_erms)
908
909ENTRY(copyin_smap_erms)
910	COPYIN smap=1 erms=1
911END(copyin_smap_erms)
912
913	ALIGN_TEXT
914copy_fault:
915	testl	$CPUID_STDEXT_SMAP,cpu_stdext_feature(%rip)
916	je	1f
917	clac
9181:	movq	$0,PCB_ONFAULT(%r11)
919	movl	$EFAULT,%eax
920	POP_FRAME_POINTER
921	ret
922
923/*
924 * casueword32.  Compare and set user integer.  Returns -1 on fault,
925 *        0 if access was successful, and 1 when comparison failed.
926 *        Old value is written to *oldp.
927 *        dst = %rdi, old = %esi, oldp = %rdx, new = %ecx
928 */
929ENTRY(casueword32_nosmap)
930	PUSH_FRAME_POINTER
931	movq	PCPU(CURPCB),%r8
932	movq	$fusufault,PCB_ONFAULT(%r8)
933
934	movq	$VM_MAXUSER_ADDRESS-4,%rax
935	cmpq	%rax,%rdi			/* verify address is valid */
936	ja	fusufault
937
938	movl	%esi,%eax			/* old */
939#ifdef SMP
940	lock
941#endif
942	cmpxchgl %ecx,(%rdi)			/* new = %ecx */
943	setne	%cl
944
945	/*
946	 * The old value is in %eax.  If the store succeeded it will be the
947	 * value we expected (old) from before the store, otherwise it will
948	 * be the current value.  Save %eax into %esi to prepare the return
949	 * value.
950	 */
951	movl	%eax,%esi
952	xorl	%eax,%eax
953	movq	%rax,PCB_ONFAULT(%r8)
954
955	/*
956	 * Access the oldp after the pcb_onfault is cleared, to correctly
957	 * catch corrupted pointer.
958	 */
959	movl	%esi,(%rdx)			/* oldp = %rdx */
960	POP_FRAME_POINTER
961	movzbl	%cl, %eax
962	ret
963END(casueword32_nosmap)
964
965ENTRY(casueword32_smap)
966	PUSH_FRAME_POINTER
967	movq	PCPU(CURPCB),%r8
968	movq	$fusufault,PCB_ONFAULT(%r8)
969
970	movq	$VM_MAXUSER_ADDRESS-4,%rax
971	cmpq	%rax,%rdi			/* verify address is valid */
972	ja	fusufault
973
974	movl	%esi,%eax			/* old */
975	stac
976#ifdef SMP
977	lock
978#endif
979	cmpxchgl %ecx,(%rdi)			/* new = %ecx */
980	clac
981	setne	%cl
982
983	/*
984	 * The old value is in %eax.  If the store succeeded it will be the
985	 * value we expected (old) from before the store, otherwise it will
986	 * be the current value.  Save %eax into %esi to prepare the return
987	 * value.
988	 */
989	movl	%eax,%esi
990	xorl	%eax,%eax
991	movq	%rax,PCB_ONFAULT(%r8)
992
993	/*
994	 * Access the oldp after the pcb_onfault is cleared, to correctly
995	 * catch corrupted pointer.
996	 */
997	movl	%esi,(%rdx)			/* oldp = %rdx */
998	POP_FRAME_POINTER
999	movzbl	%cl, %eax
1000	ret
1001END(casueword32_smap)
1002
1003/*
1004 * casueword.  Compare and set user long.  Returns -1 on fault,
1005 *        0 if access was successful, and 1 when comparison failed.
1006 *        Old value is written to *oldp.
1007 *        dst = %rdi, old = %rsi, oldp = %rdx, new = %rcx
1008 */
1009ENTRY(casueword_nosmap)
1010	PUSH_FRAME_POINTER
1011	movq	PCPU(CURPCB),%r8
1012	movq	$fusufault,PCB_ONFAULT(%r8)
1013
1014	movq	$VM_MAXUSER_ADDRESS-4,%rax
1015	cmpq	%rax,%rdi			/* verify address is valid */
1016	ja	fusufault
1017
1018	movq	%rsi,%rax			/* old */
1019#ifdef SMP
1020	lock
1021#endif
1022	cmpxchgq %rcx,(%rdi)			/* new = %rcx */
1023	setne	%cl
1024
1025	/*
1026	 * The old value is in %rax.  If the store succeeded it will be the
1027	 * value we expected (old) from before the store, otherwise it will
1028	 * be the current value.
1029	 */
1030	movq	%rax,%rsi
1031	xorl	%eax,%eax
1032	movq	%rax,PCB_ONFAULT(%r8)
1033	movq	%rsi,(%rdx)
1034	POP_FRAME_POINTER
1035	movzbl	%cl, %eax
1036	ret
1037END(casueword_nosmap)
1038
1039ENTRY(casueword_smap)
1040	PUSH_FRAME_POINTER
1041	movq	PCPU(CURPCB),%r8
1042	movq	$fusufault,PCB_ONFAULT(%r8)
1043
1044	movq	$VM_MAXUSER_ADDRESS-4,%rax
1045	cmpq	%rax,%rdi			/* verify address is valid */
1046	ja	fusufault
1047
1048	movq	%rsi,%rax			/* old */
1049	stac
1050#ifdef SMP
1051	lock
1052#endif
1053	cmpxchgq %rcx,(%rdi)			/* new = %rcx */
1054	clac
1055	setne	%cl
1056
1057	/*
1058	 * The old value is in %rax.  If the store succeeded it will be the
1059	 * value we expected (old) from before the store, otherwise it will
1060	 * be the current value.
1061	 */
1062	movq	%rax,%rsi
1063	xorl	%eax,%eax
1064	movq	%rax,PCB_ONFAULT(%r8)
1065	movq	%rsi,(%rdx)
1066	POP_FRAME_POINTER
1067	movzbl	%cl, %eax
1068	ret
1069END(casueword_smap)
1070
1071/*
1072 * Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit
1073 * byte from user memory.
1074 * addr = %rdi, valp = %rsi
1075 */
1076
1077ENTRY(fueword_nosmap)
1078	PUSH_FRAME_POINTER
1079	movq	PCPU(CURPCB),%rcx
1080	movq	$fusufault,PCB_ONFAULT(%rcx)
1081
1082	movq	$VM_MAXUSER_ADDRESS-8,%rax
1083	cmpq	%rax,%rdi			/* verify address is valid */
1084	ja	fusufault
1085
1086	xorl	%eax,%eax
1087	movq	(%rdi),%r11
1088	movq	%rax,PCB_ONFAULT(%rcx)
1089	movq	%r11,(%rsi)
1090	POP_FRAME_POINTER
1091	ret
1092END(fueword_nosmap)
1093
1094ENTRY(fueword_smap)
1095	PUSH_FRAME_POINTER
1096	movq	PCPU(CURPCB),%rcx
1097	movq	$fusufault,PCB_ONFAULT(%rcx)
1098
1099	movq	$VM_MAXUSER_ADDRESS-8,%rax
1100	cmpq	%rax,%rdi			/* verify address is valid */
1101	ja	fusufault
1102
1103	xorl	%eax,%eax
1104	stac
1105	movq	(%rdi),%r11
1106	clac
1107	movq	%rax,PCB_ONFAULT(%rcx)
1108	movq	%r11,(%rsi)
1109	POP_FRAME_POINTER
1110	ret
1111END(fueword_smap)
1112
1113ENTRY(fueword32_nosmap)
1114	PUSH_FRAME_POINTER
1115	movq	PCPU(CURPCB),%rcx
1116	movq	$fusufault,PCB_ONFAULT(%rcx)
1117
1118	movq	$VM_MAXUSER_ADDRESS-4,%rax
1119	cmpq	%rax,%rdi			/* verify address is valid */
1120	ja	fusufault
1121
1122	xorl	%eax,%eax
1123	movl	(%rdi),%r11d
1124	movq	%rax,PCB_ONFAULT(%rcx)
1125	movl	%r11d,(%rsi)
1126	POP_FRAME_POINTER
1127	ret
1128END(fueword32_nosmap)
1129
1130ENTRY(fueword32_smap)
1131	PUSH_FRAME_POINTER
1132	movq	PCPU(CURPCB),%rcx
1133	movq	$fusufault,PCB_ONFAULT(%rcx)
1134
1135	movq	$VM_MAXUSER_ADDRESS-4,%rax
1136	cmpq	%rax,%rdi			/* verify address is valid */
1137	ja	fusufault
1138
1139	xorl	%eax,%eax
1140	stac
1141	movl	(%rdi),%r11d
1142	clac
1143	movq	%rax,PCB_ONFAULT(%rcx)
1144	movl	%r11d,(%rsi)
1145	POP_FRAME_POINTER
1146	ret
1147END(fueword32_smap)
1148
1149ENTRY(fuword16_nosmap)
1150	PUSH_FRAME_POINTER
1151	movq	PCPU(CURPCB),%rcx
1152	movq	$fusufault,PCB_ONFAULT(%rcx)
1153
1154	movq	$VM_MAXUSER_ADDRESS-2,%rax
1155	cmpq	%rax,%rdi
1156	ja	fusufault
1157
1158	movzwl	(%rdi),%eax
1159	movq	$0,PCB_ONFAULT(%rcx)
1160	POP_FRAME_POINTER
1161	ret
1162END(fuword16_nosmap)
1163
1164ENTRY(fuword16_smap)
1165	PUSH_FRAME_POINTER
1166	movq	PCPU(CURPCB),%rcx
1167	movq	$fusufault,PCB_ONFAULT(%rcx)
1168
1169	movq	$VM_MAXUSER_ADDRESS-2,%rax
1170	cmpq	%rax,%rdi
1171	ja	fusufault
1172
1173	stac
1174	movzwl	(%rdi),%eax
1175	clac
1176	movq	$0,PCB_ONFAULT(%rcx)
1177	POP_FRAME_POINTER
1178	ret
1179END(fuword16_smap)
1180
1181ENTRY(fubyte_nosmap)
1182	PUSH_FRAME_POINTER
1183	movq	PCPU(CURPCB),%rcx
1184	movq	$fusufault,PCB_ONFAULT(%rcx)
1185
1186	movq	$VM_MAXUSER_ADDRESS-1,%rax
1187	cmpq	%rax,%rdi
1188	ja	fusufault
1189
1190	movzbl	(%rdi),%eax
1191	movq	$0,PCB_ONFAULT(%rcx)
1192	POP_FRAME_POINTER
1193	ret
1194END(fubyte_nosmap)
1195
1196ENTRY(fubyte_smap)
1197	PUSH_FRAME_POINTER
1198	movq	PCPU(CURPCB),%rcx
1199	movq	$fusufault,PCB_ONFAULT(%rcx)
1200
1201	movq	$VM_MAXUSER_ADDRESS-1,%rax
1202	cmpq	%rax,%rdi
1203	ja	fusufault
1204
1205	stac
1206	movzbl	(%rdi),%eax
1207	clac
1208	movq	$0,PCB_ONFAULT(%rcx)
1209	POP_FRAME_POINTER
1210	ret
1211END(fubyte_smap)
1212
1213/*
1214 * Store a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit byte to
1215 * user memory.
1216 * addr = %rdi, value = %rsi
1217 */
1218ENTRY(suword_nosmap)
1219	PUSH_FRAME_POINTER
1220	movq	PCPU(CURPCB),%rcx
1221	movq	$fusufault,PCB_ONFAULT(%rcx)
1222
1223	movq	$VM_MAXUSER_ADDRESS-8,%rax
1224	cmpq	%rax,%rdi			/* verify address validity */
1225	ja	fusufault
1226
1227	movq	%rsi,(%rdi)
1228	xorl	%eax,%eax
1229	movq	%rax,PCB_ONFAULT(%rcx)
1230	POP_FRAME_POINTER
1231	ret
1232END(suword_nosmap)
1233
1234ENTRY(suword_smap)
1235	PUSH_FRAME_POINTER
1236	movq	PCPU(CURPCB),%rcx
1237	movq	$fusufault,PCB_ONFAULT(%rcx)
1238
1239	movq	$VM_MAXUSER_ADDRESS-8,%rax
1240	cmpq	%rax,%rdi			/* verify address validity */
1241	ja	fusufault
1242
1243	stac
1244	movq	%rsi,(%rdi)
1245	clac
1246	xorl	%eax,%eax
1247	movq	%rax,PCB_ONFAULT(%rcx)
1248	POP_FRAME_POINTER
1249	ret
1250END(suword_smap)
1251
1252ENTRY(suword32_nosmap)
1253	PUSH_FRAME_POINTER
1254	movq	PCPU(CURPCB),%rcx
1255	movq	$fusufault,PCB_ONFAULT(%rcx)
1256
1257	movq	$VM_MAXUSER_ADDRESS-4,%rax
1258	cmpq	%rax,%rdi			/* verify address validity */
1259	ja	fusufault
1260
1261	movl	%esi,(%rdi)
1262	xorl	%eax,%eax
1263	movq	%rax,PCB_ONFAULT(%rcx)
1264	POP_FRAME_POINTER
1265	ret
1266END(suword32_nosmap)
1267
1268ENTRY(suword32_smap)
1269	PUSH_FRAME_POINTER
1270	movq	PCPU(CURPCB),%rcx
1271	movq	$fusufault,PCB_ONFAULT(%rcx)
1272
1273	movq	$VM_MAXUSER_ADDRESS-4,%rax
1274	cmpq	%rax,%rdi			/* verify address validity */
1275	ja	fusufault
1276
1277	stac
1278	movl	%esi,(%rdi)
1279	clac
1280	xorl	%eax,%eax
1281	movq	%rax,PCB_ONFAULT(%rcx)
1282	POP_FRAME_POINTER
1283	ret
1284END(suword32_smap)
1285
1286ENTRY(suword16_nosmap)
1287	PUSH_FRAME_POINTER
1288	movq	PCPU(CURPCB),%rcx
1289	movq	$fusufault,PCB_ONFAULT(%rcx)
1290
1291	movq	$VM_MAXUSER_ADDRESS-2,%rax
1292	cmpq	%rax,%rdi			/* verify address validity */
1293	ja	fusufault
1294
1295	movw	%si,(%rdi)
1296	xorl	%eax,%eax
1297	movq	%rax,PCB_ONFAULT(%rcx)
1298	POP_FRAME_POINTER
1299	ret
1300END(suword16_nosmap)
1301
1302ENTRY(suword16_smap)
1303	PUSH_FRAME_POINTER
1304	movq	PCPU(CURPCB),%rcx
1305	movq	$fusufault,PCB_ONFAULT(%rcx)
1306
1307	movq	$VM_MAXUSER_ADDRESS-2,%rax
1308	cmpq	%rax,%rdi			/* verify address validity */
1309	ja	fusufault
1310
1311	stac
1312	movw	%si,(%rdi)
1313	clac
1314	xorl	%eax,%eax
1315	movq	%rax,PCB_ONFAULT(%rcx)
1316	POP_FRAME_POINTER
1317	ret
1318END(suword16_smap)
1319
1320ENTRY(subyte_nosmap)
1321	PUSH_FRAME_POINTER
1322	movq	PCPU(CURPCB),%rcx
1323	movq	$fusufault,PCB_ONFAULT(%rcx)
1324
1325	movq	$VM_MAXUSER_ADDRESS-1,%rax
1326	cmpq	%rax,%rdi			/* verify address validity */
1327	ja	fusufault
1328
1329	movl	%esi,%eax
1330	movb	%al,(%rdi)
1331	xorl	%eax,%eax
1332	movq	%rax,PCB_ONFAULT(%rcx)
1333	POP_FRAME_POINTER
1334	ret
1335END(subyte_nosmap)
1336
1337ENTRY(subyte_smap)
1338	PUSH_FRAME_POINTER
1339	movq	PCPU(CURPCB),%rcx
1340	movq	$fusufault,PCB_ONFAULT(%rcx)
1341
1342	movq	$VM_MAXUSER_ADDRESS-1,%rax
1343	cmpq	%rax,%rdi			/* verify address validity */
1344	ja	fusufault
1345
1346	movl	%esi,%eax
1347	stac
1348	movb	%al,(%rdi)
1349	clac
1350	xorl	%eax,%eax
1351	movq	%rax,PCB_ONFAULT(%rcx)
1352	POP_FRAME_POINTER
1353	ret
1354END(subyte_smap)
1355
1356	ALIGN_TEXT
1357fusufault:
1358	testl	$CPUID_STDEXT_SMAP,cpu_stdext_feature(%rip)
1359	je	1f
1360	clac
13611:	movq	PCPU(CURPCB),%rcx
1362	xorl	%eax,%eax
1363	movq	%rax,PCB_ONFAULT(%rcx)
1364	decq	%rax
1365	POP_FRAME_POINTER
1366	ret
1367
1368/*
1369 * copyinstr(from, to, maxlen, int *lencopied)
1370 *           %rdi, %rsi, %rdx, %rcx
1371 *
1372 *	copy a string from 'from' to 'to', stop when a 0 character is reached.
1373 *	return ENAMETOOLONG if string is longer than maxlen, and
1374 *	EFAULT on protection violations. If lencopied is non-zero,
1375 *	return the actual length in *lencopied.
1376 */
1377.macro COPYINSTR smap
1378	PUSH_FRAME_POINTER
1379	movq	%rdx,%r8			/* %r8 = maxlen */
1380	movq	PCPU(CURPCB),%r9
1381	movq	$cpystrflt,PCB_ONFAULT(%r9)
1382
1383	movq	$VM_MAXUSER_ADDRESS,%rax
1384
1385	/* make sure 'from' is within bounds */
1386	subq	%rdi,%rax
1387	jbe	cpystrflt
1388
1389	SMAP_DISABLE \smap
1390
1391	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
1392	cmpq	%rdx,%rax
1393	jb	8f
13941:
1395	incq	%rdx
13962:
1397	decq	%rdx
1398.if \smap == 0
1399	jz	copyinstr_toolong
1400.else
1401	jz	copyinstr_toolong_smap
1402.endif
1403
1404	movb	(%rdi),%al
1405	movb	%al,(%rsi)
1406	incq	%rsi
1407	incq	%rdi
1408	testb	%al,%al
1409	jnz	2b
1410
1411	SMAP_ENABLE \smap
1412
1413	/* Success -- 0 byte reached */
1414	decq	%rdx
1415	xorl	%eax,%eax
1416
1417	/* set *lencopied and return %eax */
1418	movq	%rax,PCB_ONFAULT(%r9)
1419
1420	testq	%rcx,%rcx
1421	jz	3f
1422	subq	%rdx,%r8
1423	movq	%r8,(%rcx)
14243:
1425	POP_FRAME_POINTER
1426	ret
1427	ALIGN_TEXT
14288:
1429	movq	%rax,%rdx
1430	movq	%rax,%r8
1431	jmp 1b
1432
1433.endm
1434
1435ENTRY(copyinstr_nosmap)
1436	COPYINSTR smap=0
1437END(copyinstr_nosmap)
1438
1439ENTRY(copyinstr_smap)
1440	COPYINSTR smap=1
1441END(copyinstr_smap)
1442
1443cpystrflt:
1444	testl	$CPUID_STDEXT_SMAP,cpu_stdext_feature(%rip)
1445	je	1f
1446	clac
14471:	movl	$EFAULT,%eax
1448cpystrflt_x:
1449	/* set *lencopied and return %eax */
1450	movq	$0,PCB_ONFAULT(%r9)
1451
1452	testq	%rcx,%rcx
1453	jz	1f
1454	subq	%rdx,%r8
1455	movq	%r8,(%rcx)
14561:
1457	POP_FRAME_POINTER
1458	ret
1459
1460copyinstr_toolong_smap:
1461	clac
1462copyinstr_toolong:
1463	/* rdx is zero - return ENAMETOOLONG or EFAULT */
1464	movq	$VM_MAXUSER_ADDRESS,%rax
1465	cmpq	%rax,%rdi
1466	jae	cpystrflt
1467	movl	$ENAMETOOLONG,%eax
1468	jmp	cpystrflt_x
1469
1470/*
1471 * Handling of special amd64 registers and descriptor tables etc
1472 */
1473/* void lgdt(struct region_descriptor *rdp); */
1474ENTRY(lgdt)
1475	/* reload the descriptor table */
1476	lgdt	(%rdi)
1477
1478	/* flush the prefetch q */
1479	jmp	1f
1480	nop
14811:
1482	movl	$KDSEL,%eax
1483	movl	%eax,%ds
1484	movl	%eax,%es
1485	movl	%eax,%fs	/* Beware, use wrmsr to set 64 bit base */
1486	movl	%eax,%gs
1487	movl	%eax,%ss
1488
1489	/* reload code selector by turning return into intersegmental return */
1490	popq	%rax
1491	pushq	$KCSEL
1492	pushq	%rax
1493	lretq
1494END(lgdt)
1495
1496/*****************************************************************************/
1497/* setjump, longjump                                                         */
1498/*****************************************************************************/
1499
1500ENTRY(setjmp)
1501	movq	%rbx,0(%rdi)			/* save rbx */
1502	movq	%rsp,8(%rdi)			/* save rsp */
1503	movq	%rbp,16(%rdi)			/* save rbp */
1504	movq	%r12,24(%rdi)			/* save r12 */
1505	movq	%r13,32(%rdi)			/* save r13 */
1506	movq	%r14,40(%rdi)			/* save r14 */
1507	movq	%r15,48(%rdi)			/* save r15 */
1508	movq	0(%rsp),%rdx			/* get rta */
1509	movq	%rdx,56(%rdi)			/* save rip */
1510	xorl	%eax,%eax			/* return(0); */
1511	ret
1512END(setjmp)
1513
1514ENTRY(longjmp)
1515	movq	0(%rdi),%rbx			/* restore rbx */
1516	movq	8(%rdi),%rsp			/* restore rsp */
1517	movq	16(%rdi),%rbp			/* restore rbp */
1518	movq	24(%rdi),%r12			/* restore r12 */
1519	movq	32(%rdi),%r13			/* restore r13 */
1520	movq	40(%rdi),%r14			/* restore r14 */
1521	movq	48(%rdi),%r15			/* restore r15 */
1522	movq	56(%rdi),%rdx			/* get rta */
1523	movq	%rdx,0(%rsp)			/* put in return frame */
1524	xorl	%eax,%eax			/* return(1); */
1525	incl	%eax
1526	ret
1527END(longjmp)
1528
1529/*
1530 * Support for reading MSRs in the safe manner.  (Instead of panic on #gp,
1531 * return an error.)
1532 */
1533ENTRY(rdmsr_safe)
1534/* int rdmsr_safe(u_int msr, uint64_t *data) */
1535	PUSH_FRAME_POINTER
1536	movq	PCPU(CURPCB),%r8
1537	movq	$msr_onfault,PCB_ONFAULT(%r8)
1538	movl	%edi,%ecx
1539	rdmsr			/* Read MSR pointed by %ecx. Returns
1540				   hi byte in edx, lo in %eax */
1541	salq	$32,%rdx	/* sign-shift %rdx left */
1542	movl	%eax,%eax	/* zero-extend %eax -> %rax */
1543	orq	%rdx,%rax
1544	movq	%rax,(%rsi)
1545	xorq	%rax,%rax
1546	movq	%rax,PCB_ONFAULT(%r8)
1547	POP_FRAME_POINTER
1548	ret
1549
1550/*
1551 * Support for writing MSRs in the safe manner.  (Instead of panic on #gp,
1552 * return an error.)
1553 */
1554ENTRY(wrmsr_safe)
1555/* int wrmsr_safe(u_int msr, uint64_t data) */
1556	PUSH_FRAME_POINTER
1557	movq	PCPU(CURPCB),%r8
1558	movq	$msr_onfault,PCB_ONFAULT(%r8)
1559	movl	%edi,%ecx
1560	movl	%esi,%eax
1561	sarq	$32,%rsi
1562	movl	%esi,%edx
1563	wrmsr			/* Write MSR pointed by %ecx. Accepts
1564				   hi byte in edx, lo in %eax. */
1565	xorq	%rax,%rax
1566	movq	%rax,PCB_ONFAULT(%r8)
1567	POP_FRAME_POINTER
1568	ret
1569
1570/*
1571 * MSR operations fault handler
1572 */
1573	ALIGN_TEXT
1574msr_onfault:
1575	movq	$0,PCB_ONFAULT(%r8)
1576	movl	$EFAULT,%eax
1577	POP_FRAME_POINTER
1578	ret
1579
1580/*
1581 * void pmap_pti_pcid_invalidate(uint64_t ucr3, uint64_t kcr3);
1582 * Invalidates address space addressed by ucr3, then returns to kcr3.
1583 * Done in assembler to ensure no other memory accesses happen while
1584 * on ucr3.
1585 */
1586	ALIGN_TEXT
1587ENTRY(pmap_pti_pcid_invalidate)
1588	pushfq
1589	cli
1590	movq	%rdi,%cr3	/* to user page table */
1591	movq	%rsi,%cr3	/* back to kernel */
1592	popfq
1593	retq
1594
1595/*
1596 * void pmap_pti_pcid_invlpg(uint64_t ucr3, uint64_t kcr3, vm_offset_t va);
1597 * Invalidates virtual address va in address space ucr3, then returns to kcr3.
1598 */
1599	ALIGN_TEXT
1600ENTRY(pmap_pti_pcid_invlpg)
1601	pushfq
1602	cli
1603	movq	%rdi,%cr3	/* to user page table */
1604	invlpg	(%rdx)
1605	movq	%rsi,%cr3	/* back to kernel */
1606	popfq
1607	retq
1608
1609/*
1610 * void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr3, vm_offset_t sva,
1611 *     vm_offset_t eva);
1612 * Invalidates virtual addresses between sva and eva in address space ucr3,
1613 * then returns to kcr3.
1614 */
1615	ALIGN_TEXT
1616ENTRY(pmap_pti_pcid_invlrng)
1617	pushfq
1618	cli
1619	movq	%rdi,%cr3	/* to user page table */
16201:	invlpg	(%rdx)
1621	addq	$PAGE_SIZE,%rdx
1622	cmpq	%rdx,%rcx
1623	ja	1b
1624	movq	%rsi,%cr3	/* back to kernel */
1625	popfq
1626	retq
1627
1628	.altmacro
1629	.macro	rsb_seq_label l
1630rsb_seq_\l:
1631	.endm
1632	.macro	rsb_call_label l
1633	call	rsb_seq_\l
1634	.endm
1635	.macro	rsb_seq count
1636	ll=1
1637	.rept	\count
1638	rsb_call_label	%(ll)
1639	nop
1640	rsb_seq_label %(ll)
1641	addq	$8,%rsp
1642	ll=ll+1
1643	.endr
1644	.endm
1645
1646ENTRY(rsb_flush)
1647	rsb_seq	32
1648	ret
1649
1650/* all callers already saved %rax, %rdx, and %rcx */
1651ENTRY(handle_ibrs_entry)
1652	cmpb	$0,hw_ibrs_ibpb_active(%rip)
1653	je	1f
1654	movl	$MSR_IA32_SPEC_CTRL,%ecx
1655	rdmsr
1656	orl	$(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1657	orl	$(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32,%edx
1658	wrmsr
1659	movb	$1,PCPU(IBPB_SET)
1660	testl	$CPUID_STDEXT_SMEP,cpu_stdext_feature(%rip)
1661	je	rsb_flush
16621:	ret
1663END(handle_ibrs_entry)
1664
1665ENTRY(handle_ibrs_exit)
1666	cmpb	$0,PCPU(IBPB_SET)
1667	je	1f
1668	movl	$MSR_IA32_SPEC_CTRL,%ecx
1669	rdmsr
1670	andl	$~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1671	andl	$~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx
1672	wrmsr
1673	movb	$0,PCPU(IBPB_SET)
16741:	ret
1675END(handle_ibrs_exit)
1676
1677/* registers-neutral version, but needs stack */
1678ENTRY(handle_ibrs_exit_rs)
1679	cmpb	$0,PCPU(IBPB_SET)
1680	je	1f
1681	pushq	%rax
1682	pushq	%rdx
1683	pushq	%rcx
1684	movl	$MSR_IA32_SPEC_CTRL,%ecx
1685	rdmsr
1686	andl	$~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax
1687	andl	$~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx
1688	wrmsr
1689	popq	%rcx
1690	popq	%rdx
1691	popq	%rax
1692	movb	$0,PCPU(IBPB_SET)
16931:	ret
1694END(handle_ibrs_exit_rs)
1695
1696	.noaltmacro
1697
1698/*
1699 * Flush L1D cache.  Load enough of the data from the kernel text
1700 * to flush existing L1D content.
1701 *
1702 * N.B. The function does not follow ABI calling conventions, it corrupts %rbx.
1703 * The vmm.ko caller expects that only %rax, %rdx, %rbx, %rcx, %r9, and %rflags
1704 * registers are clobbered.  The NMI handler caller only needs %r13 and %r15
1705 * preserved.
1706 */
1707ENTRY(flush_l1d_sw)
1708#define	L1D_FLUSH_SIZE	(64 * 1024)
1709	movq	$KERNBASE, %r9
1710	movq	$-L1D_FLUSH_SIZE, %rcx
1711	/*
1712	 * pass 1: Preload TLB.
1713	 * Kernel text is mapped using superpages.  TLB preload is
1714	 * done for the benefit of older CPUs which split 2M page
1715	 * into 4k TLB entries.
1716	 */
17171:	movb	L1D_FLUSH_SIZE(%r9, %rcx), %al
1718	addq	$PAGE_SIZE, %rcx
1719	jne	1b
1720	xorl	%eax, %eax
1721	cpuid
1722	movq	$-L1D_FLUSH_SIZE, %rcx
1723	/* pass 2: Read each cache line. */
17242:	movb	L1D_FLUSH_SIZE(%r9, %rcx), %al
1725	addq	$64, %rcx
1726	jne	2b
1727	lfence
1728	ret
1729#undef	L1D_FLUSH_SIZE
1730END(flush_l1d_sw)
1731
1732ENTRY(flush_l1d_sw_abi)
1733	pushq	%rbx
1734	call	flush_l1d_sw
1735	popq	%rbx
1736	ret
1737END(flush_l1d_sw_abi)
1738
1739ENTRY(mds_handler_void)
1740	retq
1741END(mds_handler_void)
1742
1743ENTRY(mds_handler_verw)
1744	subq	$8, %rsp
1745	movw	%ds, (%rsp)
1746	verw	(%rsp)
1747	addq	$8, %rsp
1748	retq
1749END(mds_handler_verw)
1750
1751ENTRY(mds_handler_ivb)
1752	pushq	%rax
1753	pushq	%rdx
1754	pushq	%rcx
1755
1756	movq	%cr0, %rax
1757	testb	$CR0_TS, %al
1758	je	1f
1759	clts
17601:	movq	PCPU(MDS_BUF), %rdx
1761	movdqa	%xmm0, PCPU(MDS_TMP)
1762	pxor	%xmm0, %xmm0
1763
1764	lfence
1765	orpd	(%rdx), %xmm0
1766	orpd	(%rdx), %xmm0
1767	mfence
1768	movl	$40, %ecx
1769	addq	$16, %rdx
17702:	movntdq	%xmm0, (%rdx)
1771	addq	$16, %rdx
1772	decl	%ecx
1773	jnz	2b
1774	mfence
1775
1776	movdqa	PCPU(MDS_TMP),%xmm0
1777	testb	$CR0_TS, %al
1778	je	3f
1779	movq	%rax, %cr0
17803:	popq	%rcx
1781	popq	%rdx
1782	popq	%rax
1783	retq
1784END(mds_handler_ivb)
1785
1786ENTRY(mds_handler_bdw)
1787	pushq	%rax
1788	pushq	%rbx
1789	pushq	%rcx
1790	pushq	%rdi
1791	pushq	%rsi
1792
1793	movq	%cr0, %rax
1794	testb	$CR0_TS, %al
1795	je	1f
1796	clts
17971:	movq	PCPU(MDS_BUF), %rbx
1798	movdqa	%xmm0, PCPU(MDS_TMP)
1799	pxor	%xmm0, %xmm0
1800
1801	movq	%rbx, %rdi
1802	movq	%rbx, %rsi
1803	movl	$40, %ecx
18042:	movntdq	%xmm0, (%rbx)
1805	addq	$16, %rbx
1806	decl	%ecx
1807	jnz	2b
1808	mfence
1809	movl	$1536, %ecx
1810	rep; movsb
1811	lfence
1812
1813	movdqa	PCPU(MDS_TMP),%xmm0
1814	testb	$CR0_TS, %al
1815	je	3f
1816	movq	%rax, %cr0
18173:	popq	%rsi
1818	popq	%rdi
1819	popq	%rcx
1820	popq	%rbx
1821	popq	%rax
1822	retq
1823END(mds_handler_bdw)
1824
1825ENTRY(mds_handler_skl_sse)
1826	pushq	%rax
1827	pushq	%rdx
1828	pushq	%rcx
1829	pushq	%rdi
1830
1831	movq	%cr0, %rax
1832	testb	$CR0_TS, %al
1833	je	1f
1834	clts
18351:	movq	PCPU(MDS_BUF), %rdi
1836	movq	PCPU(MDS_BUF64), %rdx
1837	movdqa	%xmm0, PCPU(MDS_TMP)
1838	pxor	%xmm0, %xmm0
1839
1840	lfence
1841	orpd	(%rdx), %xmm0
1842	orpd	(%rdx), %xmm0
1843	xorl	%eax, %eax
18442:	clflushopt	5376(%rdi, %rax, 8)
1845	addl	$8, %eax
1846	cmpl	$8 * 12, %eax
1847	jb	2b
1848	sfence
1849	movl	$6144, %ecx
1850	xorl	%eax, %eax
1851	rep; stosb
1852	mfence
1853
1854	movdqa	PCPU(MDS_TMP), %xmm0
1855	testb	$CR0_TS, %al
1856	je	3f
1857	movq	%rax, %cr0
18583:	popq	%rdi
1859	popq	%rcx
1860	popq	%rdx
1861	popq	%rax
1862	retq
1863END(mds_handler_skl_sse)
1864
1865ENTRY(mds_handler_skl_avx)
1866	pushq	%rax
1867	pushq	%rdx
1868	pushq	%rcx
1869	pushq	%rdi
1870
1871	movq	%cr0, %rax
1872	testb	$CR0_TS, %al
1873	je	1f
1874	clts
18751:	movq	PCPU(MDS_BUF), %rdi
1876	movq	PCPU(MDS_BUF64), %rdx
1877	vmovdqa	%ymm0, PCPU(MDS_TMP)
1878	vpxor	%ymm0, %ymm0, %ymm0
1879
1880	lfence
1881	vorpd	(%rdx), %ymm0, %ymm0
1882	vorpd	(%rdx), %ymm0, %ymm0
1883	xorl	%eax, %eax
18842:	clflushopt	5376(%rdi, %rax, 8)
1885	addl	$8, %eax
1886	cmpl	$8 * 12, %eax
1887	jb	2b
1888	sfence
1889	movl	$6144, %ecx
1890	xorl	%eax, %eax
1891	rep; stosb
1892	mfence
1893
1894	vmovdqa	PCPU(MDS_TMP), %ymm0
1895	testb	$CR0_TS, %al
1896	je	3f
1897	movq	%rax, %cr0
18983:	popq	%rdi
1899	popq	%rcx
1900	popq	%rdx
1901	popq	%rax
1902	retq
1903END(mds_handler_skl_avx)
1904
1905ENTRY(mds_handler_skl_avx512)
1906	pushq	%rax
1907	pushq	%rdx
1908	pushq	%rcx
1909	pushq	%rdi
1910
1911	movq	%cr0, %rax
1912	testb	$CR0_TS, %al
1913	je	1f
1914	clts
19151:	movq	PCPU(MDS_BUF), %rdi
1916	movq	PCPU(MDS_BUF64), %rdx
1917	vmovdqa64	%zmm0, PCPU(MDS_TMP)
1918	vpxord	%zmm0, %zmm0, %zmm0
1919
1920	lfence
1921	vorpd	(%rdx), %zmm0, %zmm0
1922	vorpd	(%rdx), %zmm0, %zmm0
1923	xorl	%eax, %eax
19242:	clflushopt	5376(%rdi, %rax, 8)
1925	addl	$8, %eax
1926	cmpl	$8 * 12, %eax
1927	jb	2b
1928	sfence
1929	movl	$6144, %ecx
1930	xorl	%eax, %eax
1931	rep; stosb
1932	mfence
1933
1934	vmovdqa64	PCPU(MDS_TMP), %zmm0
1935	testb	$CR0_TS, %al
1936	je	3f
1937	movq	%rax, %cr0
19383:	popq	%rdi
1939	popq	%rcx
1940	popq	%rdx
1941	popq	%rax
1942	retq
1943END(mds_handler_skl_avx512)
1944
1945ENTRY(mds_handler_silvermont)
1946	pushq	%rax
1947	pushq	%rdx
1948	pushq	%rcx
1949
1950	movq	%cr0, %rax
1951	testb	$CR0_TS, %al
1952	je	1f
1953	clts
19541:	movq	PCPU(MDS_BUF), %rdx
1955	movdqa	%xmm0, PCPU(MDS_TMP)
1956	pxor	%xmm0, %xmm0
1957
1958	movl	$16, %ecx
19592:	movntdq	%xmm0, (%rdx)
1960	addq	$16, %rdx
1961	decl	%ecx
1962	jnz	2b
1963	mfence
1964
1965	movdqa	PCPU(MDS_TMP),%xmm0
1966	testb	$CR0_TS, %al
1967	je	3f
1968	movq	%rax, %cr0
19693:	popq	%rcx
1970	popq	%rdx
1971	popq	%rax
1972	retq
1973END(mds_handler_silvermont)
1974