1/*
2 * memset - fill memory with a constant byte
3 *
4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 * See https://llvm.org/LICENSE.txt for license information.
6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 */
8
9/* Assumptions:
10 *
11 * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
12 *
13 */
14
15#include "../asmdefs.h"
16
17#define dstin	x0
18#define val	x1
19#define valw	w1
20#define count	x2
21#define dst	x3
22#define dstend	x4
23#define zva_val	x5
24
25ENTRY (__memset_aarch64)
26
27	dup	v0.16B, valw
28	add	dstend, dstin, count
29
30	cmp	count, 96
31	b.hi	L(set_long)
32	cmp	count, 16
33	b.hs	L(set_medium)
34	mov	val, v0.D[0]
35
36	/* Set 0..15 bytes.  */
37	tbz	count, 3, 1f
38	str	val, [dstin]
39	str	val, [dstend, -8]
40	ret
41	nop
421:	tbz	count, 2, 2f
43	str	valw, [dstin]
44	str	valw, [dstend, -4]
45	ret
462:	cbz	count, 3f
47	strb	valw, [dstin]
48	tbz	count, 1, 3f
49	strh	valw, [dstend, -2]
503:	ret
51
52	/* Set 17..96 bytes.  */
53L(set_medium):
54	str	q0, [dstin]
55	tbnz	count, 6, L(set96)
56	str	q0, [dstend, -16]
57	tbz	count, 5, 1f
58	str	q0, [dstin, 16]
59	str	q0, [dstend, -32]
601:	ret
61
62	.p2align 4
63	/* Set 64..96 bytes.  Write 64 bytes from the start and
64	   32 bytes from the end.  */
65L(set96):
66	str	q0, [dstin, 16]
67	stp	q0, q0, [dstin, 32]
68	stp	q0, q0, [dstend, -32]
69	ret
70
71	.p2align 4
72L(set_long):
73	and	valw, valw, 255
74	bic	dst, dstin, 15
75	str	q0, [dstin]
76	cmp	count, 160
77	ccmp	valw, 0, 0, hs
78	b.ne	L(no_zva)
79
80#ifndef SKIP_ZVA_CHECK
81	mrs	zva_val, dczid_el0
82	and	zva_val, zva_val, 31
83	cmp	zva_val, 4		/* ZVA size is 64 bytes.  */
84	b.ne	L(no_zva)
85#endif
86	str	q0, [dst, 16]
87	stp	q0, q0, [dst, 32]
88	bic	dst, dst, 63
89	sub	count, dstend, dst	/* Count is now 64 too large.  */
90	sub	count, count, 128	/* Adjust count and bias for loop.  */
91
92	.p2align 4
93L(zva_loop):
94	add	dst, dst, 64
95	dc	zva, dst
96	subs	count, count, 64
97	b.hi	L(zva_loop)
98	stp	q0, q0, [dstend, -64]
99	stp	q0, q0, [dstend, -32]
100	ret
101
102L(no_zva):
103	sub	count, dstend, dst	/* Count is 16 too large.  */
104	sub	dst, dst, 16		/* Dst is biased by -32.  */
105	sub	count, count, 64 + 16	/* Adjust count and bias for loop.  */
106L(no_zva_loop):
107	stp	q0, q0, [dst, 32]
108	stp	q0, q0, [dst, 64]!
109	subs	count, count, 64
110	b.hi	L(no_zva_loop)
111	stp	q0, q0, [dstend, -64]
112	stp	q0, q0, [dstend, -32]
113	ret
114
115END (__memset_aarch64)
116