1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
24  */
25 
26 #include <sys/zfs_context.h>
27 #include <sys/zio_checksum.h>
28 
29 #include "blake3_impl.h"
30 
31 static const blake3_ops_t *const blake3_impls[] = {
32 	&blake3_generic_impl,
33 #if defined(__aarch64__) || \
34 	(defined(__x86_64) && defined(HAVE_SSE2)) || \
35 	(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
36 	&blake3_sse2_impl,
37 #endif
38 #if defined(__aarch64__) || \
39 	(defined(__x86_64) && defined(HAVE_SSE4_1)) || \
40 	(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
41 	&blake3_sse41_impl,
42 #endif
43 #if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
44 	&blake3_avx2_impl,
45 #endif
46 #if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
47 	&blake3_avx512_impl,
48 #endif
49 };
50 
51 /* Select BLAKE3 implementation */
52 #define	IMPL_FASTEST	(UINT32_MAX)
53 #define	IMPL_CYCLE	(UINT32_MAX - 1)
54 
55 #define	IMPL_READ(i)	(*(volatile uint32_t *) &(i))
56 
57 /* Indicate that benchmark has been done */
58 static boolean_t blake3_initialized = B_FALSE;
59 
60 /* Implementation that contains the fastest methods */
61 static blake3_ops_t blake3_fastest_impl = {
62 	.name = "fastest"
63 };
64 
65 /* Hold all supported implementations */
66 static const blake3_ops_t *blake3_supp_impls[ARRAY_SIZE(blake3_impls)];
67 static uint32_t blake3_supp_impls_cnt = 0;
68 
69 /* Currently selected implementation */
70 static uint32_t blake3_impl_chosen = IMPL_FASTEST;
71 
72 static struct blake3_impl_selector {
73 	const char *name;
74 	uint32_t sel;
75 } blake3_impl_selectors[] = {
76 	{ "cycle",	IMPL_CYCLE },
77 	{ "fastest",	IMPL_FASTEST }
78 };
79 
80 /* check the supported implementations */
81 static void blake3_impl_init(void)
82 {
83 	int i, c;
84 
85 	/* init only once */
86 	if (likely(blake3_initialized))
87 		return;
88 
89 	/* move supported implementations into blake3_supp_impls */
90 	for (i = 0, c = 0; i < ARRAY_SIZE(blake3_impls); i++) {
91 		const blake3_ops_t *impl = blake3_impls[i];
92 
93 		if (impl->is_supported && impl->is_supported())
94 			blake3_supp_impls[c++] = impl;
95 	}
96 	blake3_supp_impls_cnt = c;
97 
98 	/* first init generic impl, may be changed via set_fastest() */
99 	memcpy(&blake3_fastest_impl, blake3_impls[0],
100 	    sizeof (blake3_fastest_impl));
101 	blake3_initialized = B_TRUE;
102 }
103 
104 /* get number of supported implementations */
105 uint32_t
106 blake3_impl_getcnt(void)
107 {
108 	blake3_impl_init();
109 	return (blake3_supp_impls_cnt);
110 }
111 
112 /* get id of selected implementation */
113 uint32_t
114 blake3_impl_getid(void)
115 {
116 	return (IMPL_READ(blake3_impl_chosen));
117 }
118 
119 /* get name of selected implementation */
120 const char *
121 blake3_impl_getname(void)
122 {
123 	uint32_t impl = IMPL_READ(blake3_impl_chosen);
124 
125 	blake3_impl_init();
126 	switch (impl) {
127 	case IMPL_FASTEST:
128 		return ("fastest");
129 	case IMPL_CYCLE:
130 		return ("cycle");
131 	default:
132 		return (blake3_supp_impls[impl]->name);
133 	}
134 }
135 
136 /* setup id as fastest implementation */
137 void
138 blake3_impl_set_fastest(uint32_t id)
139 {
140 	/* setup fastest impl */
141 	memcpy(&blake3_fastest_impl, blake3_supp_impls[id],
142 	    sizeof (blake3_fastest_impl));
143 }
144 
145 /* set implementation by id */
146 void
147 blake3_impl_setid(uint32_t id)
148 {
149 	blake3_impl_init();
150 	switch (id) {
151 	case IMPL_FASTEST:
152 		atomic_swap_32(&blake3_impl_chosen, IMPL_FASTEST);
153 		break;
154 	case IMPL_CYCLE:
155 		atomic_swap_32(&blake3_impl_chosen, IMPL_CYCLE);
156 		break;
157 	default:
158 		ASSERT3U(id, >=, 0);
159 		ASSERT3U(id, <, blake3_supp_impls_cnt);
160 		atomic_swap_32(&blake3_impl_chosen, id);
161 		break;
162 	}
163 }
164 
165 /* set implementation by name */
166 int
167 blake3_impl_setname(const char *val)
168 {
169 	uint32_t impl = IMPL_READ(blake3_impl_chosen);
170 	size_t val_len;
171 	int i, err = -EINVAL;
172 
173 	blake3_impl_init();
174 	val_len = strlen(val);
175 	while ((val_len > 0) && !!isspace(val[val_len-1])) /* trim '\n' */
176 		val_len--;
177 
178 	/* check mandatory implementations */
179 	for (i = 0; i < ARRAY_SIZE(blake3_impl_selectors); i++) {
180 		const char *name = blake3_impl_selectors[i].name;
181 
182 		if (val_len == strlen(name) &&
183 		    strncmp(val, name, val_len) == 0) {
184 			impl = blake3_impl_selectors[i].sel;
185 			err = 0;
186 			break;
187 		}
188 	}
189 
190 	if (err != 0 && blake3_initialized) {
191 		/* check all supported implementations */
192 		for (i = 0; i < blake3_supp_impls_cnt; i++) {
193 			const char *name = blake3_supp_impls[i]->name;
194 
195 			if (val_len == strlen(name) &&
196 			    strncmp(val, name, val_len) == 0) {
197 				impl = i;
198 				err = 0;
199 				break;
200 			}
201 		}
202 	}
203 
204 	if (err == 0) {
205 		atomic_swap_32(&blake3_impl_chosen, impl);
206 	}
207 
208 	return (err);
209 }
210 
211 const blake3_ops_t *
212 blake3_impl_get_ops(void)
213 {
214 	const blake3_ops_t *ops = NULL;
215 	uint32_t impl = IMPL_READ(blake3_impl_chosen);
216 
217 	blake3_impl_init();
218 	switch (impl) {
219 	case IMPL_FASTEST:
220 		ASSERT(blake3_initialized);
221 		ops = &blake3_fastest_impl;
222 		break;
223 	case IMPL_CYCLE:
224 		/* Cycle through supported implementations */
225 		ASSERT(blake3_initialized);
226 		ASSERT3U(blake3_supp_impls_cnt, >, 0);
227 		static uint32_t cycle_count = 0;
228 		uint32_t idx = (++cycle_count) % blake3_supp_impls_cnt;
229 		ops = blake3_supp_impls[idx];
230 		break;
231 	default:
232 		ASSERT3U(blake3_supp_impls_cnt, >, 0);
233 		ASSERT3U(impl, <, blake3_supp_impls_cnt);
234 		ops = blake3_supp_impls[impl];
235 		break;
236 	}
237 
238 	ASSERT3P(ops, !=, NULL);
239 	return (ops);
240 }
241 
242 #if defined(_KERNEL)
243 
244 void **blake3_per_cpu_ctx;
245 
246 void
247 blake3_per_cpu_ctx_init(void)
248 {
249 	/*
250 	 * Create "The Godfather" ptr to hold all blake3 ctx
251 	 */
252 	blake3_per_cpu_ctx = kmem_alloc(max_ncpus * sizeof (void *), KM_SLEEP);
253 	for (int i = 0; i < max_ncpus; i++) {
254 		blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX),
255 		    KM_SLEEP);
256 	}
257 
258 	/* init once in kernel mode */
259 	blake3_impl_init();
260 }
261 
262 void
263 blake3_per_cpu_ctx_fini(void)
264 {
265 	for (int i = 0; i < max_ncpus; i++) {
266 		memset(blake3_per_cpu_ctx[i], 0, sizeof (BLAKE3_CTX));
267 		kmem_free(blake3_per_cpu_ctx[i], sizeof (BLAKE3_CTX));
268 	}
269 	memset(blake3_per_cpu_ctx, 0, max_ncpus * sizeof (void *));
270 	kmem_free(blake3_per_cpu_ctx, max_ncpus * sizeof (void *));
271 }
272 
273 #define	IMPL_FMT(impl, i)	(((impl) == (i)) ? "[%s] " : "%s ")
274 
275 #if defined(__linux__)
276 
277 static int
278 blake3_param_get(char *buffer, zfs_kernel_param_t *unused)
279 {
280 	const uint32_t impl = IMPL_READ(blake3_impl_chosen);
281 	char *fmt;
282 	int cnt = 0;
283 
284 	/* cycling */
285 	fmt = IMPL_FMT(impl, IMPL_CYCLE);
286 	cnt += sprintf(buffer + cnt, fmt, "cycle");
287 
288 	/* list fastest */
289 	fmt = IMPL_FMT(impl, IMPL_FASTEST);
290 	cnt += sprintf(buffer + cnt, fmt, "fastest");
291 
292 	/* list all supported implementations */
293 	for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) {
294 		fmt = IMPL_FMT(impl, i);
295 		cnt += sprintf(buffer + cnt, fmt,
296 		    blake3_supp_impls[i]->name);
297 	}
298 
299 	return (cnt);
300 }
301 
302 static int
303 blake3_param_set(const char *val, zfs_kernel_param_t *unused)
304 {
305 	(void) unused;
306 	return (blake3_impl_setname(val));
307 }
308 
309 #elif defined(__FreeBSD__)
310 
311 #include <sys/sbuf.h>
312 
313 static int
314 blake3_param(ZFS_MODULE_PARAM_ARGS)
315 {
316 	int err;
317 
318 	if (req->newptr == NULL) {
319 		const uint32_t impl = IMPL_READ(blake3_impl_chosen);
320 		const int init_buflen = 64;
321 		const char *fmt;
322 		struct sbuf *s;
323 
324 		s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req);
325 
326 		/* cycling */
327 		fmt = IMPL_FMT(impl, IMPL_CYCLE);
328 		(void) sbuf_printf(s, fmt, "cycle");
329 
330 		/* list fastest */
331 		fmt = IMPL_FMT(impl, IMPL_FASTEST);
332 		(void) sbuf_printf(s, fmt, "fastest");
333 
334 		/* list all supported implementations */
335 		for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) {
336 			fmt = IMPL_FMT(impl, i);
337 			(void) sbuf_printf(s, fmt, blake3_supp_impls[i]->name);
338 		}
339 
340 		err = sbuf_finish(s);
341 		sbuf_delete(s);
342 
343 		return (err);
344 	}
345 
346 	char buf[16];
347 
348 	err = sysctl_handle_string(oidp, buf, sizeof (buf), req);
349 	if (err) {
350 		return (err);
351 	}
352 
353 	return (-blake3_impl_setname(buf));
354 }
355 #endif
356 
357 #undef IMPL_FMT
358 
359 ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, blake3_impl,
360     blake3_param_set, blake3_param_get, ZMOD_RW, \
361 	"Select BLAKE3 implementation.");
362 #endif
363