1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2024 Oxide Computer Company
14  */
15 
16 /*
17  * The purpose of this test is to ensure that we honor several aspects of our
18  * lock ordering. In particular we want to validate the following our starvation
19  * properties, that is that blocking writers should take priority ahead of
20  * blocking readers and that the controller lock takes priority over various
21  * namespace locks. While we test all kinds of locks here, we only use the
22  * controller fd here to simplify the test design.
23  *
24  * To do this, we utilize our blocking locks. In particular, we take a first
25  * lock and then spin up threads that should all block on that. To deal with the
26  * inherit race of knowing when a thread is blocked or not, we utilize libproc
27  * and wait until the thread has the PR_ASLEEP flag set and that it's in an
28  * ioctl system call. This ensures that the folks that are present are added in
29  * the appropriate order.
30  */
31 
32 #include <err.h>
33 #include <stdlib.h>
34 #include <unistd.h>
35 #include <stdbool.h>
36 #include <sys/sysmacros.h>
37 #include <sys/debug.h>
38 #include <thread.h>
39 #include <synch.h>
40 #include <strings.h>
41 
42 #include "nvme_ioctl_util.h"
43 
44 /*
45  * Maximum number of threads that we'll spin up for locks.
46  */
47 #define	MAX_LOCKS	10
48 
49 typedef struct {
50 	thread_t loi_thread;
51 	const nvme_ioctl_lock_t *loi_lock;
52 } lock_order_info_t;
53 
54 static mutex_t lock_mutex;
55 static lock_order_info_t lock_results[MAX_LOCKS];
56 static uint32_t lock_nextres;
57 static bool lock_valid;
58 
59 typedef struct lock_order_test lock_order_test_t;
60 typedef bool (*lock_order_valif_f)(const lock_order_test_t *, uint32_t);
61 
62 struct lock_order_test {
63 	const char *lot_desc;
64 	const nvme_ioctl_lock_t *lot_initlock;
65 	const nvme_ioctl_lock_t *lot_locks[MAX_LOCKS];
66 	lock_order_valif_f lot_verif;
67 };
68 
69 static void
70 lock_verify_dump(void)
71 {
72 	for (size_t i = 0; i < lock_nextres; i++) {
73 		const nvme_ioctl_lock_t *lock = lock_results[i].loi_lock;
74 		const char *targ = lock->nil_ent == NVME_LOCK_E_CTRL ?
75 		    "controller" : "namespace";
76 		const char *level = lock->nil_level == NVME_LOCK_L_READ ?
77 		    "read" : "write";
78 		(void) printf("\t[%zu] = { %s, %s }\n", i, targ, level);
79 	}
80 }
81 
82 /*
83  * Verify that a given number of writers in the test are all found ahead of any
84  * readers found in the test.
85  */
86 static bool
87 lock_verify_write_before_read(const lock_order_test_t *test, uint32_t nthr)
88 {
89 	bool pass = true;
90 	size_t nwrite = 0;
91 	size_t nread = 0;
92 
93 	for (size_t i = 0; i < MAX_LOCKS; i++) {
94 		if (test->lot_locks[i] == NULL)
95 			break;
96 		if (test->lot_locks[i]->nil_level == NVME_LOCK_L_READ) {
97 			nread++;
98 		} else {
99 			nwrite++;
100 		}
101 	}
102 	VERIFY3U(nwrite + nread, ==, nthr);
103 
104 	mutex_enter(&lock_mutex);
105 	for (size_t i = 0; i < nthr; i++) {
106 		nvme_lock_level_t exp_level;
107 		const char *str;
108 		const lock_order_info_t *res = &lock_results[i];
109 
110 		if (nwrite > 0) {
111 			exp_level = NVME_LOCK_L_WRITE;
112 			str = "WRITE";
113 			nwrite--;
114 		} else {
115 			exp_level = NVME_LOCK_L_READ;
116 			str = "READ";
117 			nread--;
118 		}
119 
120 		if (exp_level != res->loi_lock->nil_level) {
121 			pass = false;
122 			warnx("TEST FAILED: %s: lock %zu (tid %u, ent %u, "
123 			    "level %u) was the wrong level, expected level %u "
124 			    "(%s)", test->lot_desc, i, res->loi_thread,
125 			    res->loi_lock->nil_ent, res->loi_lock->nil_level,
126 			    exp_level, str);
127 		}
128 	}
129 	VERIFY3U(nwrite, ==, 0);
130 	VERIFY3U(nread, ==, 0);
131 
132 	if (!pass) {
133 		lock_verify_dump();
134 	}
135 	mutex_exit(&lock_mutex);
136 
137 	return (pass);
138 }
139 
140 /*
141  * This verifies that all controller level locks should come in the order before
142  * the namespace locks. Note, this also calls the write before read checks and
143  * therefore assumes that we have an ordering that supports that.
144  */
145 static bool
146 lock_verify_ctrl_before_ns(const lock_order_test_t *test, uint32_t nthr)
147 {
148 	bool pass = true;
149 	size_t nctrl = 0;
150 	size_t nns = 0;
151 
152 	for (size_t i = 0; i < MAX_LOCKS; i++) {
153 		if (test->lot_locks[i] == NULL)
154 			break;
155 		if (test->lot_locks[i]->nil_ent == NVME_LOCK_E_CTRL) {
156 			nctrl++;
157 		} else {
158 			nns++;
159 		}
160 	}
161 	VERIFY3U(nctrl + nns, ==, nthr);
162 
163 	mutex_enter(&lock_mutex);
164 	for (size_t i = 0; i < nthr; i++) {
165 		nvme_lock_ent_t exp_ent;
166 		const char *str;
167 		const lock_order_info_t *res = &lock_results[i];
168 
169 		if (nctrl > 0) {
170 			exp_ent = NVME_LOCK_E_CTRL;
171 			str = "ctrl";
172 			nctrl--;
173 		} else {
174 			exp_ent = NVME_LOCK_E_NS;
175 			str = "ns";
176 			nns--;
177 		}
178 
179 		if (exp_ent != res->loi_lock->nil_ent) {
180 			pass = false;
181 			warnx("TEST FAILED: %s: lock %zu (tid %u, ent %u, "
182 			    "level %u) was the wrong entity, expected type %u "
183 			    "(%s)", test->lot_desc, i, res->loi_thread,
184 			    res->loi_lock->nil_ent, res->loi_lock->nil_level,
185 			    exp_ent, str);
186 		}
187 	}
188 
189 	VERIFY3U(nctrl, ==, 0);
190 	VERIFY3U(nns, ==, 0);
191 
192 	if (!pass) {
193 		lock_verify_dump();
194 	}
195 	mutex_exit(&lock_mutex);
196 
197 	return (pass);
198 }
199 
200 static bool
201 lock_verif_ent_level(const lock_order_test_t *test, uint32_t nthr)
202 {
203 	bool pass = true;
204 
205 	if (!lock_verify_ctrl_before_ns(test, nthr))
206 		pass = false;
207 	if (!lock_verify_write_before_read(test, nthr))
208 		pass = false;
209 	return (pass);
210 }
211 
212 /*
213  * The descriptions below are fashioned with the starting lock followed by what
214  * order we're testing.
215  */
216 static const lock_order_test_t lock_order_tests[] = { {
217 	.lot_desc = "ns(rd): pending ns writer doesn't allow more ns readers",
218 	.lot_initlock = &nvme_test_ns_rdlock,
219 	.lot_locks = { &nvme_test_ns_wrlock, &nvme_test_ns_rdlock },
220 	.lot_verif = lock_verify_write_before_read,
221 }, {
222 	.lot_desc = "ns(wr): pending ns writer beats waiting ns reader",
223 	.lot_initlock = &nvme_test_ns_wrlock,
224 	.lot_locks = { &nvme_test_ns_rdlock, &nvme_test_ns_wrlock },
225 	.lot_verif = lock_verify_write_before_read,
226 }, {
227 	.lot_desc = "ns(rd): all pend ns writers beat prior pend readers",
228 	.lot_initlock = &nvme_test_ns_rdlock,
229 	.lot_locks = { &nvme_test_ns_wrlock, &nvme_test_ns_rdlock,
230 	    &nvme_test_ns_rdlock, &nvme_test_ns_wrlock, &nvme_test_ns_rdlock,
231 	    &nvme_test_ns_wrlock },
232 	.lot_verif = lock_verify_write_before_read,
233 }, {
234 	.lot_desc = "ns(rd): pending ctrl writer doesn't allow more ns readers",
235 	.lot_initlock = &nvme_test_ns_rdlock,
236 	.lot_locks = { &nvme_test_ctrl_wrlock, &nvme_test_ns_rdlock,
237 	    &nvme_test_ns_rdlock },
238 	.lot_verif = lock_verify_write_before_read,
239 }, {
240 	.lot_desc = "ns(wr): pending ctrl writer beats prior pend ns readers",
241 	.lot_initlock = &nvme_test_ns_wrlock,
242 	.lot_locks = { &nvme_test_ns_rdlock, &nvme_test_ns_rdlock,
243 	    &nvme_test_ctrl_wrlock, &nvme_test_ns_rdlock },
244 	.lot_verif = lock_verify_write_before_read,
245 }, {
246 	.lot_desc = "ns(rd): pending ctrl writer doesn't allow ctrl readers",
247 	.lot_initlock = &nvme_test_ns_rdlock,
248 	.lot_locks = { &nvme_test_ctrl_wrlock, &nvme_test_ctrl_rdlock,
249 	    &nvme_test_ctrl_rdlock },
250 	.lot_verif = lock_verify_write_before_read,
251 }, {
252 	.lot_desc = "ns(rd): pending ctrl writer beats pending ns writer "
253 	    "and readers",
254 	.lot_initlock = &nvme_test_ns_rdlock,
255 	.lot_locks = { &nvme_test_ns_wrlock, &nvme_test_ns_rdlock,
256 	    &nvme_test_ctrl_wrlock, &nvme_test_ctrl_rdlock },
257 	.lot_verif = lock_verify_ctrl_before_ns,
258 }, {
259 	.lot_desc = "ctrl(rd): pending ctrl writer blocks ns read",
260 	.lot_initlock = &nvme_test_ctrl_rdlock,
261 	.lot_locks = { &nvme_test_ctrl_wrlock, &nvme_test_ns_rdlock,
262 	    &nvme_test_ns_rdlock },
263 	.lot_verif = lock_verif_ent_level,
264 }, {
265 	.lot_desc = "ctrl(rd): pending ctrl writer blocks ns writer",
266 	.lot_initlock = &nvme_test_ctrl_rdlock,
267 	.lot_locks = { &nvme_test_ctrl_wrlock, &nvme_test_ns_wrlock },
268 	.lot_verif = lock_verif_ent_level,
269 }, {
270 	.lot_desc = "ctrl(rd): pending ctrl writer blocks ctrl reader",
271 	.lot_initlock = &nvme_test_ctrl_rdlock,
272 	.lot_locks = { &nvme_test_ctrl_wrlock, &nvme_test_ctrl_rdlock },
273 	.lot_verif = lock_verify_write_before_read,
274 }, {
275 	.lot_desc = "ctrl(wr): ctrl writer beats all pending readers",
276 	.lot_initlock = &nvme_test_ctrl_wrlock,
277 	.lot_locks = { &nvme_test_ctrl_rdlock, &nvme_test_ctrl_rdlock,
278 	    &nvme_test_ns_rdlock, &nvme_test_ns_rdlock,
279 	    &nvme_test_ctrl_wrlock },
280 	.lot_verif = lock_verify_write_before_read,
281 }, {
282 	.lot_desc = "ctrl(wr): ns writer beats all pending ns readers",
283 	.lot_initlock = &nvme_test_ctrl_wrlock,
284 	.lot_locks = { &nvme_test_ns_rdlock, &nvme_test_ns_rdlock,
285 	    &nvme_test_ns_wrlock, &nvme_test_ns_rdlock, &nvme_test_ns_wrlock },
286 	.lot_verif = lock_verify_write_before_read,
287 } };
288 
289 static void *
290 lock_thread(void *arg)
291 {
292 	const nvme_ioctl_lock_t *tmpl = arg;
293 	nvme_ioctl_lock_t lock = *tmpl;
294 	int ctrlfd = nvme_ioctl_test_get_fd(0);
295 	const char *targ = tmpl->nil_ent == NVME_LOCK_E_CTRL ?
296 	    "controller" : "namespace";
297 	const char *level = tmpl->nil_level == NVME_LOCK_L_READ ?
298 	    "read" : "write";
299 
300 	lock.nil_flags &= ~NVME_LOCK_F_DONT_BLOCK;
301 	nvme_ioctl_test_lock(ctrlfd, &lock);
302 
303 	mutex_enter(&lock_mutex);
304 	if (!lock_valid) {
305 		errx(EXIT_FAILURE, "TEST FAILED: thread 0x%x managed to return "
306 		    "with held %s %s lock before main thread unlocked: test "
307 		    "cannot continue", thr_self(), targ, level);
308 	}
309 	VERIFY3U(lock_nextres, <, MAX_LOCKS);
310 	lock_results[lock_nextres].loi_thread = thr_self();
311 	lock_results[lock_nextres].loi_lock = tmpl;
312 	lock_nextres++;
313 	mutex_exit(&lock_mutex);
314 
315 	VERIFY0(close(ctrlfd));
316 
317 	thr_exit(NULL);
318 }
319 
320 static bool
321 lock_order_test(const lock_order_test_t *test)
322 {
323 	int ctrlfd;
324 	uint32_t nthr = 0;
325 	thread_t thrids[MAX_LOCKS];
326 
327 	/*
328 	 * Ensure we have whatever lock we intend to create ahead of doing
329 	 * anything else.
330 	 */
331 	ctrlfd = nvme_ioctl_test_get_fd(0);
332 	nvme_ioctl_test_lock(ctrlfd, test->lot_initlock);
333 
334 	mutex_enter(&lock_mutex);
335 	(void) memset(&lock_results, 0, sizeof (lock_results));
336 	lock_nextres = 0;
337 	lock_valid = false;
338 	mutex_exit(&lock_mutex);
339 
340 	for (uint32_t i = 0; i < MAX_LOCKS; i++, nthr++) {
341 		int err;
342 
343 		if (test->lot_locks[i] == NULL)
344 			break;
345 
346 		err = thr_create(NULL, 0, lock_thread,
347 		    (void *)test->lot_locks[i], 0, &thrids[i]);
348 		if (err != 0) {
349 			errc(EXIT_FAILURE, err, "TEST FAILED: %s: cannot "
350 			    "continue because we failed to create thread %u",
351 			    test->lot_desc, i);
352 		}
353 
354 		while (!nvme_ioctl_test_thr_blocked(thrids[i])) {
355 			struct timespec sleep;
356 
357 			sleep.tv_sec = 0;
358 			sleep.tv_nsec = MSEC2NSEC(10);
359 			(void) nanosleep(&sleep, NULL);
360 		}
361 	}
362 
363 	/*
364 	 * Now that all threads have been launched, close our fd to allow them
365 	 * to run loose and wait for them. Indicate to them that now it is okay
366 	 * to get the lock.
367 	 */
368 	mutex_enter(&lock_mutex);
369 	lock_valid = true;
370 	mutex_exit(&lock_mutex);
371 	VERIFY0(close(ctrlfd));
372 	for (uint32_t i = 0; i < nthr; i++) {
373 		int err = thr_join(thrids[i], NULL, NULL);
374 		if (err != 0) {
375 			errc(EXIT_FAILURE, err, "TEST FAILED: %s: cannot "
376 			    "continue because we failed to join thread %u",
377 			    test->lot_desc, i);
378 		}
379 	}
380 	mutex_enter(&lock_mutex);
381 	VERIFY3U(lock_nextres, ==, nthr);
382 	mutex_exit(&lock_mutex);
383 
384 	if (test->lot_verif(test, nthr)) {
385 		(void) printf("TEST PASSED: %s\n", test->lot_desc);
386 		return (true);
387 	}
388 
389 	return (false);
390 }
391 
392 int
393 main(void)
394 {
395 	int ret = EXIT_SUCCESS;
396 
397 	VERIFY0(mutex_init(&lock_mutex, USYNC_THREAD | LOCK_ERRORCHECK, NULL));
398 
399 	for (size_t i = 0; i < ARRAY_SIZE(lock_order_tests); i++) {
400 		if (!lock_order_test(&lock_order_tests[i])) {
401 			ret = EXIT_FAILURE;
402 		}
403 	}
404 
405 	VERIFY0(mutex_destroy(&lock_mutex));
406 	return (ret);
407 }
408