xref: /netbsd/sys/fs/udf/udf_strat_sequential.c (revision 87d35fd6)
1 /* $NetBSD: udf_strat_sequential.c,v 1.20 2023/06/27 09:58:50 reinoud Exp $ */
2 
3 /*
4  * Copyright (c) 2006, 2008 Reinoud Zandijk
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  */
28 
29 #include <sys/cdefs.h>
30 #ifndef lint
31 __KERNEL_RCSID(0, "$NetBSD: udf_strat_sequential.c,v 1.20 2023/06/27 09:58:50 reinoud Exp $");
32 #endif /* not lint */
33 
34 
35 #if defined(_KERNEL_OPT)
36 #include "opt_compat_netbsd.h"
37 #endif
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/sysctl.h>
42 #include <sys/namei.h>
43 #include <sys/proc.h>
44 #include <sys/kernel.h>
45 #include <sys/vnode.h>
46 #include <miscfs/genfs/genfs_node.h>
47 #include <sys/mount.h>
48 #include <sys/buf.h>
49 #include <sys/file.h>
50 #include <sys/device.h>
51 #include <sys/disklabel.h>
52 #include <sys/ioctl.h>
53 #include <sys/malloc.h>
54 #include <sys/dirent.h>
55 #include <sys/stat.h>
56 #include <sys/conf.h>
57 #include <sys/kauth.h>
58 #include <sys/kthread.h>
59 #include <dev/clock_subr.h>
60 
61 #include <fs/udf/ecma167-udf.h>
62 #include <fs/udf/udf_mount.h>
63 
64 #include "udf.h"
65 #include "udf_subr.h"
66 #include "udf_bswap.h"
67 
68 
69 #define VTOI(vnode) ((struct udf_node *) vnode->v_data)
70 #define PRIV(ump) ((struct strat_private *) ump->strategy_private)
71 
72 /* --------------------------------------------------------------------- */
73 
74 /* BUFQ's */
75 #define UDF_SHED_MAX 3
76 
77 #define UDF_SHED_READING	0
78 #define UDF_SHED_WRITING	1
79 #define UDF_SHED_SEQWRITING	2
80 
81 struct strat_private {
82 	struct pool		 desc_pool;	 	/* node descriptors */
83 
84 	lwp_t			*queue_lwp;
85 	kcondvar_t		 discstrat_cv;		/* to wait on       */
86 	kmutex_t		 discstrat_mutex;	/* disc strategy    */
87 
88 	int			 thread_running;	/* thread control */
89 	int			 run_thread;		/* thread control */
90 	int			 thread_finished;	/* thread control */
91 
92 	int			 sync_req;		/* thread control */
93 	int			 cur_queue;
94 
95 	struct disk_strategy	 old_strategy_setting;
96 	struct bufq_state	*queues[UDF_SHED_MAX];
97 	struct timespec		 last_queued[UDF_SHED_MAX];
98 };
99 
100 
101 /* --------------------------------------------------------------------- */
102 
103 static void
udf_wr_nodedscr_callback(struct buf * buf)104 udf_wr_nodedscr_callback(struct buf *buf)
105 {
106 	struct udf_node *udf_node;
107 
108 	KASSERT(buf);
109 	KASSERT(buf->b_data);
110 
111 	/* called when write action is done */
112 	DPRINTF(WRITE, ("udf_wr_nodedscr_callback(): node written out\n"));
113 
114 	udf_node = VTOI(buf->b_vp);
115 	if (udf_node == NULL) {
116 		putiobuf(buf);
117 		printf("udf_wr_node_callback: NULL node?\n");
118 		return;
119 	}
120 
121 	/* XXX right flags to mark dirty again on error? */
122 	if (buf->b_error) {
123 		udf_node->i_flags |= IN_MODIFIED | IN_ACCESSED;
124 		/* XXX TODO reschedule on error */
125 	}
126 
127 	/* decrement outstanding_nodedscr */
128 	KASSERT(udf_node->outstanding_nodedscr >= 1);
129 	udf_node->outstanding_nodedscr--;
130 	if (udf_node->outstanding_nodedscr == 0) {
131 		/* first unlock the node */
132 		UDF_UNLOCK_NODE(udf_node, 0);
133 		cv_broadcast(&udf_node->node_lock);
134 	}
135 
136 	putiobuf(buf);
137 }
138 
139 /* --------------------------------------------------------------------- */
140 
141 static int
udf_create_logvol_dscr_seq(struct udf_strat_args * args)142 udf_create_logvol_dscr_seq(struct udf_strat_args *args)
143 {
144 	union dscrptr   **dscrptr = &args->dscr;
145 	struct udf_mount *ump = args->ump;
146 	struct strat_private *priv = PRIV(ump);
147 	uint32_t lb_size;
148 
149 	lb_size = udf_rw32(ump->logical_vol->lb_size);
150 	*dscrptr = pool_get(&priv->desc_pool, PR_WAITOK);
151 	memset(*dscrptr, 0, lb_size);
152 
153 	return 0;
154 }
155 
156 
157 static void
udf_free_logvol_dscr_seq(struct udf_strat_args * args)158 udf_free_logvol_dscr_seq(struct udf_strat_args *args)
159 {
160 	union dscrptr    *dscr = args->dscr;
161 	struct udf_mount *ump  = args->ump;
162 	struct strat_private *priv = PRIV(ump);
163 
164 	pool_put(&priv->desc_pool, dscr);
165 }
166 
167 
168 static int
udf_read_logvol_dscr_seq(struct udf_strat_args * args)169 udf_read_logvol_dscr_seq(struct udf_strat_args *args)
170 {
171 	union dscrptr   **dscrptr = &args->dscr;
172 	union dscrptr    *tmpdscr;
173 	struct udf_mount *ump = args->ump;
174 	struct long_ad   *icb = args->icb;
175 	struct strat_private *priv = PRIV(ump);
176 	uint32_t lb_size;
177 	uint32_t sector, dummy;
178 	int error;
179 
180 	lb_size = udf_rw32(ump->logical_vol->lb_size);
181 
182 	error = udf_translate_vtop(ump, icb, &sector, &dummy);
183 	if (error)
184 		return error;
185 
186 	/* try to read in fe/efe */
187 	error = udf_read_phys_dscr(ump, sector, M_UDFTEMP, &tmpdscr);
188 	if (error)
189 		return error;
190 
191 	*dscrptr = pool_get(&priv->desc_pool, PR_WAITOK);
192 	memcpy(*dscrptr, tmpdscr, lb_size);
193 	free(tmpdscr, M_UDFTEMP);
194 
195 	return 0;
196 }
197 
198 
199 static int
udf_write_logvol_dscr_seq(struct udf_strat_args * args)200 udf_write_logvol_dscr_seq(struct udf_strat_args *args)
201 {
202 	union dscrptr    *dscr     = args->dscr;
203 	struct udf_mount *ump      = args->ump;
204 	struct udf_node  *udf_node = args->udf_node;
205 	struct long_ad   *icb      = args->icb;
206 	int               waitfor  = args->waitfor;
207 	uint32_t logsectornr, sectornr, dummy;
208 	int error, vpart;
209 
210 	/*
211 	 * we have to decide if we write it out sequential or at its fixed
212 	 * position by examining the partition its (to be) written on.
213 	 */
214 	vpart       = udf_rw16(udf_node->loc.loc.part_num);
215 	logsectornr = udf_rw32(icb->loc.lb_num);
216 	sectornr    = 0;
217 	if (ump->vtop_tp[vpart] != UDF_VTOP_TYPE_VIRT) {
218 		error = udf_translate_vtop(ump, icb, &sectornr, &dummy);
219 		if (error)
220 			goto out;
221 	}
222 
223 	if (waitfor) {
224 		DPRINTF(WRITE, ("udf_write_logvol_dscr: sync write\n"));
225 
226 		error = udf_write_phys_dscr_sync(ump, udf_node, UDF_C_NODE,
227 			dscr, sectornr, logsectornr);
228 	} else {
229 		DPRINTF(WRITE, ("udf_write_logvol_dscr: no wait, async write\n"));
230 
231 		error = udf_write_phys_dscr_async(ump, udf_node, UDF_C_NODE,
232 			dscr, sectornr, logsectornr, udf_wr_nodedscr_callback);
233 		/* will be UNLOCKED in call back */
234 		return error;
235 	}
236 out:
237 	udf_node->outstanding_nodedscr--;
238 	if (udf_node->outstanding_nodedscr == 0) {
239 		UDF_UNLOCK_NODE(udf_node, 0);
240 		cv_broadcast(&udf_node->node_lock);
241 	}
242 
243 	return error;
244 }
245 
246 /* --------------------------------------------------------------------- */
247 
248 /*
249  * Main file-system specific scheduler. Due to the nature of optical media
250  * scheduling can't be performed in the traditional way. Most OS
251  * implementations i've seen thus read or write a file atomically giving all
252  * kinds of side effects.
253  *
254  * This implementation uses a kernel thread to schedule the queued requests in
255  * such a way that is semi-optimal for optical media; this means approximately
256  * (R*|(Wr*|Ws*))* since switching between reading and writing is expensive in
257  * time.
258  */
259 
260 static void
udf_queuebuf_seq(struct udf_strat_args * args)261 udf_queuebuf_seq(struct udf_strat_args *args)
262 {
263 	struct udf_mount *ump = args->ump;
264 	struct buf *nestbuf = args->nestbuf;
265 	struct strat_private *priv = PRIV(ump);
266 	int queue;
267 	int what;
268 
269 	KASSERT(ump);
270 	KASSERT(nestbuf);
271 	KASSERT(nestbuf->b_iodone == nestiobuf_iodone);
272 
273 	what = nestbuf->b_udf_c_type;
274 	queue = UDF_SHED_READING;
275 	if ((nestbuf->b_flags & B_READ) == 0) {
276 		/* writing */
277 		queue = UDF_SHED_SEQWRITING;
278 		if (what == UDF_C_ABSOLUTE)
279 			queue = UDF_SHED_WRITING;
280 	}
281 
282 	/* use our own scheduler lists for more complex scheduling */
283 	mutex_enter(&priv->discstrat_mutex);
284 		bufq_put(priv->queues[queue], nestbuf);
285 		vfs_timestamp(&priv->last_queued[queue]);
286 	mutex_exit(&priv->discstrat_mutex);
287 
288 	/* signal our thread that there might be something to do */
289 	cv_signal(&priv->discstrat_cv);
290 }
291 
292 /* --------------------------------------------------------------------- */
293 
294 static void
udf_sync_caches_seq(struct udf_strat_args * args)295 udf_sync_caches_seq(struct udf_strat_args *args)
296 {
297 	struct udf_mount *ump = args->ump;
298 	struct strat_private *priv = PRIV(ump);
299 
300 	/* we might be called during unmount inadvertedly, be on safe side */
301 	if (!priv)
302 		return;
303 
304 	/* signal our thread that there might be something to do */
305 	priv->sync_req = 1;
306 	cv_signal(&priv->discstrat_cv);
307 
308 	mutex_enter(&priv->discstrat_mutex);
309 		while (priv->sync_req) {
310 			cv_timedwait(&priv->discstrat_cv,
311 				&priv->discstrat_mutex, hz/8);
312 		}
313 	mutex_exit(&priv->discstrat_mutex);
314 }
315 
316 /* --------------------------------------------------------------------- */
317 
318 /* TODO convert to lb_size */
319 static void
udf_VAT_mapping_update(struct udf_mount * ump,struct buf * buf,uint32_t lb_map)320 udf_VAT_mapping_update(struct udf_mount *ump, struct buf *buf, uint32_t lb_map)
321 {
322 	union dscrptr    *fdscr = (union dscrptr *) buf->b_data;
323 	struct vnode     *vp = buf->b_vp;
324 	struct udf_node  *udf_node = VTOI(vp);
325 	uint32_t lb_num;
326 	uint32_t udf_rw32_lbmap;
327 	int c_type = buf->b_udf_c_type;
328 	int error;
329 
330 	/* only interested when we're using a VAT */
331 	KASSERT(ump->vat_node);
332 	KASSERT(ump->vtop_alloc[ump->node_part] == UDF_ALLOC_VAT);
333 
334 	/* only nodes are recorded in the VAT */
335 	/* NOTE: and the fileset descriptor (FIXME ?) */
336 	if (c_type != UDF_C_NODE)
337 		return;
338 
339 	udf_rw32_lbmap = udf_rw32(lb_map);
340 
341 	/* if we're the VAT itself, only update our assigned sector number */
342 	if (udf_node == ump->vat_node) {
343 		fdscr->tag.tag_loc = udf_rw32_lbmap;
344 		udf_validate_tag_sum(fdscr);
345 		DPRINTF(TRANSLATE, ("VAT assigned to sector %u\n",
346 			udf_rw32(udf_rw32_lbmap)));
347 		/* no use mapping the VAT node in the VAT */
348 		return;
349 	}
350 
351 	/* record new position in VAT file */
352 	lb_num = udf_rw32(fdscr->tag.tag_loc);
353 
354 	/* lb_num = udf_rw32(udf_node->write_loc.loc.lb_num); */
355 
356 	DPRINTF(TRANSLATE, ("VAT entry change (log %u -> phys %u)\n",
357 			lb_num, lb_map));
358 
359 	/* VAT should be the longer than this write, can't go wrong */
360 	KASSERT(lb_num <= ump->vat_entries);
361 
362 	mutex_enter(&ump->allocate_mutex);
363 	error = udf_vat_write(ump->vat_node,
364 			(uint8_t *) &udf_rw32_lbmap, 4,
365 			ump->vat_offset + lb_num * 4);
366 	mutex_exit(&ump->allocate_mutex);
367 
368 	if (error)
369 		panic( "udf_VAT_mapping_update: HELP! i couldn't "
370 			"write in the VAT file ?\n");
371 }
372 
373 
374 static void
udf_issue_buf(struct udf_mount * ump,int queue,struct buf * buf)375 udf_issue_buf(struct udf_mount *ump, int queue, struct buf *buf)
376 {
377 	union dscrptr *dscr;
378 	struct long_ad *node_ad_cpy;
379 	struct part_desc *pdesc;
380 	uint64_t *lmapping, *lmappos;
381 	uint32_t sectornr, bpos;
382 	uint32_t ptov;
383 	uint16_t vpart_num;
384 	uint8_t *fidblk;
385 	int sector_size = ump->discinfo.sector_size;
386 	int blks = sector_size / DEV_BSIZE;
387 	int len, buf_len;
388 
389 	/* if reading, just pass to the device's STRATEGY */
390 	if (queue == UDF_SHED_READING) {
391 		DPRINTF(SHEDULE, ("\nudf_issue_buf READ %p : sector %d type %d,"
392 			"b_resid %d, b_bcount %d, b_bufsize %d\n",
393 			buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
394 			buf->b_resid, buf->b_bcount, buf->b_bufsize));
395 		VOP_STRATEGY(ump->devvp, buf);
396 		return;
397 	}
398 
399 	if (queue == UDF_SHED_WRITING) {
400 		DPRINTF(SHEDULE, ("\nudf_issue_buf WRITE %p : sector %d "
401 			"type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
402 			buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
403 			buf->b_resid, buf->b_bcount, buf->b_bufsize));
404 		KASSERT(buf->b_udf_c_type == UDF_C_ABSOLUTE);
405 
406 		// udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
407 		VOP_STRATEGY(ump->devvp, buf);
408 		return;
409 	}
410 
411 	KASSERT(queue == UDF_SHED_SEQWRITING);
412 	DPRINTF(SHEDULE, ("\nudf_issue_buf SEQWRITE %p : sector XXXX "
413 		"type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
414 		buf, buf->b_udf_c_type, buf->b_resid, buf->b_bcount,
415 		buf->b_bufsize));
416 
417 	/*
418 	 * Buffers should not have been allocated to disc addresses yet on
419 	 * this queue. Note that a buffer can get multiple extents allocated.
420 	 *
421 	 * lmapping contains lb_num relative to base partition.
422 	 */
423 	lmapping    = ump->la_lmapping;
424 	node_ad_cpy = ump->la_node_ad_cpy;
425 
426 	/* logically allocate buf and map it in the file */
427 	udf_late_allocate_buf(ump, buf, lmapping, node_ad_cpy, &vpart_num);
428 
429 	/*
430 	 * NOTE We are using the knowledge here that sequential media will
431 	 * always be mapped linearly. Thus no use to explicitly translate the
432 	 * lmapping list.
433 	 */
434 
435 	/* calculate offset from physical base partition */
436 	pdesc = ump->partitions[ump->vtop[vpart_num]];
437 	ptov  = udf_rw32(pdesc->start_loc);
438 
439 	/* set buffers blkno to the physical block number */
440 	buf->b_blkno = (*lmapping + ptov) * blks;
441 
442 	/* fixate floating descriptors */
443 	if (buf->b_udf_c_type == UDF_C_FLOAT_DSCR) {
444 		/* set our tag location to the absolute position */
445 		dscr = (union dscrptr *) buf->b_data;
446 		dscr->tag.tag_loc = udf_rw32(*lmapping + ptov);
447 		udf_validate_tag_and_crc_sums(dscr);
448 	}
449 
450 	/* update mapping in the VAT */
451 	if (buf->b_udf_c_type == UDF_C_NODE) {
452 		udf_VAT_mapping_update(ump, buf, *lmapping);
453 		udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
454 	}
455 
456 	/* if we have FIDs, fixup using the new allocation table */
457 	if (buf->b_udf_c_type == UDF_C_FIDS) {
458 		buf_len = buf->b_bcount;
459 		bpos = 0;
460 		lmappos = lmapping;
461 		while (buf_len) {
462 			sectornr = *lmappos++;
463 			len = MIN(buf_len, sector_size);
464 			fidblk = (uint8_t *) buf->b_data + bpos;
465 			udf_fixup_fid_block(fidblk, sector_size,
466 				0, len, sectornr);
467 			bpos += len;
468 			buf_len -= len;
469 		}
470 	}
471 
472 	VOP_STRATEGY(ump->devvp, buf);
473 }
474 
475 
476 static void
udf_doshedule(struct udf_mount * ump)477 udf_doshedule(struct udf_mount *ump)
478 {
479 	struct buf *buf;
480 	struct timespec now, *last;
481 	struct strat_private *priv = PRIV(ump);
482 	void (*b_callback)(struct buf *);
483 	int new_queue;
484 	int error;
485 
486 	buf = bufq_get(priv->queues[priv->cur_queue]);
487 	if (buf) {
488 		/* transfer from the current queue to the device queue */
489 		mutex_exit(&priv->discstrat_mutex);
490 
491 		/* transform buffer to synchronous; XXX needed? */
492 		b_callback = buf->b_iodone;
493 		buf->b_iodone = NULL;
494 		CLR(buf->b_flags, B_ASYNC);
495 
496 		/* issue and wait on completion */
497 		udf_issue_buf(ump, priv->cur_queue, buf);
498 		biowait(buf);
499 
500 		mutex_enter(&priv->discstrat_mutex);
501 
502 		/* if there is an error, repair this error, otherwise propagate */
503 		if (buf->b_error && ((buf->b_flags & B_READ) == 0)) {
504 			/* check what we need to do */
505 			panic("UDF write error, can't handle yet!\n");
506 		}
507 
508 		/* propagate result to higher layers */
509 		if (b_callback) {
510 			buf->b_iodone = b_callback;
511 			(*buf->b_iodone)(buf);
512 		}
513 
514 		return;
515 	}
516 
517 	/* Check if we're idling in this state */
518 	vfs_timestamp(&now);
519 	last = &priv->last_queued[priv->cur_queue];
520 	if (ump->discinfo.mmc_class == MMC_CLASS_CD) {
521 		/* dont switch too fast for CD media; its expensive in time */
522 		if (now.tv_sec - last->tv_sec < 3)
523 			return;
524 	}
525 
526 	/* check if we can/should switch */
527 	new_queue = priv->cur_queue;
528 
529 	if (bufq_peek(priv->queues[UDF_SHED_READING]))
530 		new_queue = UDF_SHED_READING;
531 	if (bufq_peek(priv->queues[UDF_SHED_WRITING]))		/* only for unmount */
532 		new_queue = UDF_SHED_WRITING;
533 	if (bufq_peek(priv->queues[UDF_SHED_SEQWRITING]))
534 		new_queue = UDF_SHED_SEQWRITING;
535 	if (priv->cur_queue == UDF_SHED_READING) {
536 		if (new_queue == UDF_SHED_SEQWRITING) {
537 			/* TODO use flag to signal if this is needed */
538 			mutex_exit(&priv->discstrat_mutex);
539 
540 			/* update trackinfo for data and metadata */
541 			error = udf_update_trackinfo(ump,
542 					&ump->data_track);
543 			assert(error == 0);
544 			error = udf_update_trackinfo(ump,
545 					&ump->metadata_track);
546 			assert(error == 0);
547 			mutex_enter(&priv->discstrat_mutex);
548 			__USE(error);
549 		}
550 	}
551 
552 	if (new_queue != priv->cur_queue) {
553 		DPRINTF(SHEDULE, ("switching from %d to %d\n",
554 			priv->cur_queue, new_queue));
555 		if (new_queue == UDF_SHED_READING)
556 			udf_mmc_synchronise_caches(ump);
557 	}
558 
559 	priv->cur_queue = new_queue;
560 }
561 
562 
563 static void
udf_discstrat_thread(void * arg)564 udf_discstrat_thread(void *arg)
565 {
566 	struct udf_mount *ump = (struct udf_mount *) arg;
567 	struct strat_private *priv = PRIV(ump);
568 	int empty;
569 
570 	empty = 1;
571 
572 	priv->thread_running = 1;
573 	cv_broadcast(&priv->discstrat_cv);
574 
575 	mutex_enter(&priv->discstrat_mutex);
576 	while (priv->run_thread || !empty || priv->sync_req) {
577 		/* process the current selected queue */
578 		udf_doshedule(ump);
579 		empty  = (bufq_peek(priv->queues[UDF_SHED_READING]) == NULL);
580 		empty &= (bufq_peek(priv->queues[UDF_SHED_WRITING]) == NULL);
581 		empty &= (bufq_peek(priv->queues[UDF_SHED_SEQWRITING]) == NULL);
582 
583 		/* wait for more if needed */
584 		if (empty) {
585 			if (priv->sync_req) {
586 				/* on sync, we need to simulate a read->write transition */
587 				udf_mmc_synchronise_caches(ump);
588 				priv->cur_queue = UDF_SHED_READING;
589 				priv->sync_req = 0;
590 			}
591 			cv_timedwait(&priv->discstrat_cv,
592 				&priv->discstrat_mutex, hz/8);
593 		}
594 	}
595 	mutex_exit(&priv->discstrat_mutex);
596 
597 	priv->thread_running  = 0;
598 	priv->thread_finished = 1;
599 	cv_broadcast(&priv->discstrat_cv);
600 
601 	kthread_exit(0);
602 	/* not reached */
603 }
604 
605 /* --------------------------------------------------------------------- */
606 
607 static void
udf_discstrat_init_seq(struct udf_strat_args * args)608 udf_discstrat_init_seq(struct udf_strat_args *args)
609 {
610 	struct udf_mount *ump = args->ump;
611 	struct strat_private *priv = PRIV(ump);
612 	struct disk_strategy dkstrat;
613 	uint32_t lb_size;
614 
615 	KASSERT(ump);
616 	KASSERT(ump->logical_vol);
617 	KASSERT(priv == NULL);
618 
619 	lb_size = udf_rw32(ump->logical_vol->lb_size);
620 	KASSERT(lb_size > 0);
621 
622 	/* initialise our memory space */
623 	ump->strategy_private = malloc(sizeof(struct strat_private),
624 		M_UDFTEMP, M_WAITOK);
625 	priv = ump->strategy_private;
626 	memset(priv, 0 , sizeof(struct strat_private));
627 
628 	/* initialise locks */
629 	cv_init(&priv->discstrat_cv, "udfstrat");
630 	mutex_init(&priv->discstrat_mutex, MUTEX_DEFAULT, IPL_NONE);
631 
632 	/*
633 	 * Initialise pool for descriptors associated with nodes. This is done
634 	 * in lb_size units though currently lb_size is dictated to be
635 	 * sector_size.
636 	 */
637 	pool_init(&priv->desc_pool, lb_size, 0, 0, 0, "udf_desc_pool", NULL,
638 	    IPL_NONE);
639 
640 	/*
641 	 * remember old device strategy method and explicit set method
642 	 * `discsort' since we have our own more complex strategy that is not
643 	 * implementable on the CD device and other strategies will get in the
644 	 * way.
645 	 */
646 	memset(&priv->old_strategy_setting, 0,
647 		sizeof(struct disk_strategy));
648 	VOP_IOCTL(ump->devvp, DIOCGSTRATEGY, &priv->old_strategy_setting,
649 		FREAD | FKIOCTL, NOCRED);
650 	memset(&dkstrat, 0, sizeof(struct disk_strategy));
651 	strcpy(dkstrat.dks_name, "discsort");
652 	VOP_IOCTL(ump->devvp, DIOCSSTRATEGY, &dkstrat, FWRITE | FKIOCTL,
653 		NOCRED);
654 
655 	/* initialise our internal scheduler */
656 	priv->cur_queue = UDF_SHED_READING;
657 	bufq_alloc(&priv->queues[UDF_SHED_READING], "disksort",
658 		BUFQ_SORT_RAWBLOCK);
659 	bufq_alloc(&priv->queues[UDF_SHED_WRITING], "disksort",
660 		BUFQ_SORT_RAWBLOCK);
661 	bufq_alloc(&priv->queues[UDF_SHED_SEQWRITING], "fcfs", 0);
662 	vfs_timestamp(&priv->last_queued[UDF_SHED_READING]);
663 	vfs_timestamp(&priv->last_queued[UDF_SHED_WRITING]);
664 	vfs_timestamp(&priv->last_queued[UDF_SHED_SEQWRITING]);
665 
666 	/* create our disk strategy thread */
667 	priv->thread_finished = 0;
668 	priv->thread_running  = 0;
669 	priv->run_thread      = 1;
670 	priv->sync_req        = 0;
671 	if (kthread_create(PRI_NONE, 0 /* KTHREAD_MPSAFE*/, NULL /* cpu_info*/,
672 		udf_discstrat_thread, ump, &priv->queue_lwp,
673 		"%s", "udf_rw")) {
674 		panic("fork udf_rw");
675 	}
676 
677 	/* wait for thread to spin up */
678 	mutex_enter(&priv->discstrat_mutex);
679 	while (!priv->thread_running) {
680 		cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex, hz);
681 	}
682 	mutex_exit(&priv->discstrat_mutex);
683 }
684 
685 
686 static void
udf_discstrat_finish_seq(struct udf_strat_args * args)687 udf_discstrat_finish_seq(struct udf_strat_args *args)
688 {
689 	struct udf_mount *ump = args->ump;
690 	struct strat_private *priv = PRIV(ump);
691 
692 	if (ump == NULL)
693 		return;
694 
695 	/* stop our scheduling thread */
696 	KASSERT(priv->run_thread == 1);
697 	priv->run_thread = 0;
698 
699 	mutex_enter(&priv->discstrat_mutex);
700 	while (!priv->thread_finished) {
701 		cv_broadcast(&priv->discstrat_cv);
702 		cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex, hz);
703 	}
704 	mutex_exit(&priv->discstrat_mutex);
705 
706 	/* kthread should be finished now */
707 
708 	/* set back old device strategy method */
709 	VOP_IOCTL(ump->devvp, DIOCSSTRATEGY, &priv->old_strategy_setting,
710 			FWRITE, NOCRED);
711 
712 	/* destroy our pool */
713 	pool_destroy(&priv->desc_pool);
714 
715 	mutex_destroy(&priv->discstrat_mutex);
716 	cv_destroy(&priv->discstrat_cv);
717 
718 	/* free our private space */
719 	free(ump->strategy_private, M_UDFTEMP);
720 	ump->strategy_private = NULL;
721 }
722 
723 /* --------------------------------------------------------------------- */
724 
725 struct udf_strategy udf_strat_sequential =
726 {
727 	udf_create_logvol_dscr_seq,
728 	udf_free_logvol_dscr_seq,
729 	udf_read_logvol_dscr_seq,
730 	udf_write_logvol_dscr_seq,
731 	udf_queuebuf_seq,
732 	udf_sync_caches_seq,
733 	udf_discstrat_init_seq,
734 	udf_discstrat_finish_seq
735 };
736 
737 
738