xref: /netbsd/sys/fs/udf/udf_strat_rmw.c (revision 87d35fd6)
1 /* $NetBSD: udf_strat_rmw.c,v 1.31 2023/06/27 09:58:50 reinoud Exp $ */
2 
3 /*
4  * Copyright (c) 2006, 2008 Reinoud Zandijk
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  */
28 
29 #include <sys/cdefs.h>
30 #ifndef lint
31 __KERNEL_RCSID(0, "$NetBSD: udf_strat_rmw.c,v 1.31 2023/06/27 09:58:50 reinoud Exp $");
32 #endif /* not lint */
33 
34 
35 #if defined(_KERNEL_OPT)
36 #include "opt_compat_netbsd.h"
37 #endif
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/sysctl.h>
42 #include <sys/namei.h>
43 #include <sys/proc.h>
44 #include <sys/kernel.h>
45 #include <sys/vnode.h>
46 #include <miscfs/genfs/genfs_node.h>
47 #include <sys/mount.h>
48 #include <sys/buf.h>
49 #include <sys/file.h>
50 #include <sys/device.h>
51 #include <sys/disklabel.h>
52 #include <sys/ioctl.h>
53 #include <sys/malloc.h>
54 #include <sys/dirent.h>
55 #include <sys/stat.h>
56 #include <sys/conf.h>
57 #include <sys/kauth.h>
58 #include <sys/kthread.h>
59 #include <dev/clock_subr.h>
60 
61 #include <fs/udf/ecma167-udf.h>
62 #include <fs/udf/udf_mount.h>
63 
64 #include "udf.h"
65 #include "udf_subr.h"
66 #include "udf_bswap.h"
67 
68 
69 #define VTOI(vnode) ((struct udf_node *) (vnode)->v_data)
70 #define PRIV(ump) ((struct strat_private *) (ump)->strategy_private)
71 #define BTOE(buf) ((struct udf_eccline *) ((buf)->b_private))
72 
73 /* --------------------------------------------------------------------- */
74 
75 #define UDF_MAX_PACKET_SIZE	64			/* DONT change this */
76 
77 /* sheduler states */
78 #define UDF_SHED_WAITING	1			/* waiting on timeout */
79 #define UDF_SHED_READING	2
80 #define UDF_SHED_WRITING	3
81 #define UDF_SHED_SEQWRITING	4
82 #define UDF_SHED_IDLE		5			/* refcnt'd */
83 #define UDF_SHED_FREE		6			/* recycleable */
84 #define UDF_SHED_MAX		6+1
85 
86 /* flags */
87 #define ECC_LOCKED		0x01			/* prevent access   */
88 #define ECC_WANTED		0x02			/* trying access    */
89 #define ECC_SEQWRITING		0x04			/* sequential queue */
90 #define ECC_FLOATING		0x08			/* not queued yet   */
91 
92 #define ECC_WAITTIME		10
93 
94 
95 TAILQ_HEAD(ecclineq, udf_eccline);
96 struct udf_eccline {
97 	struct udf_mount	 *ump;
98 	uint64_t		  present;		/* preserve these */
99 	uint64_t		  readin;		/* bitmap */
100 	uint64_t		  dirty;		/* bitmap */
101 	uint64_t		  error;		/* bitmap */
102 	uint32_t		  refcnt;
103 
104 	struct timespec		  wait_time;
105 	uint32_t		  flags;
106 	uint32_t		  start_sector;		/* physical */
107 
108 	const char		 *fname;
109 	int			  sline;
110 
111 	struct buf		 *buf;
112 	void			 *blob;
113 
114 	struct buf		 *bufs[UDF_MAX_PACKET_SIZE];
115 	uint32_t		  bufs_bpos[UDF_MAX_PACKET_SIZE];
116 	int			  bufs_len[UDF_MAX_PACKET_SIZE];
117 
118 	int			  queued_on;		/* on which BUFQ list */
119 	LIST_ENTRY(udf_eccline)   hashchain;		/* on sector lookup  */
120 };
121 
122 
123 struct strat_private {
124 	lwp_t			 *queue_lwp;
125 	kcondvar_t		  discstrat_cv;		/* to wait on       */
126 	kmutex_t		  discstrat_mutex;	/* disc strategy    */
127 	kmutex_t		  seqwrite_mutex;	/* protect mappings */
128 
129 	int			  thread_running;	/* thread control */
130 	int			  run_thread;		/* thread control */
131 	int			  thread_finished;	/* thread control */
132 	int			  cur_queue;
133 
134 	int			  num_floating;
135 	int			  num_queued[UDF_SHED_MAX];
136 	struct bufq_state	 *queues[UDF_SHED_MAX];
137 	struct timespec		  last_queued[UDF_SHED_MAX];
138 	struct disk_strategy	  old_strategy_setting;
139 
140 	struct pool		  eccline_pool;
141 	struct pool		  ecclineblob_pool;
142 	LIST_HEAD(, udf_eccline)  eccline_hash[UDF_ECCBUF_HASHSIZE];
143 };
144 
145 /* --------------------------------------------------------------------- */
146 
147 #define UDF_LOCK_ECCLINE(eccline) udf_lock_eccline(eccline, __FILE__, __LINE__)
148 #define UDF_UNLOCK_ECCLINE(eccline) udf_unlock_eccline(eccline, __FILE__, __LINE__)
149 
150 /* can be called with or without discstrat lock */
151 static void
udf_lock_eccline(struct udf_eccline * eccline,const char * fname,int sline)152 udf_lock_eccline(struct udf_eccline *eccline, const char *fname, int sline)
153 {
154 	struct strat_private *priv = PRIV(eccline->ump);
155 	int waslocked, ret;
156 
157 	KASSERT(mutex_owned(&priv->discstrat_mutex));
158 
159 	waslocked = mutex_owned(&priv->discstrat_mutex);
160 	if (!waslocked)
161 		mutex_enter(&priv->discstrat_mutex);
162 
163 	/* wait until its unlocked first */
164 	eccline->refcnt++;
165 	while (eccline->flags & ECC_LOCKED) {
166 		DPRINTF(ECCLINE, ("waiting for lock at %s:%d\n",
167 					fname, sline));
168 		DPRINTF(ECCLINE, ("was locked at %s:%d\n",
169 					eccline->fname, eccline->sline));
170 		eccline->flags |= ECC_WANTED;
171 		ret = cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex,
172 			hz/8);
173 		if (ret == EWOULDBLOCK)
174 			DPRINTF(LOCKING, ("eccline lock held, waiting for "
175 				"release"));
176 	}
177 	eccline->flags |= ECC_LOCKED;
178 	eccline->flags &= ~ECC_WANTED;
179 	eccline->refcnt--;
180 
181 	eccline->fname = fname;
182 	eccline->sline = sline;
183 
184 	if (!waslocked)
185 		mutex_exit(&priv->discstrat_mutex);
186 }
187 
188 
189 /* can be called with or without discstrat lock */
190 static void
udf_unlock_eccline(struct udf_eccline * eccline,const char * fname,int sline)191 udf_unlock_eccline(struct udf_eccline *eccline, const char *fname, int sline)
192 {
193 	struct strat_private *priv = PRIV(eccline->ump);
194 	int waslocked;
195 
196 	KASSERT(mutex_owned(&priv->discstrat_mutex));
197 
198 	waslocked = mutex_owned(&priv->discstrat_mutex);
199 	if (!waslocked)
200 		mutex_enter(&priv->discstrat_mutex);
201 
202 	eccline->flags &= ~ECC_LOCKED;
203 	cv_broadcast(&priv->discstrat_cv);
204 
205 	if (!waslocked)
206 		mutex_exit(&priv->discstrat_mutex);
207 }
208 
209 
210 /* NOTE discstrat_mutex should be held! */
211 static void
udf_dispose_eccline(struct udf_eccline * eccline)212 udf_dispose_eccline(struct udf_eccline *eccline)
213 {
214 	struct strat_private *priv = PRIV(eccline->ump);
215 
216 	KASSERT(mutex_owned(&priv->discstrat_mutex));
217 
218 	DPRINTF(ECCLINE, ("dispose eccline with start sector %d, "
219 		"present %0"PRIx64"\n", eccline->start_sector,
220 		eccline->present));
221 
222 	KASSERT(eccline->refcnt == 0);
223 	KASSERT(eccline->dirty  == 0);
224 	KASSERT(eccline->queued_on == 0);
225 	KASSERT(eccline->flags & ECC_FLOATING);
226 	KASSERT(eccline->flags & ECC_LOCKED);
227 
228 	LIST_REMOVE(eccline, hashchain);
229 	priv->num_floating--;
230 
231 	putiobuf(eccline->buf);
232 	pool_put(&priv->ecclineblob_pool, eccline->blob);
233 	pool_put(&priv->eccline_pool, eccline);
234 }
235 
236 
237 /* NOTE discstrat_mutex should be held! */
238 static void
udf_push_eccline(struct udf_eccline * eccline,int newqueue)239 udf_push_eccline(struct udf_eccline *eccline, int newqueue)
240 {
241 	struct strat_private *priv = PRIV(eccline->ump);
242 
243 	KASSERT(mutex_owned(&priv->discstrat_mutex));
244 
245 	DPRINTF(PARANOIA, ("DEBUG: buf %p pushed on queue %d\n", eccline->buf, newqueue));
246 
247 	KASSERT(eccline->queued_on == 0);
248 	KASSERT(eccline->flags & ECC_FLOATING);
249 
250 	/* set buffer block numbers to make sure its queued correctly */
251 	eccline->buf->b_lblkno   = eccline->start_sector;
252 	eccline->buf->b_blkno    = eccline->start_sector;
253 	eccline->buf->b_rawblkno = eccline->start_sector;
254 
255 	vfs_timestamp(&priv->last_queued[newqueue]);
256 	eccline->flags &= ~ECC_FLOATING;
257 	priv->num_floating--;
258 	eccline->queued_on = newqueue;
259 	priv->num_queued[newqueue]++;
260 	bufq_put(priv->queues[newqueue], eccline->buf);
261 
262 	UDF_UNLOCK_ECCLINE(eccline);
263 
264 	/* XXX tickle disc strategy statemachine */
265 	if (newqueue != UDF_SHED_IDLE)
266 		cv_signal(&priv->discstrat_cv);
267 }
268 
269 
270 static struct udf_eccline *
udf_peek_eccline(struct strat_private * priv,int queued_on)271 udf_peek_eccline(struct strat_private *priv, int queued_on)
272 {
273 	struct udf_eccline *eccline;
274 	struct buf *buf;
275 
276 	KASSERT(mutex_owned(&priv->discstrat_mutex));
277 
278 	for(;;) {
279 		buf = bufq_peek(priv->queues[queued_on]);
280 		/* could have been a race, but we'll revisit later */
281 		if (buf == NULL)
282 			return NULL;
283 
284 		eccline = BTOE(buf);
285 		UDF_LOCK_ECCLINE(eccline);
286 
287 		/* might have changed before we obtained the lock */
288 		if (eccline->queued_on == queued_on)
289 			break;
290 
291 		UDF_UNLOCK_ECCLINE(eccline);
292 	}
293 
294 	KASSERT(eccline->queued_on == queued_on);
295 	KASSERT((eccline->flags & ECC_FLOATING) == 0);
296 
297 	DPRINTF(PARANOIA, ("DEBUG: buf %p peeked at queue %d\n",
298 		eccline->buf, queued_on));
299 
300 	return eccline;
301 }
302 
303 
304 static struct udf_eccline *
udf_pop_eccline(struct strat_private * priv,int queued_on)305 udf_pop_eccline(struct strat_private *priv, int queued_on)
306 {
307 	struct udf_eccline *eccline;
308 	struct buf *buf;
309 
310 	KASSERT(mutex_owned(&priv->discstrat_mutex));
311 
312 	for(;;) {
313 		buf = bufq_get(priv->queues[queued_on]);
314 		if (buf == NULL) {
315 			// KASSERT(priv->num_queued[queued_on] == 0);
316 			return NULL;
317 		}
318 
319 		eccline = BTOE(buf);
320 		UDF_LOCK_ECCLINE(eccline);
321 
322 		/* might have changed before we obtained the lock */
323 		if (eccline->queued_on == queued_on)
324 			break;
325 
326 		UDF_UNLOCK_ECCLINE(eccline);
327 	}
328 
329 	KASSERT(eccline->queued_on == queued_on);
330 	KASSERT((eccline->flags & ECC_FLOATING) == 0);
331 
332 	priv->num_queued[queued_on]--;
333 	eccline->queued_on = 0;
334 
335 	eccline->flags |= ECC_FLOATING;
336 	priv->num_floating++;
337 
338 	DPRINTF(PARANOIA, ("DEBUG: buf %p popped from queue %d\n",
339 		eccline->buf, queued_on));
340 
341 	return eccline;
342 }
343 
344 
345 static void
udf_unqueue_eccline(struct strat_private * priv,struct udf_eccline * eccline)346 udf_unqueue_eccline(struct strat_private *priv, struct udf_eccline *eccline)
347 {
348 	struct buf *ret __diagused;
349 
350 	UDF_LOCK_ECCLINE(eccline);
351 	if (eccline->queued_on == 0) {
352 		KASSERT(eccline->flags & ECC_FLOATING);
353 		return;
354 	}
355 
356 	ret = bufq_cancel(priv->queues[eccline->queued_on], eccline->buf);
357 	KASSERT(ret == eccline->buf);
358 
359 	priv->num_queued[eccline->queued_on]--;
360 	eccline->queued_on = 0;
361 
362 	eccline->flags |= ECC_FLOATING;
363 	priv->num_floating++;
364 }
365 
366 
367 static struct udf_eccline *
udf_geteccline(struct udf_mount * ump,uint32_t sector,int flags)368 udf_geteccline(struct udf_mount *ump, uint32_t sector, int flags)
369 {
370 	struct strat_private *priv = PRIV(ump);
371 	struct udf_eccline *eccline;
372 	uint32_t start_sector, lb_size, blobsize;
373 	uint8_t *eccline_blob;
374 	int line, line_offset;
375 	int num_busy;
376 
377 	mutex_enter(&priv->discstrat_mutex);
378 
379 	/* lookup in our line cache hashtable */
380 	line_offset  = sector % ump->packet_size;
381 	start_sector = sector - line_offset;
382 	line = (start_sector/ump->packet_size) & UDF_ECCBUF_HASHMASK;
383 
384 	KASSERT(priv->thread_running);
385 
386 retry:
387 	DPRINTF(ECCLINE, ("get line sector %d, line %d\n", sector, line));
388 	LIST_FOREACH(eccline, &priv->eccline_hash[line], hashchain) {
389 		if (eccline->start_sector == start_sector) {
390 			DPRINTF(ECCLINE, ("\tfound eccline, start_sector %d\n",
391 				eccline->start_sector));
392 			udf_unqueue_eccline(priv, eccline);
393 
394 			mutex_exit(&priv->discstrat_mutex);
395 			return eccline;
396 		}
397 	}
398 
399 	/* not found in eccline cache */
400 	DPRINTF(ECCLINE, ("\tnot found in eccline cache\n"));
401 
402 	lb_size  = udf_rw32(ump->logical_vol->lb_size);
403 	blobsize = ump->packet_size * lb_size;
404 
405 	/* dont allow too many pending requests */
406 	DPRINTF(ECCLINE, ("\tallocating new eccline\n"));
407 	num_busy = (priv->num_queued[UDF_SHED_SEQWRITING] + priv->num_floating);
408 	if ((flags & ECC_SEQWRITING) && (num_busy > UDF_ECCLINE_MAXBUSY)) {
409 		cv_timedwait(&priv->discstrat_cv,
410 			&priv->discstrat_mutex, hz/8);
411 		goto retry;
412 	}
413 
414 	eccline_blob = pool_get(&priv->ecclineblob_pool, PR_NOWAIT);
415 	eccline = pool_get(&priv->eccline_pool, PR_NOWAIT);
416 	if ((eccline_blob == NULL) || (eccline == NULL)) {
417 		if (eccline_blob)
418 			pool_put(&priv->ecclineblob_pool, eccline_blob);
419 		if (eccline)
420 			pool_put(&priv->eccline_pool, eccline);
421 
422 		/* out of memory for now; canibalise freelist */
423 		eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
424 		if (eccline == NULL) {
425 			/* serious trouble; wait and retry */
426 			cv_timedwait(&priv->discstrat_cv,
427 				&priv->discstrat_mutex, hz/8);
428 			goto retry;
429 		}
430 
431 		/* push back line if we're waiting for it or its locked */
432 		if (eccline->flags & ECC_WANTED) {
433 			/* we won a race, but someone else needed it */
434 			udf_push_eccline(eccline, UDF_SHED_FREE);
435 			goto retry;
436 		}
437 
438 		/* unlink this entry */
439 		LIST_REMOVE(eccline, hashchain);
440 		KASSERT(eccline->flags & ECC_FLOATING);
441 		KASSERT(eccline->queued_on == 0);
442 
443 		eccline_blob = eccline->blob;
444 		eccline->flags = ECC_FLOATING | ECC_LOCKED;
445 	} else {
446 		eccline->flags = ECC_FLOATING | ECC_LOCKED;
447 		priv->num_floating++;
448 	}
449 
450 	eccline->queued_on = 0;
451 	eccline->blob = eccline_blob;
452 	eccline->buf  = getiobuf(NULL, true);
453 	eccline->buf->b_private = eccline;	/* IMPORTANT */
454 
455 	/* initialise eccline blob */
456 	/* XXX memset expensive and strictly not needed XXX */
457 	memset(eccline->blob, 0, blobsize);
458 
459 	eccline->ump = ump;
460 	eccline->present = eccline->readin = eccline->dirty = 0;
461 	eccline->error = 0;
462 	eccline->refcnt = 0;
463 	memset(eccline->bufs, 0, UDF_MAX_PACKET_SIZE * sizeof(struct buf *));
464 
465 	eccline->start_sector    = start_sector;
466 	eccline->buf->b_lblkno   = start_sector;
467 	eccline->buf->b_blkno    = start_sector;
468 	eccline->buf->b_rawblkno = start_sector;
469 
470 	LIST_INSERT_HEAD(&priv->eccline_hash[line], eccline, hashchain);
471 
472 	/*
473 	 * TODO possible optimalisation for checking overlap with partitions
474 	 * to get a clue on future eccline usage
475 	 */
476 
477 	KASSERT(eccline->refcnt == 0);
478 	KASSERT(eccline->flags & ECC_FLOATING);
479 	KASSERT(eccline->flags & ECC_LOCKED);
480 	mutex_exit(&priv->discstrat_mutex);
481 
482 	return eccline;
483 }
484 
485 
486 static void
udf_puteccline(struct udf_eccline * eccline)487 udf_puteccline(struct udf_eccline *eccline)
488 {
489 	struct strat_private *priv = PRIV(eccline->ump);
490 	struct udf_mount *ump = eccline->ump;
491 	uint64_t allbits = ((uint64_t) 1 << ump->packet_size)-1;
492 	int new_queue;
493 
494 	mutex_enter(&priv->discstrat_mutex);
495 
496 	DPRINTF(ECCLINE, ("put eccline start sector %d, refcnt %d\n",
497 		eccline->start_sector, eccline->refcnt));
498 
499 	KASSERT(eccline->flags & ECC_LOCKED);
500 	KASSERT(eccline->flags & ECC_FLOATING);
501 
502 	/* clear all read bits that are already read in */
503 	if (eccline->readin & eccline->present)
504 		eccline->readin &= (~eccline->present) & allbits;
505 
506 	/* if we have active nodes we dont set it on seqwriting */
507 	if (eccline->refcnt > 1)
508 		eccline->flags &= ~ECC_SEQWRITING;
509 
510 	/* select state */
511 	new_queue = UDF_SHED_FREE;
512 	if (eccline->refcnt > 0)
513 		new_queue = UDF_SHED_IDLE;
514 	if (eccline->flags & ECC_WANTED)
515 		new_queue = UDF_SHED_IDLE;
516 	if (eccline->readin)
517 		new_queue = UDF_SHED_READING;
518 	if (eccline->dirty) {
519 		new_queue = UDF_SHED_WAITING;
520 		vfs_timestamp(&eccline->wait_time);
521 		eccline->wait_time.tv_sec += ECC_WAITTIME;
522 
523 		if (eccline->present == allbits) {
524 			new_queue = UDF_SHED_WRITING;
525 			if (eccline->flags & ECC_SEQWRITING)
526 				new_queue = UDF_SHED_SEQWRITING;
527 		}
528 	}
529 	udf_push_eccline(eccline, new_queue);
530 
531 	mutex_exit(&priv->discstrat_mutex);
532 }
533 
534 /* --------------------------------------------------------------------- */
535 
536 static int
udf_create_nodedscr_rmw(struct udf_strat_args * args)537 udf_create_nodedscr_rmw(struct udf_strat_args *args)
538 {
539 	union dscrptr   **dscrptr  = &args->dscr;
540 	struct udf_mount *ump      = args->ump;
541 	struct long_ad   *icb      = args->icb;
542 	struct udf_eccline *eccline;
543 	uint64_t bit;
544 	uint32_t sectornr, lb_size, dummy;
545 	uint8_t *mem;
546 	int error, eccsect;
547 
548 	error = udf_translate_vtop(ump, icb, &sectornr, &dummy);
549 	if (error)
550 		return error;
551 
552 	lb_size  = udf_rw32(ump->logical_vol->lb_size);
553 
554 	/* get our eccline */
555 	eccline = udf_geteccline(ump, sectornr, 0);
556 	eccsect = sectornr - eccline->start_sector;
557 
558 	bit = (uint64_t) 1 << eccsect;
559 	eccline->readin  &= ~bit;	/* just in case */
560 	eccline->present |=  bit;
561 	eccline->dirty   &= ~bit;	/* Err... euhm... clean? */
562 
563 	eccline->refcnt++;
564 
565 	/* clear space */
566 	mem = ((uint8_t *) eccline->blob) + eccsect * lb_size;
567 	memset(mem, 0, lb_size);
568 
569 	udf_puteccline(eccline);
570 
571 	*dscrptr = (union dscrptr *) mem;
572 	return 0;
573 }
574 
575 
576 static void
udf_free_nodedscr_rmw(struct udf_strat_args * args)577 udf_free_nodedscr_rmw(struct udf_strat_args *args)
578 {
579 	struct udf_mount *ump  = args->ump;
580 	struct long_ad   *icb  = args->icb;
581 	struct udf_eccline *eccline;
582 	uint64_t bit;
583 	uint32_t sectornr, dummy;
584 	int error, eccsect;
585 
586 	error = udf_translate_vtop(ump, icb, &sectornr, &dummy);
587 	if (error)
588 		return;
589 
590 	/* get our eccline */
591 	eccline = udf_geteccline(ump, sectornr, 0);
592 	eccsect = sectornr - eccline->start_sector;
593 
594 	bit = (uint64_t) 1 << eccsect;
595 	KASSERT(eccline->present & bit);
596 
597 	eccline->readin &= ~bit;	/* just in case */
598 	/* XXX eccline->dirty? */
599 
600 	KASSERT(eccline->refcnt >= 1);
601 	eccline->refcnt--;
602 
603 	udf_puteccline(eccline);
604 }
605 
606 
607 static int
udf_read_nodedscr_rmw(struct udf_strat_args * args)608 udf_read_nodedscr_rmw(struct udf_strat_args *args)
609 {
610 	union dscrptr   **dscrptr = &args->dscr;
611 	struct udf_mount *ump = args->ump;
612 	struct long_ad   *icb = args->icb;
613 	struct strat_private *priv;
614 	struct udf_eccline *eccline;
615 	uint64_t bit;
616 	uint32_t sectornr, dummy;
617 	uint8_t *pos;
618 	int sector_size = ump->discinfo.sector_size;
619 	int lb_size __diagused = udf_rw32(ump->logical_vol->lb_size);
620 	int i, error, dscrlen, eccsect;
621 
622 	KASSERT(sector_size == lb_size);
623 	error = udf_translate_vtop(ump, icb, &sectornr, &dummy);
624 	if (error)
625 		return error;
626 
627 	/* get our eccline */
628 	eccline = udf_geteccline(ump, sectornr, 0);
629 	eccsect = sectornr - eccline->start_sector;
630 
631 	bit = (uint64_t) 1 << eccsect;
632 	if ((eccline->present & bit) == 0) {
633 		/* mark bit for readin */
634 		eccline->readin |= bit;
635 		eccline->refcnt++;	/* prevent recycling */
636 		KASSERT(eccline->bufs[eccsect] == NULL);
637 		udf_puteccline(eccline);
638 
639 		/* wait for completion */
640 		priv = PRIV(eccline->ump);
641 		mutex_enter(&priv->discstrat_mutex);
642 		while (((eccline->present | eccline->error) & bit) == 0) {
643 			error = cv_timedwait(&priv->discstrat_cv,
644 				&priv->discstrat_mutex,
645 				hz/8);
646 			if (error == EWOULDBLOCK)
647 				DPRINTF(LOCKING, ("eccline waiting for read\n"));
648 		}
649 		mutex_exit(&priv->discstrat_mutex);
650 
651 		/* reget our line */
652 		eccline = udf_geteccline(ump, sectornr, 0);
653 		KASSERT(eccline->refcnt >= 1);
654 		eccline->refcnt--;	/* undo refcnt */
655 
656 		if (eccline->error & bit) {
657 			*dscrptr = NULL;
658 			udf_puteccline(eccline);
659 			return EIO;		/* XXX error code */
660 		}
661 	}
662 
663 	*dscrptr = (union dscrptr *)
664 		(((uint8_t *) eccline->blob) + eccsect * sector_size);
665 
666 	/* code from read_phys_descr */
667 	/* check if its a valid tag */
668 	error = udf_check_tag(*dscrptr);
669 	if (error) {
670 		/* check if its an empty block */
671 		pos = (uint8_t *) *dscrptr;
672 		for (i = 0; i < sector_size; i++, pos++) {
673 			if (*pos) break;
674 		}
675 		if (i == sector_size) {
676 			/* return no error but with no dscrptr */
677 			error = 0;
678 		}
679 		*dscrptr = NULL;
680 		udf_puteccline(eccline);
681 		return error;
682 	}
683 
684 	/* calculate descriptor size */
685 	dscrlen = udf_tagsize(*dscrptr, sector_size);
686 	error = udf_check_tag_payload(*dscrptr, dscrlen);
687 	if (error) {
688 		*dscrptr = NULL;
689 		udf_puteccline(eccline);
690 		return error;
691 	}
692 
693 	/* we have a hold since it has a node descriptor */
694 	eccline->refcnt++;
695 	udf_puteccline(eccline);
696 
697 	return 0;
698 }
699 
700 
701 static int
udf_write_nodedscr_rmw(struct udf_strat_args * args)702 udf_write_nodedscr_rmw(struct udf_strat_args *args)
703 {
704 	union dscrptr    *dscrptr = args->dscr;
705 	struct udf_mount *ump = args->ump;
706 	struct long_ad   *icb = args->icb;
707 	struct udf_node *udf_node = args->udf_node;
708 	struct udf_eccline *eccline;
709 	uint64_t bit;
710 	uint32_t sectornr, logsectornr, dummy;
711 	// int waitfor  = args->waitfor;
712 	int sector_size = ump->discinfo.sector_size;
713 	int lb_size __diagused = udf_rw32(ump->logical_vol->lb_size);
714 	int error, eccsect;
715 
716 	KASSERT(sector_size == lb_size);
717 	sectornr    = 0;
718 	error = udf_translate_vtop(ump, icb, &sectornr, &dummy);
719 	if (error)
720 		return error;
721 
722 	/* get our eccline */
723 	eccline = udf_geteccline(ump, sectornr, 0);
724 	eccsect = sectornr - eccline->start_sector;
725 
726 	bit = (uint64_t) 1 << eccsect;
727 
728 	/* old callback still pending? */
729 	if (eccline->bufs[eccsect]) {
730 		DPRINTF(WRITE, ("udf_write_nodedscr_rmw: writing descriptor"
731 					" over buffer?\n"));
732 		nestiobuf_done(eccline->bufs[eccsect],
733 				eccline->bufs_len[eccsect],
734 				0);
735 		eccline->bufs[eccsect] = NULL;
736 	}
737 
738 	/* set sector number in the descriptor and validate */
739 	dscrptr = (union dscrptr *)
740 		(((uint8_t *) eccline->blob) + eccsect * sector_size);
741 	KASSERT(dscrptr == args->dscr);
742 
743 	logsectornr = udf_rw32(icb->loc.lb_num);
744 	dscrptr->tag.tag_loc = udf_rw32(logsectornr);
745 	udf_validate_tag_and_crc_sums(dscrptr);
746 
747 	udf_fixup_node_internals(ump, (uint8_t *) dscrptr, UDF_C_NODE);
748 
749 	/* set our flags */
750 	KASSERT(eccline->present & bit);
751 	eccline->dirty |= bit;
752 
753 	KASSERT(udf_tagsize(dscrptr, sector_size) <= sector_size);
754 
755 	udf_node->outstanding_nodedscr--;
756 	if (udf_node->outstanding_nodedscr == 0) {
757 		/* XXX still using wakeup! */
758 		UDF_UNLOCK_NODE(udf_node, 0);
759 		cv_broadcast(&udf_node->node_lock);
760 	}
761 	udf_puteccline(eccline);
762 
763 	/* XXX waitfor not used */
764 	return 0;
765 }
766 
767 
768 static void
udf_queuebuf_rmw(struct udf_strat_args * args)769 udf_queuebuf_rmw(struct udf_strat_args *args)
770 {
771 	struct udf_mount *ump = args->ump;
772 	struct buf *buf = args->nestbuf;
773 	struct desc_tag *tag;
774 	struct strat_private *priv = PRIV(ump);
775 	struct udf_eccline *eccline;
776 	struct long_ad *node_ad_cpy;
777 	uint64_t bit, *lmapping, *pmapping, *lmappos, *pmappos, blknr;
778 	uint32_t buf_len, len, sectors, sectornr, our_sectornr;
779 	uint32_t bpos;
780 	uint16_t vpart_num;
781 	uint8_t *fidblk, *src, *dst;
782 	int sector_size = ump->discinfo.sector_size;
783 	int blks = sector_size / DEV_BSIZE;
784 	int eccsect, what, queue, error;
785 
786 	KASSERT(ump);
787 	KASSERT(buf);
788 	KASSERT(buf->b_iodone == nestiobuf_iodone);
789 
790 	blknr        = buf->b_blkno;
791 	our_sectornr = blknr / blks;
792 
793 	what = buf->b_udf_c_type;
794 	queue = UDF_SHED_READING;
795 	if ((buf->b_flags & B_READ) == 0) {
796 		/* writing */
797 		queue = UDF_SHED_SEQWRITING;
798 		if (what == UDF_C_ABSOLUTE)
799 			queue = UDF_SHED_WRITING;
800 		if (what == UDF_C_DSCR)
801 			queue = UDF_SHED_WRITING;
802 		if (what == UDF_C_NODE)
803 			queue = UDF_SHED_WRITING;
804 	}
805 
806 	if (queue == UDF_SHED_READING) {
807 		DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw READ %p : sector %d type %d,"
808 			"b_resid %d, b_bcount %d, b_bufsize %d\n",
809 			buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
810 			buf->b_resid, buf->b_bcount, buf->b_bufsize));
811 
812 		/* mark bits for reading */
813 		buf_len = buf->b_bcount;
814 		sectornr = our_sectornr;
815 		eccline = udf_geteccline(ump, sectornr, 0);
816 		eccsect = sectornr - eccline->start_sector;
817 		bpos = 0;
818 		while (buf_len) {
819 			len = MIN(buf_len, sector_size);
820 			if ((eccsect < 0) || (eccsect >= ump->packet_size)) {
821 				udf_puteccline(eccline);
822 				eccline = udf_geteccline(ump, sectornr, 0);
823 				eccsect = sectornr - eccline->start_sector;
824 			}
825 			bit = (uint64_t) 1 << eccsect;
826 			error = eccline->error & bit ? EIO : 0;
827 			if (eccline->present & bit) {
828 				src = (uint8_t *) eccline->blob +
829 					eccsect * sector_size;
830 				dst = (uint8_t *) buf->b_data + bpos;
831 				if (!error)
832 					memcpy(dst, src, len);
833 				nestiobuf_done(buf, len, error);
834 			} else {
835 				eccline->readin |= bit;
836 				KASSERT(eccline->bufs[eccsect] == NULL);
837 				eccline->bufs[eccsect] = buf;
838 				eccline->bufs_bpos[eccsect] = bpos;
839 				eccline->bufs_len[eccsect] = len;
840 			}
841 			bpos += sector_size;
842 			eccsect++;
843 			sectornr++;
844 			buf_len -= len;
845 		}
846 		udf_puteccline(eccline);
847 		return;
848 	}
849 
850 	if (queue == UDF_SHED_WRITING) {
851 		DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw WRITE %p : sector %d "
852 			"type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
853 			buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
854 			buf->b_resid, buf->b_bcount, buf->b_bufsize));
855 
856 		/* if we have FIDs fixup using buffer's sector number(s) */
857 		if (buf->b_udf_c_type == UDF_C_FIDS)
858 			panic("UDF_C_FIDS in SHED_WRITING!\n");
859 
860 		udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
861 
862 		/* copy parts into the bufs and set for writing */
863 		buf_len = buf->b_bcount;
864 		sectornr = our_sectornr;
865 		eccline = udf_geteccline(ump, sectornr, 0);
866 		eccsect = sectornr - eccline->start_sector;
867 		bpos = 0;
868 		while (buf_len) {
869 			len = MIN(buf_len, sector_size);
870 			if ((eccsect < 0) || (eccsect >= ump->packet_size)) {
871 				udf_puteccline(eccline);
872 				eccline = udf_geteccline(ump, sectornr, 0);
873 				eccsect = sectornr - eccline->start_sector;
874 			}
875 			bit = (uint64_t) 1 << eccsect;
876 			KASSERT((eccline->readin & bit) == 0);
877 			eccline->present |= bit;
878 			eccline->dirty   |= bit;
879 			if (eccline->bufs[eccsect]) {
880 				/* old callback still pending */
881 				nestiobuf_done(eccline->bufs[eccsect],
882 						eccline->bufs_len[eccsect],
883 						0);
884 				eccline->bufs[eccsect] = NULL;
885 			}
886 
887 			src = (uint8_t *) buf->b_data + bpos;
888 			dst = (uint8_t *) eccline->blob + eccsect * sector_size;
889 			if (len != sector_size)
890 				memset(dst, 0, sector_size);
891 			memcpy(dst, src, len);
892 
893 			/* note that its finished for this extent */
894 			eccline->bufs[eccsect] = NULL;
895 			nestiobuf_done(buf, len, 0);
896 
897 			bpos += sector_size;
898 			eccsect++;
899 			sectornr++;
900 			buf_len -= len;
901 		}
902 		udf_puteccline(eccline);
903 		return;
904 
905 	}
906 
907 	/* sequential writing */
908 	KASSERT(queue == UDF_SHED_SEQWRITING);
909 	DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw SEQWRITE %p : sector XXXX "
910 		"type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
911 		buf, buf->b_udf_c_type, buf->b_resid, buf->b_bcount,
912 		buf->b_bufsize));
913 	/*
914 	 * Buffers should not have been allocated to disc addresses yet on
915 	 * this queue. Note that a buffer can get multiple extents allocated.
916 	 * Note that it *looks* like the normal writing but its different in
917 	 * the details.
918 	 *
919 	 * lmapping contains lb_num relative to base partition.
920 	 *
921 	 * XXX should we try to claim/organize the allocated memory to
922 	 * block-aligned pieces?
923 	 */
924 	mutex_enter(&priv->seqwrite_mutex);
925 
926 	lmapping    = ump->la_lmapping;
927 	node_ad_cpy = ump->la_node_ad_cpy;
928 
929 	/* logically allocate buf and map it in the file */
930 	udf_late_allocate_buf(ump, buf, lmapping, node_ad_cpy, &vpart_num);
931 
932 	/* if we have FIDs, fixup using the new allocation table */
933 	if (buf->b_udf_c_type == UDF_C_FIDS) {
934 		buf_len = buf->b_bcount;
935 		bpos = 0;
936 		lmappos = lmapping;
937 		while (buf_len) {
938 			sectornr = *lmappos++;
939 			len = MIN(buf_len, sector_size);
940 			fidblk = (uint8_t *) buf->b_data + bpos;
941 			udf_fixup_fid_block(fidblk, sector_size,
942 				0, len, sectornr);
943 			bpos += len;
944 			buf_len -= len;
945 		}
946 	}
947 	if (buf->b_udf_c_type == UDF_C_METADATA_SBM) {
948 		if (buf->b_lblkno == 0) {
949 			/* update the tag location inside */
950 			tag = (struct desc_tag *) buf->b_data;
951 			tag->tag_loc = udf_rw32(*lmapping);
952 			udf_validate_tag_and_crc_sums(buf->b_data);
953 		}
954 	}
955 	udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
956 
957 	/*
958 	 * Translate new mappings in lmapping to pmappings.
959 	 * pmapping to contain lb_nums as used for disc addressing.
960 	 */
961 	pmapping = ump->la_pmapping;
962 	sectors  = (buf->b_bcount + sector_size -1) / sector_size;
963 	udf_translate_vtop_list(ump, sectors, vpart_num, lmapping, pmapping);
964 
965 	/* copy parts into the bufs and set for writing */
966 	pmappos = pmapping;
967 	buf_len = buf->b_bcount;
968 	sectornr = *pmappos++;
969 	eccline = udf_geteccline(ump, sectornr, ECC_SEQWRITING);
970 	eccsect = sectornr - eccline->start_sector;
971 	bpos = 0;
972 	while (buf_len) {
973 		len = MIN(buf_len, sector_size);
974 		eccsect = sectornr - eccline->start_sector;
975 		if ((eccsect < 0) || (eccsect >= ump->packet_size)) {
976 			eccline->flags |= ECC_SEQWRITING;
977 			udf_puteccline(eccline);
978 			eccline = udf_geteccline(ump, sectornr, ECC_SEQWRITING);
979 			eccsect = sectornr - eccline->start_sector;
980 		}
981 		bit = (uint64_t) 1 << eccsect;
982 		KASSERT((eccline->readin & bit) == 0);
983 		eccline->present |= bit;
984 		eccline->dirty   |= bit;
985 		eccline->bufs[eccsect] = NULL;
986 
987 		src = (uint8_t *) buf->b_data + bpos;
988 		dst = (uint8_t *)
989 			eccline->blob + eccsect * sector_size;
990 		if (len != sector_size)
991 			memset(dst, 0, sector_size);
992 		memcpy(dst, src, len);
993 
994 		/* note that its finished for this extent */
995 		nestiobuf_done(buf, len, 0);
996 
997 		bpos += sector_size;
998 		sectornr = *pmappos++;
999 		buf_len -= len;
1000 	}
1001 	eccline->flags |= ECC_SEQWRITING;
1002 	udf_puteccline(eccline);
1003 	mutex_exit(&priv->seqwrite_mutex);
1004 }
1005 
1006 /* --------------------------------------------------------------------- */
1007 
1008 static void
udf_sync_caches_rmw(struct udf_strat_args * args)1009 udf_sync_caches_rmw(struct udf_strat_args *args)
1010 {
1011 	struct udf_mount *ump = args->ump;
1012 
1013 	udf_mmc_synchronise_caches(ump);
1014 }
1015 
1016 /* --------------------------------------------------------------------- */
1017 
1018 static void
udf_shedule_read_callback(struct buf * buf)1019 udf_shedule_read_callback(struct buf *buf)
1020 {
1021 	struct udf_eccline *eccline = BTOE(buf);
1022 	struct udf_mount *ump = eccline->ump;
1023 	uint64_t bit;
1024 	uint8_t *src, *dst;
1025 	int sector_size = ump->discinfo.sector_size;
1026 	int error, i, len;
1027 
1028 	DPRINTF(ECCLINE, ("read callback called on buf %p\n", buf));
1029 
1030 	/* post process read action */
1031 	KASSERT(eccline->flags & ECC_LOCKED);
1032 	error = buf->b_error;
1033 	for (i = 0; i < ump->packet_size; i++) {
1034 		bit = (uint64_t) 1 << i;
1035 		src = (uint8_t *) buf->b_data +   i * sector_size;
1036 		dst = (uint8_t *) eccline->blob + i * sector_size;
1037 		if (eccline->present & bit)
1038 			continue;
1039 		eccline->present |= bit;
1040 		if (error)
1041 			eccline->error |= bit;
1042 		if (eccline->bufs[i]) {
1043 			dst = (uint8_t *) eccline->bufs[i]->b_data +
1044 				eccline->bufs_bpos[i];
1045 			len = eccline->bufs_len[i];
1046 			if (!error)
1047 				memcpy(dst, src, len);
1048 			nestiobuf_done(eccline->bufs[i], len, error);
1049 			eccline->bufs[i] = NULL;
1050 		}
1051 
1052 	}
1053 	KASSERT(buf->b_data == eccline->blob);
1054 	KASSERT(eccline->present == ((uint64_t) 1 << ump->packet_size)-1);
1055 
1056 	/*
1057 	 * XXX TODO what to do on read errors? read in all sectors
1058 	 * synchronously and allocate a sparable entry?
1059 	 */
1060 
1061 	udf_puteccline(eccline);
1062 	DPRINTF(ECCLINE, ("read callback finished\n"));
1063 }
1064 
1065 
1066 static void
udf_shedule_write_callback(struct buf * buf)1067 udf_shedule_write_callback(struct buf *buf)
1068 {
1069 	struct udf_eccline *eccline = BTOE(buf);
1070 	struct udf_mount *ump = eccline->ump;
1071 	uint64_t bit;
1072 	int error, i;
1073 
1074 	DPRINTF(ECCLINE, ("write callback called on buf %p\n", buf));
1075 
1076 	/* post process write action */
1077 	KASSERT(eccline->flags & ECC_LOCKED);
1078 	error = buf->b_error;
1079 	for (i = 0; i < ump->packet_size; i++) {
1080 		bit = (uint64_t) 1 << i;
1081 		if ((eccline->dirty & bit) == 0)
1082 			continue;
1083 		if (error) {
1084 			eccline->error |= bit;
1085 		} else {
1086 			eccline->dirty &= ~bit;
1087 		}
1088 
1089 		KASSERT(eccline->bufs[i] == 0);
1090 	}
1091 	KASSERT(eccline->dirty == 0);
1092 	KASSERT(error == 0);
1093 
1094 	/*
1095 	 * XXX TODO on write errors allocate a sparable entry and reissue
1096 	 */
1097 
1098 	udf_puteccline(eccline);
1099 	DPRINTF(ECCLINE, ("write callback finished\n"));
1100 }
1101 
1102 
1103 static void
udf_issue_eccline(struct udf_eccline * eccline,int queued_on)1104 udf_issue_eccline(struct udf_eccline *eccline, int queued_on)
1105 {
1106 	struct udf_mount *ump = eccline->ump;
1107 	struct strat_private *priv = PRIV(ump);
1108 	struct buf *buf, *nestbuf;
1109 	uint64_t bit, allbits = ((uint64_t) 1 << ump->packet_size)-1;
1110 	uint32_t start;
1111 	int sector_size = ump->discinfo.sector_size;
1112 	int blks = sector_size / DEV_BSIZE;
1113 	int i;
1114 
1115 	KASSERT(eccline->flags & ECC_LOCKED);
1116 
1117 	if (queued_on == UDF_SHED_READING) {
1118 		DPRINTF(SHEDULE, ("udf_issue_eccline reading : "));
1119 		/* read all bits that are not yet present */
1120 		eccline->readin = (~eccline->present) & allbits;
1121 		KASSERT(eccline->readin);
1122 		start = eccline->start_sector;
1123 		buf = eccline->buf;
1124 		buf->b_flags    = B_READ | B_ASYNC;
1125 		SET(buf->b_cflags, BC_BUSY);	/* mark buffer busy */
1126 		buf->b_oflags   = 0;
1127 		buf->b_iodone   = udf_shedule_read_callback;
1128 		buf->b_data     = eccline->blob;
1129 		buf->b_bcount   = ump->packet_size * sector_size;
1130 		buf->b_resid    = buf->b_bcount;
1131 		buf->b_bufsize  = buf->b_bcount;
1132 		buf->b_private  = eccline;
1133 		BIO_SETPRIO(buf, BPRIO_DEFAULT);
1134 		buf->b_lblkno   = buf->b_blkno = buf->b_rawblkno = start * blks;
1135 		buf->b_proc     = NULL;
1136 
1137 		if (eccline->present != 0) {
1138 			for (i = 0; i < ump->packet_size; i++) {
1139 				bit = (uint64_t) 1 << i;
1140 				if (eccline->present & bit) {
1141 					nestiobuf_done(buf, sector_size, 0);
1142 					continue;
1143 				}
1144 				nestbuf = getiobuf(NULL, true);
1145 				nestiobuf_setup(buf, nestbuf, i * sector_size,
1146 					sector_size);
1147 				/* adjust blocknumber to read */
1148 				nestbuf->b_blkno = buf->b_blkno + i*blks;
1149 				nestbuf->b_rawblkno = buf->b_rawblkno + i*blks;
1150 
1151 				DPRINTF(SHEDULE, ("sector %d ", start + i));
1152 
1153 				/* mutex dance since it could lock */
1154 				mutex_exit(&priv->discstrat_mutex);
1155 					/* call asynchronous */
1156 					VOP_STRATEGY(ump->devvp, nestbuf);
1157 				mutex_enter(&priv->discstrat_mutex);
1158 			}
1159 			DPRINTF(SHEDULE, ("\n"));
1160 			return;
1161 		}
1162 	} else {
1163 		/* write or seqwrite */
1164 		DPRINTF(SHEDULE, ("udf_issue_eccline writing or seqwriting : "));
1165 		DPRINTF(SHEDULE, ("\n\tpresent %"PRIx64", readin %"PRIx64", "
1166 			"dirty %"PRIx64"\n\t", eccline->present, eccline->readin,
1167 			eccline->dirty));
1168 		KASSERT(eccline->present == allbits);
1169 
1170 		start = eccline->start_sector;
1171 		buf = eccline->buf;
1172 		buf->b_flags    = B_WRITE | B_ASYNC;
1173 		SET(buf->b_cflags, BC_BUSY);	/* mark buffer busy */
1174 		buf->b_oflags   = 0;
1175 		buf->b_iodone   = udf_shedule_write_callback;
1176 		buf->b_data     = eccline->blob;
1177 		buf->b_bcount   = ump->packet_size * sector_size;
1178 		buf->b_resid    = buf->b_bcount;
1179 		buf->b_bufsize  = buf->b_bcount;
1180 		buf->b_private  = eccline;
1181 		BIO_SETPRIO(buf, BPRIO_DEFAULT);
1182 		buf->b_lblkno   = buf->b_blkno = buf->b_rawblkno = start * blks;
1183 		buf->b_proc     = NULL;
1184 	}
1185 
1186 	/* mutex dance since it could lock */
1187 	mutex_exit(&priv->discstrat_mutex);
1188 		/* call asynchronous */
1189 		DPRINTF(SHEDULE, ("sector %d for %d\n",
1190 			start, ump->packet_size));
1191 		VOP_STRATEGY(ump->devvp, buf);
1192 	mutex_enter(&priv->discstrat_mutex);
1193 }
1194 
1195 
1196 static void
udf_discstrat_thread(void * arg)1197 udf_discstrat_thread(void *arg)
1198 {
1199 	struct udf_mount *ump = (struct udf_mount *) arg;
1200 	struct strat_private *priv = PRIV(ump);
1201 	struct udf_eccline *eccline;
1202 	struct timespec now, *last;
1203 	uint64_t allbits = ((uint64_t) 1 << ump->packet_size)-1;
1204 	int new_queue, wait, work;
1205 
1206 	work = 1;
1207 	priv->thread_running = 1;
1208 	cv_broadcast(&priv->discstrat_cv);
1209 
1210 	mutex_enter(&priv->discstrat_mutex);
1211 	priv->num_floating = 0;
1212 	while (priv->run_thread || work || priv->num_floating) {
1213 		/* get our time */
1214 		vfs_timestamp(&now);
1215 
1216 		/* maintenance: handle eccline state machine */
1217 		for(;;) {
1218 			/* only peek at it */
1219 			eccline = udf_peek_eccline(priv, UDF_SHED_WAITING);
1220 			if (eccline == NULL)
1221 				break;
1222 
1223 			/* if not reading, wait until the time has come */
1224 			if ((priv->cur_queue != UDF_SHED_READING) &&
1225 				(eccline->wait_time.tv_sec - now.tv_sec > 0)) {
1226 					UDF_UNLOCK_ECCLINE(eccline);
1227 					/* all others are later, so break off */
1228 					break;
1229 			}
1230 
1231 			/* release */
1232 			UDF_UNLOCK_ECCLINE(eccline);
1233 
1234 			/* do get it */
1235 			eccline = udf_pop_eccline(priv, UDF_SHED_WAITING);
1236 
1237 			/* requeue according to state */
1238 			new_queue = UDF_SHED_FREE;	/* unlikely */
1239 			if (eccline->refcnt > 0)
1240 				new_queue = UDF_SHED_IDLE;
1241 			if (eccline->flags & ECC_WANTED)
1242 				new_queue = UDF_SHED_IDLE;
1243 			if (eccline->readin)
1244 				new_queue = UDF_SHED_READING;
1245 			if (eccline->dirty) {
1246 				new_queue = UDF_SHED_READING;
1247 				if (eccline->present == allbits) {
1248 					new_queue = UDF_SHED_WRITING;
1249 					if (eccline->flags & ECC_SEQWRITING)
1250 						new_queue = UDF_SHED_SEQWRITING;
1251 				}
1252 			}
1253 			udf_push_eccline(eccline, new_queue);
1254 		}
1255 
1256 		/* maintenance: free excess ecclines */
1257 		while (priv->num_queued[UDF_SHED_FREE] > UDF_ECCLINE_MAXFREE) {
1258 			eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
1259 			KASSERT(eccline);
1260 			KASSERT(eccline->refcnt == 0);
1261 			if (eccline->flags & ECC_WANTED) {
1262 				/* we won the race, but we dont want to win */
1263 				DPRINTF(ECCLINE, ("Tried removing, pushed back to free list\n"));
1264 				udf_push_eccline(eccline, UDF_SHED_IDLE);
1265 			} else {
1266 				DPRINTF(ECCLINE, ("Removing entry from free list\n"));
1267 				udf_dispose_eccline(eccline);
1268 			}
1269 		}
1270 
1271 		/* process the current selected queue */
1272 		/* get our time */
1273 		vfs_timestamp(&now);
1274 		last = &priv->last_queued[priv->cur_queue];
1275 
1276 		/* get our line */
1277 		eccline = udf_pop_eccline(priv, priv->cur_queue);
1278 		if (eccline) {
1279 			wait = 0;
1280 			new_queue = priv->cur_queue;
1281 			DPRINTF(ECCLINE, ("UDF_ISSUE_ECCLINE\n"));
1282 
1283 			udf_issue_eccline(eccline, priv->cur_queue);
1284 		} else {
1285 			/* don't switch too quickly */
1286 			if (now.tv_sec - last->tv_sec < 2) {
1287 				/* wait some time */
1288 				cv_timedwait(&priv->discstrat_cv,
1289 					&priv->discstrat_mutex, hz);
1290 				/* we assume there is work to be done */
1291 				work = 1;
1292 				continue;
1293 			}
1294 
1295 			/* XXX select on queue lengths ? */
1296 			wait = 1;
1297 			/* check if we can/should switch */
1298 			new_queue = priv->cur_queue;
1299 			if (bufq_peek(priv->queues[UDF_SHED_READING]))
1300 				new_queue = UDF_SHED_READING;
1301 			if (bufq_peek(priv->queues[UDF_SHED_WRITING]))
1302 				new_queue = UDF_SHED_WRITING;
1303 			if (bufq_peek(priv->queues[UDF_SHED_SEQWRITING]))
1304 				new_queue = UDF_SHED_SEQWRITING;
1305 		}
1306 
1307 		/* give room */
1308 		mutex_exit(&priv->discstrat_mutex);
1309 
1310 		if (new_queue != priv->cur_queue) {
1311 			wait = 0;
1312 			DPRINTF(SHEDULE, ("switching from %d to %d\n",
1313 				priv->cur_queue, new_queue));
1314 			priv->cur_queue = new_queue;
1315 		}
1316 		mutex_enter(&priv->discstrat_mutex);
1317 
1318 		/* wait for more if needed */
1319 		if (wait)
1320 			cv_timedwait(&priv->discstrat_cv,
1321 				&priv->discstrat_mutex, hz/4);	/* /8 */
1322 
1323 		work  = (bufq_peek(priv->queues[UDF_SHED_WAITING]) != NULL);
1324 		work |= (bufq_peek(priv->queues[UDF_SHED_READING]) != NULL);
1325 		work |= (bufq_peek(priv->queues[UDF_SHED_WRITING]) != NULL);
1326 		work |= (bufq_peek(priv->queues[UDF_SHED_SEQWRITING]) != NULL);
1327 
1328 		DPRINTF(PARANOIA, ("work : (%d, %d, %d) -> work %d, float %d\n",
1329 			(bufq_peek(priv->queues[UDF_SHED_READING]) != NULL),
1330 			(bufq_peek(priv->queues[UDF_SHED_WRITING]) != NULL),
1331 			(bufq_peek(priv->queues[UDF_SHED_SEQWRITING]) != NULL),
1332 			work, priv->num_floating));
1333 	}
1334 
1335 	mutex_exit(&priv->discstrat_mutex);
1336 
1337 	/* tear down remaining ecclines */
1338 	mutex_enter(&priv->discstrat_mutex);
1339 	KASSERT(bufq_peek(priv->queues[UDF_SHED_WAITING]) == NULL);
1340 	KASSERT(bufq_peek(priv->queues[UDF_SHED_IDLE]) == NULL);
1341 	KASSERT(bufq_peek(priv->queues[UDF_SHED_READING]) == NULL);
1342 	KASSERT(bufq_peek(priv->queues[UDF_SHED_WRITING]) == NULL);
1343 	KASSERT(bufq_peek(priv->queues[UDF_SHED_SEQWRITING]) == NULL);
1344 
1345 	KASSERT(priv->num_queued[UDF_SHED_WAITING] == 0);
1346 	KASSERT(priv->num_queued[UDF_SHED_IDLE] == 0);
1347 	KASSERT(priv->num_queued[UDF_SHED_READING] == 0);
1348 	KASSERT(priv->num_queued[UDF_SHED_WRITING] == 0);
1349 	KASSERT(priv->num_queued[UDF_SHED_SEQWRITING] == 0);
1350 
1351 	eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
1352 	while (eccline) {
1353 		udf_dispose_eccline(eccline);
1354 		eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
1355 	}
1356 	KASSERT(priv->num_queued[UDF_SHED_FREE] == 0);
1357 	mutex_exit(&priv->discstrat_mutex);
1358 
1359 	priv->thread_running  = 0;
1360 	priv->thread_finished = 1;
1361 	cv_broadcast(&priv->discstrat_cv);
1362 
1363 	kthread_exit(0);
1364 	/* not reached */
1365 }
1366 
1367 /* --------------------------------------------------------------------- */
1368 
1369 /*
1370  * Buffer memory pool allocator.
1371  */
1372 
1373 static void *
ecclinepool_page_alloc(struct pool * pp,int flags)1374 ecclinepool_page_alloc(struct pool *pp, int flags)
1375 {
1376         return (void *)uvm_km_alloc(kernel_map,
1377             MAXBSIZE, MAXBSIZE,
1378             ((flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK)
1379 	    	| UVM_KMF_WIRED /* UVM_KMF_PAGABLE? */);
1380 }
1381 
1382 static void
ecclinepool_page_free(struct pool * pp,void * v)1383 ecclinepool_page_free(struct pool *pp, void *v)
1384 {
1385         uvm_km_free(kernel_map, (vaddr_t)v, MAXBSIZE, UVM_KMF_WIRED);
1386 }
1387 
1388 static struct pool_allocator ecclinepool_allocator = {
1389         .pa_alloc = ecclinepool_page_alloc,
1390         .pa_free  = ecclinepool_page_free,
1391         .pa_pagesz = MAXBSIZE,
1392 };
1393 
1394 
1395 static void
udf_discstrat_init_rmw(struct udf_strat_args * args)1396 udf_discstrat_init_rmw(struct udf_strat_args *args)
1397 {
1398 	struct udf_mount *ump = args->ump;
1399 	struct strat_private *priv = PRIV(ump);
1400 	uint32_t lb_size, blobsize, hashline;
1401 	int i;
1402 
1403 	KASSERT(ump);
1404 	KASSERT(ump->logical_vol);
1405 	KASSERT(priv == NULL);
1406 
1407 	lb_size = udf_rw32(ump->logical_vol->lb_size);
1408 	blobsize = ump->packet_size * lb_size;
1409 	KASSERT(lb_size > 0);
1410 	KASSERT(ump->packet_size <= 64);
1411 
1412 	/* initialise our memory space */
1413 	ump->strategy_private = malloc(sizeof(struct strat_private),
1414 		M_UDFTEMP, M_WAITOK);
1415 	priv = ump->strategy_private;
1416 	memset(priv, 0 , sizeof(struct strat_private));
1417 
1418 	/* initialise locks */
1419 	cv_init(&priv->discstrat_cv, "udfstrat");
1420 	mutex_init(&priv->discstrat_mutex, MUTEX_DEFAULT, IPL_NONE);
1421 	mutex_init(&priv->seqwrite_mutex, MUTEX_DEFAULT, IPL_NONE);
1422 
1423 	/* initialise struct eccline pool */
1424 	pool_init(&priv->eccline_pool, sizeof(struct udf_eccline),
1425 		0, 0, 0, "udf_eccline_pool", NULL, IPL_NONE);
1426 
1427 	/* initialise eccline blob pool */
1428         ecclinepool_allocator.pa_pagesz = blobsize;
1429 	pool_init(&priv->ecclineblob_pool, blobsize,
1430 		0, 0, 0, "udf_eccline_blob", &ecclinepool_allocator, IPL_NONE);
1431 
1432 	/* initialise main queues */
1433 	for (i = 0; i < UDF_SHED_MAX; i++) {
1434 		priv->num_queued[i] = 0;
1435 		vfs_timestamp(&priv->last_queued[i]);
1436 	}
1437 	bufq_alloc(&priv->queues[UDF_SHED_WAITING], "fcfs",
1438 		BUFQ_SORT_RAWBLOCK);
1439 	bufq_alloc(&priv->queues[UDF_SHED_READING], "disksort",
1440 		BUFQ_SORT_RAWBLOCK);
1441 	bufq_alloc(&priv->queues[UDF_SHED_WRITING], "disksort",
1442 		BUFQ_SORT_RAWBLOCK);
1443 	bufq_alloc(&priv->queues[UDF_SHED_SEQWRITING], "disksort", 0);
1444 
1445 	/* initialise administrative queues */
1446 	bufq_alloc(&priv->queues[UDF_SHED_IDLE], "fcfs", 0);
1447 	bufq_alloc(&priv->queues[UDF_SHED_FREE], "fcfs", 0);
1448 
1449 	for (hashline = 0; hashline < UDF_ECCBUF_HASHSIZE; hashline++) {
1450 		LIST_INIT(&priv->eccline_hash[hashline]);
1451 	}
1452 
1453 	/* create our disk strategy thread */
1454 	priv->cur_queue = UDF_SHED_READING;
1455 	priv->thread_finished = 0;
1456 	priv->thread_running  = 0;
1457 	priv->run_thread      = 1;
1458 	if (kthread_create(PRI_NONE, 0 /* KTHREAD_MPSAFE*/, NULL /* cpu_info*/,
1459 		udf_discstrat_thread, ump, &priv->queue_lwp,
1460 		"%s", "udf_rw")) {
1461 		panic("fork udf_rw");
1462 	}
1463 
1464 	/* wait for thread to spin up */
1465 	mutex_enter(&priv->discstrat_mutex);
1466 	while (!priv->thread_running) {
1467 		cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex, hz);
1468 	}
1469 	mutex_exit(&priv->discstrat_mutex);
1470 }
1471 
1472 
1473 static void
udf_discstrat_finish_rmw(struct udf_strat_args * args)1474 udf_discstrat_finish_rmw(struct udf_strat_args *args)
1475 {
1476 	struct udf_mount *ump = args->ump;
1477 	struct strat_private *priv = PRIV(ump);
1478 
1479 	if (ump == NULL)
1480 		return;
1481 
1482 	/* stop our sheduling thread */
1483 	KASSERT(priv->run_thread == 1);
1484 	priv->run_thread = 0;
1485 
1486 	mutex_enter(&priv->discstrat_mutex);
1487 	while (!priv->thread_finished) {
1488 		cv_broadcast(&priv->discstrat_cv);
1489 		cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex, hz);
1490 	}
1491 	mutex_exit(&priv->discstrat_mutex);
1492 
1493 	/* kthread should be finished now */
1494 	cv_destroy(&priv->discstrat_cv);
1495 	mutex_destroy(&priv->discstrat_mutex);
1496 	mutex_destroy(&priv->seqwrite_mutex);
1497 
1498 	/* cleanup our pools */
1499 	pool_destroy(&priv->eccline_pool);
1500 	pool_destroy(&priv->ecclineblob_pool);
1501 
1502 	/* free our private space */
1503 	free(ump->strategy_private, M_UDFTEMP);
1504 	ump->strategy_private = NULL;
1505 }
1506 
1507 /* --------------------------------------------------------------------- */
1508 
1509 struct udf_strategy udf_strat_rmw =
1510 {
1511 	udf_create_nodedscr_rmw,
1512 	udf_free_nodedscr_rmw,
1513 	udf_read_nodedscr_rmw,
1514 	udf_write_nodedscr_rmw,
1515 	udf_queuebuf_rmw,
1516 	udf_sync_caches_rmw,
1517 	udf_discstrat_init_rmw,
1518 	udf_discstrat_finish_rmw
1519 };
1520 
1521