1 /* $NetBSD: udf_strat_rmw.c,v 1.31 2023/06/27 09:58:50 reinoud Exp $ */
2
3 /*
4 * Copyright (c) 2006, 2008 Reinoud Zandijk
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 */
28
29 #include <sys/cdefs.h>
30 #ifndef lint
31 __KERNEL_RCSID(0, "$NetBSD: udf_strat_rmw.c,v 1.31 2023/06/27 09:58:50 reinoud Exp $");
32 #endif /* not lint */
33
34
35 #if defined(_KERNEL_OPT)
36 #include "opt_compat_netbsd.h"
37 #endif
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/sysctl.h>
42 #include <sys/namei.h>
43 #include <sys/proc.h>
44 #include <sys/kernel.h>
45 #include <sys/vnode.h>
46 #include <miscfs/genfs/genfs_node.h>
47 #include <sys/mount.h>
48 #include <sys/buf.h>
49 #include <sys/file.h>
50 #include <sys/device.h>
51 #include <sys/disklabel.h>
52 #include <sys/ioctl.h>
53 #include <sys/malloc.h>
54 #include <sys/dirent.h>
55 #include <sys/stat.h>
56 #include <sys/conf.h>
57 #include <sys/kauth.h>
58 #include <sys/kthread.h>
59 #include <dev/clock_subr.h>
60
61 #include <fs/udf/ecma167-udf.h>
62 #include <fs/udf/udf_mount.h>
63
64 #include "udf.h"
65 #include "udf_subr.h"
66 #include "udf_bswap.h"
67
68
69 #define VTOI(vnode) ((struct udf_node *) (vnode)->v_data)
70 #define PRIV(ump) ((struct strat_private *) (ump)->strategy_private)
71 #define BTOE(buf) ((struct udf_eccline *) ((buf)->b_private))
72
73 /* --------------------------------------------------------------------- */
74
75 #define UDF_MAX_PACKET_SIZE 64 /* DONT change this */
76
77 /* sheduler states */
78 #define UDF_SHED_WAITING 1 /* waiting on timeout */
79 #define UDF_SHED_READING 2
80 #define UDF_SHED_WRITING 3
81 #define UDF_SHED_SEQWRITING 4
82 #define UDF_SHED_IDLE 5 /* refcnt'd */
83 #define UDF_SHED_FREE 6 /* recycleable */
84 #define UDF_SHED_MAX 6+1
85
86 /* flags */
87 #define ECC_LOCKED 0x01 /* prevent access */
88 #define ECC_WANTED 0x02 /* trying access */
89 #define ECC_SEQWRITING 0x04 /* sequential queue */
90 #define ECC_FLOATING 0x08 /* not queued yet */
91
92 #define ECC_WAITTIME 10
93
94
95 TAILQ_HEAD(ecclineq, udf_eccline);
96 struct udf_eccline {
97 struct udf_mount *ump;
98 uint64_t present; /* preserve these */
99 uint64_t readin; /* bitmap */
100 uint64_t dirty; /* bitmap */
101 uint64_t error; /* bitmap */
102 uint32_t refcnt;
103
104 struct timespec wait_time;
105 uint32_t flags;
106 uint32_t start_sector; /* physical */
107
108 const char *fname;
109 int sline;
110
111 struct buf *buf;
112 void *blob;
113
114 struct buf *bufs[UDF_MAX_PACKET_SIZE];
115 uint32_t bufs_bpos[UDF_MAX_PACKET_SIZE];
116 int bufs_len[UDF_MAX_PACKET_SIZE];
117
118 int queued_on; /* on which BUFQ list */
119 LIST_ENTRY(udf_eccline) hashchain; /* on sector lookup */
120 };
121
122
123 struct strat_private {
124 lwp_t *queue_lwp;
125 kcondvar_t discstrat_cv; /* to wait on */
126 kmutex_t discstrat_mutex; /* disc strategy */
127 kmutex_t seqwrite_mutex; /* protect mappings */
128
129 int thread_running; /* thread control */
130 int run_thread; /* thread control */
131 int thread_finished; /* thread control */
132 int cur_queue;
133
134 int num_floating;
135 int num_queued[UDF_SHED_MAX];
136 struct bufq_state *queues[UDF_SHED_MAX];
137 struct timespec last_queued[UDF_SHED_MAX];
138 struct disk_strategy old_strategy_setting;
139
140 struct pool eccline_pool;
141 struct pool ecclineblob_pool;
142 LIST_HEAD(, udf_eccline) eccline_hash[UDF_ECCBUF_HASHSIZE];
143 };
144
145 /* --------------------------------------------------------------------- */
146
147 #define UDF_LOCK_ECCLINE(eccline) udf_lock_eccline(eccline, __FILE__, __LINE__)
148 #define UDF_UNLOCK_ECCLINE(eccline) udf_unlock_eccline(eccline, __FILE__, __LINE__)
149
150 /* can be called with or without discstrat lock */
151 static void
udf_lock_eccline(struct udf_eccline * eccline,const char * fname,int sline)152 udf_lock_eccline(struct udf_eccline *eccline, const char *fname, int sline)
153 {
154 struct strat_private *priv = PRIV(eccline->ump);
155 int waslocked, ret;
156
157 KASSERT(mutex_owned(&priv->discstrat_mutex));
158
159 waslocked = mutex_owned(&priv->discstrat_mutex);
160 if (!waslocked)
161 mutex_enter(&priv->discstrat_mutex);
162
163 /* wait until its unlocked first */
164 eccline->refcnt++;
165 while (eccline->flags & ECC_LOCKED) {
166 DPRINTF(ECCLINE, ("waiting for lock at %s:%d\n",
167 fname, sline));
168 DPRINTF(ECCLINE, ("was locked at %s:%d\n",
169 eccline->fname, eccline->sline));
170 eccline->flags |= ECC_WANTED;
171 ret = cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex,
172 hz/8);
173 if (ret == EWOULDBLOCK)
174 DPRINTF(LOCKING, ("eccline lock held, waiting for "
175 "release"));
176 }
177 eccline->flags |= ECC_LOCKED;
178 eccline->flags &= ~ECC_WANTED;
179 eccline->refcnt--;
180
181 eccline->fname = fname;
182 eccline->sline = sline;
183
184 if (!waslocked)
185 mutex_exit(&priv->discstrat_mutex);
186 }
187
188
189 /* can be called with or without discstrat lock */
190 static void
udf_unlock_eccline(struct udf_eccline * eccline,const char * fname,int sline)191 udf_unlock_eccline(struct udf_eccline *eccline, const char *fname, int sline)
192 {
193 struct strat_private *priv = PRIV(eccline->ump);
194 int waslocked;
195
196 KASSERT(mutex_owned(&priv->discstrat_mutex));
197
198 waslocked = mutex_owned(&priv->discstrat_mutex);
199 if (!waslocked)
200 mutex_enter(&priv->discstrat_mutex);
201
202 eccline->flags &= ~ECC_LOCKED;
203 cv_broadcast(&priv->discstrat_cv);
204
205 if (!waslocked)
206 mutex_exit(&priv->discstrat_mutex);
207 }
208
209
210 /* NOTE discstrat_mutex should be held! */
211 static void
udf_dispose_eccline(struct udf_eccline * eccline)212 udf_dispose_eccline(struct udf_eccline *eccline)
213 {
214 struct strat_private *priv = PRIV(eccline->ump);
215
216 KASSERT(mutex_owned(&priv->discstrat_mutex));
217
218 DPRINTF(ECCLINE, ("dispose eccline with start sector %d, "
219 "present %0"PRIx64"\n", eccline->start_sector,
220 eccline->present));
221
222 KASSERT(eccline->refcnt == 0);
223 KASSERT(eccline->dirty == 0);
224 KASSERT(eccline->queued_on == 0);
225 KASSERT(eccline->flags & ECC_FLOATING);
226 KASSERT(eccline->flags & ECC_LOCKED);
227
228 LIST_REMOVE(eccline, hashchain);
229 priv->num_floating--;
230
231 putiobuf(eccline->buf);
232 pool_put(&priv->ecclineblob_pool, eccline->blob);
233 pool_put(&priv->eccline_pool, eccline);
234 }
235
236
237 /* NOTE discstrat_mutex should be held! */
238 static void
udf_push_eccline(struct udf_eccline * eccline,int newqueue)239 udf_push_eccline(struct udf_eccline *eccline, int newqueue)
240 {
241 struct strat_private *priv = PRIV(eccline->ump);
242
243 KASSERT(mutex_owned(&priv->discstrat_mutex));
244
245 DPRINTF(PARANOIA, ("DEBUG: buf %p pushed on queue %d\n", eccline->buf, newqueue));
246
247 KASSERT(eccline->queued_on == 0);
248 KASSERT(eccline->flags & ECC_FLOATING);
249
250 /* set buffer block numbers to make sure its queued correctly */
251 eccline->buf->b_lblkno = eccline->start_sector;
252 eccline->buf->b_blkno = eccline->start_sector;
253 eccline->buf->b_rawblkno = eccline->start_sector;
254
255 vfs_timestamp(&priv->last_queued[newqueue]);
256 eccline->flags &= ~ECC_FLOATING;
257 priv->num_floating--;
258 eccline->queued_on = newqueue;
259 priv->num_queued[newqueue]++;
260 bufq_put(priv->queues[newqueue], eccline->buf);
261
262 UDF_UNLOCK_ECCLINE(eccline);
263
264 /* XXX tickle disc strategy statemachine */
265 if (newqueue != UDF_SHED_IDLE)
266 cv_signal(&priv->discstrat_cv);
267 }
268
269
270 static struct udf_eccline *
udf_peek_eccline(struct strat_private * priv,int queued_on)271 udf_peek_eccline(struct strat_private *priv, int queued_on)
272 {
273 struct udf_eccline *eccline;
274 struct buf *buf;
275
276 KASSERT(mutex_owned(&priv->discstrat_mutex));
277
278 for(;;) {
279 buf = bufq_peek(priv->queues[queued_on]);
280 /* could have been a race, but we'll revisit later */
281 if (buf == NULL)
282 return NULL;
283
284 eccline = BTOE(buf);
285 UDF_LOCK_ECCLINE(eccline);
286
287 /* might have changed before we obtained the lock */
288 if (eccline->queued_on == queued_on)
289 break;
290
291 UDF_UNLOCK_ECCLINE(eccline);
292 }
293
294 KASSERT(eccline->queued_on == queued_on);
295 KASSERT((eccline->flags & ECC_FLOATING) == 0);
296
297 DPRINTF(PARANOIA, ("DEBUG: buf %p peeked at queue %d\n",
298 eccline->buf, queued_on));
299
300 return eccline;
301 }
302
303
304 static struct udf_eccline *
udf_pop_eccline(struct strat_private * priv,int queued_on)305 udf_pop_eccline(struct strat_private *priv, int queued_on)
306 {
307 struct udf_eccline *eccline;
308 struct buf *buf;
309
310 KASSERT(mutex_owned(&priv->discstrat_mutex));
311
312 for(;;) {
313 buf = bufq_get(priv->queues[queued_on]);
314 if (buf == NULL) {
315 // KASSERT(priv->num_queued[queued_on] == 0);
316 return NULL;
317 }
318
319 eccline = BTOE(buf);
320 UDF_LOCK_ECCLINE(eccline);
321
322 /* might have changed before we obtained the lock */
323 if (eccline->queued_on == queued_on)
324 break;
325
326 UDF_UNLOCK_ECCLINE(eccline);
327 }
328
329 KASSERT(eccline->queued_on == queued_on);
330 KASSERT((eccline->flags & ECC_FLOATING) == 0);
331
332 priv->num_queued[queued_on]--;
333 eccline->queued_on = 0;
334
335 eccline->flags |= ECC_FLOATING;
336 priv->num_floating++;
337
338 DPRINTF(PARANOIA, ("DEBUG: buf %p popped from queue %d\n",
339 eccline->buf, queued_on));
340
341 return eccline;
342 }
343
344
345 static void
udf_unqueue_eccline(struct strat_private * priv,struct udf_eccline * eccline)346 udf_unqueue_eccline(struct strat_private *priv, struct udf_eccline *eccline)
347 {
348 struct buf *ret __diagused;
349
350 UDF_LOCK_ECCLINE(eccline);
351 if (eccline->queued_on == 0) {
352 KASSERT(eccline->flags & ECC_FLOATING);
353 return;
354 }
355
356 ret = bufq_cancel(priv->queues[eccline->queued_on], eccline->buf);
357 KASSERT(ret == eccline->buf);
358
359 priv->num_queued[eccline->queued_on]--;
360 eccline->queued_on = 0;
361
362 eccline->flags |= ECC_FLOATING;
363 priv->num_floating++;
364 }
365
366
367 static struct udf_eccline *
udf_geteccline(struct udf_mount * ump,uint32_t sector,int flags)368 udf_geteccline(struct udf_mount *ump, uint32_t sector, int flags)
369 {
370 struct strat_private *priv = PRIV(ump);
371 struct udf_eccline *eccline;
372 uint32_t start_sector, lb_size, blobsize;
373 uint8_t *eccline_blob;
374 int line, line_offset;
375 int num_busy;
376
377 mutex_enter(&priv->discstrat_mutex);
378
379 /* lookup in our line cache hashtable */
380 line_offset = sector % ump->packet_size;
381 start_sector = sector - line_offset;
382 line = (start_sector/ump->packet_size) & UDF_ECCBUF_HASHMASK;
383
384 KASSERT(priv->thread_running);
385
386 retry:
387 DPRINTF(ECCLINE, ("get line sector %d, line %d\n", sector, line));
388 LIST_FOREACH(eccline, &priv->eccline_hash[line], hashchain) {
389 if (eccline->start_sector == start_sector) {
390 DPRINTF(ECCLINE, ("\tfound eccline, start_sector %d\n",
391 eccline->start_sector));
392 udf_unqueue_eccline(priv, eccline);
393
394 mutex_exit(&priv->discstrat_mutex);
395 return eccline;
396 }
397 }
398
399 /* not found in eccline cache */
400 DPRINTF(ECCLINE, ("\tnot found in eccline cache\n"));
401
402 lb_size = udf_rw32(ump->logical_vol->lb_size);
403 blobsize = ump->packet_size * lb_size;
404
405 /* dont allow too many pending requests */
406 DPRINTF(ECCLINE, ("\tallocating new eccline\n"));
407 num_busy = (priv->num_queued[UDF_SHED_SEQWRITING] + priv->num_floating);
408 if ((flags & ECC_SEQWRITING) && (num_busy > UDF_ECCLINE_MAXBUSY)) {
409 cv_timedwait(&priv->discstrat_cv,
410 &priv->discstrat_mutex, hz/8);
411 goto retry;
412 }
413
414 eccline_blob = pool_get(&priv->ecclineblob_pool, PR_NOWAIT);
415 eccline = pool_get(&priv->eccline_pool, PR_NOWAIT);
416 if ((eccline_blob == NULL) || (eccline == NULL)) {
417 if (eccline_blob)
418 pool_put(&priv->ecclineblob_pool, eccline_blob);
419 if (eccline)
420 pool_put(&priv->eccline_pool, eccline);
421
422 /* out of memory for now; canibalise freelist */
423 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
424 if (eccline == NULL) {
425 /* serious trouble; wait and retry */
426 cv_timedwait(&priv->discstrat_cv,
427 &priv->discstrat_mutex, hz/8);
428 goto retry;
429 }
430
431 /* push back line if we're waiting for it or its locked */
432 if (eccline->flags & ECC_WANTED) {
433 /* we won a race, but someone else needed it */
434 udf_push_eccline(eccline, UDF_SHED_FREE);
435 goto retry;
436 }
437
438 /* unlink this entry */
439 LIST_REMOVE(eccline, hashchain);
440 KASSERT(eccline->flags & ECC_FLOATING);
441 KASSERT(eccline->queued_on == 0);
442
443 eccline_blob = eccline->blob;
444 eccline->flags = ECC_FLOATING | ECC_LOCKED;
445 } else {
446 eccline->flags = ECC_FLOATING | ECC_LOCKED;
447 priv->num_floating++;
448 }
449
450 eccline->queued_on = 0;
451 eccline->blob = eccline_blob;
452 eccline->buf = getiobuf(NULL, true);
453 eccline->buf->b_private = eccline; /* IMPORTANT */
454
455 /* initialise eccline blob */
456 /* XXX memset expensive and strictly not needed XXX */
457 memset(eccline->blob, 0, blobsize);
458
459 eccline->ump = ump;
460 eccline->present = eccline->readin = eccline->dirty = 0;
461 eccline->error = 0;
462 eccline->refcnt = 0;
463 memset(eccline->bufs, 0, UDF_MAX_PACKET_SIZE * sizeof(struct buf *));
464
465 eccline->start_sector = start_sector;
466 eccline->buf->b_lblkno = start_sector;
467 eccline->buf->b_blkno = start_sector;
468 eccline->buf->b_rawblkno = start_sector;
469
470 LIST_INSERT_HEAD(&priv->eccline_hash[line], eccline, hashchain);
471
472 /*
473 * TODO possible optimalisation for checking overlap with partitions
474 * to get a clue on future eccline usage
475 */
476
477 KASSERT(eccline->refcnt == 0);
478 KASSERT(eccline->flags & ECC_FLOATING);
479 KASSERT(eccline->flags & ECC_LOCKED);
480 mutex_exit(&priv->discstrat_mutex);
481
482 return eccline;
483 }
484
485
486 static void
udf_puteccline(struct udf_eccline * eccline)487 udf_puteccline(struct udf_eccline *eccline)
488 {
489 struct strat_private *priv = PRIV(eccline->ump);
490 struct udf_mount *ump = eccline->ump;
491 uint64_t allbits = ((uint64_t) 1 << ump->packet_size)-1;
492 int new_queue;
493
494 mutex_enter(&priv->discstrat_mutex);
495
496 DPRINTF(ECCLINE, ("put eccline start sector %d, refcnt %d\n",
497 eccline->start_sector, eccline->refcnt));
498
499 KASSERT(eccline->flags & ECC_LOCKED);
500 KASSERT(eccline->flags & ECC_FLOATING);
501
502 /* clear all read bits that are already read in */
503 if (eccline->readin & eccline->present)
504 eccline->readin &= (~eccline->present) & allbits;
505
506 /* if we have active nodes we dont set it on seqwriting */
507 if (eccline->refcnt > 1)
508 eccline->flags &= ~ECC_SEQWRITING;
509
510 /* select state */
511 new_queue = UDF_SHED_FREE;
512 if (eccline->refcnt > 0)
513 new_queue = UDF_SHED_IDLE;
514 if (eccline->flags & ECC_WANTED)
515 new_queue = UDF_SHED_IDLE;
516 if (eccline->readin)
517 new_queue = UDF_SHED_READING;
518 if (eccline->dirty) {
519 new_queue = UDF_SHED_WAITING;
520 vfs_timestamp(&eccline->wait_time);
521 eccline->wait_time.tv_sec += ECC_WAITTIME;
522
523 if (eccline->present == allbits) {
524 new_queue = UDF_SHED_WRITING;
525 if (eccline->flags & ECC_SEQWRITING)
526 new_queue = UDF_SHED_SEQWRITING;
527 }
528 }
529 udf_push_eccline(eccline, new_queue);
530
531 mutex_exit(&priv->discstrat_mutex);
532 }
533
534 /* --------------------------------------------------------------------- */
535
536 static int
udf_create_nodedscr_rmw(struct udf_strat_args * args)537 udf_create_nodedscr_rmw(struct udf_strat_args *args)
538 {
539 union dscrptr **dscrptr = &args->dscr;
540 struct udf_mount *ump = args->ump;
541 struct long_ad *icb = args->icb;
542 struct udf_eccline *eccline;
543 uint64_t bit;
544 uint32_t sectornr, lb_size, dummy;
545 uint8_t *mem;
546 int error, eccsect;
547
548 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
549 if (error)
550 return error;
551
552 lb_size = udf_rw32(ump->logical_vol->lb_size);
553
554 /* get our eccline */
555 eccline = udf_geteccline(ump, sectornr, 0);
556 eccsect = sectornr - eccline->start_sector;
557
558 bit = (uint64_t) 1 << eccsect;
559 eccline->readin &= ~bit; /* just in case */
560 eccline->present |= bit;
561 eccline->dirty &= ~bit; /* Err... euhm... clean? */
562
563 eccline->refcnt++;
564
565 /* clear space */
566 mem = ((uint8_t *) eccline->blob) + eccsect * lb_size;
567 memset(mem, 0, lb_size);
568
569 udf_puteccline(eccline);
570
571 *dscrptr = (union dscrptr *) mem;
572 return 0;
573 }
574
575
576 static void
udf_free_nodedscr_rmw(struct udf_strat_args * args)577 udf_free_nodedscr_rmw(struct udf_strat_args *args)
578 {
579 struct udf_mount *ump = args->ump;
580 struct long_ad *icb = args->icb;
581 struct udf_eccline *eccline;
582 uint64_t bit;
583 uint32_t sectornr, dummy;
584 int error, eccsect;
585
586 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
587 if (error)
588 return;
589
590 /* get our eccline */
591 eccline = udf_geteccline(ump, sectornr, 0);
592 eccsect = sectornr - eccline->start_sector;
593
594 bit = (uint64_t) 1 << eccsect;
595 KASSERT(eccline->present & bit);
596
597 eccline->readin &= ~bit; /* just in case */
598 /* XXX eccline->dirty? */
599
600 KASSERT(eccline->refcnt >= 1);
601 eccline->refcnt--;
602
603 udf_puteccline(eccline);
604 }
605
606
607 static int
udf_read_nodedscr_rmw(struct udf_strat_args * args)608 udf_read_nodedscr_rmw(struct udf_strat_args *args)
609 {
610 union dscrptr **dscrptr = &args->dscr;
611 struct udf_mount *ump = args->ump;
612 struct long_ad *icb = args->icb;
613 struct strat_private *priv;
614 struct udf_eccline *eccline;
615 uint64_t bit;
616 uint32_t sectornr, dummy;
617 uint8_t *pos;
618 int sector_size = ump->discinfo.sector_size;
619 int lb_size __diagused = udf_rw32(ump->logical_vol->lb_size);
620 int i, error, dscrlen, eccsect;
621
622 KASSERT(sector_size == lb_size);
623 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
624 if (error)
625 return error;
626
627 /* get our eccline */
628 eccline = udf_geteccline(ump, sectornr, 0);
629 eccsect = sectornr - eccline->start_sector;
630
631 bit = (uint64_t) 1 << eccsect;
632 if ((eccline->present & bit) == 0) {
633 /* mark bit for readin */
634 eccline->readin |= bit;
635 eccline->refcnt++; /* prevent recycling */
636 KASSERT(eccline->bufs[eccsect] == NULL);
637 udf_puteccline(eccline);
638
639 /* wait for completion */
640 priv = PRIV(eccline->ump);
641 mutex_enter(&priv->discstrat_mutex);
642 while (((eccline->present | eccline->error) & bit) == 0) {
643 error = cv_timedwait(&priv->discstrat_cv,
644 &priv->discstrat_mutex,
645 hz/8);
646 if (error == EWOULDBLOCK)
647 DPRINTF(LOCKING, ("eccline waiting for read\n"));
648 }
649 mutex_exit(&priv->discstrat_mutex);
650
651 /* reget our line */
652 eccline = udf_geteccline(ump, sectornr, 0);
653 KASSERT(eccline->refcnt >= 1);
654 eccline->refcnt--; /* undo refcnt */
655
656 if (eccline->error & bit) {
657 *dscrptr = NULL;
658 udf_puteccline(eccline);
659 return EIO; /* XXX error code */
660 }
661 }
662
663 *dscrptr = (union dscrptr *)
664 (((uint8_t *) eccline->blob) + eccsect * sector_size);
665
666 /* code from read_phys_descr */
667 /* check if its a valid tag */
668 error = udf_check_tag(*dscrptr);
669 if (error) {
670 /* check if its an empty block */
671 pos = (uint8_t *) *dscrptr;
672 for (i = 0; i < sector_size; i++, pos++) {
673 if (*pos) break;
674 }
675 if (i == sector_size) {
676 /* return no error but with no dscrptr */
677 error = 0;
678 }
679 *dscrptr = NULL;
680 udf_puteccline(eccline);
681 return error;
682 }
683
684 /* calculate descriptor size */
685 dscrlen = udf_tagsize(*dscrptr, sector_size);
686 error = udf_check_tag_payload(*dscrptr, dscrlen);
687 if (error) {
688 *dscrptr = NULL;
689 udf_puteccline(eccline);
690 return error;
691 }
692
693 /* we have a hold since it has a node descriptor */
694 eccline->refcnt++;
695 udf_puteccline(eccline);
696
697 return 0;
698 }
699
700
701 static int
udf_write_nodedscr_rmw(struct udf_strat_args * args)702 udf_write_nodedscr_rmw(struct udf_strat_args *args)
703 {
704 union dscrptr *dscrptr = args->dscr;
705 struct udf_mount *ump = args->ump;
706 struct long_ad *icb = args->icb;
707 struct udf_node *udf_node = args->udf_node;
708 struct udf_eccline *eccline;
709 uint64_t bit;
710 uint32_t sectornr, logsectornr, dummy;
711 // int waitfor = args->waitfor;
712 int sector_size = ump->discinfo.sector_size;
713 int lb_size __diagused = udf_rw32(ump->logical_vol->lb_size);
714 int error, eccsect;
715
716 KASSERT(sector_size == lb_size);
717 sectornr = 0;
718 error = udf_translate_vtop(ump, icb, §ornr, &dummy);
719 if (error)
720 return error;
721
722 /* get our eccline */
723 eccline = udf_geteccline(ump, sectornr, 0);
724 eccsect = sectornr - eccline->start_sector;
725
726 bit = (uint64_t) 1 << eccsect;
727
728 /* old callback still pending? */
729 if (eccline->bufs[eccsect]) {
730 DPRINTF(WRITE, ("udf_write_nodedscr_rmw: writing descriptor"
731 " over buffer?\n"));
732 nestiobuf_done(eccline->bufs[eccsect],
733 eccline->bufs_len[eccsect],
734 0);
735 eccline->bufs[eccsect] = NULL;
736 }
737
738 /* set sector number in the descriptor and validate */
739 dscrptr = (union dscrptr *)
740 (((uint8_t *) eccline->blob) + eccsect * sector_size);
741 KASSERT(dscrptr == args->dscr);
742
743 logsectornr = udf_rw32(icb->loc.lb_num);
744 dscrptr->tag.tag_loc = udf_rw32(logsectornr);
745 udf_validate_tag_and_crc_sums(dscrptr);
746
747 udf_fixup_node_internals(ump, (uint8_t *) dscrptr, UDF_C_NODE);
748
749 /* set our flags */
750 KASSERT(eccline->present & bit);
751 eccline->dirty |= bit;
752
753 KASSERT(udf_tagsize(dscrptr, sector_size) <= sector_size);
754
755 udf_node->outstanding_nodedscr--;
756 if (udf_node->outstanding_nodedscr == 0) {
757 /* XXX still using wakeup! */
758 UDF_UNLOCK_NODE(udf_node, 0);
759 cv_broadcast(&udf_node->node_lock);
760 }
761 udf_puteccline(eccline);
762
763 /* XXX waitfor not used */
764 return 0;
765 }
766
767
768 static void
udf_queuebuf_rmw(struct udf_strat_args * args)769 udf_queuebuf_rmw(struct udf_strat_args *args)
770 {
771 struct udf_mount *ump = args->ump;
772 struct buf *buf = args->nestbuf;
773 struct desc_tag *tag;
774 struct strat_private *priv = PRIV(ump);
775 struct udf_eccline *eccline;
776 struct long_ad *node_ad_cpy;
777 uint64_t bit, *lmapping, *pmapping, *lmappos, *pmappos, blknr;
778 uint32_t buf_len, len, sectors, sectornr, our_sectornr;
779 uint32_t bpos;
780 uint16_t vpart_num;
781 uint8_t *fidblk, *src, *dst;
782 int sector_size = ump->discinfo.sector_size;
783 int blks = sector_size / DEV_BSIZE;
784 int eccsect, what, queue, error;
785
786 KASSERT(ump);
787 KASSERT(buf);
788 KASSERT(buf->b_iodone == nestiobuf_iodone);
789
790 blknr = buf->b_blkno;
791 our_sectornr = blknr / blks;
792
793 what = buf->b_udf_c_type;
794 queue = UDF_SHED_READING;
795 if ((buf->b_flags & B_READ) == 0) {
796 /* writing */
797 queue = UDF_SHED_SEQWRITING;
798 if (what == UDF_C_ABSOLUTE)
799 queue = UDF_SHED_WRITING;
800 if (what == UDF_C_DSCR)
801 queue = UDF_SHED_WRITING;
802 if (what == UDF_C_NODE)
803 queue = UDF_SHED_WRITING;
804 }
805
806 if (queue == UDF_SHED_READING) {
807 DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw READ %p : sector %d type %d,"
808 "b_resid %d, b_bcount %d, b_bufsize %d\n",
809 buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
810 buf->b_resid, buf->b_bcount, buf->b_bufsize));
811
812 /* mark bits for reading */
813 buf_len = buf->b_bcount;
814 sectornr = our_sectornr;
815 eccline = udf_geteccline(ump, sectornr, 0);
816 eccsect = sectornr - eccline->start_sector;
817 bpos = 0;
818 while (buf_len) {
819 len = MIN(buf_len, sector_size);
820 if ((eccsect < 0) || (eccsect >= ump->packet_size)) {
821 udf_puteccline(eccline);
822 eccline = udf_geteccline(ump, sectornr, 0);
823 eccsect = sectornr - eccline->start_sector;
824 }
825 bit = (uint64_t) 1 << eccsect;
826 error = eccline->error & bit ? EIO : 0;
827 if (eccline->present & bit) {
828 src = (uint8_t *) eccline->blob +
829 eccsect * sector_size;
830 dst = (uint8_t *) buf->b_data + bpos;
831 if (!error)
832 memcpy(dst, src, len);
833 nestiobuf_done(buf, len, error);
834 } else {
835 eccline->readin |= bit;
836 KASSERT(eccline->bufs[eccsect] == NULL);
837 eccline->bufs[eccsect] = buf;
838 eccline->bufs_bpos[eccsect] = bpos;
839 eccline->bufs_len[eccsect] = len;
840 }
841 bpos += sector_size;
842 eccsect++;
843 sectornr++;
844 buf_len -= len;
845 }
846 udf_puteccline(eccline);
847 return;
848 }
849
850 if (queue == UDF_SHED_WRITING) {
851 DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw WRITE %p : sector %d "
852 "type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
853 buf, (uint32_t) buf->b_blkno / blks, buf->b_udf_c_type,
854 buf->b_resid, buf->b_bcount, buf->b_bufsize));
855
856 /* if we have FIDs fixup using buffer's sector number(s) */
857 if (buf->b_udf_c_type == UDF_C_FIDS)
858 panic("UDF_C_FIDS in SHED_WRITING!\n");
859
860 udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
861
862 /* copy parts into the bufs and set for writing */
863 buf_len = buf->b_bcount;
864 sectornr = our_sectornr;
865 eccline = udf_geteccline(ump, sectornr, 0);
866 eccsect = sectornr - eccline->start_sector;
867 bpos = 0;
868 while (buf_len) {
869 len = MIN(buf_len, sector_size);
870 if ((eccsect < 0) || (eccsect >= ump->packet_size)) {
871 udf_puteccline(eccline);
872 eccline = udf_geteccline(ump, sectornr, 0);
873 eccsect = sectornr - eccline->start_sector;
874 }
875 bit = (uint64_t) 1 << eccsect;
876 KASSERT((eccline->readin & bit) == 0);
877 eccline->present |= bit;
878 eccline->dirty |= bit;
879 if (eccline->bufs[eccsect]) {
880 /* old callback still pending */
881 nestiobuf_done(eccline->bufs[eccsect],
882 eccline->bufs_len[eccsect],
883 0);
884 eccline->bufs[eccsect] = NULL;
885 }
886
887 src = (uint8_t *) buf->b_data + bpos;
888 dst = (uint8_t *) eccline->blob + eccsect * sector_size;
889 if (len != sector_size)
890 memset(dst, 0, sector_size);
891 memcpy(dst, src, len);
892
893 /* note that its finished for this extent */
894 eccline->bufs[eccsect] = NULL;
895 nestiobuf_done(buf, len, 0);
896
897 bpos += sector_size;
898 eccsect++;
899 sectornr++;
900 buf_len -= len;
901 }
902 udf_puteccline(eccline);
903 return;
904
905 }
906
907 /* sequential writing */
908 KASSERT(queue == UDF_SHED_SEQWRITING);
909 DPRINTF(SHEDULE, ("\nudf_queuebuf_rmw SEQWRITE %p : sector XXXX "
910 "type %d, b_resid %d, b_bcount %d, b_bufsize %d\n",
911 buf, buf->b_udf_c_type, buf->b_resid, buf->b_bcount,
912 buf->b_bufsize));
913 /*
914 * Buffers should not have been allocated to disc addresses yet on
915 * this queue. Note that a buffer can get multiple extents allocated.
916 * Note that it *looks* like the normal writing but its different in
917 * the details.
918 *
919 * lmapping contains lb_num relative to base partition.
920 *
921 * XXX should we try to claim/organize the allocated memory to
922 * block-aligned pieces?
923 */
924 mutex_enter(&priv->seqwrite_mutex);
925
926 lmapping = ump->la_lmapping;
927 node_ad_cpy = ump->la_node_ad_cpy;
928
929 /* logically allocate buf and map it in the file */
930 udf_late_allocate_buf(ump, buf, lmapping, node_ad_cpy, &vpart_num);
931
932 /* if we have FIDs, fixup using the new allocation table */
933 if (buf->b_udf_c_type == UDF_C_FIDS) {
934 buf_len = buf->b_bcount;
935 bpos = 0;
936 lmappos = lmapping;
937 while (buf_len) {
938 sectornr = *lmappos++;
939 len = MIN(buf_len, sector_size);
940 fidblk = (uint8_t *) buf->b_data + bpos;
941 udf_fixup_fid_block(fidblk, sector_size,
942 0, len, sectornr);
943 bpos += len;
944 buf_len -= len;
945 }
946 }
947 if (buf->b_udf_c_type == UDF_C_METADATA_SBM) {
948 if (buf->b_lblkno == 0) {
949 /* update the tag location inside */
950 tag = (struct desc_tag *) buf->b_data;
951 tag->tag_loc = udf_rw32(*lmapping);
952 udf_validate_tag_and_crc_sums(buf->b_data);
953 }
954 }
955 udf_fixup_node_internals(ump, buf->b_data, buf->b_udf_c_type);
956
957 /*
958 * Translate new mappings in lmapping to pmappings.
959 * pmapping to contain lb_nums as used for disc addressing.
960 */
961 pmapping = ump->la_pmapping;
962 sectors = (buf->b_bcount + sector_size -1) / sector_size;
963 udf_translate_vtop_list(ump, sectors, vpart_num, lmapping, pmapping);
964
965 /* copy parts into the bufs and set for writing */
966 pmappos = pmapping;
967 buf_len = buf->b_bcount;
968 sectornr = *pmappos++;
969 eccline = udf_geteccline(ump, sectornr, ECC_SEQWRITING);
970 eccsect = sectornr - eccline->start_sector;
971 bpos = 0;
972 while (buf_len) {
973 len = MIN(buf_len, sector_size);
974 eccsect = sectornr - eccline->start_sector;
975 if ((eccsect < 0) || (eccsect >= ump->packet_size)) {
976 eccline->flags |= ECC_SEQWRITING;
977 udf_puteccline(eccline);
978 eccline = udf_geteccline(ump, sectornr, ECC_SEQWRITING);
979 eccsect = sectornr - eccline->start_sector;
980 }
981 bit = (uint64_t) 1 << eccsect;
982 KASSERT((eccline->readin & bit) == 0);
983 eccline->present |= bit;
984 eccline->dirty |= bit;
985 eccline->bufs[eccsect] = NULL;
986
987 src = (uint8_t *) buf->b_data + bpos;
988 dst = (uint8_t *)
989 eccline->blob + eccsect * sector_size;
990 if (len != sector_size)
991 memset(dst, 0, sector_size);
992 memcpy(dst, src, len);
993
994 /* note that its finished for this extent */
995 nestiobuf_done(buf, len, 0);
996
997 bpos += sector_size;
998 sectornr = *pmappos++;
999 buf_len -= len;
1000 }
1001 eccline->flags |= ECC_SEQWRITING;
1002 udf_puteccline(eccline);
1003 mutex_exit(&priv->seqwrite_mutex);
1004 }
1005
1006 /* --------------------------------------------------------------------- */
1007
1008 static void
udf_sync_caches_rmw(struct udf_strat_args * args)1009 udf_sync_caches_rmw(struct udf_strat_args *args)
1010 {
1011 struct udf_mount *ump = args->ump;
1012
1013 udf_mmc_synchronise_caches(ump);
1014 }
1015
1016 /* --------------------------------------------------------------------- */
1017
1018 static void
udf_shedule_read_callback(struct buf * buf)1019 udf_shedule_read_callback(struct buf *buf)
1020 {
1021 struct udf_eccline *eccline = BTOE(buf);
1022 struct udf_mount *ump = eccline->ump;
1023 uint64_t bit;
1024 uint8_t *src, *dst;
1025 int sector_size = ump->discinfo.sector_size;
1026 int error, i, len;
1027
1028 DPRINTF(ECCLINE, ("read callback called on buf %p\n", buf));
1029
1030 /* post process read action */
1031 KASSERT(eccline->flags & ECC_LOCKED);
1032 error = buf->b_error;
1033 for (i = 0; i < ump->packet_size; i++) {
1034 bit = (uint64_t) 1 << i;
1035 src = (uint8_t *) buf->b_data + i * sector_size;
1036 dst = (uint8_t *) eccline->blob + i * sector_size;
1037 if (eccline->present & bit)
1038 continue;
1039 eccline->present |= bit;
1040 if (error)
1041 eccline->error |= bit;
1042 if (eccline->bufs[i]) {
1043 dst = (uint8_t *) eccline->bufs[i]->b_data +
1044 eccline->bufs_bpos[i];
1045 len = eccline->bufs_len[i];
1046 if (!error)
1047 memcpy(dst, src, len);
1048 nestiobuf_done(eccline->bufs[i], len, error);
1049 eccline->bufs[i] = NULL;
1050 }
1051
1052 }
1053 KASSERT(buf->b_data == eccline->blob);
1054 KASSERT(eccline->present == ((uint64_t) 1 << ump->packet_size)-1);
1055
1056 /*
1057 * XXX TODO what to do on read errors? read in all sectors
1058 * synchronously and allocate a sparable entry?
1059 */
1060
1061 udf_puteccline(eccline);
1062 DPRINTF(ECCLINE, ("read callback finished\n"));
1063 }
1064
1065
1066 static void
udf_shedule_write_callback(struct buf * buf)1067 udf_shedule_write_callback(struct buf *buf)
1068 {
1069 struct udf_eccline *eccline = BTOE(buf);
1070 struct udf_mount *ump = eccline->ump;
1071 uint64_t bit;
1072 int error, i;
1073
1074 DPRINTF(ECCLINE, ("write callback called on buf %p\n", buf));
1075
1076 /* post process write action */
1077 KASSERT(eccline->flags & ECC_LOCKED);
1078 error = buf->b_error;
1079 for (i = 0; i < ump->packet_size; i++) {
1080 bit = (uint64_t) 1 << i;
1081 if ((eccline->dirty & bit) == 0)
1082 continue;
1083 if (error) {
1084 eccline->error |= bit;
1085 } else {
1086 eccline->dirty &= ~bit;
1087 }
1088
1089 KASSERT(eccline->bufs[i] == 0);
1090 }
1091 KASSERT(eccline->dirty == 0);
1092 KASSERT(error == 0);
1093
1094 /*
1095 * XXX TODO on write errors allocate a sparable entry and reissue
1096 */
1097
1098 udf_puteccline(eccline);
1099 DPRINTF(ECCLINE, ("write callback finished\n"));
1100 }
1101
1102
1103 static void
udf_issue_eccline(struct udf_eccline * eccline,int queued_on)1104 udf_issue_eccline(struct udf_eccline *eccline, int queued_on)
1105 {
1106 struct udf_mount *ump = eccline->ump;
1107 struct strat_private *priv = PRIV(ump);
1108 struct buf *buf, *nestbuf;
1109 uint64_t bit, allbits = ((uint64_t) 1 << ump->packet_size)-1;
1110 uint32_t start;
1111 int sector_size = ump->discinfo.sector_size;
1112 int blks = sector_size / DEV_BSIZE;
1113 int i;
1114
1115 KASSERT(eccline->flags & ECC_LOCKED);
1116
1117 if (queued_on == UDF_SHED_READING) {
1118 DPRINTF(SHEDULE, ("udf_issue_eccline reading : "));
1119 /* read all bits that are not yet present */
1120 eccline->readin = (~eccline->present) & allbits;
1121 KASSERT(eccline->readin);
1122 start = eccline->start_sector;
1123 buf = eccline->buf;
1124 buf->b_flags = B_READ | B_ASYNC;
1125 SET(buf->b_cflags, BC_BUSY); /* mark buffer busy */
1126 buf->b_oflags = 0;
1127 buf->b_iodone = udf_shedule_read_callback;
1128 buf->b_data = eccline->blob;
1129 buf->b_bcount = ump->packet_size * sector_size;
1130 buf->b_resid = buf->b_bcount;
1131 buf->b_bufsize = buf->b_bcount;
1132 buf->b_private = eccline;
1133 BIO_SETPRIO(buf, BPRIO_DEFAULT);
1134 buf->b_lblkno = buf->b_blkno = buf->b_rawblkno = start * blks;
1135 buf->b_proc = NULL;
1136
1137 if (eccline->present != 0) {
1138 for (i = 0; i < ump->packet_size; i++) {
1139 bit = (uint64_t) 1 << i;
1140 if (eccline->present & bit) {
1141 nestiobuf_done(buf, sector_size, 0);
1142 continue;
1143 }
1144 nestbuf = getiobuf(NULL, true);
1145 nestiobuf_setup(buf, nestbuf, i * sector_size,
1146 sector_size);
1147 /* adjust blocknumber to read */
1148 nestbuf->b_blkno = buf->b_blkno + i*blks;
1149 nestbuf->b_rawblkno = buf->b_rawblkno + i*blks;
1150
1151 DPRINTF(SHEDULE, ("sector %d ", start + i));
1152
1153 /* mutex dance since it could lock */
1154 mutex_exit(&priv->discstrat_mutex);
1155 /* call asynchronous */
1156 VOP_STRATEGY(ump->devvp, nestbuf);
1157 mutex_enter(&priv->discstrat_mutex);
1158 }
1159 DPRINTF(SHEDULE, ("\n"));
1160 return;
1161 }
1162 } else {
1163 /* write or seqwrite */
1164 DPRINTF(SHEDULE, ("udf_issue_eccline writing or seqwriting : "));
1165 DPRINTF(SHEDULE, ("\n\tpresent %"PRIx64", readin %"PRIx64", "
1166 "dirty %"PRIx64"\n\t", eccline->present, eccline->readin,
1167 eccline->dirty));
1168 KASSERT(eccline->present == allbits);
1169
1170 start = eccline->start_sector;
1171 buf = eccline->buf;
1172 buf->b_flags = B_WRITE | B_ASYNC;
1173 SET(buf->b_cflags, BC_BUSY); /* mark buffer busy */
1174 buf->b_oflags = 0;
1175 buf->b_iodone = udf_shedule_write_callback;
1176 buf->b_data = eccline->blob;
1177 buf->b_bcount = ump->packet_size * sector_size;
1178 buf->b_resid = buf->b_bcount;
1179 buf->b_bufsize = buf->b_bcount;
1180 buf->b_private = eccline;
1181 BIO_SETPRIO(buf, BPRIO_DEFAULT);
1182 buf->b_lblkno = buf->b_blkno = buf->b_rawblkno = start * blks;
1183 buf->b_proc = NULL;
1184 }
1185
1186 /* mutex dance since it could lock */
1187 mutex_exit(&priv->discstrat_mutex);
1188 /* call asynchronous */
1189 DPRINTF(SHEDULE, ("sector %d for %d\n",
1190 start, ump->packet_size));
1191 VOP_STRATEGY(ump->devvp, buf);
1192 mutex_enter(&priv->discstrat_mutex);
1193 }
1194
1195
1196 static void
udf_discstrat_thread(void * arg)1197 udf_discstrat_thread(void *arg)
1198 {
1199 struct udf_mount *ump = (struct udf_mount *) arg;
1200 struct strat_private *priv = PRIV(ump);
1201 struct udf_eccline *eccline;
1202 struct timespec now, *last;
1203 uint64_t allbits = ((uint64_t) 1 << ump->packet_size)-1;
1204 int new_queue, wait, work;
1205
1206 work = 1;
1207 priv->thread_running = 1;
1208 cv_broadcast(&priv->discstrat_cv);
1209
1210 mutex_enter(&priv->discstrat_mutex);
1211 priv->num_floating = 0;
1212 while (priv->run_thread || work || priv->num_floating) {
1213 /* get our time */
1214 vfs_timestamp(&now);
1215
1216 /* maintenance: handle eccline state machine */
1217 for(;;) {
1218 /* only peek at it */
1219 eccline = udf_peek_eccline(priv, UDF_SHED_WAITING);
1220 if (eccline == NULL)
1221 break;
1222
1223 /* if not reading, wait until the time has come */
1224 if ((priv->cur_queue != UDF_SHED_READING) &&
1225 (eccline->wait_time.tv_sec - now.tv_sec > 0)) {
1226 UDF_UNLOCK_ECCLINE(eccline);
1227 /* all others are later, so break off */
1228 break;
1229 }
1230
1231 /* release */
1232 UDF_UNLOCK_ECCLINE(eccline);
1233
1234 /* do get it */
1235 eccline = udf_pop_eccline(priv, UDF_SHED_WAITING);
1236
1237 /* requeue according to state */
1238 new_queue = UDF_SHED_FREE; /* unlikely */
1239 if (eccline->refcnt > 0)
1240 new_queue = UDF_SHED_IDLE;
1241 if (eccline->flags & ECC_WANTED)
1242 new_queue = UDF_SHED_IDLE;
1243 if (eccline->readin)
1244 new_queue = UDF_SHED_READING;
1245 if (eccline->dirty) {
1246 new_queue = UDF_SHED_READING;
1247 if (eccline->present == allbits) {
1248 new_queue = UDF_SHED_WRITING;
1249 if (eccline->flags & ECC_SEQWRITING)
1250 new_queue = UDF_SHED_SEQWRITING;
1251 }
1252 }
1253 udf_push_eccline(eccline, new_queue);
1254 }
1255
1256 /* maintenance: free excess ecclines */
1257 while (priv->num_queued[UDF_SHED_FREE] > UDF_ECCLINE_MAXFREE) {
1258 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
1259 KASSERT(eccline);
1260 KASSERT(eccline->refcnt == 0);
1261 if (eccline->flags & ECC_WANTED) {
1262 /* we won the race, but we dont want to win */
1263 DPRINTF(ECCLINE, ("Tried removing, pushed back to free list\n"));
1264 udf_push_eccline(eccline, UDF_SHED_IDLE);
1265 } else {
1266 DPRINTF(ECCLINE, ("Removing entry from free list\n"));
1267 udf_dispose_eccline(eccline);
1268 }
1269 }
1270
1271 /* process the current selected queue */
1272 /* get our time */
1273 vfs_timestamp(&now);
1274 last = &priv->last_queued[priv->cur_queue];
1275
1276 /* get our line */
1277 eccline = udf_pop_eccline(priv, priv->cur_queue);
1278 if (eccline) {
1279 wait = 0;
1280 new_queue = priv->cur_queue;
1281 DPRINTF(ECCLINE, ("UDF_ISSUE_ECCLINE\n"));
1282
1283 udf_issue_eccline(eccline, priv->cur_queue);
1284 } else {
1285 /* don't switch too quickly */
1286 if (now.tv_sec - last->tv_sec < 2) {
1287 /* wait some time */
1288 cv_timedwait(&priv->discstrat_cv,
1289 &priv->discstrat_mutex, hz);
1290 /* we assume there is work to be done */
1291 work = 1;
1292 continue;
1293 }
1294
1295 /* XXX select on queue lengths ? */
1296 wait = 1;
1297 /* check if we can/should switch */
1298 new_queue = priv->cur_queue;
1299 if (bufq_peek(priv->queues[UDF_SHED_READING]))
1300 new_queue = UDF_SHED_READING;
1301 if (bufq_peek(priv->queues[UDF_SHED_WRITING]))
1302 new_queue = UDF_SHED_WRITING;
1303 if (bufq_peek(priv->queues[UDF_SHED_SEQWRITING]))
1304 new_queue = UDF_SHED_SEQWRITING;
1305 }
1306
1307 /* give room */
1308 mutex_exit(&priv->discstrat_mutex);
1309
1310 if (new_queue != priv->cur_queue) {
1311 wait = 0;
1312 DPRINTF(SHEDULE, ("switching from %d to %d\n",
1313 priv->cur_queue, new_queue));
1314 priv->cur_queue = new_queue;
1315 }
1316 mutex_enter(&priv->discstrat_mutex);
1317
1318 /* wait for more if needed */
1319 if (wait)
1320 cv_timedwait(&priv->discstrat_cv,
1321 &priv->discstrat_mutex, hz/4); /* /8 */
1322
1323 work = (bufq_peek(priv->queues[UDF_SHED_WAITING]) != NULL);
1324 work |= (bufq_peek(priv->queues[UDF_SHED_READING]) != NULL);
1325 work |= (bufq_peek(priv->queues[UDF_SHED_WRITING]) != NULL);
1326 work |= (bufq_peek(priv->queues[UDF_SHED_SEQWRITING]) != NULL);
1327
1328 DPRINTF(PARANOIA, ("work : (%d, %d, %d) -> work %d, float %d\n",
1329 (bufq_peek(priv->queues[UDF_SHED_READING]) != NULL),
1330 (bufq_peek(priv->queues[UDF_SHED_WRITING]) != NULL),
1331 (bufq_peek(priv->queues[UDF_SHED_SEQWRITING]) != NULL),
1332 work, priv->num_floating));
1333 }
1334
1335 mutex_exit(&priv->discstrat_mutex);
1336
1337 /* tear down remaining ecclines */
1338 mutex_enter(&priv->discstrat_mutex);
1339 KASSERT(bufq_peek(priv->queues[UDF_SHED_WAITING]) == NULL);
1340 KASSERT(bufq_peek(priv->queues[UDF_SHED_IDLE]) == NULL);
1341 KASSERT(bufq_peek(priv->queues[UDF_SHED_READING]) == NULL);
1342 KASSERT(bufq_peek(priv->queues[UDF_SHED_WRITING]) == NULL);
1343 KASSERT(bufq_peek(priv->queues[UDF_SHED_SEQWRITING]) == NULL);
1344
1345 KASSERT(priv->num_queued[UDF_SHED_WAITING] == 0);
1346 KASSERT(priv->num_queued[UDF_SHED_IDLE] == 0);
1347 KASSERT(priv->num_queued[UDF_SHED_READING] == 0);
1348 KASSERT(priv->num_queued[UDF_SHED_WRITING] == 0);
1349 KASSERT(priv->num_queued[UDF_SHED_SEQWRITING] == 0);
1350
1351 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
1352 while (eccline) {
1353 udf_dispose_eccline(eccline);
1354 eccline = udf_pop_eccline(priv, UDF_SHED_FREE);
1355 }
1356 KASSERT(priv->num_queued[UDF_SHED_FREE] == 0);
1357 mutex_exit(&priv->discstrat_mutex);
1358
1359 priv->thread_running = 0;
1360 priv->thread_finished = 1;
1361 cv_broadcast(&priv->discstrat_cv);
1362
1363 kthread_exit(0);
1364 /* not reached */
1365 }
1366
1367 /* --------------------------------------------------------------------- */
1368
1369 /*
1370 * Buffer memory pool allocator.
1371 */
1372
1373 static void *
ecclinepool_page_alloc(struct pool * pp,int flags)1374 ecclinepool_page_alloc(struct pool *pp, int flags)
1375 {
1376 return (void *)uvm_km_alloc(kernel_map,
1377 MAXBSIZE, MAXBSIZE,
1378 ((flags & PR_WAITOK) ? 0 : UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK)
1379 | UVM_KMF_WIRED /* UVM_KMF_PAGABLE? */);
1380 }
1381
1382 static void
ecclinepool_page_free(struct pool * pp,void * v)1383 ecclinepool_page_free(struct pool *pp, void *v)
1384 {
1385 uvm_km_free(kernel_map, (vaddr_t)v, MAXBSIZE, UVM_KMF_WIRED);
1386 }
1387
1388 static struct pool_allocator ecclinepool_allocator = {
1389 .pa_alloc = ecclinepool_page_alloc,
1390 .pa_free = ecclinepool_page_free,
1391 .pa_pagesz = MAXBSIZE,
1392 };
1393
1394
1395 static void
udf_discstrat_init_rmw(struct udf_strat_args * args)1396 udf_discstrat_init_rmw(struct udf_strat_args *args)
1397 {
1398 struct udf_mount *ump = args->ump;
1399 struct strat_private *priv = PRIV(ump);
1400 uint32_t lb_size, blobsize, hashline;
1401 int i;
1402
1403 KASSERT(ump);
1404 KASSERT(ump->logical_vol);
1405 KASSERT(priv == NULL);
1406
1407 lb_size = udf_rw32(ump->logical_vol->lb_size);
1408 blobsize = ump->packet_size * lb_size;
1409 KASSERT(lb_size > 0);
1410 KASSERT(ump->packet_size <= 64);
1411
1412 /* initialise our memory space */
1413 ump->strategy_private = malloc(sizeof(struct strat_private),
1414 M_UDFTEMP, M_WAITOK);
1415 priv = ump->strategy_private;
1416 memset(priv, 0 , sizeof(struct strat_private));
1417
1418 /* initialise locks */
1419 cv_init(&priv->discstrat_cv, "udfstrat");
1420 mutex_init(&priv->discstrat_mutex, MUTEX_DEFAULT, IPL_NONE);
1421 mutex_init(&priv->seqwrite_mutex, MUTEX_DEFAULT, IPL_NONE);
1422
1423 /* initialise struct eccline pool */
1424 pool_init(&priv->eccline_pool, sizeof(struct udf_eccline),
1425 0, 0, 0, "udf_eccline_pool", NULL, IPL_NONE);
1426
1427 /* initialise eccline blob pool */
1428 ecclinepool_allocator.pa_pagesz = blobsize;
1429 pool_init(&priv->ecclineblob_pool, blobsize,
1430 0, 0, 0, "udf_eccline_blob", &ecclinepool_allocator, IPL_NONE);
1431
1432 /* initialise main queues */
1433 for (i = 0; i < UDF_SHED_MAX; i++) {
1434 priv->num_queued[i] = 0;
1435 vfs_timestamp(&priv->last_queued[i]);
1436 }
1437 bufq_alloc(&priv->queues[UDF_SHED_WAITING], "fcfs",
1438 BUFQ_SORT_RAWBLOCK);
1439 bufq_alloc(&priv->queues[UDF_SHED_READING], "disksort",
1440 BUFQ_SORT_RAWBLOCK);
1441 bufq_alloc(&priv->queues[UDF_SHED_WRITING], "disksort",
1442 BUFQ_SORT_RAWBLOCK);
1443 bufq_alloc(&priv->queues[UDF_SHED_SEQWRITING], "disksort", 0);
1444
1445 /* initialise administrative queues */
1446 bufq_alloc(&priv->queues[UDF_SHED_IDLE], "fcfs", 0);
1447 bufq_alloc(&priv->queues[UDF_SHED_FREE], "fcfs", 0);
1448
1449 for (hashline = 0; hashline < UDF_ECCBUF_HASHSIZE; hashline++) {
1450 LIST_INIT(&priv->eccline_hash[hashline]);
1451 }
1452
1453 /* create our disk strategy thread */
1454 priv->cur_queue = UDF_SHED_READING;
1455 priv->thread_finished = 0;
1456 priv->thread_running = 0;
1457 priv->run_thread = 1;
1458 if (kthread_create(PRI_NONE, 0 /* KTHREAD_MPSAFE*/, NULL /* cpu_info*/,
1459 udf_discstrat_thread, ump, &priv->queue_lwp,
1460 "%s", "udf_rw")) {
1461 panic("fork udf_rw");
1462 }
1463
1464 /* wait for thread to spin up */
1465 mutex_enter(&priv->discstrat_mutex);
1466 while (!priv->thread_running) {
1467 cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex, hz);
1468 }
1469 mutex_exit(&priv->discstrat_mutex);
1470 }
1471
1472
1473 static void
udf_discstrat_finish_rmw(struct udf_strat_args * args)1474 udf_discstrat_finish_rmw(struct udf_strat_args *args)
1475 {
1476 struct udf_mount *ump = args->ump;
1477 struct strat_private *priv = PRIV(ump);
1478
1479 if (ump == NULL)
1480 return;
1481
1482 /* stop our sheduling thread */
1483 KASSERT(priv->run_thread == 1);
1484 priv->run_thread = 0;
1485
1486 mutex_enter(&priv->discstrat_mutex);
1487 while (!priv->thread_finished) {
1488 cv_broadcast(&priv->discstrat_cv);
1489 cv_timedwait(&priv->discstrat_cv, &priv->discstrat_mutex, hz);
1490 }
1491 mutex_exit(&priv->discstrat_mutex);
1492
1493 /* kthread should be finished now */
1494 cv_destroy(&priv->discstrat_cv);
1495 mutex_destroy(&priv->discstrat_mutex);
1496 mutex_destroy(&priv->seqwrite_mutex);
1497
1498 /* cleanup our pools */
1499 pool_destroy(&priv->eccline_pool);
1500 pool_destroy(&priv->ecclineblob_pool);
1501
1502 /* free our private space */
1503 free(ump->strategy_private, M_UDFTEMP);
1504 ump->strategy_private = NULL;
1505 }
1506
1507 /* --------------------------------------------------------------------- */
1508
1509 struct udf_strategy udf_strat_rmw =
1510 {
1511 udf_create_nodedscr_rmw,
1512 udf_free_nodedscr_rmw,
1513 udf_read_nodedscr_rmw,
1514 udf_write_nodedscr_rmw,
1515 udf_queuebuf_rmw,
1516 udf_sync_caches_rmw,
1517 udf_discstrat_init_rmw,
1518 udf_discstrat_finish_rmw
1519 };
1520
1521