1 /* 2 * This file provides an implementation for block I/O functions as expected by 3 * libfsdriver for root file systems. In particular, the lmfs_driver function 4 * can be used to implement fdr_driver, the lmfs_bio function can be used to 5 * implement the fdr_bread, fdr_bwrite, and fdr_bpeek hooks, and the the 6 * lmfs_bflush function can be used to implement the fdr_bflush hook. At the 7 * very least, a file system that makes use of the provided functionality 8 * must adhere to the following rules: 9 * 10 * o it must initialize this library in order to set up a buffer pool for 11 * use by these functions, using the lmfs_buf_pool function; the 12 * recommended number of blocks for *non*-disk-backed file systems is 13 * NR_IOREQS buffers (disk-backed file systems typically use many more); 14 * o it must enable VM caching in order to support memory mapping of block 15 * devices, using the lmfs_may_use_vmcache function; 16 * o it must either use lmfs_flushall as implementation for the fdr_sync 17 * hook, or call lmfs_flushall as part of its own fdr_sync implementation. 18 * 19 * In addition, a disk-backed file system (as opposed to e.g. a networked file 20 * system that intends to be able to serve as a root file system) should 21 * consider the following points: 22 * 23 * o it may restrict calls to fdr_bwrite on the mounted partition, for 24 * example to the partition's first 1024 bytes; it should generally not 25 * prevent that area from being written even if the file system is mounted 26 * read-only; 27 * o it is free to set its own block size, although the default block size 28 * works fine for raw block I/O as well. 29 */ 30 31 #include <minix/drivers.h> 32 #include <minix/libminixfs.h> 33 #include <minix/fsdriver.h> 34 #include <minix/bdev.h> 35 #include <assert.h> 36 37 /* 38 * Set the driver label of the device identified by 'dev' to 'label'. While 39 * 'dev' is a full device number, only its major device number is to be used. 40 * This is a very thin wrapper right now, but eventually we will want to hide 41 * all of libbdev from file systems that use this library, so it is a start. 42 */ 43 void 44 lmfs_driver(dev_t dev, char *label) 45 { 46 47 bdev_driver(dev, label); 48 } 49 50 /* 51 * Prefetch up to "nblocks" blocks on "dev" starting from block number "block". 52 * Stop early when either the I/O request fills up or when a block is already 53 * found to be in the cache. The latter is likely to happen often, since this 54 * function is called before getting each block for reading. Prefetching is a 55 * strictly best-effort operation, and may fail silently. 56 * TODO: limit according to the number of available buffers. 57 */ 58 static void 59 block_prefetch(dev_t dev, block_t block, block_t nblocks) 60 { 61 struct buf *bp, *bufs[NR_IOREQS]; 62 unsigned int count; 63 64 for (count = 0; count < nblocks; count++) { 65 bp = lmfs_get_block(dev, block + count, PREFETCH); 66 assert(bp != NULL); 67 68 if (lmfs_dev(bp) != NO_DEV) { 69 lmfs_put_block(bp, FULL_DATA_BLOCK); 70 71 break; 72 } 73 74 bufs[count] = bp; 75 } 76 77 if (count > 0) 78 lmfs_rw_scattered(dev, bufs, count, READING); 79 } 80 81 /* 82 * Perform block I/O, on "dev", starting from offset "pos", for a total of 83 * "bytes" bytes. Reading, writing, and peeking are highly similar, and thus, 84 * this function implements all of them. The "call" parameter indicates the 85 * call type (one of FSC_READ, FSC_WRITE, FSC_PEEK). For read and write calls, 86 * "data" will identify the user buffer to use; for peek calls, "data" is set 87 * to NULL. In all cases, this function returns the number of bytes 88 * successfully transferred, 0 on end-of-file conditions, and a negative error 89 * code if no bytes could be transferred due to an error. Dirty data is not 90 * flushed immediately, and thus, a successful write only indicates that the 91 * data have been taken in by the cache (for immediate I/O, a character device 92 * would have to be used, but MINIX3 no longer supports this), which may be 93 * follwed later by silent failures, including undetected end-of-file cases. 94 * In particular, write requests may or may not return 0 (EOF) immediately when 95 * writing at or beyond the block device's size. i Since block I/O takes place 96 * at block granularity, block-unaligned writes have to read a block from disk 97 * before updating it, and that is the only possible source of actual I/O 98 * errors for write calls. 99 * TODO: reconsider the buffering-only approach, or see if we can at least 100 * somehow throw accurate EOF errors without reading in each block first. 101 */ 102 ssize_t 103 lmfs_bio(dev_t dev, struct fsdriver_data * data, size_t bytes, off_t pos, 104 int call) 105 { 106 block_t block, blocks_left; 107 size_t block_size, off, block_off, chunk; 108 struct buf *bp; 109 int r, write, how; 110 111 if (dev == NO_DEV) 112 return EINVAL; 113 114 block_size = lmfs_fs_block_size(); 115 write = (call == FSC_WRITE); 116 117 assert(block_size > 0); 118 119 /* FIXME: block_t is 32-bit, so we have to impose a limit here. */ 120 if (pos < 0 || pos / block_size > UINT32_MAX || bytes > SSIZE_MAX) 121 return EINVAL; 122 123 off = 0; 124 block = pos / block_size; 125 block_off = (size_t)(pos % block_size); 126 blocks_left = howmany(block_off + bytes, block_size); 127 128 lmfs_reset_rdwt_err(); 129 r = OK; 130 131 for (off = 0; off < bytes; off += chunk) { 132 chunk = block_size - block_off; 133 if (chunk > bytes - off) 134 chunk = bytes - off; 135 136 /* 137 * For read requests, help the block driver form larger I/O 138 * requests. 139 */ 140 if (!write) 141 block_prefetch(dev, block, blocks_left); 142 143 /* 144 * Do not read the block from disk if we will end up 145 * overwriting all of its contents. 146 */ 147 how = (write && chunk == block_size) ? NO_READ : NORMAL; 148 149 bp = lmfs_get_block(dev, block, how); 150 assert(bp); 151 152 r = lmfs_rdwt_err(); 153 154 if (r == OK && data != NULL) { 155 assert(lmfs_dev(bp) != NO_DEV); 156 157 if (write) { 158 r = fsdriver_copyin(data, off, 159 (char *)bp->data + block_off, chunk); 160 161 /* 162 * Mark the block as dirty even if the copy 163 * failed, since the copy may in fact have 164 * succeeded partially. This is an interface 165 * issue that should be resolved at some point, 166 * but for now we do not want the cache to be 167 * desynchronized from the disk contents. 168 */ 169 lmfs_markdirty(bp); 170 } else 171 r = fsdriver_copyout(data, off, 172 (char *)bp->data + block_off, chunk); 173 } 174 175 lmfs_put_block(bp, FULL_DATA_BLOCK); 176 177 if (r != OK) 178 break; 179 180 block++; 181 block_off = 0; 182 blocks_left--; 183 } 184 185 /* 186 * If we were not able to do any I/O, return the error (or EOF, even 187 * for writes). Otherwise, return how many bytes we did manage to 188 * transfer. 189 */ 190 if (r != OK && off == 0) 191 return (r == END_OF_FILE) ? 0 : r; 192 193 return off; 194 } 195 196 /* 197 * Perform a flush request on a block device, flushing and invalidating all 198 * blocks associated with this device, both in the local cache and in VM. 199 * This operation is called after a block device is closed and must prevent 200 * that stale copies of blocks remain in any cache. 201 */ 202 void 203 lmfs_bflush(dev_t dev) 204 { 205 206 /* First flush any dirty blocks on this device to disk. */ 207 lmfs_flushdev(dev); 208 209 /* Then purge any blocks associated with the device. */ 210 lmfs_invalidate(dev); 211 } 212