1 /* 2 * This file provides an implementation for block I/O functions as expected by 3 * libfsdriver for root file systems. In particular, the lmfs_driver function 4 * can be used to implement fdr_driver, the lmfs_bio function can be used to 5 * implement the fdr_bread, fdr_bwrite, and fdr_bpeek hooks, and the the 6 * lmfs_bflush function can be used to implement the fdr_bflush hook. At the 7 * very least, a file system that makes use of the provided functionality 8 * must adhere to the following rules: 9 * 10 * o it must initialize this library in order to set up a buffer pool for 11 * use by these functions, using the lmfs_buf_pool function; the 12 * recommended number of blocks for *non*-disk-backed file systems is 13 * LMFS_MAX_PREFETCH buffers (disk-backed file systems typically use many 14 * more); 15 * o it must enable VM caching in order to support memory mapping of block 16 * devices, using the lmfs_may_use_vmcache function; 17 * o it must either use lmfs_flushall as implementation for the fdr_sync 18 * hook, or call lmfs_flushall as part of its own fdr_sync implementation. 19 * 20 * In addition, a disk-backed file system (as opposed to e.g. a networked file 21 * system that intends to be able to serve as a root file system) should 22 * consider the following points: 23 * 24 * o it may restrict calls to fdr_bwrite on the mounted partition, for 25 * example to the partition's first 1024 bytes; it should generally not 26 * prevent that area from being written even if the file system is mounted 27 * read-only; 28 * o it is free to set its own block size, although the default block size 29 * works fine for raw block I/O as well. 30 */ 31 32 #include <minix/drivers.h> 33 #include <minix/libminixfs.h> 34 #include <minix/fsdriver.h> 35 #include <minix/bdev.h> 36 #include <minix/partition.h> 37 #include <sys/ioctl.h> 38 #include <assert.h> 39 40 #include "inc.h" 41 42 /* 43 * Set the driver label of the device identified by 'dev' to 'label'. While 44 * 'dev' is a full device number, only its major device number is to be used. 45 * This is a very thin wrapper right now, but eventually we will want to hide 46 * all of libbdev from file systems that use this library, so it is a start. 47 */ 48 void 49 lmfs_driver(dev_t dev, char *label) 50 { 51 52 bdev_driver(dev, label); 53 } 54 55 /* 56 * Prefetch up to "nblocks" blocks on "dev" starting from block number "block". 57 * The size to be used for the last block in the range is given as "last_size". 58 * Stop early when either the I/O request fills up or when a block is already 59 * found to be in the cache. The latter is likely to happen often, since this 60 * function is called before getting each block for reading. Prefetching is a 61 * strictly best-effort operation, and may fail silently. 62 * TODO: limit according to the number of available buffers. 63 */ 64 static void 65 block_prefetch(dev_t dev, block64_t block, unsigned int nblocks, 66 size_t block_size, size_t last_size) 67 { 68 struct buf *bp; 69 unsigned int count, limit; 70 int r; 71 72 limit = lmfs_readahead_limit(); 73 assert(limit >= 1 && limit <= LMFS_MAX_PREFETCH); 74 75 if (nblocks > limit) { 76 nblocks = limit; 77 78 last_size = block_size; 79 } 80 81 for (count = 0; count < nblocks; count++) { 82 if (count == nblocks - 1 && last_size < block_size) 83 r = lmfs_get_partial_block(&bp, dev, block + count, 84 PEEK, last_size); 85 else 86 r = lmfs_get_block(&bp, dev, block + count, PEEK); 87 88 if (r == OK) { 89 lmfs_put_block(bp); 90 91 last_size = block_size; 92 93 break; 94 } 95 } 96 97 if (count > 0) 98 lmfs_readahead(dev, block, count, last_size); 99 } 100 101 /* 102 * Perform block I/O, on "dev", starting from offset "pos", for a total of 103 * "bytes" bytes. Reading, writing, and peeking are highly similar, and thus, 104 * this function implements all of them. The "call" parameter indicates the 105 * call type (one of FSC_READ, FSC_WRITE, FSC_PEEK). For read and write calls, 106 * "data" will identify the user buffer to use; for peek calls, "data" is set 107 * to NULL. In all cases, this function returns the number of bytes 108 * successfully transferred, 0 on end-of-file conditions, and a negative error 109 * code if no bytes could be transferred due to an error. Dirty data is not 110 * flushed immediately, and thus, a successful write only indicates that the 111 * data have been taken in by the cache (for immediate I/O, a character device 112 * would have to be used, but MINIX3 no longer supports this), which may be 113 * follwed later by silent failures. End-of-file conditions are always 114 * reported immediately, though. 115 */ 116 ssize_t 117 lmfs_bio(dev_t dev, struct fsdriver_data * data, size_t bytes, off_t pos, 118 int call) 119 { 120 block64_t block; 121 struct part_geom part; 122 size_t block_size, off, block_off, last_size, size, chunk; 123 unsigned int blocks_left; 124 struct buf *bp; 125 int r, do_write, how; 126 127 if (dev == NO_DEV) 128 return EINVAL; 129 130 block_size = lmfs_fs_block_size(); 131 do_write = (call == FSC_WRITE); 132 133 assert(block_size > 0); 134 135 if (bytes == 0) 136 return 0; /* just in case */ 137 138 if (pos < 0 || bytes > SSIZE_MAX || pos > INT64_MAX - bytes + 1) 139 return EINVAL; 140 141 /* 142 * Get the partition size, so that we can handle EOF ourselves. 143 * Unfortunately, we cannot cache the results between calls, since we 144 * do not get to see DIOCSETP ioctls--see also repartition(8). 145 */ 146 if ((r = bdev_ioctl(dev, DIOCGETP, &part, NONE /*user_endpt*/)) != OK) 147 return r; 148 149 if ((uint64_t)pos >= part.size) 150 return 0; /* EOF */ 151 152 if ((uint64_t)pos > part.size - bytes) 153 bytes = part.size - pos; 154 155 off = 0; 156 block = pos / block_size; 157 block_off = (size_t)(pos % block_size); 158 blocks_left = howmany(block_off + bytes, block_size); 159 160 assert(blocks_left > 0); 161 162 /* 163 * If the last block we need is also the last block of the device, 164 * see how many bytes we should actually transfer for that block. 165 */ 166 if (block + blocks_left - 1 == part.size / block_size) 167 last_size = part.size % block_size; 168 else 169 last_size = block_size; 170 171 r = OK; 172 173 for (off = 0; off < bytes && blocks_left > 0; off += chunk) { 174 size = (blocks_left == 1) ? last_size : block_size; 175 176 chunk = size - block_off; 177 if (chunk > bytes - off) 178 chunk = bytes - off; 179 180 assert(chunk > 0 && chunk <= size); 181 182 /* 183 * For read requests, help the block driver form larger I/O 184 * requests. 185 */ 186 if (!do_write) 187 block_prefetch(dev, block, blocks_left, block_size, 188 last_size); 189 190 /* 191 * Do not read the block from disk if we will end up 192 * overwriting all of its contents. 193 */ 194 how = (do_write && chunk == size) ? NO_READ : NORMAL; 195 196 if (size < block_size) 197 r = lmfs_get_partial_block(&bp, dev, block, how, size); 198 else 199 r = lmfs_get_block(&bp, dev, block, how); 200 201 if (r != OK) { 202 printf("libminixfs: error getting block <%"PRIx64"," 203 "%"PRIu64"> for device I/O (%d)\n", dev, block, r); 204 205 break; 206 } 207 208 /* Perform the actual copy. */ 209 if (r == OK && data != NULL) { 210 if (do_write) { 211 r = fsdriver_copyin(data, off, 212 (char *)bp->data + block_off, chunk); 213 214 /* 215 * Mark the block as dirty even if the copy 216 * failed, since the copy may in fact have 217 * succeeded partially. This is an interface 218 * issue that should be resolved at some point, 219 * but for now we do not want the cache to be 220 * desynchronized from the disk contents. 221 */ 222 lmfs_markdirty(bp); 223 } else 224 r = fsdriver_copyout(data, off, 225 (char *)bp->data + block_off, chunk); 226 } 227 228 lmfs_put_block(bp); 229 230 if (r != OK) 231 break; 232 233 block++; 234 block_off = 0; 235 blocks_left--; 236 } 237 238 /* 239 * If we were not able to do any I/O, return the error. Otherwise, 240 * return how many bytes we did manage to transfer. 241 */ 242 if (r != OK && off == 0) 243 return r; 244 245 return off; 246 } 247 248 /* 249 * Perform a flush request on a block device, flushing and invalidating all 250 * blocks associated with this device, both in the local cache and in VM. 251 * This operation is called after a block device is closed and must prevent 252 * that stale copies of blocks remain in any cache. 253 */ 254 void 255 lmfs_bflush(dev_t dev) 256 { 257 258 /* First flush any dirty blocks on this device to disk. */ 259 lmfs_flushdev(dev); 260 261 /* Then purge any blocks associated with the device. */ 262 lmfs_invalidate(dev); 263 } 264