xref: /minix/minix/lib/libminixfs/bio.c (revision 0a6a1f1d)
1 /*
2  * This file provides an implementation for block I/O functions as expected by
3  * libfsdriver for root file systems.  In particular, the lmfs_driver function
4  * can be used to implement fdr_driver, the lmfs_bio function can be used to
5  * implement the fdr_bread, fdr_bwrite, and fdr_bpeek hooks, and the the
6  * lmfs_bflush function can be used to implement the fdr_bflush hook.  At the
7  * very least, a file system that makes use of the provided functionality
8  * must adhere to the following rules:
9  *
10  *   o  it must initialize this library in order to set up a buffer pool for
11  *      use by these functions, using the lmfs_buf_pool function; the
12  *      recommended number of blocks for *non*-disk-backed file systems is
13  *      LMFS_MAX_PREFETCH buffers (disk-backed file systems typically use many
14  *      more);
15  *   o  it must enable VM caching in order to support memory mapping of block
16  *      devices, using the lmfs_may_use_vmcache function;
17  *   o  it must either use lmfs_flushall as implementation for the fdr_sync
18  *      hook, or call lmfs_flushall as part of its own fdr_sync implementation.
19  *
20  * In addition, a disk-backed file system (as opposed to e.g. a networked file
21  * system that intends to be able to serve as a root file system) should
22  * consider the following points:
23  *
24  *   o  it may restrict calls to fdr_bwrite on the mounted partition, for
25  *      example to the partition's first 1024 bytes; it should generally not
26  *      prevent that area from being written even if the file system is mounted
27  *      read-only;
28  *   o  it is free to set its own block size, although the default block size
29  *      works fine for raw block I/O as well.
30  */
31 
32 #include <minix/drivers.h>
33 #include <minix/libminixfs.h>
34 #include <minix/fsdriver.h>
35 #include <minix/bdev.h>
36 #include <minix/partition.h>
37 #include <sys/ioctl.h>
38 #include <assert.h>
39 
40 #include "inc.h"
41 
42 /*
43  * Set the driver label of the device identified by 'dev' to 'label'.  While
44  * 'dev' is a full device number, only its major device number is to be used.
45  * This is a very thin wrapper right now, but eventually we will want to hide
46  * all of libbdev from file systems that use this library, so it is a start.
47  */
48 void
49 lmfs_driver(dev_t dev, char *label)
50 {
51 
52 	bdev_driver(dev, label);
53 }
54 
55 /*
56  * Prefetch up to "nblocks" blocks on "dev" starting from block number "block".
57  * The size to be used for the last block in the range is given as "last_size".
58  * Stop early when either the I/O request fills up or when a block is already
59  * found to be in the cache.  The latter is likely to happen often, since this
60  * function is called before getting each block for reading.  Prefetching is a
61  * strictly best-effort operation, and may fail silently.
62  * TODO: limit according to the number of available buffers.
63  */
64 static void
65 block_prefetch(dev_t dev, block64_t block, unsigned int nblocks,
66 	size_t block_size, size_t last_size)
67 {
68 	struct buf *bp;
69 	unsigned int count, limit;
70 	int r;
71 
72 	limit = lmfs_readahead_limit();
73 	assert(limit >= 1 && limit <= LMFS_MAX_PREFETCH);
74 
75 	if (nblocks > limit) {
76 		nblocks = limit;
77 
78 		last_size = block_size;
79 	}
80 
81 	for (count = 0; count < nblocks; count++) {
82 		if (count == nblocks - 1 && last_size < block_size)
83 			r = lmfs_get_partial_block(&bp, dev, block + count,
84 			    PEEK, last_size);
85 		else
86 			r = lmfs_get_block(&bp, dev, block + count, PEEK);
87 
88 		if (r == OK) {
89 			lmfs_put_block(bp);
90 
91 			last_size = block_size;
92 
93 			break;
94 		}
95 	}
96 
97 	if (count > 0)
98 		lmfs_readahead(dev, block, count, last_size);
99 }
100 
101 /*
102  * Perform block I/O, on "dev", starting from offset "pos", for a total of
103  * "bytes" bytes.  Reading, writing, and peeking are highly similar, and thus,
104  * this function implements all of them.  The "call" parameter indicates the
105  * call type (one of FSC_READ, FSC_WRITE, FSC_PEEK).  For read and write calls,
106  * "data" will identify the user buffer to use; for peek calls, "data" is set
107  * to NULL.  In all cases, this function returns the number of bytes
108  * successfully transferred, 0 on end-of-file conditions, and a negative error
109  * code if no bytes could be transferred due to an error.  Dirty data is not
110  * flushed immediately, and thus, a successful write only indicates that the
111  * data have been taken in by the cache (for immediate I/O, a character device
112  * would have to be used, but MINIX3 no longer supports this), which may be
113  * follwed later by silent failures.  End-of-file conditions are always
114  * reported immediately, though.
115  */
116 ssize_t
117 lmfs_bio(dev_t dev, struct fsdriver_data * data, size_t bytes, off_t pos,
118 	int call)
119 {
120 	block64_t block;
121 	struct part_geom part;
122 	size_t block_size, off, block_off, last_size, size, chunk;
123 	unsigned int blocks_left;
124 	struct buf *bp;
125 	int r, do_write, how;
126 
127 	if (dev == NO_DEV)
128 		return EINVAL;
129 
130 	block_size = lmfs_fs_block_size();
131 	do_write = (call == FSC_WRITE);
132 
133 	assert(block_size > 0);
134 
135 	if (bytes == 0)
136 		return 0; /* just in case */
137 
138 	if (pos < 0 || bytes > SSIZE_MAX || pos > INT64_MAX - bytes + 1)
139 		return EINVAL;
140 
141 	/*
142 	 * Get the partition size, so that we can handle EOF ourselves.
143 	 * Unfortunately, we cannot cache the results between calls, since we
144 	 * do not get to see DIOCSETP ioctls--see also repartition(8).
145 	 */
146 	if ((r = bdev_ioctl(dev, DIOCGETP, &part, NONE /*user_endpt*/)) != OK)
147 		return r;
148 
149 	if ((uint64_t)pos >= part.size)
150 		return 0; /* EOF */
151 
152 	if ((uint64_t)pos > part.size - bytes)
153 		bytes = part.size - pos;
154 
155 	off = 0;
156 	block = pos / block_size;
157 	block_off = (size_t)(pos % block_size);
158 	blocks_left = howmany(block_off + bytes, block_size);
159 
160 	assert(blocks_left > 0);
161 
162 	/*
163 	 * If the last block we need is also the last block of the device,
164 	 * see how many bytes we should actually transfer for that block.
165 	 */
166 	if (block + blocks_left - 1 == part.size / block_size)
167 		last_size = part.size % block_size;
168 	else
169 		last_size = block_size;
170 
171 	r = OK;
172 
173 	for (off = 0; off < bytes && blocks_left > 0; off += chunk) {
174 		size = (blocks_left == 1) ? last_size : block_size;
175 
176 		chunk = size - block_off;
177 		if (chunk > bytes - off)
178 			chunk = bytes - off;
179 
180 		assert(chunk > 0 && chunk <= size);
181 
182 		/*
183 		 * For read requests, help the block driver form larger I/O
184 		 * requests.
185 		 */
186 		if (!do_write)
187 			block_prefetch(dev, block, blocks_left, block_size,
188 			    last_size);
189 
190 		/*
191 		 * Do not read the block from disk if we will end up
192 		 * overwriting all of its contents.
193 		 */
194 		how = (do_write && chunk == size) ? NO_READ : NORMAL;
195 
196 		if (size < block_size)
197 			r = lmfs_get_partial_block(&bp, dev, block, how, size);
198 		else
199 			r = lmfs_get_block(&bp, dev, block, how);
200 
201 		if (r != OK) {
202 			printf("libminixfs: error getting block <%"PRIx64","
203 			    "%"PRIu64"> for device I/O (%d)\n", dev, block, r);
204 
205 			break;
206 		}
207 
208 		/* Perform the actual copy. */
209 		if (r == OK && data != NULL) {
210 			if (do_write) {
211 				r = fsdriver_copyin(data, off,
212 				    (char *)bp->data + block_off, chunk);
213 
214 				/*
215 				 * Mark the block as dirty even if the copy
216 				 * failed, since the copy may in fact have
217 				 * succeeded partially.  This is an interface
218 				 * issue that should be resolved at some point,
219 				 * but for now we do not want the cache to be
220 				 * desynchronized from the disk contents.
221 				 */
222 				lmfs_markdirty(bp);
223 			} else
224 				r = fsdriver_copyout(data, off,
225 				    (char *)bp->data + block_off, chunk);
226 		}
227 
228 		lmfs_put_block(bp);
229 
230 		if (r != OK)
231 			break;
232 
233 		block++;
234 		block_off = 0;
235 		blocks_left--;
236 	}
237 
238 	/*
239 	 * If we were not able to do any I/O, return the error.  Otherwise,
240 	 * return how many bytes we did manage to transfer.
241 	 */
242 	if (r != OK && off == 0)
243 		return r;
244 
245 	return off;
246 }
247 
248 /*
249  * Perform a flush request on a block device, flushing and invalidating all
250  * blocks associated with this device, both in the local cache and in VM.
251  * This operation is called after a block device is closed and must prevent
252  * that stale copies of blocks remain in any cache.
253  */
254 void
255 lmfs_bflush(dev_t dev)
256 {
257 
258 	/* First flush any dirty blocks on this device to disk. */
259 	lmfs_flushdev(dev);
260 
261 	/* Then purge any blocks associated with the device. */
262 	lmfs_invalidate(dev);
263 }
264