xref: /minix/minix/lib/libminixfs/bio.c (revision 83133719)
1 /*
2  * This file provides an implementation for block I/O functions as expected by
3  * libfsdriver for root file systems.  In particular, the lmfs_driver function
4  * can be used to implement fdr_driver, the lmfs_bio function can be used to
5  * implement the fdr_bread, fdr_bwrite, and fdr_bpeek hooks, and the the
6  * lmfs_bflush function can be used to implement the fdr_bflush hook.  At the
7  * very least, a file system that makes use of the provided functionality
8  * must adhere to the following rules:
9  *
10  *   o  it must initialize this library in order to set up a buffer pool for
11  *      use by these functions, using the lmfs_buf_pool function; the
12  *      recommended number of blocks for *non*-disk-backed file systems is
13  *      NR_IOREQS buffers (disk-backed file systems typically use many more);
14  *   o  it must enable VM caching in order to support memory mapping of block
15  *      devices, using the lmfs_may_use_vmcache function;
16  *   o  it must either use lmfs_flushall as implementation for the fdr_sync
17  *      hook, or call lmfs_flushall as part of its own fdr_sync implementation.
18  *
19  * In addition, a disk-backed file system (as opposed to e.g. a networked file
20  * system that intends to be able to serve as a root file system) should
21  * consider the following points:
22  *
23  *   o  it may restrict calls to fdr_bwrite on the mounted partition, for
24  *      example to the partition's first 1024 bytes; it should generally not
25  *      prevent that area from being written even if the file system is mounted
26  *      read-only;
27  *   o  it is free to set its own block size, although the default block size
28  *      works fine for raw block I/O as well.
29  */
30 
31 #include <minix/drivers.h>
32 #include <minix/libminixfs.h>
33 #include <minix/fsdriver.h>
34 #include <minix/bdev.h>
35 #include <assert.h>
36 
37 /*
38  * Set the driver label of the device identified by 'dev' to 'label'.  While
39  * 'dev' is a full device number, only its major device number is to be used.
40  * This is a very thin wrapper right now, but eventually we will want to hide
41  * all of libbdev from file systems that use this library, so it is a start.
42  */
43 void
44 lmfs_driver(dev_t dev, char *label)
45 {
46 
47 	bdev_driver(dev, label);
48 }
49 
50 /*
51  * Prefetch up to "nblocks" blocks on "dev" starting from block number "block".
52  * Stop early when either the I/O request fills up or when a block is already
53  * found to be in the cache.  The latter is likely to happen often, since this
54  * function is called before getting each block for reading.  Prefetching is a
55  * strictly best-effort operation, and may fail silently.
56  * TODO: limit according to the number of available buffers.
57  */
58 static void
59 block_prefetch(dev_t dev, block_t block, block_t nblocks)
60 {
61 	struct buf *bp, *bufs[NR_IOREQS];
62 	unsigned int count;
63 
64 	for (count = 0; count < nblocks; count++) {
65 		bp = lmfs_get_block(dev, block + count, PREFETCH);
66 		assert(bp != NULL);
67 
68 		if (lmfs_dev(bp) != NO_DEV) {
69 			lmfs_put_block(bp, FULL_DATA_BLOCK);
70 
71 			break;
72 		}
73 
74 		bufs[count] = bp;
75 	}
76 
77 	if (count > 0)
78 		lmfs_rw_scattered(dev, bufs, count, READING);
79 }
80 
81 /*
82  * Perform block I/O, on "dev", starting from offset "pos", for a total of
83  * "bytes" bytes.  Reading, writing, and peeking are highly similar, and thus,
84  * this function implements all of them.  The "call" parameter indicates the
85  * call type (one of FSC_READ, FSC_WRITE, FSC_PEEK).  For read and write calls,
86  * "data" will identify the user buffer to use; for peek calls, "data" is set
87  * to NULL.  In all cases, this function returns the number of bytes
88  * successfully transferred, 0 on end-of-file conditions, and a negative error
89  * code if no bytes could be transferred due to an error.  Dirty data is not
90  * flushed immediately, and thus, a successful write only indicates that the
91  * data have been taken in by the cache (for immediate I/O, a character device
92  * would have to be used, but MINIX3 no longer supports this), which may be
93  * follwed later by silent failures, including undetected end-of-file cases.
94  * In particular, write requests may or may not return 0 (EOF) immediately when
95  * writing at or beyond the block device's size. i Since block I/O takes place
96  * at block granularity, block-unaligned writes have to read a block from disk
97  * before updating it, and that is the only possible source of actual I/O
98  * errors for write calls.
99  * TODO: reconsider the buffering-only approach, or see if we can at least
100  * somehow throw accurate EOF errors without reading in each block first.
101  */
102 ssize_t
103 lmfs_bio(dev_t dev, struct fsdriver_data * data, size_t bytes, off_t pos,
104 	int call)
105 {
106 	block_t block, blocks_left;
107 	size_t block_size, off, block_off, chunk;
108 	struct buf *bp;
109 	int r, write, how;
110 
111 	if (dev == NO_DEV)
112 		return EINVAL;
113 
114 	block_size = lmfs_fs_block_size();
115 	write = (call == FSC_WRITE);
116 
117 	assert(block_size > 0);
118 
119 	/* FIXME: block_t is 32-bit, so we have to impose a limit here. */
120 	if (pos < 0 || pos / block_size > UINT32_MAX || bytes > SSIZE_MAX)
121 		return EINVAL;
122 
123 	off = 0;
124 	block = pos / block_size;
125 	block_off = (size_t)(pos % block_size);
126 	blocks_left = howmany(block_off + bytes, block_size);
127 
128 	lmfs_reset_rdwt_err();
129 	r = OK;
130 
131 	for (off = 0; off < bytes; off += chunk) {
132 		chunk = block_size - block_off;
133 		if (chunk > bytes - off)
134 			chunk = bytes - off;
135 
136 		/*
137 		 * For read requests, help the block driver form larger I/O
138 		 * requests.
139 		 */
140 		if (!write)
141 			block_prefetch(dev, block, blocks_left);
142 
143 		/*
144 		 * Do not read the block from disk if we will end up
145 		 * overwriting all of its contents.
146 		 */
147 		how = (write && chunk == block_size) ? NO_READ : NORMAL;
148 
149 		bp = lmfs_get_block(dev, block, how);
150 		assert(bp);
151 
152 		r = lmfs_rdwt_err();
153 
154 		if (r == OK && data != NULL) {
155 			assert(lmfs_dev(bp) != NO_DEV);
156 
157 			if (write) {
158 				r = fsdriver_copyin(data, off,
159 				    (char *)bp->data + block_off, chunk);
160 
161 				/*
162 				 * Mark the block as dirty even if the copy
163 				 * failed, since the copy may in fact have
164 				 * succeeded partially.  This is an interface
165 				 * issue that should be resolved at some point,
166 				 * but for now we do not want the cache to be
167 				 * desynchronized from the disk contents.
168 				 */
169 				lmfs_markdirty(bp);
170 			} else
171 				r = fsdriver_copyout(data, off,
172 				    (char *)bp->data + block_off, chunk);
173 		}
174 
175 		lmfs_put_block(bp, FULL_DATA_BLOCK);
176 
177 		if (r != OK)
178 			break;
179 
180 		block++;
181 		block_off = 0;
182 		blocks_left--;
183 	}
184 
185 	/*
186 	 * If we were not able to do any I/O, return the error (or EOF, even
187 	 * for writes).  Otherwise, return how many bytes we did manage to
188 	 * transfer.
189 	 */
190 	if (r != OK && off == 0)
191 		return (r == END_OF_FILE) ? 0 : r;
192 
193 	return off;
194 }
195 
196 /*
197  * Perform a flush request on a block device, flushing and invalidating all
198  * blocks associated with this device, both in the local cache and in VM.
199  * This operation is called after a block device is closed and must prevent
200  * that stale copies of blocks remain in any cache.
201  */
202 void
203 lmfs_bflush(dev_t dev)
204 {
205 
206 	/* First flush any dirty blocks on this device to disk. */
207 	lmfs_flushdev(dev);
208 
209 	/* Then purge any blocks associated with the device. */
210 	lmfs_invalidate(dev);
211 }
212