1 /*
2 * Copyright (c) 2010 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Alex Hornung <ahornung@gmail.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 /*
36 * This file implements initial version of a mirror target
37 */
38 #include <sys/bio.h>
39 #include <sys/malloc.h>
40 #include <sys/uuid.h>
41
42 #include <dev/disk/dm/dm.h>
43 MALLOC_DEFINE(M_DMDMIRROR, "dm_dmirror", "Device Mapper Target DMIRROR");
44
45 /* segdesc flags */
46 #define MEDIA_UNSTABLE 0x0001
47 #define MEDIA_READ_DEGRADED 0x0002
48 #define MEDIA_WRITE_DEGRADED 0x0004
49 #define MEDIA_MASTER 0x0008
50 #define UNINITIALIZED 0x0010
51 #define OLD_UNSTABLE 0x0020
52 #define OLD_MSATER 0x0040
53
54 /* dmirror disk flags */
55 #define DISK_ONLINE 0x0001
56
57
58 #define dmirror_set_bio_disk(bio, x) ((bio)->bio_caller_info1.ptr = (x))
59 #define dmirror_get_bio_disk(bio) ((bio)?((bio)->bio_caller_info1.ptr):NULL)
60 #define dmirror_set_bio_seg(bio, x) ((bio)->bio_caller_info2.offset = (x))
61 #define dmirror_get_bio_segno(bio) ((bio)?((bio)->bio_caller_info2.offset):0)
62
63 #define dmirror_set_bio_retries(bio, x) ((bio)->bio_caller_info3.value = (x))
64 #define dmirror_get_bio_retries(bio) ((bio)?((bio)->bio_caller_info3.value):0)
65
66 #define dmirror_set_bio_mbuf(bio, x) ((bio)->bio_caller_info3.ptr = (x))
67 #define dmirror_get_bio_mbuf(bio) ((bio)?((bio)->bio_caller_info3.ptr):NULL)
68
69
70
71 /* Segment descriptor for each logical segment */
72 typedef struct segdesc {
73 uint32_t flags; /* Flags, including state */
74 uint32_t zf_bitmap; /* Zero-fill bitmap */
75 uint8_t disk_no;
76 uint8_t spare1;
77 uint16_t spare2;
78 uint32_t spare3;
79 /* XXX: some timestamp/serial */
80 } segdesc_t;
81
82 typedef struct dmirror_disk {
83 uint32_t flags;
84 dm_pdev_t *pdev;
85 } dmirror_disk_t;
86
87 typedef struct target_dmirror_config {
88 size_t params_len;
89 dmirror_disk_t disks[4];
90 uint8_t ndisks;
91 /* XXX: uuid stuff */
92
93 } dm_target_dmirror_config_t;
94
95 static
96 struct bio*
dmirror_clone_bio(struct bio * obio)97 dmirror_clone_bio(struct bio *obio)
98 {
99 struct bio *bio;
100 struct buf *mbp;
101 struct buf *bp;
102
103 mbp = obio->bio_buf;
104 bp = getpbuf(NULL);
105
106 BUF_KERNPROC(bp);
107 bp->b_vp = mbp->b_vp;
108 bp->b_cmd = mbp->b_cmd;
109 bp->b_data = (char *)mbp->b_data;
110 bp->b_resid = bp->b_bcount = mbp->b_bcount;
111 bp->b_bufsize = bp->b_bcount;
112
113 bio = &bp->b_bio1;
114 bio->bio_offset = obio->bio_offset;
115
116 return (bio);
117 }
118
119 static void
dmirror_write_done(struct bio * bio)120 dmirror_write_done(struct bio *bio)
121 {
122 dmirror_disk_t disk;
123 off_t segno;
124 struct bio *obio, *mbio;
125 int retries;
126
127 disk = dmirror_get_bio_disk(bio);
128 segno = dmirror_get_bio_segno(bio);
129 mbio = dmirror_get_bio_mbuf(bio);
130
131 if (bio->bio_buf->b_flags & B_ERROR) {
132 /* write failed */
133 }
134
135 obio = pop_bio(bio);
136 biodone(obio);
137 }
138
139 void
dmirror_issue_write(dmirror_disk_t disk,struct bio * bio)140 dmirror_issue_write(dmirror_disk_t disk, struct bio *bio)
141 {
142 dmirror_set_bio_disk(bio, disk);
143 dmirror_set_bio_segno(bio, SEGNO_FROM_OFFSET(bio->bio_offset));
144
145 bio->bio_done = dmirror_write_done;
146 vn_strategy(disk->pdev, bio);
147 }
148
149 void
dmirror_write(dm_target_crypt_config_t config,struct bio * bio)150 dmirror_write(dm_target_crypt_config_t config, struct bio *bio)
151 {
152 dmirror_disk_t disk, m_disk;
153 struct bio *wbio1, *wbio2;
154 segdesc_t segdesc;
155 int i, masters = 0;
156
157 for(i = 0; i < XXX config->ndisks; i++) {
158 disk = &config->disks[i];
159 segdesc = SEGDESC_FROM_OFFSET(disk, bio->bio_offset);
160 if (segdesc->flags & MEDIA_MASTER) {
161 if (++masters == 1)
162 m_disk = disk;
163 }
164 }
165
166 if (masters == 1) {
167 dmirror_set_bio_mbuf(bio, NULL);
168 dmirror_issue_write(m_disk, bio);
169 } else {
170 wbio1 = dmirror_clone_bio(bio);
171 wbio2 = dmirror_clone_bio(bio);
172 dmirror_set_bio_mbuf(wbio1, bio);
173 dmirror_set_bio_mbuf(wbio2, bio);
174 dmirror_issue_write(XXX disk1, wbio1);
175 dmirror_issue_write(XXX disk2, wbio2);
176 }
177
178 }
179
180 static void
segdesc_set_flag(dmirror_disk_t disk,off_t segno,int flag)181 segdesc_set_flag(dmirror_disk_t disk, off_t segno, int flag)
182 {
183 /*
184 * XXX: set the flag on the in-memory descriptor and write back to disks.
185 */
186 foo |= flag;
187 }
188
189
190 static void
segdesc_clear_flag(dmirror_disk_t disk,off_t segno,int flag)191 segdesc_clear_flag(dmirror_disk_t disk, off_t segno, int flag)
192 {
193 /*
194 * XXX: set the flag on the in-memory descriptor and write back to disks.
195 */
196 foo &= ~flag;
197 }
198
199 static void
dmirror_read_done(struct bio * bio)200 dmirror_read_done(struct bio *bio)
201 {
202 dmirror_disk_t disk;
203 off_t segno;
204 struct bio *obio;
205 int retries;
206
207 disk = dmirror_get_bio_disk(bio);
208 segno = dmirror_get_bio_segno(bio);
209 retries = dmirror_get_bio_retries(bio);
210
211 if (bio->bio_buf->b_flags & B_ERROR) {
212 /* read failed, so redispatch to a different disk */
213 segdesc_set_flag(disk, segno, MEDIA_READ_DEGRADED);
214 /* XXX: set other disk to master, if possible */
215 if (retries < disk->config->max_retries) {
216 dmirror_set_bio_retries(bio, retries + 1);
217 /*
218 * XXX: how do we restore the bio to health? Like this?
219 */
220 bio->bio_buf->b_flags &= ~(B_ERROR | B_INVAL);
221 /*
222 * XXX: something tells me that dispatching stuff from a
223 * biodone routine is not the greatest idea
224 */
225 dmirror_issue_read(next_disk, bio);
226 return;
227 }
228 }
229
230 obio = pop_bio(bio);
231 biodone(obio);
232 }
233
234 void
dmirror_issue_read(dmirror_disk_t disk,struct bio * bio)235 dmirror_issue_read(dmirror_disk_t disk, struct bio *bio)
236 {
237 dmirror_set_bio_disk(bio, disk);
238 dmirror_set_bio_segno(bio, SEGNO_FROM_OFFSET(bio->bio_offset));
239
240 bio->bio_done = dmirror_read_done;
241 vn_strategy(disk->pdev, bio);
242 }
243
244 void
dmirror_read(dm_target_crypt_config_t config,struct bio * bio)245 dmirror_read(dm_target_crypt_config_t config, struct bio *bio)
246 {
247 dmirror_disk_t disk, m_disk;
248 segdesc_t segdesc;
249 int i, masters = 0;
250
251 for(i = 0; i < XXX config->ndisks; i++) {
252 disk = &config->disks[i];
253 segdesc = SEGDESC_FROM_OFFSET(disk, bio->bio_offset);
254 if (segdesc->flags & MEDIA_MASTER) {
255 if (++masters == 1)
256 m_disk = disk;
257 }
258 }
259
260 if (masters > 1) {
261 /* XXX: fail. */
262 biodone(foo);
263 return;
264 }
265
266 if (masters == 1) {
267 segdesc = SEGDESC_FROM_OFFSET(m_disk, bio->bio_offset);
268 if (segdesc->flags & UNINITIALIZED) {
269 /* XXX: ... */
270 }
271 dmirror_issue_read(m_disk, bio);
272 } else {
273 /* dispatch read to any disk */
274 /* but try not to send to a READ_DEGRADED drive */
275 m_disk = NULL;
276 for (i = 0; i < config->ndisks; i++) {
277 disk = &config->disks[i];
278 segdesc = SEGDESC_FROM_OFFSET(disk, bio->bio_offset);
279 if (!(segdesc->flags & MEDIA_READ_DEGRADED)) {
280 m_disk = disk;
281 break;
282 }
283 }
284 /* XXX: do the uninitialized magic here, too */
285 if (m_disk) {
286 /*
287 * XXX: we found some non-degraded disk. We might want to
288 * optimize performance by sending reads to different disks,
289 * not just the first one.
290 */
291 dmirror_set_bio_retries(bio, 0);
292 dmirror_issue_read(m_disk, bio);
293 } else {
294 /* XXX: all disks are read degraded, just sent to any */
295 m_disk = &config->disks[i];
296 dmirror_set_bio_retries(bio, 0);
297 dmirror_issue_read(m_disk, bio);
298 }
299 }
300 }
301
302 /* Strategy routine called from dm_strategy. */
303 /*
304 * Do IO operation, called from dmstrategy routine.
305 */
306 int
dm_target_dmirror_strategy(dm_table_entry_t * table_en,struct buf * bp)307 dm_target_dmirror_strategy(dm_table_entry_t *table_en, struct buf *bp)
308 {
309 struct bio *bio, *split_bio1, *split_bio2;
310 struct buf *bp;
311 off_t bseg, eseg, seg_end;
312 size_t fsb;
313 int split_transaction = 0;
314
315 dm_target_crypt_config_t *priv;
316 priv = table_en->target_config;
317
318 if ((bp->b_cmd == BUF_CMD_READ) || (bp->b_cmd == BUF_CMD_WRITE)) {
319 /* Get rid of stuff we can't really handle */
320 if (((bp->b_bcount % DEV_BSIZE) != 0) || (bp->b_bcount == 0)) {
321 kprintf("dm_target_dmirror_strategy: can't really handle bp->b_bcount = %d\n", bp->b_bcount);
322 bp->b_error = EINVAL;
323 bp->b_flags |= B_ERROR | B_INVAL;
324 biodone(&bp->b_bio1);
325 return 0;
326 }
327
328 bseg = SEGNO_FROM_OFFSET(bp->b_bio1.bio_offset);
329 eseg = SEGNO_FROM_OFFSET(bp->b_bio1.bio_offset + bp->b_resid);
330 seg_end = OFFSET_FROM_SEGNO(eseg);
331
332 if (bseg != eseg) {
333 split_transaction = 1;
334 /* fsb = first segment bytes (bytes in the first segment) */
335 fsb = seg_end - bp->b_bio1.bio_offset;
336
337 nestbuf = getpbuf(NULL);
338 nestiobuf_setup(&bp->b_bio1, nestbuf, 0, fsb);
339 split_bio1 = push_bio(&nestbuf->b_bio1);
340 split_bio1->bio_offset = bp->b_bio1.bio_offset +
341 priv->block_offset*DEV_BSIZE;
342
343 nestbuf = getpbuf(NULL);
344 nestiobuf_setup(&bp->b_bio1, nestbuf, fsb, bp->b_resid - fsb);
345 split_bio2 = push_bio(&nestbuf->b_bio1);
346 split_bio2->bio_offset = bp->b_bio1.bio_offset + fsb +
347 priv->block_offset*DEV_BSIZE;
348 }
349 }
350
351 switch (bp->b_cmd) {
352 case BUF_CMD_READ:
353 if (split_transaction) {
354 dmirror_read(priv, split_bio1);
355 dmirror_read(priv, split_bio2);
356 } else {
357 bio = push_bio(&bp->b_bio1);
358 bio->bio_offset = bp->b_bio1.bio_offset + priv->block_offset*DEV_BSIZE;
359 dmirror_read(priv, bio);
360 }
361 break;
362
363 case BUF_CMD_WRITE:
364 if (split_transaction) {
365 dmirror_write(priv, split_bio1);
366 dmirror_write(priv, split_bio2);
367 } else {
368 bio = push_bio(&bp->b_bio1);
369 bio->bio_offset = bp->b_bio1.bio_offset + priv->block_offset*DEV_BSIZE;
370 dmirror_write(priv, bio);
371 }
372 break;
373
374 default:
375 /* XXX: clone... */
376 vn_strategy(priv->pdev[0]->pdev_vnode, &bp->b_bio1);
377 vn_strategy(priv->pdev[1]->pdev_vnode, &bp->b_bio1);
378 }
379
380 return 0;
381
382 }
383
384 /* XXX: add missing dm functions */
385