1 /* 2 * Copyright (c) 2010 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Alex Hornung <ahornung@gmail.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 /* 36 * This file implements initial version of a mirror target 37 */ 38 #include <sys/bio.h> 39 #include <sys/malloc.h> 40 #include <sys/uuid.h> 41 42 #include <dev/disk/dm/dm.h> 43 MALLOC_DEFINE(M_DMDMIRROR, "dm_dmirror", "Device Mapper Target DMIRROR"); 44 45 /* segdesc flags */ 46 #define MEDIA_UNSTABLE 0x0001 47 #define MEDIA_READ_DEGRADED 0x0002 48 #define MEDIA_WRITE_DEGRADED 0x0004 49 #define MEDIA_MASTER 0x0008 50 #define UNINITIALIZED 0x0010 51 #define OLD_UNSTABLE 0x0020 52 #define OLD_MSATER 0x0040 53 54 /* dmirror disk flags */ 55 #define DISK_ONLINE 0x0001 56 57 58 #define dmirror_set_bio_disk(bio, x) ((bio)->bio_caller_info1.ptr = (x)) 59 #define dmirror_get_bio_disk(bio) ((bio)?((bio)->bio_caller_info1.ptr):NULL) 60 #define dmirror_set_bio_seg(bio, x) ((bio)->bio_caller_info2.offset = (x)) 61 #define dmirror_get_bio_segno(bio) ((bio)?((bio)->bio_caller_info2.offset):0) 62 63 #define dmirror_set_bio_retries(bio, x) ((bio)->bio_caller_info3.value = (x)) 64 #define dmirror_get_bio_retries(bio) ((bio)?((bio)->bio_caller_info3.value):0) 65 66 #define dmirror_set_bio_mbuf(bio, x) ((bio)->bio_caller_info3.ptr = (x)) 67 #define dmirror_get_bio_mbuf(bio) ((bio)?((bio)->bio_caller_info3.ptr):NULL) 68 69 70 71 /* Segment descriptor for each logical segment */ 72 typedef struct segdesc { 73 uint32_t flags; /* Flags, including state */ 74 uint32_t zf_bitmap; /* Zero-fill bitmap */ 75 uint8_t disk_no; 76 uint8_t spare1; 77 uint16_t spare2; 78 uint32_t spare3; 79 /* XXX: some timestamp/serial */ 80 } segdesc_t; 81 82 typedef struct dmirror_disk { 83 uint32_t flags; 84 dm_pdev_t *pdev; 85 } dmirror_disk_t; 86 87 typedef struct target_dmirror_config { 88 size_t params_len; 89 dmirror_disk_t disks[4]; 90 uint8_t ndisks; 91 /* XXX: uuid stuff */ 92 93 } dm_target_dmirror_config_t; 94 95 static 96 struct bio* 97 dmirror_clone_bio(struct bio *obio) 98 { 99 struct bio *bio; 100 struct buf *mbp; 101 struct buf *bp; 102 103 mbp = obio->bio_buf; 104 bp = getpbuf(NULL); 105 106 BUF_KERNPROC(bp); 107 bp->b_vp = mbp->b_vp; 108 bp->b_cmd = mbp->b_cmd; 109 bp->b_data = (char *)mbp->b_data; 110 bp->b_resid = bp->b_bcount = mbp->b_bcount; 111 bp->b_bufsize = bp->b_bcount; 112 113 bio = &bp->b_bio1; 114 bio->bio_offset = obio->bio_offset; 115 116 return (bio); 117 } 118 119 static void 120 dmirror_write_done(struct bio *bio) 121 { 122 dmirror_disk_t disk; 123 off_t segno; 124 struct bio *obio, *mbio; 125 int retries; 126 127 disk = dmirror_get_bio_disk(bio); 128 segno = dmirror_get_bio_segno(bio); 129 mbio = dmirror_get_bio_mbuf(bio); 130 131 if (bio->bio_buf->b_flags & B_ERROR) { 132 /* write failed */ 133 } 134 135 obio = pop_bio(bio); 136 biodone(obio); 137 } 138 139 void 140 dmirror_issue_write(dmirror_disk_t disk, struct bio *bio) 141 { 142 dmirror_set_bio_disk(bio, disk); 143 dmirror_set_bio_segno(bio, SEGNO_FROM_OFFSET(bio->bio_offset)); 144 145 bio->bio_done = dmirror_write_done; 146 vn_strategy(disk->pdev, bio); 147 } 148 149 void 150 dmirror_write(dm_target_crypt_config_t config, struct bio *bio) 151 { 152 dmirror_disk_t disk, m_disk; 153 struct bio *wbio1, *wbio2; 154 segdesc_t segdesc; 155 int i, masters = 0; 156 157 for(i = 0; i < XXX config->ndisks; i++) { 158 disk = &config->disks[i]; 159 segdesc = SEGDESC_FROM_OFFSET(disk, bio->bio_offset); 160 if (segdesc->flags & MEDIA_MASTER) { 161 if (++masters == 1) 162 m_disk = disk; 163 } 164 } 165 166 if (masters == 1) { 167 dmirror_set_bio_mbuf(bio, NULL); 168 dmirror_issue_write(m_disk, bio); 169 } else { 170 wbio1 = dmirror_clone_bio(bio); 171 wbio2 = dmirror_clone_bio(bio); 172 dmirror_set_bio_mbuf(wbio1, bio); 173 dmirror_set_bio_mbuf(wbio2, bio); 174 dmirror_issue_write(XXX disk1, wbio1); 175 dmirror_issue_write(XXX disk2, wbio2); 176 } 177 178 } 179 180 static void 181 segdesc_set_flag(dmirror_disk_t disk, off_t segno, int flag) 182 { 183 /* 184 * XXX: set the flag on the in-memory descriptor and write back to disks. 185 */ 186 foo |= flag; 187 } 188 189 190 static void 191 segdesc_clear_flag(dmirror_disk_t disk, off_t segno, int flag) 192 { 193 /* 194 * XXX: set the flag on the in-memory descriptor and write back to disks. 195 */ 196 foo &= ~flag; 197 } 198 199 static void 200 dmirror_read_done(struct bio *bio) 201 { 202 dmirror_disk_t disk; 203 off_t segno; 204 struct bio *obio; 205 int retries; 206 207 disk = dmirror_get_bio_disk(bio); 208 segno = dmirror_get_bio_segno(bio); 209 retries = dmirror_get_bio_retries(bio); 210 211 if (bio->bio_buf->b_flags & B_ERROR) { 212 /* read failed, so redispatch to a different disk */ 213 segdesc_set_flag(disk, segno, MEDIA_READ_DEGRADED); 214 /* XXX: set other disk to master, if possible */ 215 if (retries < disk->config->max_retries) { 216 dmirror_set_bio_retries(bio, retries + 1); 217 /* 218 * XXX: how do we restore the bio to health? Like this? 219 */ 220 bio->bio_buf->b_flags &= ~(B_ERROR | B_INVAL); 221 /* 222 * XXX: something tells me that dispatching stuff from a 223 * biodone routine is not the greatest idea 224 */ 225 dmirror_issue_read(next_disk, bio); 226 return; 227 } 228 } 229 230 obio = pop_bio(bio); 231 biodone(obio); 232 } 233 234 void 235 dmirror_issue_read(dmirror_disk_t disk, struct bio *bio) 236 { 237 dmirror_set_bio_disk(bio, disk); 238 dmirror_set_bio_segno(bio, SEGNO_FROM_OFFSET(bio->bio_offset)); 239 240 bio->bio_done = dmirror_read_done; 241 vn_strategy(disk->pdev, bio); 242 } 243 244 void 245 dmirror_read(dm_target_crypt_config_t config, struct bio *bio) 246 { 247 dmirror_disk_t disk, m_disk; 248 segdesc_t segdesc; 249 int i, masters = 0; 250 251 for(i = 0; i < XXX config->ndisks; i++) { 252 disk = &config->disks[i]; 253 segdesc = SEGDESC_FROM_OFFSET(disk, bio->bio_offset); 254 if (segdesc->flags & MEDIA_MASTER) { 255 if (++masters == 1) 256 m_disk = disk; 257 } 258 } 259 260 if (masters > 1) { 261 /* XXX: fail. */ 262 biodone(foo); 263 return; 264 } 265 266 if (masters == 1) { 267 segdesc = SEGDESC_FROM_OFFSET(m_disk, bio->bio_offset); 268 if (segdesc->flags & UNINITIALIZED) { 269 /* XXX: ... */ 270 } 271 dmirror_issue_read(m_disk, bio); 272 } else { 273 /* dispatch read to any disk */ 274 /* but try not to send to a READ_DEGRADED drive */ 275 m_disk = NULL; 276 for (i = 0; i < config->ndisks; i++) { 277 disk = &config->disks[i]; 278 segdesc = SEGDESC_FROM_OFFSET(disk, bio->bio_offset); 279 if (!(segdesc->flags & MEDIA_READ_DEGRADED)) { 280 m_disk = disk; 281 break; 282 } 283 } 284 /* XXX: do the uninitialized magic here, too */ 285 if (m_disk) { 286 /* 287 * XXX: we found some non-degraded disk. We might want to 288 * optimize performance by sending reads to different disks, 289 * not just the first one. 290 */ 291 dmirror_set_bio_retries(bio, 0); 292 dmirror_issue_read(m_disk, bio); 293 } else { 294 /* XXX: all disks are read degraded, just sent to any */ 295 m_disk = &config->disks[i]; 296 dmirror_set_bio_retries(bio, 0); 297 dmirror_issue_read(m_disk, bio); 298 } 299 } 300 } 301 302 /* Strategy routine called from dm_strategy. */ 303 /* 304 * Do IO operation, called from dmstrategy routine. 305 */ 306 int 307 dm_target_dmirror_strategy(dm_table_entry_t *table_en, struct buf *bp) 308 { 309 struct bio *bio, *split_bio1, *split_bio2; 310 struct buf *bp; 311 off_t bseg, eseg, seg_end; 312 size_t fsb; 313 int split_transaction = 0; 314 315 dm_target_crypt_config_t *priv; 316 priv = table_en->target_config; 317 318 if ((bp->b_cmd == BUF_CMD_READ) || (bp->b_cmd == BUF_CMD_WRITE)) { 319 /* Get rid of stuff we can't really handle */ 320 if (((bp->b_bcount % DEV_BSIZE) != 0) || (bp->b_bcount == 0)) { 321 kprintf("dm_target_dmirror_strategy: can't really handle bp->b_bcount = %d\n", bp->b_bcount); 322 bp->b_error = EINVAL; 323 bp->b_flags |= B_ERROR | B_INVAL; 324 biodone(&bp->b_bio1); 325 return 0; 326 } 327 328 bseg = SEGNO_FROM_OFFSET(bp->b_bio1.bio_offset); 329 eseg = SEGNO_FROM_OFFSET(bp->b_bio1.bio_offset + bp->b_resid); 330 seg_end = OFFSET_FROM_SEGNO(eseg); 331 332 if (bseg != eseg) { 333 split_transaction = 1; 334 /* fsb = first segment bytes (bytes in the first segment) */ 335 fsb = seg_end - bp->b_bio1.bio_offset; 336 337 nestbuf = getpbuf(NULL); 338 nestiobuf_setup(&bp->b_bio1, nestbuf, 0, fsb); 339 split_bio1 = push_bio(&nestbuf->b_bio1); 340 split_bio1->bio_offset = bp->b_bio1.bio_offset + 341 priv->block_offset*DEV_BSIZE; 342 343 nestbuf = getpbuf(NULL); 344 nestiobuf_setup(&bp->b_bio1, nestbuf, fsb, bp->b_resid - fsb); 345 split_bio2 = push_bio(&nestbuf->b_bio1); 346 split_bio2->bio_offset = bp->b_bio1.bio_offset + fsb + 347 priv->block_offset*DEV_BSIZE; 348 } 349 } 350 351 switch (bp->b_cmd) { 352 case BUF_CMD_READ: 353 if (split_transaction) { 354 dmirror_read(priv, split_bio1); 355 dmirror_read(priv, split_bio2); 356 } else { 357 bio = push_bio(&bp->b_bio1); 358 bio->bio_offset = bp->b_bio1.bio_offset + priv->block_offset*DEV_BSIZE; 359 dmirror_read(priv, bio); 360 } 361 break; 362 363 case BUF_CMD_WRITE: 364 if (split_transaction) { 365 dmirror_write(priv, split_bio1); 366 dmirror_write(priv, split_bio2); 367 } else { 368 bio = push_bio(&bp->b_bio1); 369 bio->bio_offset = bp->b_bio1.bio_offset + priv->block_offset*DEV_BSIZE; 370 dmirror_write(priv, bio); 371 } 372 break; 373 374 default: 375 /* XXX: clone... */ 376 vn_strategy(priv->pdev[0]->pdev_vnode, &bp->b_bio1); 377 vn_strategy(priv->pdev[1]->pdev_vnode, &bp->b_bio1); 378 } 379 380 return 0; 381 382 } 383 384 /* XXX: add missing dm functions */ 385