1 /* 2 * Copyright (c) 2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * HAMMER PFS ioctls - Manage pseudo-fs configurations 36 */ 37 38 #include "hammer.h" 39 40 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, 41 hammer_inode_t ip); 42 static int hammer_pfs_rollback(hammer_transaction_t trans, 43 hammer_pseudofs_inmem_t pfsm, 44 hammer_tid_t trunc_tid); 45 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, 46 hammer_tid_t trunc_tid); 47 48 /* 49 * Get mirroring/pseudo-fs information 50 * 51 * NOTE: The ip used for ioctl is not necessarily related to the PFS 52 * since this ioctl only requires PFS id (or upper 16 bits of ip localization). 53 */ 54 int 55 hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 56 struct hammer_ioc_pseudofs_rw *pfs) 57 { 58 hammer_pseudofs_inmem_t pfsm; 59 uint32_t localization; 60 int error; 61 62 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 63 return(error); 64 localization = pfs_to_lo(pfs->pfs_id); 65 pfs->bytes = sizeof(struct hammer_pseudofs_data); 66 pfs->version = HAMMER_IOC_PSEUDOFS_VERSION; 67 68 pfsm = hammer_load_pseudofs(trans, localization, &error); 69 if (error) { 70 hammer_rel_pseudofs(trans->hmp, pfsm); 71 return(error); 72 } 73 74 /* 75 * If the PFS is a master the sync tid is set by normal operation 76 * rather than the mirroring code, and will always track the 77 * real HAMMER filesystem. 78 * 79 * We use flush_tid1, which is the highest fully committed TID. 80 * flush_tid2 is the TID most recently flushed, but the UNDO hasn't 81 * caught up to it yet so a crash will roll us back to flush_tid1. 82 */ 83 if (hammer_is_pfs_master(&pfsm->pfsd)) 84 pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1; 85 86 /* 87 * Copy out to userland. 88 */ 89 error = 0; 90 if (pfs->ondisk && error == 0) 91 error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd)); 92 hammer_rel_pseudofs(trans->hmp, pfsm); 93 return(error); 94 } 95 96 /* 97 * Set mirroring/pseudo-fs information 98 * 99 * NOTE: The ip used for ioctl is not necessarily related to the PFS 100 * since this ioctl only requires PFS id (or upper 16 bits of ip localization). 101 */ 102 int 103 hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 104 struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs) 105 { 106 hammer_pseudofs_inmem_t pfsm; 107 uint32_t localization; 108 int error; 109 110 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 111 return(error); 112 localization = pfs_to_lo(pfs->pfs_id); 113 if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION) 114 error = EINVAL; 115 116 if (error == 0 && pfs->ondisk) { 117 /* 118 * Load the PFS so we can modify our in-core copy. Ignore 119 * ENOENT errors. 120 */ 121 pfsm = hammer_load_pseudofs(trans, localization, &error); 122 error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd)); 123 124 /* 125 * Save it back, create a root inode if we are in master 126 * mode and no root exists. 127 * 128 * We do not create root inodes for slaves, the root inode 129 * must be mirrored from the master. 130 */ 131 if (error == 0 && hammer_is_pfs_master(&pfsm->pfsd)) { 132 error = hammer_mkroot_pseudofs(trans, cred, pfsm, ip); 133 } 134 if (error == 0) 135 error = hammer_save_pseudofs(trans, pfsm); 136 137 /* 138 * Wakeup anyone waiting for a TID update for this PFS 139 */ 140 wakeup(&pfsm->pfsd.sync_end_tid); 141 hammer_rel_pseudofs(trans->hmp, pfsm); 142 } 143 return(error); 144 } 145 146 /* 147 * Upgrade a slave to a master 148 * 149 * This is fairly easy to do, but we must physically undo any partial syncs 150 * for transaction ids > sync_end_tid. Effective, we must do a partial 151 * rollback. 152 * 153 * NOTE: The ip used for ioctl is not necessarily related to the PFS 154 * since this ioctl only requires PFS id (or upper 16 bits of ip localization). 155 */ 156 int 157 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 158 struct hammer_ioc_pseudofs_rw *pfs) 159 { 160 hammer_pseudofs_inmem_t pfsm; 161 uint32_t localization; 162 int error; 163 164 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 165 return(error); 166 localization = pfs_to_lo(pfs->pfs_id); 167 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 168 return(error); 169 170 /* 171 * A master id must be set when upgrading 172 */ 173 pfsm = hammer_load_pseudofs(trans, localization, &error); 174 if (error == 0) { 175 if (hammer_is_pfs_slave(&pfsm->pfsd)) { 176 error = hammer_pfs_rollback(trans, pfsm, 177 pfsm->pfsd.sync_end_tid + 1); 178 if (error == 0) { 179 pfsm->pfsd.mirror_flags &= ~HAMMER_PFSD_SLAVE; 180 error = hammer_save_pseudofs(trans, pfsm); 181 } 182 } 183 } 184 hammer_rel_pseudofs(trans->hmp, pfsm); 185 if (error == EINTR) { 186 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 187 error = 0; 188 } 189 return (error); 190 } 191 192 /* 193 * Downgrade a master to a slave 194 * 195 * This is really easy to do, just set the SLAVE flag and update sync_end_tid. 196 * 197 * We previously did not update sync_end_tid in consideration for a slave 198 * upgraded to a master and then downgraded again, but this completely breaks 199 * the case where one starts with a master and then downgrades to a slave, 200 * then upgrades again. 201 * 202 * NOTE: The ip used for ioctl is not necessarily related to the PFS 203 * since this ioctl only requires PFS id (or upper 16 bits of ip localization). 204 */ 205 int 206 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 207 struct hammer_ioc_pseudofs_rw *pfs) 208 { 209 hammer_mount_t hmp = trans->hmp; 210 hammer_pseudofs_inmem_t pfsm; 211 uint32_t localization; 212 int error; 213 214 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 215 return(error); 216 localization = pfs_to_lo(pfs->pfs_id); 217 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 218 return(error); 219 220 pfsm = hammer_load_pseudofs(trans, localization, &error); 221 if (error == 0) { 222 if (hammer_is_pfs_master(&pfsm->pfsd)) { 223 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE; 224 if (pfsm->pfsd.sync_end_tid < hmp->flush_tid1) 225 pfsm->pfsd.sync_end_tid = hmp->flush_tid1; 226 error = hammer_save_pseudofs(trans, pfsm); 227 } 228 } 229 hammer_rel_pseudofs(trans->hmp, pfsm); 230 return (error); 231 } 232 233 /* 234 * Destroy a PFS 235 * 236 * We can destroy a PFS by scanning and deleting all of its records in the 237 * B-Tree. The hammer utility will delete the softlink in the primary 238 * filesystem. 239 * 240 * NOTE: The ip used for ioctl is not necessarily related to the PFS 241 * since this ioctl only requires PFS id (or upper 16 bits of ip localization). 242 */ 243 int 244 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 245 struct hammer_ioc_pseudofs_rw *pfs) 246 { 247 hammer_pseudofs_inmem_t pfsm; 248 uint32_t localization; 249 int error; 250 251 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 252 return(error); 253 localization = pfs_to_lo(pfs->pfs_id); 254 255 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 256 return(error); 257 258 pfsm = hammer_load_pseudofs(trans, localization, &error); 259 if (error == 0) { 260 error = hammer_pfs_rollback(trans, pfsm, 0); 261 if (error == 0) { 262 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_DELETED; 263 error = hammer_save_pseudofs(trans, pfsm); 264 } 265 } 266 hammer_rel_pseudofs(trans->hmp, pfsm); 267 if (error == EINTR) { 268 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 269 error = 0; 270 } 271 return(error); 272 } 273 274 /* 275 * Wait for the PFS to sync past the specified TID 276 */ 277 int 278 hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 279 struct hammer_ioc_pseudofs_rw *pfs) 280 { 281 hammer_pseudofs_inmem_t pfsm; 282 struct hammer_pseudofs_data pfsd; 283 uint32_t localization; 284 hammer_tid_t tid; 285 void *waitp; 286 int error; 287 288 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 289 return(error); 290 localization = pfs_to_lo(pfs->pfs_id); 291 292 if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0) 293 return(error); 294 295 pfsm = hammer_load_pseudofs(trans, localization, &error); 296 if (error == 0) { 297 if (hammer_is_pfs_slave(&pfsm->pfsd)) { 298 tid = pfsm->pfsd.sync_end_tid; 299 waitp = &pfsm->pfsd.sync_end_tid; 300 } else { 301 tid = trans->hmp->flush_tid1; 302 waitp = &trans->hmp->flush_tid1; 303 } 304 if (tid <= pfsd.sync_end_tid) 305 tsleep(waitp, PCATCH, "hmrmwt", 0); 306 } 307 hammer_rel_pseudofs(trans->hmp, pfsm); 308 if (error == EINTR) { 309 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 310 error = 0; 311 } 312 return(error); 313 } 314 315 /* 316 * Iterate PFS ondisk data. 317 * This function basically does the same as hammer_load_pseudofs() 318 * except that the purpose of this function is to retrieve data. 319 * 320 * NOTE: The ip used for ioctl is not necessarily related to the PFS 321 * since this ioctl only requires PFS id (or upper 16 bits of ip localization). 322 */ 323 int 324 hammer_ioc_iterate_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 325 struct hammer_ioc_pfs_iterate *pi) 326 { 327 struct hammer_cursor cursor; 328 struct hammer_ioc_pseudofs_rw pfs; 329 hammer_inode_t dip; 330 uint32_t localization; 331 int error; 332 333 /* 334 * struct hammer_ioc_pfs_iterate was never necessary. 335 * This ioctl needs extra code only to do conversion. 336 * The name pi->pos is misleading, but it's been exposed 337 * to userspace header.. 338 */ 339 bzero(&pfs, sizeof(pfs)); 340 pfs.pfs_id = pi->pos; 341 pfs.bytes = sizeof(struct hammer_pseudofs_data); /* dummy */ 342 if ((error = hammer_pfs_autodetect(&pfs, ip)) != 0) 343 return(error); 344 pi->pos = pfs.pfs_id; 345 localization = pfs_to_lo(pi->pos); 346 347 dip = hammer_get_inode(trans, NULL, HAMMER_OBJID_ROOT, HAMMER_MAX_TID, 348 HAMMER_DEF_LOCALIZATION, 0, &error); 349 350 error = hammer_init_cursor(trans, &cursor, 351 (dip ? &dip->cache[1] : NULL), dip); 352 if (error) 353 goto out; 354 355 cursor.key_beg.localization = HAMMER_DEF_LOCALIZATION | 356 HAMMER_LOCALIZE_MISC; 357 cursor.key_beg.obj_id = HAMMER_OBJID_ROOT; 358 cursor.key_beg.create_tid = 0; 359 cursor.key_beg.delete_tid = 0; 360 cursor.key_beg.rec_type = HAMMER_RECTYPE_PFS; 361 cursor.key_beg.obj_type = 0; 362 cursor.key_beg.key = localization; 363 cursor.asof = HAMMER_MAX_TID; 364 cursor.flags |= HAMMER_CURSOR_ASOF; 365 366 error = hammer_ip_lookup(&cursor); 367 if (error == 0) { 368 error = hammer_ip_resolve_data(&cursor); 369 if (error == 0) { 370 if (pi->ondisk) 371 copyout(cursor.data, pi->ondisk, cursor.leaf->data_len); 372 localization = cursor.leaf->base.key; 373 pi->pos = lo_to_pfs(localization); 374 /* 375 * Caller needs to increment pi->pos each time calling 376 * this ioctl. This ioctl only restores current PFS id. 377 */ 378 } 379 } 380 out: 381 hammer_done_cursor(&cursor); 382 if (dip) 383 hammer_rel_inode(dip, 0); 384 return(error); 385 } 386 387 /* 388 * Auto-detect the pseudofs and do basic bounds checking. 389 */ 390 static 391 int 392 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, hammer_inode_t ip) 393 { 394 int error = 0; 395 396 if (pfs->pfs_id == -1) 397 pfs->pfs_id = lo_to_pfs(ip->obj_localization); 398 if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS) 399 error = EINVAL; 400 if (pfs->bytes < sizeof(struct hammer_pseudofs_data)) 401 error = EINVAL; 402 return(error); 403 } 404 405 /* 406 * Rollback the specified PFS to (trunc_tid - 1), removing everything 407 * greater or equal to trunc_tid. The PFS must not have been in no-mirror 408 * mode or the MIRROR_FILTERED scan will not work properly. 409 * 410 * This is typically used to remove any partial syncs when upgrading a 411 * slave to a master. It can theoretically also be used to rollback 412 * any PFS, including PFS#0, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN 413 * PRUNED, and to points that are older only if they are on a retained 414 * (pruning softlink) boundary. 415 * 416 * Rollbacks destroy information. If you don't mind inode numbers changing 417 * a better way would be to cpdup a snapshot back onto the master. 418 */ 419 static 420 int 421 hammer_pfs_rollback(hammer_transaction_t trans, 422 hammer_pseudofs_inmem_t pfsm, 423 hammer_tid_t trunc_tid) 424 { 425 struct hammer_cmirror cmirror; 426 struct hammer_cursor cursor; 427 struct hammer_base_elm key_cur; 428 int error; 429 int seq; 430 431 bzero(&cmirror, sizeof(cmirror)); 432 bzero(&key_cur, sizeof(key_cur)); 433 key_cur.localization = HAMMER_MIN_LOCALIZATION | pfsm->localization; 434 key_cur.obj_id = HAMMER_MIN_OBJID; 435 key_cur.key = HAMMER_MIN_KEY; 436 key_cur.create_tid = 1; 437 key_cur.rec_type = HAMMER_MIN_RECTYPE; 438 439 seq = trans->hmp->flusher.done; 440 441 retry: 442 error = hammer_init_cursor(trans, &cursor, NULL, NULL); 443 if (error) { 444 hammer_done_cursor(&cursor); 445 goto failed; 446 } 447 cursor.key_beg = key_cur; 448 cursor.key_end.localization = HAMMER_MAX_LOCALIZATION | 449 pfsm->localization; 450 cursor.key_end.obj_id = HAMMER_MAX_OBJID; 451 cursor.key_end.key = HAMMER_MAX_KEY; 452 cursor.key_end.create_tid = HAMMER_MAX_TID; 453 cursor.key_end.rec_type = HAMMER_MAX_RECTYPE; 454 455 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 456 cursor.flags |= HAMMER_CURSOR_BACKEND; 457 458 /* 459 * Do an optimized scan of only records created or modified 460 * >= trunc_tid, so we can fix up those records. We must 461 * still check the TIDs but this greatly reduces the size of 462 * the scan. 463 */ 464 cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED; 465 cursor.cmirror = &cmirror; 466 cmirror.mirror_tid = trunc_tid; 467 468 error = hammer_btree_first(&cursor); 469 while (error == 0) { 470 /* 471 * Abort the rollback. 472 */ 473 if (error == 0) { 474 error = hammer_signal_check(trans->hmp); 475 if (error) 476 break; 477 } 478 479 /* 480 * We only care about leafs. Internal nodes can be returned 481 * in mirror-filtered mode (they are used to generate SKIP 482 * mrecords), but we don't need them for this code. 483 * 484 * WARNING: See warnings in hammer_unlock_cursor() function. 485 */ 486 cursor.flags |= HAMMER_CURSOR_ATEDISK; 487 if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) { 488 key_cur = cursor.node->ondisk->elms[cursor.index].base; 489 error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid); 490 } 491 492 while (hammer_flusher_meta_halflimit(trans->hmp) || 493 hammer_flusher_undo_exhausted(trans, 2)) { 494 hammer_unlock_cursor(&cursor); 495 hammer_flusher_wait(trans->hmp, seq); 496 hammer_lock_cursor(&cursor); 497 seq = hammer_flusher_async_one(trans->hmp); 498 } 499 500 if (error == 0) 501 error = hammer_btree_iterate(&cursor); 502 } 503 if (error == ENOENT) 504 error = 0; 505 hammer_done_cursor(&cursor); 506 if (error == EDEADLK) 507 goto retry; 508 failed: 509 return(error); 510 } 511 512 /* 513 * Helper function - perform rollback on a B-Tree element given trunc_tid. 514 * 515 * If create_tid >= trunc_tid the record is physically destroyed. 516 * If delete_tid >= trunc_tid it will be set to 0, undeleting the record. 517 */ 518 static 519 int 520 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid) 521 { 522 hammer_btree_leaf_elm_t elm; 523 int error; 524 525 elm = &cursor->node->ondisk->elms[cursor->index].leaf; 526 if (elm->base.create_tid < trunc_tid && 527 elm->base.delete_tid < trunc_tid) { 528 return(0); 529 } 530 531 if (elm->base.create_tid >= trunc_tid) { 532 error = hammer_delete_at_cursor( 533 cursor, HAMMER_DELETE_DESTROY, 534 cursor->trans->tid, cursor->trans->time32, 535 1, NULL); 536 } else if (elm->base.delete_tid >= trunc_tid) { 537 error = hammer_delete_at_cursor( 538 cursor, HAMMER_DELETE_ADJUST, 539 0, 0, 540 1, NULL); 541 } else { 542 error = 0; 543 } 544 return(error); 545 } 546 547