1 /* 2 * Copyright (c) 2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * HAMMER PFS ioctls - Manage pseudo-fs configurations 36 */ 37 38 #include "hammer.h" 39 40 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, 41 hammer_inode_t ip); 42 static int hammer_pfs_rollback(hammer_transaction_t trans, 43 hammer_pseudofs_inmem_t pfsm, 44 hammer_tid_t trunc_tid); 45 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, 46 hammer_tid_t trunc_tid); 47 48 /* 49 * Get mirroring/pseudo-fs information 50 * 51 * NOTE: The ip used for ioctl is not necessarily related to the PFS 52 * since this ioctl only requires PFS id (or upper 16 bits of ip localization). 53 */ 54 int 55 hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 56 struct hammer_ioc_pseudofs_rw *pfs) 57 { 58 hammer_pseudofs_inmem_t pfsm; 59 uint32_t localization; 60 int error; 61 62 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 63 return(error); 64 localization = pfs_to_lo(pfs->pfs_id); 65 pfs->bytes = sizeof(struct hammer_pseudofs_data); 66 pfs->version = HAMMER_IOC_PSEUDOFS_VERSION; 67 68 pfsm = hammer_load_pseudofs(trans, localization, &error); 69 if (error) { 70 hammer_rel_pseudofs(trans->hmp, pfsm); 71 return(error); 72 } 73 74 /* 75 * If the PFS is a master the sync tid is set by normal operation 76 * rather than the mirroring code, and will always track the 77 * real HAMMER filesystem. 78 * 79 * We use flush_tid1, which is the highest fully committed TID. 80 * flush_tid2 is the TID most recently flushed, but the UNDO hasn't 81 * caught up to it yet so a crash will roll us back to flush_tid1. 82 */ 83 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) 84 pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1; 85 86 /* 87 * Copy out to userland. 88 */ 89 error = 0; 90 if (pfs->ondisk && error == 0) 91 error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd)); 92 hammer_rel_pseudofs(trans->hmp, pfsm); 93 return(error); 94 } 95 96 /* 97 * Set mirroring/pseudo-fs information 98 * 99 * NOTE: The ip used for ioctl is not necessarily related to the PFS 100 * since this ioctl only requires PFS id (or upper 16 bits of ip localization). 101 */ 102 int 103 hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 104 struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs) 105 { 106 hammer_pseudofs_inmem_t pfsm; 107 uint32_t localization; 108 int error; 109 110 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 111 return(error); 112 localization = pfs_to_lo(pfs->pfs_id); 113 if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION) 114 error = EINVAL; 115 116 /* 117 * Make sure a caller isn't creating a PFS from non-root PFS. 118 */ 119 if (lo_to_pfs(ip->obj_localization) != HAMMER_ROOT_PFSID) { 120 hmkprintf(trans->hmp, 121 "Creating a PFS from non-root PFS is not allowed\n"); 122 return(EINVAL); 123 } 124 125 if (error == 0 && pfs->ondisk) { 126 /* 127 * Load the PFS so we can modify our in-core copy. Ignore 128 * ENOENT errors. 129 */ 130 pfsm = hammer_load_pseudofs(trans, localization, &error); 131 error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd)); 132 133 /* 134 * Save it back, create a root inode if we are in master 135 * mode and no root exists. 136 * 137 * We do not create root inodes for slaves, the root inode 138 * must be mirrored from the master. 139 */ 140 if (error == 0 && 141 (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) { 142 error = hammer_mkroot_pseudofs(trans, cred, pfsm); 143 } 144 if (error == 0) 145 error = hammer_save_pseudofs(trans, pfsm); 146 147 /* 148 * Wakeup anyone waiting for a TID update for this PFS 149 */ 150 wakeup(&pfsm->pfsd.sync_end_tid); 151 hammer_rel_pseudofs(trans->hmp, pfsm); 152 } 153 return(error); 154 } 155 156 /* 157 * Upgrade a slave to a master 158 * 159 * This is fairly easy to do, but we must physically undo any partial syncs 160 * for transaction ids > sync_end_tid. Effective, we must do a partial 161 * rollback. 162 * 163 * NOTE: The ip used for ioctl is not necessarily related to the PFS 164 * since this ioctl only requires PFS id (or upper 16 bits of ip localization). 165 */ 166 int 167 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 168 struct hammer_ioc_pseudofs_rw *pfs) 169 { 170 hammer_pseudofs_inmem_t pfsm; 171 uint32_t localization; 172 int error; 173 174 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 175 return(error); 176 localization = pfs_to_lo(pfs->pfs_id); 177 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 178 return(error); 179 180 /* 181 * A master id must be set when upgrading 182 */ 183 pfsm = hammer_load_pseudofs(trans, localization, &error); 184 if (error == 0) { 185 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) != 0) { 186 error = hammer_pfs_rollback(trans, pfsm, 187 pfsm->pfsd.sync_end_tid + 1); 188 if (error == 0) { 189 pfsm->pfsd.mirror_flags &= ~HAMMER_PFSD_SLAVE; 190 error = hammer_save_pseudofs(trans, pfsm); 191 } 192 } 193 } 194 hammer_rel_pseudofs(trans->hmp, pfsm); 195 if (error == EINTR) { 196 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 197 error = 0; 198 } 199 return (error); 200 } 201 202 /* 203 * Downgrade a master to a slave 204 * 205 * This is really easy to do, just set the SLAVE flag and update sync_end_tid. 206 * 207 * We previously did not update sync_end_tid in consideration for a slave 208 * upgraded to a master and then downgraded again, but this completely breaks 209 * the case where one starts with a master and then downgrades to a slave, 210 * then upgrades again. 211 * 212 * NOTE: The ip used for ioctl is not necessarily related to the PFS 213 * since this ioctl only requires PFS id (or upper 16 bits of ip localization). 214 */ 215 int 216 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 217 struct hammer_ioc_pseudofs_rw *pfs) 218 { 219 hammer_mount_t hmp = trans->hmp; 220 hammer_pseudofs_inmem_t pfsm; 221 uint32_t localization; 222 int error; 223 224 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 225 return(error); 226 localization = pfs_to_lo(pfs->pfs_id); 227 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 228 return(error); 229 230 pfsm = hammer_load_pseudofs(trans, localization, &error); 231 if (error == 0) { 232 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) { 233 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE; 234 if (pfsm->pfsd.sync_end_tid < hmp->flush_tid1) 235 pfsm->pfsd.sync_end_tid = hmp->flush_tid1; 236 error = hammer_save_pseudofs(trans, pfsm); 237 } 238 } 239 hammer_rel_pseudofs(trans->hmp, pfsm); 240 return (error); 241 } 242 243 /* 244 * Destroy a PFS 245 * 246 * We can destroy a PFS by scanning and deleting all of its records in the 247 * B-Tree. The hammer utility will delete the softlink in the primary 248 * filesystem. 249 * 250 * NOTE: The ip used for ioctl is not necessarily related to the PFS 251 * since this ioctl only requires PFS id (or upper 16 bits of ip localization). 252 */ 253 int 254 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 255 struct hammer_ioc_pseudofs_rw *pfs) 256 { 257 hammer_pseudofs_inmem_t pfsm; 258 uint32_t localization; 259 int error; 260 261 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 262 return(error); 263 localization = pfs_to_lo(pfs->pfs_id); 264 265 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 266 return(error); 267 268 pfsm = hammer_load_pseudofs(trans, localization, &error); 269 if (error == 0) { 270 error = hammer_pfs_rollback(trans, pfsm, 0); 271 if (error == 0) { 272 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_DELETED; 273 error = hammer_save_pseudofs(trans, pfsm); 274 } 275 } 276 hammer_rel_pseudofs(trans->hmp, pfsm); 277 if (error == EINTR) { 278 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 279 error = 0; 280 } 281 return(error); 282 } 283 284 /* 285 * Wait for the PFS to sync past the specified TID 286 */ 287 int 288 hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 289 struct hammer_ioc_pseudofs_rw *pfs) 290 { 291 hammer_pseudofs_inmem_t pfsm; 292 struct hammer_pseudofs_data pfsd; 293 uint32_t localization; 294 hammer_tid_t tid; 295 void *waitp; 296 int error; 297 298 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 299 return(error); 300 localization = pfs_to_lo(pfs->pfs_id); 301 302 if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0) 303 return(error); 304 305 pfsm = hammer_load_pseudofs(trans, localization, &error); 306 if (error == 0) { 307 if (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) { 308 tid = pfsm->pfsd.sync_end_tid; 309 waitp = &pfsm->pfsd.sync_end_tid; 310 } else { 311 tid = trans->hmp->flush_tid1; 312 waitp = &trans->hmp->flush_tid1; 313 } 314 if (tid <= pfsd.sync_end_tid) 315 tsleep(waitp, PCATCH, "hmrmwt", 0); 316 } 317 hammer_rel_pseudofs(trans->hmp, pfsm); 318 if (error == EINTR) { 319 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 320 error = 0; 321 } 322 return(error); 323 } 324 325 /* 326 * Iterate PFS ondisk data. 327 * This function basically does the same as hammer_load_pseudofs() 328 * except that the purpose of this function is to retrieve data. 329 * 330 * NOTE: The ip used for ioctl is not necessarily related to the PFS 331 * since this ioctl only requires PFS id (or upper 16 bits of ip localization). 332 */ 333 int 334 hammer_ioc_iterate_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 335 struct hammer_ioc_pfs_iterate *pi) 336 { 337 struct hammer_cursor cursor; 338 struct hammer_ioc_pseudofs_rw pfs; 339 hammer_inode_t dip; 340 uint32_t localization; 341 int error; 342 343 /* 344 * struct hammer_ioc_pfs_iterate was never necessary. 345 * This ioctl needs extra code only to do conversion. 346 * The name pi->pos is misleading, but it's been exposed 347 * to userspace header.. 348 */ 349 bzero(&pfs, sizeof(pfs)); 350 pfs.pfs_id = pi->pos; 351 pfs.bytes = sizeof(struct hammer_pseudofs_data); /* dummy */ 352 if ((error = hammer_pfs_autodetect(&pfs, ip)) != 0) 353 return(error); 354 pi->pos = pfs.pfs_id; 355 localization = pfs_to_lo(pi->pos); 356 357 dip = hammer_get_inode(trans, NULL, HAMMER_OBJID_ROOT, HAMMER_MAX_TID, 358 HAMMER_DEF_LOCALIZATION, 0, &error); 359 360 error = hammer_init_cursor(trans, &cursor, 361 (dip ? &dip->cache[1] : NULL), dip); 362 if (error) 363 goto out; 364 365 cursor.key_beg.localization = HAMMER_DEF_LOCALIZATION | 366 HAMMER_LOCALIZE_MISC; 367 cursor.key_beg.obj_id = HAMMER_OBJID_ROOT; 368 cursor.key_beg.create_tid = 0; 369 cursor.key_beg.delete_tid = 0; 370 cursor.key_beg.rec_type = HAMMER_RECTYPE_PFS; 371 cursor.key_beg.obj_type = 0; 372 cursor.key_beg.key = localization; 373 cursor.asof = HAMMER_MAX_TID; 374 cursor.flags |= HAMMER_CURSOR_ASOF; 375 376 error = hammer_ip_lookup(&cursor); 377 if (error == 0) { 378 error = hammer_ip_resolve_data(&cursor); 379 if (error == 0) { 380 if (pi->ondisk) 381 copyout(cursor.data, pi->ondisk, cursor.leaf->data_len); 382 localization = cursor.leaf->base.key; 383 pi->pos = lo_to_pfs(localization); 384 /* 385 * Caller needs to increment pi->pos each time calling 386 * this ioctl. This ioctl only restores current PFS id. 387 */ 388 } 389 } 390 out: 391 hammer_done_cursor(&cursor); 392 if (dip) 393 hammer_rel_inode(dip, 0); 394 return(error); 395 } 396 397 /* 398 * Auto-detect the pseudofs and do basic bounds checking. 399 */ 400 static 401 int 402 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, hammer_inode_t ip) 403 { 404 int error = 0; 405 406 if (pfs->pfs_id == -1) 407 pfs->pfs_id = lo_to_pfs(ip->obj_localization); 408 if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS) 409 error = EINVAL; 410 if (pfs->bytes < sizeof(struct hammer_pseudofs_data)) 411 error = EINVAL; 412 return(error); 413 } 414 415 /* 416 * Rollback the specified PFS to (trunc_tid - 1), removing everything 417 * greater or equal to trunc_tid. The PFS must not have been in no-mirror 418 * mode or the MIRROR_FILTERED scan will not work properly. 419 * 420 * This is typically used to remove any partial syncs when upgrading a 421 * slave to a master. It can theoretically also be used to rollback 422 * any PFS, including PFS#0, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN 423 * PRUNED, and to points that are older only if they are on a retained 424 * (pruning softlink) boundary. 425 * 426 * Rollbacks destroy information. If you don't mind inode numbers changing 427 * a better way would be to cpdup a snapshot back onto the master. 428 */ 429 static 430 int 431 hammer_pfs_rollback(hammer_transaction_t trans, 432 hammer_pseudofs_inmem_t pfsm, 433 hammer_tid_t trunc_tid) 434 { 435 struct hammer_cmirror cmirror; 436 struct hammer_cursor cursor; 437 struct hammer_base_elm key_cur; 438 int error; 439 int seq; 440 441 bzero(&cmirror, sizeof(cmirror)); 442 bzero(&key_cur, sizeof(key_cur)); 443 key_cur.localization = HAMMER_MIN_LOCALIZATION | pfsm->localization; 444 key_cur.obj_id = HAMMER_MIN_OBJID; 445 key_cur.key = HAMMER_MIN_KEY; 446 key_cur.create_tid = 1; 447 key_cur.rec_type = HAMMER_MIN_RECTYPE; 448 449 seq = trans->hmp->flusher.done; 450 451 retry: 452 error = hammer_init_cursor(trans, &cursor, NULL, NULL); 453 if (error) { 454 hammer_done_cursor(&cursor); 455 goto failed; 456 } 457 cursor.key_beg = key_cur; 458 cursor.key_end.localization = HAMMER_MAX_LOCALIZATION | 459 pfsm->localization; 460 cursor.key_end.obj_id = HAMMER_MAX_OBJID; 461 cursor.key_end.key = HAMMER_MAX_KEY; 462 cursor.key_end.create_tid = HAMMER_MAX_TID; 463 cursor.key_end.rec_type = HAMMER_MAX_RECTYPE; 464 465 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 466 cursor.flags |= HAMMER_CURSOR_BACKEND; 467 468 /* 469 * Do an optimized scan of only records created or modified 470 * >= trunc_tid, so we can fix up those records. We must 471 * still check the TIDs but this greatly reduces the size of 472 * the scan. 473 */ 474 cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED; 475 cursor.cmirror = &cmirror; 476 cmirror.mirror_tid = trunc_tid; 477 478 error = hammer_btree_first(&cursor); 479 while (error == 0) { 480 /* 481 * Abort the rollback. 482 */ 483 if (error == 0) { 484 error = hammer_signal_check(trans->hmp); 485 if (error) 486 break; 487 } 488 489 /* 490 * We only care about leafs. Internal nodes can be returned 491 * in mirror-filtered mode (they are used to generate SKIP 492 * mrecords), but we don't need them for this code. 493 * 494 * WARNING: See warnings in hammer_unlock_cursor() function. 495 */ 496 cursor.flags |= HAMMER_CURSOR_ATEDISK; 497 if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) { 498 key_cur = cursor.node->ondisk->elms[cursor.index].base; 499 error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid); 500 } 501 502 while (hammer_flusher_meta_halflimit(trans->hmp) || 503 hammer_flusher_undo_exhausted(trans, 2)) { 504 hammer_unlock_cursor(&cursor); 505 hammer_flusher_wait(trans->hmp, seq); 506 hammer_lock_cursor(&cursor); 507 seq = hammer_flusher_async_one(trans->hmp); 508 } 509 510 if (error == 0) 511 error = hammer_btree_iterate(&cursor); 512 } 513 if (error == ENOENT) 514 error = 0; 515 hammer_done_cursor(&cursor); 516 if (error == EDEADLK) 517 goto retry; 518 failed: 519 return(error); 520 } 521 522 /* 523 * Helper function - perform rollback on a B-Tree element given trunc_tid. 524 * 525 * If create_tid >= trunc_tid the record is physically destroyed. 526 * If delete_tid >= trunc_tid it will be set to 0, undeleting the record. 527 */ 528 static 529 int 530 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid) 531 { 532 hammer_btree_leaf_elm_t elm; 533 int error; 534 535 elm = &cursor->node->ondisk->elms[cursor->index].leaf; 536 if (elm->base.create_tid < trunc_tid && 537 elm->base.delete_tid < trunc_tid) { 538 return(0); 539 } 540 541 if (elm->base.create_tid >= trunc_tid) { 542 error = hammer_delete_at_cursor( 543 cursor, HAMMER_DELETE_DESTROY, 544 cursor->trans->tid, cursor->trans->time32, 545 1, NULL); 546 } else if (elm->base.delete_tid >= trunc_tid) { 547 error = hammer_delete_at_cursor( 548 cursor, HAMMER_DELETE_ADJUST, 549 0, 0, 550 1, NULL); 551 } else { 552 error = 0; 553 } 554 return(error); 555 } 556 557