1 /* 2 * Copyright (c) 2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * HAMMER PFS ioctls - Manage pseudo-fs configurations 36 */ 37 38 #include "hammer.h" 39 40 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, 41 hammer_inode_t ip); 42 static int hammer_pfs_rollback(hammer_transaction_t trans, 43 hammer_pseudofs_inmem_t pfsm, 44 hammer_tid_t trunc_tid); 45 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, 46 hammer_tid_t trunc_tid); 47 48 /* 49 * Get mirroring/pseudo-fs information 50 * 51 * NOTE: The ip used for ioctl is not necessarily related to the PFS 52 * since this ioctl only requires PFS id (or upper 16 bits of ip localization). 53 */ 54 int 55 hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 56 struct hammer_ioc_pseudofs_rw *pfs) 57 { 58 hammer_pseudofs_inmem_t pfsm; 59 uint32_t localization; 60 int error; 61 62 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 63 return(error); 64 localization = pfs_to_lo(pfs->pfs_id); 65 pfs->bytes = sizeof(struct hammer_pseudofs_data); 66 pfs->version = HAMMER_IOC_PSEUDOFS_VERSION; 67 68 pfsm = hammer_load_pseudofs(trans, localization, &error); 69 if (error) { 70 hammer_rel_pseudofs(trans->hmp, pfsm); 71 return(error); 72 } 73 74 /* 75 * If the PFS is a master the sync tid is set by normal operation 76 * rather than the mirroring code, and will always track the 77 * real HAMMER filesystem. 78 * 79 * We use flush_tid1, which is the highest fully committed TID. 80 * flush_tid2 is the TID most recently flushed, but the UNDO hasn't 81 * caught up to it yet so a crash will roll us back to flush_tid1. 82 */ 83 if (hammer_is_pfs_master(&pfsm->pfsd)) 84 pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1; 85 86 /* 87 * Copy out to userland. 88 */ 89 if (pfs->ondisk) 90 error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd)); 91 hammer_rel_pseudofs(trans->hmp, pfsm); 92 return(error); 93 } 94 95 /* 96 * Set mirroring/pseudo-fs information 97 */ 98 int 99 hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 100 struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs) 101 { 102 hammer_pseudofs_inmem_t pfsm; 103 uint32_t localization; 104 int error; 105 106 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 107 return(error); 108 localization = pfs_to_lo(pfs->pfs_id); 109 if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION) 110 error = EINVAL; 111 112 if (error == 0 && pfs->ondisk) { 113 /* 114 * Load the PFS so we can modify our in-core copy. Ignore 115 * ENOENT errors. 116 */ 117 pfsm = hammer_load_pseudofs(trans, localization, &error); 118 error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd)); 119 120 /* 121 * Save it back, create a root inode if we are in master 122 * mode and no root exists. 123 * 124 * We do not create root inodes for slaves, the root inode 125 * must be mirrored from the master. 126 */ 127 if (error == 0 && hammer_is_pfs_master(&pfsm->pfsd)) { 128 error = hammer_mkroot_pseudofs(trans, cred, pfsm, ip); 129 } 130 if (error == 0) 131 error = hammer_save_pseudofs(trans, pfsm); 132 133 /* 134 * Wakeup anyone waiting for a TID update for this PFS 135 */ 136 wakeup(&pfsm->pfsd.sync_end_tid); 137 hammer_rel_pseudofs(trans->hmp, pfsm); 138 } 139 return(error); 140 } 141 142 /* 143 * Upgrade a slave to a master 144 * 145 * This is fairly easy to do, but we must physically undo any partial syncs 146 * for transaction ids > sync_end_tid. Effective, we must do a partial 147 * rollback. 148 * 149 * NOTE: The ip used for ioctl is not necessarily related to the PFS 150 * since this ioctl only requires PFS id (or upper 16 bits of ip localization). 151 */ 152 int 153 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 154 struct hammer_ioc_pseudofs_rw *pfs) 155 { 156 hammer_pseudofs_inmem_t pfsm; 157 uint32_t localization; 158 int error; 159 160 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 161 return(error); 162 localization = pfs_to_lo(pfs->pfs_id); 163 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 164 return(error); 165 166 /* 167 * A master id must be set when upgrading 168 */ 169 pfsm = hammer_load_pseudofs(trans, localization, &error); 170 if (error == 0) { 171 if (hammer_is_pfs_slave(&pfsm->pfsd)) { 172 error = hammer_pfs_rollback(trans, pfsm, 173 pfsm->pfsd.sync_end_tid + 1); 174 if (error == 0) { 175 pfsm->pfsd.mirror_flags &= ~HAMMER_PFSD_SLAVE; 176 error = hammer_save_pseudofs(trans, pfsm); 177 } 178 } 179 } 180 hammer_rel_pseudofs(trans->hmp, pfsm); 181 if (error == EINTR) { 182 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 183 error = 0; 184 } 185 return (error); 186 } 187 188 /* 189 * Downgrade a master to a slave 190 * 191 * This is really easy to do, just set the SLAVE flag and update sync_end_tid. 192 * 193 * We previously did not update sync_end_tid in consideration for a slave 194 * upgraded to a master and then downgraded again, but this completely breaks 195 * the case where one starts with a master and then downgrades to a slave, 196 * then upgrades again. 197 * 198 * NOTE: The ip used for ioctl is not necessarily related to the PFS 199 * since this ioctl only requires PFS id (or upper 16 bits of ip localization). 200 */ 201 int 202 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 203 struct hammer_ioc_pseudofs_rw *pfs) 204 { 205 hammer_mount_t hmp = trans->hmp; 206 hammer_pseudofs_inmem_t pfsm; 207 uint32_t localization; 208 int error; 209 210 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 211 return(error); 212 localization = pfs_to_lo(pfs->pfs_id); 213 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 214 return(error); 215 216 pfsm = hammer_load_pseudofs(trans, localization, &error); 217 if (error == 0) { 218 if (hammer_is_pfs_master(&pfsm->pfsd)) { 219 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE; 220 if (pfsm->pfsd.sync_end_tid < hmp->flush_tid1) 221 pfsm->pfsd.sync_end_tid = hmp->flush_tid1; 222 error = hammer_save_pseudofs(trans, pfsm); 223 } 224 } 225 hammer_rel_pseudofs(trans->hmp, pfsm); 226 return (error); 227 } 228 229 /* 230 * Destroy a PFS 231 * 232 * We can destroy a PFS by scanning and deleting all of its records in the 233 * B-Tree. The hammer utility will delete the softlink in the primary 234 * filesystem. 235 * 236 * NOTE: The ip used for ioctl is not necessarily related to the PFS 237 * since this ioctl only requires PFS id (or upper 16 bits of ip localization). 238 */ 239 int 240 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 241 struct hammer_ioc_pseudofs_rw *pfs) 242 { 243 hammer_pseudofs_inmem_t pfsm; 244 uint32_t localization; 245 int error; 246 247 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 248 return(error); 249 localization = pfs_to_lo(pfs->pfs_id); 250 251 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 252 return(error); 253 254 pfsm = hammer_load_pseudofs(trans, localization, &error); 255 if (error == 0) { 256 error = hammer_pfs_rollback(trans, pfsm, 0); 257 if (error == 0) { 258 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_DELETED; 259 error = hammer_save_pseudofs(trans, pfsm); 260 } 261 } 262 hammer_rel_pseudofs(trans->hmp, pfsm); 263 if (error == EINTR) { 264 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 265 error = 0; 266 } 267 return(error); 268 } 269 270 /* 271 * Wait for the PFS to sync past the specified TID 272 */ 273 int 274 hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 275 struct hammer_ioc_pseudofs_rw *pfs) 276 { 277 hammer_pseudofs_inmem_t pfsm; 278 struct hammer_pseudofs_data pfsd; 279 uint32_t localization; 280 hammer_tid_t tid; 281 void *waitp; 282 int error; 283 284 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 285 return(error); 286 localization = pfs_to_lo(pfs->pfs_id); 287 288 if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0) 289 return(error); 290 291 pfsm = hammer_load_pseudofs(trans, localization, &error); 292 if (error == 0) { 293 if (hammer_is_pfs_slave(&pfsm->pfsd)) { 294 tid = pfsm->pfsd.sync_end_tid; 295 waitp = &pfsm->pfsd.sync_end_tid; 296 } else { 297 tid = trans->hmp->flush_tid1; 298 waitp = &trans->hmp->flush_tid1; 299 } 300 if (tid <= pfsd.sync_end_tid) 301 tsleep(waitp, PCATCH, "hmrmwt", 0); 302 } 303 hammer_rel_pseudofs(trans->hmp, pfsm); 304 if (error == EINTR) { 305 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 306 error = 0; 307 } 308 return(error); 309 } 310 311 /* 312 * Iterate PFS ondisk data. 313 * This function essentially does the same as hammer_load_pseudofs() 314 * except that this function only retrieves PFS data without touching 315 * hammer_pfs_rb_tree at all. 316 * 317 * NOTE: The ip used for ioctl is not necessarily related to the PFS 318 * since this ioctl only requires PFS id (or upper 16 bits of ip localization). 319 * 320 * NOTE: The API was changed in DragonFly 4.7, due to design issues 321 * this ioctl and libhammer (which is the only caller of this ioctl 322 * within DragonFly source, but no longer maintained by anyone) had. 323 */ 324 int 325 hammer_ioc_scan_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 326 struct hammer_ioc_pseudofs_rw *pfs) 327 { 328 struct hammer_cursor cursor; 329 hammer_inode_t dip; 330 uint32_t localization; 331 int error; 332 333 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 334 return(error); 335 localization = pfs_to_lo(pfs->pfs_id); 336 pfs->bytes = sizeof(struct hammer_pseudofs_data); 337 pfs->version = HAMMER_IOC_PSEUDOFS_VERSION; 338 339 dip = hammer_get_inode(trans, NULL, HAMMER_OBJID_ROOT, HAMMER_MAX_TID, 340 HAMMER_DEF_LOCALIZATION, 0, &error); 341 342 error = hammer_init_cursor(trans, &cursor, 343 (dip ? &dip->cache[1] : NULL), dip); 344 if (error) 345 goto fail; 346 347 cursor.key_beg.localization = HAMMER_DEF_LOCALIZATION | 348 HAMMER_LOCALIZE_MISC; 349 cursor.key_beg.obj_id = HAMMER_OBJID_ROOT; 350 cursor.key_beg.create_tid = 0; 351 cursor.key_beg.delete_tid = 0; 352 cursor.key_beg.rec_type = HAMMER_RECTYPE_PFS; 353 cursor.key_beg.obj_type = 0; 354 cursor.key_beg.key = localization; 355 cursor.asof = HAMMER_MAX_TID; 356 cursor.flags |= HAMMER_CURSOR_ASOF; 357 358 error = hammer_ip_lookup(&cursor); 359 if (error == 0) { 360 error = hammer_ip_resolve_data(&cursor); 361 if (error == 0) { 362 if (pfs->ondisk) 363 copyout(cursor.data, pfs->ondisk, cursor.leaf->data_len); 364 localization = cursor.leaf->base.key; 365 pfs->pfs_id = lo_to_pfs(localization); 366 } 367 } 368 hammer_done_cursor(&cursor); 369 fail: 370 if (dip) 371 hammer_rel_inode(dip, 0); 372 return(error); 373 } 374 375 /* 376 * Auto-detect the pseudofs and do basic bounds checking. 377 */ 378 static 379 int 380 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, hammer_inode_t ip) 381 { 382 int error = 0; 383 384 if (pfs->pfs_id == -1) 385 pfs->pfs_id = lo_to_pfs(ip->obj_localization); 386 if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS) 387 error = EINVAL; 388 if (pfs->bytes < sizeof(struct hammer_pseudofs_data)) 389 error = EINVAL; 390 return(error); 391 } 392 393 /* 394 * Rollback the specified PFS to (trunc_tid - 1), removing everything 395 * greater or equal to trunc_tid. The PFS must not have been in no-mirror 396 * mode or the MIRROR_FILTERED scan will not work properly. 397 * 398 * This is typically used to remove any partial syncs when upgrading a 399 * slave to a master. It can theoretically also be used to rollback 400 * any PFS, including root PFS, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN 401 * PRUNED, and to points that are older only if they are on a retained 402 * (pruning softlink) boundary. 403 * 404 * Rollbacks destroy information. If you don't mind inode numbers changing 405 * a better way would be to cpdup a snapshot back onto the master. 406 */ 407 static 408 int 409 hammer_pfs_rollback(hammer_transaction_t trans, 410 hammer_pseudofs_inmem_t pfsm, 411 hammer_tid_t trunc_tid) 412 { 413 struct hammer_cmirror cmirror; 414 struct hammer_cursor cursor; 415 struct hammer_base_elm key_cur; 416 int error; 417 int seq; 418 419 bzero(&cmirror, sizeof(cmirror)); 420 bzero(&key_cur, sizeof(key_cur)); 421 key_cur.localization = HAMMER_MIN_LOCALIZATION | pfsm->localization; 422 key_cur.obj_id = HAMMER_MIN_OBJID; 423 key_cur.key = HAMMER_MIN_KEY; 424 key_cur.create_tid = 1; 425 key_cur.rec_type = HAMMER_MIN_RECTYPE; 426 427 seq = trans->hmp->flusher.done; 428 429 retry: 430 error = hammer_init_cursor(trans, &cursor, NULL, NULL); 431 if (error) { 432 hammer_done_cursor(&cursor); 433 goto failed; 434 } 435 cursor.key_beg = key_cur; 436 cursor.key_end.localization = HAMMER_MAX_LOCALIZATION | 437 pfsm->localization; 438 cursor.key_end.obj_id = HAMMER_MAX_OBJID; 439 cursor.key_end.key = HAMMER_MAX_KEY; 440 cursor.key_end.create_tid = HAMMER_MAX_TID; 441 cursor.key_end.rec_type = HAMMER_MAX_RECTYPE; 442 443 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 444 cursor.flags |= HAMMER_CURSOR_BACKEND; 445 446 /* 447 * Do an optimized scan of only records created or modified 448 * >= trunc_tid, so we can fix up those records. We must 449 * still check the TIDs but this greatly reduces the size of 450 * the scan. 451 */ 452 cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED; 453 cursor.cmirror = &cmirror; 454 cmirror.mirror_tid = trunc_tid; 455 456 error = hammer_btree_first(&cursor); 457 while (error == 0) { 458 /* 459 * Abort the rollback. 460 */ 461 if (error == 0) { 462 error = hammer_signal_check(trans->hmp); 463 if (error) 464 break; 465 } 466 467 /* 468 * We only care about leafs. Internal nodes can be returned 469 * in mirror-filtered mode (they are used to generate SKIP 470 * mrecords), but we don't need them for this code. 471 * 472 * WARNING: See warnings in hammer_unlock_cursor() function. 473 */ 474 cursor.flags |= HAMMER_CURSOR_ATEDISK; 475 if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) { 476 key_cur = cursor.node->ondisk->elms[cursor.index].base; 477 error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid); 478 } 479 480 while (hammer_flusher_meta_halflimit(trans->hmp) || 481 hammer_flusher_undo_exhausted(trans, 2)) { 482 hammer_unlock_cursor(&cursor); 483 hammer_flusher_wait(trans->hmp, seq); 484 hammer_lock_cursor(&cursor); 485 seq = hammer_flusher_async_one(trans->hmp); 486 } 487 488 if (error == 0) 489 error = hammer_btree_iterate(&cursor); 490 } 491 if (error == ENOENT) 492 error = 0; 493 hammer_done_cursor(&cursor); 494 if (error == EDEADLK) 495 goto retry; 496 failed: 497 return(error); 498 } 499 500 /* 501 * Helper function - perform rollback on a B-Tree element given trunc_tid. 502 * 503 * If create_tid >= trunc_tid the record is physically destroyed. 504 * If delete_tid >= trunc_tid it will be set to 0, undeleting the record. 505 */ 506 static 507 int 508 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid) 509 { 510 hammer_btree_leaf_elm_t elm; 511 int error; 512 513 elm = &cursor->node->ondisk->elms[cursor->index].leaf; 514 if (elm->base.create_tid < trunc_tid && 515 elm->base.delete_tid < trunc_tid) { 516 return(0); 517 } 518 519 if (elm->base.create_tid >= trunc_tid) { 520 error = hammer_delete_at_cursor( 521 cursor, HAMMER_DELETE_DESTROY, 522 cursor->trans->tid, cursor->trans->time32, 523 1, NULL); 524 } else if (elm->base.delete_tid >= trunc_tid) { 525 error = hammer_delete_at_cursor( 526 cursor, HAMMER_DELETE_ADJUST, 527 0, 0, 528 1, NULL); 529 } else { 530 error = 0; 531 } 532 return(error); 533 } 534 535