1 /* 2 * Copyright (c) 2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * HAMMER PFS ioctls - Manage pseudo-fs configurations 36 */ 37 38 #include "hammer.h" 39 40 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, 41 hammer_inode_t ip); 42 static int hammer_pfs_rollback(hammer_transaction_t trans, 43 hammer_pseudofs_inmem_t pfsm, 44 hammer_tid_t trunc_tid); 45 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, 46 hammer_tid_t trunc_tid); 47 48 /* 49 * Get mirroring/pseudo-fs information 50 * 51 * NOTE: The ip used for ioctl is not necessarily related to the PFS 52 * since this ioctl only requires PFS id (or upper 16 bits of ip localization). 53 */ 54 int 55 hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 56 struct hammer_ioc_pseudofs_rw *pfs) 57 { 58 hammer_pseudofs_inmem_t pfsm; 59 u_int32_t localization; 60 int error; 61 62 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 63 return(error); 64 localization = (u_int32_t)pfs->pfs_id << 16; 65 pfs->bytes = sizeof(struct hammer_pseudofs_data); 66 pfs->version = HAMMER_IOC_PSEUDOFS_VERSION; 67 68 pfsm = hammer_load_pseudofs(trans, localization, &error); 69 if (error) { 70 hammer_rel_pseudofs(trans->hmp, pfsm); 71 return(error); 72 } 73 74 /* 75 * If the PFS is a master the sync tid is set by normal operation 76 * rather than the mirroring code, and will always track the 77 * real HAMMER filesystem. 78 * 79 * We use flush_tid1, which is the highest fully committed TID. 80 * flush_tid2 is the TID most recently flushed, but the UNDO hasn't 81 * caught up to it yet so a crash will roll us back to flush_tid1. 82 */ 83 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) 84 pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1; 85 86 /* 87 * Copy out to userland. 88 */ 89 error = 0; 90 if (pfs->ondisk && error == 0) 91 error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd)); 92 hammer_rel_pseudofs(trans->hmp, pfsm); 93 return(error); 94 } 95 96 /* 97 * Set mirroring/pseudo-fs information 98 * 99 * NOTE: The ip used for ioctl is not necessarily related to the PFS 100 * since this ioctl only requires PFS id (or upper 16 bits of ip localization). 101 */ 102 int 103 hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 104 struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs) 105 { 106 hammer_pseudofs_inmem_t pfsm; 107 u_int32_t localization; 108 int error; 109 110 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 111 return(error); 112 localization = (u_int32_t)pfs->pfs_id << 16; 113 if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION) 114 error = EINVAL; 115 116 if (error == 0 && pfs->ondisk) { 117 /* 118 * Load the PFS so we can modify our in-core copy. Ignore 119 * ENOENT errors. 120 */ 121 pfsm = hammer_load_pseudofs(trans, localization, &error); 122 error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd)); 123 124 /* 125 * Save it back, create a root inode if we are in master 126 * mode and no root exists. 127 * 128 * We do not create root inodes for slaves, the root inode 129 * must be mirrored from the master. 130 */ 131 if (error == 0 && 132 (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) { 133 error = hammer_mkroot_pseudofs(trans, cred, pfsm); 134 } 135 if (error == 0) 136 error = hammer_save_pseudofs(trans, pfsm); 137 138 /* 139 * Wakeup anyone waiting for a TID update for this PFS 140 */ 141 wakeup(&pfsm->pfsd.sync_end_tid); 142 hammer_rel_pseudofs(trans->hmp, pfsm); 143 } 144 return(error); 145 } 146 147 /* 148 * Upgrade a slave to a master 149 * 150 * This is fairly easy to do, but we must physically undo any partial syncs 151 * for transaction ids > sync_end_tid. Effective, we must do a partial 152 * rollback. 153 * 154 * NOTE: The ip used for ioctl is not necessarily related to the PFS 155 * since this ioctl only requires PFS id (or upper 16 bits of ip localization). 156 */ 157 int 158 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 159 struct hammer_ioc_pseudofs_rw *pfs) 160 { 161 hammer_pseudofs_inmem_t pfsm; 162 u_int32_t localization; 163 int error; 164 165 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 166 return(error); 167 localization = (u_int32_t)pfs->pfs_id << 16; 168 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 169 return(error); 170 171 /* 172 * A master id must be set when upgrading 173 */ 174 pfsm = hammer_load_pseudofs(trans, localization, &error); 175 if (error == 0) { 176 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) != 0) { 177 error = hammer_pfs_rollback(trans, pfsm, 178 pfsm->pfsd.sync_end_tid + 1); 179 if (error == 0) { 180 pfsm->pfsd.mirror_flags &= ~HAMMER_PFSD_SLAVE; 181 error = hammer_save_pseudofs(trans, pfsm); 182 } 183 } 184 } 185 hammer_rel_pseudofs(trans->hmp, pfsm); 186 if (error == EINTR) { 187 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 188 error = 0; 189 } 190 return (error); 191 } 192 193 /* 194 * Downgrade a master to a slave 195 * 196 * This is really easy to do, just set the SLAVE flag and update sync_end_tid. 197 * 198 * We previously did not update sync_end_tid in consideration for a slave 199 * upgraded to a master and then downgraded again, but this completely breaks 200 * the case where one starts with a master and then downgrades to a slave, 201 * then upgrades again. 202 * 203 * NOTE: The ip used for ioctl is not necessarily related to the PFS 204 * since this ioctl only requires PFS id (or upper 16 bits of ip localization). 205 */ 206 int 207 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 208 struct hammer_ioc_pseudofs_rw *pfs) 209 { 210 hammer_mount_t hmp = trans->hmp; 211 hammer_pseudofs_inmem_t pfsm; 212 u_int32_t localization; 213 int error; 214 215 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 216 return(error); 217 localization = (u_int32_t)pfs->pfs_id << 16; 218 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 219 return(error); 220 221 pfsm = hammer_load_pseudofs(trans, localization, &error); 222 if (error == 0) { 223 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) { 224 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE; 225 if (pfsm->pfsd.sync_end_tid < hmp->flush_tid1) 226 pfsm->pfsd.sync_end_tid = hmp->flush_tid1; 227 error = hammer_save_pseudofs(trans, pfsm); 228 } 229 } 230 hammer_rel_pseudofs(trans->hmp, pfsm); 231 return (error); 232 } 233 234 /* 235 * Destroy a PFS 236 * 237 * We can destroy a PFS by scanning and deleting all of its records in the 238 * B-Tree. The hammer utility will delete the softlink in the primary 239 * filesystem. 240 * 241 * NOTE: The ip used for ioctl is not necessarily related to the PFS 242 * since this ioctl only requires PFS id (or upper 16 bits of ip localization). 243 */ 244 int 245 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 246 struct hammer_ioc_pseudofs_rw *pfs) 247 { 248 hammer_pseudofs_inmem_t pfsm; 249 u_int32_t localization; 250 int error; 251 252 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 253 return(error); 254 localization = (u_int32_t)pfs->pfs_id << 16; 255 256 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 257 return(error); 258 259 pfsm = hammer_load_pseudofs(trans, localization, &error); 260 if (error == 0) { 261 error = hammer_pfs_rollback(trans, pfsm, 0); 262 if (error == 0) { 263 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_DELETED; 264 error = hammer_save_pseudofs(trans, pfsm); 265 } 266 } 267 hammer_rel_pseudofs(trans->hmp, pfsm); 268 if (error == EINTR) { 269 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 270 error = 0; 271 } 272 return(error); 273 } 274 275 /* 276 * Wait for the PFS to sync past the specified TID 277 */ 278 int 279 hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 280 struct hammer_ioc_pseudofs_rw *pfs) 281 { 282 hammer_pseudofs_inmem_t pfsm; 283 struct hammer_pseudofs_data pfsd; 284 u_int32_t localization; 285 hammer_tid_t tid; 286 void *waitp; 287 int error; 288 289 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 290 return(error); 291 localization = (u_int32_t)pfs->pfs_id << 16; 292 293 if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0) 294 return(error); 295 296 pfsm = hammer_load_pseudofs(trans, localization, &error); 297 if (error == 0) { 298 if (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) { 299 tid = pfsm->pfsd.sync_end_tid; 300 waitp = &pfsm->pfsd.sync_end_tid; 301 } else { 302 tid = trans->hmp->flush_tid1; 303 waitp = &trans->hmp->flush_tid1; 304 } 305 if (tid <= pfsd.sync_end_tid) 306 tsleep(waitp, PCATCH, "hmrmwt", 0); 307 } 308 hammer_rel_pseudofs(trans->hmp, pfsm); 309 if (error == EINTR) { 310 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 311 error = 0; 312 } 313 return(error); 314 } 315 316 /* 317 * Iterate PFS ondisk data. 318 * This function basically does the same as hammer_load_pseudofs() 319 * except that the purpose of this function is to retrieve data. 320 * 321 * NOTE: The ip used for ioctl is not necessarily related to the PFS 322 * since this ioctl only requires PFS id (or upper 16 bits of ip localization). 323 */ 324 int 325 hammer_ioc_iterate_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 326 struct hammer_ioc_pfs_iterate *pi) 327 { 328 struct hammer_cursor cursor; 329 struct hammer_ioc_pseudofs_rw pfs; 330 hammer_inode_t dip; 331 u_int32_t localization; 332 int error; 333 334 /* 335 * struct hammer_ioc_pfs_iterate was never necessary. 336 * This ioctl needs extra code only to do conversion. 337 * The name pi->pos is misleading, but it's been exposed 338 * to userspace header.. 339 */ 340 bzero(&pfs, sizeof(pfs)); 341 pfs.pfs_id = pi->pos; 342 pfs.bytes = sizeof(struct hammer_pseudofs_data); /* dummy */ 343 if ((error = hammer_pfs_autodetect(&pfs, ip)) != 0) 344 return(error); 345 pi->pos = pfs.pfs_id; 346 localization = (u_int32_t)pi->pos << 16; 347 348 dip = hammer_get_inode(trans, NULL, HAMMER_OBJID_ROOT, HAMMER_MAX_TID, 349 HAMMER_DEF_LOCALIZATION, 0, &error); 350 351 error = hammer_init_cursor(trans, &cursor, 352 (dip ? &dip->cache[1] : NULL), dip); 353 if (error) 354 goto out; 355 356 cursor.key_beg.localization = HAMMER_DEF_LOCALIZATION + 357 HAMMER_LOCALIZE_MISC; 358 cursor.key_beg.obj_id = HAMMER_OBJID_ROOT; 359 cursor.key_beg.create_tid = 0; 360 cursor.key_beg.delete_tid = 0; 361 cursor.key_beg.rec_type = HAMMER_RECTYPE_PFS; 362 cursor.key_beg.obj_type = 0; 363 cursor.key_beg.key = localization; 364 cursor.asof = HAMMER_MAX_TID; 365 cursor.flags |= HAMMER_CURSOR_ASOF; 366 367 error = hammer_ip_lookup(&cursor); 368 if (error == 0) { 369 error = hammer_ip_resolve_data(&cursor); 370 if (error == 0) { 371 if (pi->ondisk) 372 copyout(cursor.data, pi->ondisk, cursor.leaf->data_len); 373 localization = cursor.leaf->base.key; 374 pi->pos = localization >> 16; 375 /* 376 * Caller needs to increment pi->pos each time calling 377 * this ioctl. This ioctl only restores current PFS id. 378 */ 379 } 380 } 381 out: 382 hammer_done_cursor(&cursor); 383 if (dip) 384 hammer_rel_inode(dip, 0); 385 return(error); 386 } 387 388 /* 389 * Auto-detect the pseudofs and do basic bounds checking. 390 */ 391 static 392 int 393 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, hammer_inode_t ip) 394 { 395 int error = 0; 396 397 if (pfs->pfs_id == -1) 398 pfs->pfs_id = (int)(ip->obj_localization >> 16); 399 if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS) 400 error = EINVAL; 401 if (pfs->bytes < sizeof(struct hammer_pseudofs_data)) 402 error = EINVAL; 403 return(error); 404 } 405 406 /* 407 * Rollback the specified PFS to (trunc_tid - 1), removing everything 408 * greater or equal to trunc_tid. The PFS must not have been in no-mirror 409 * mode or the MIRROR_FILTERED scan will not work properly. 410 * 411 * This is typically used to remove any partial syncs when upgrading a 412 * slave to a master. It can theoretically also be used to rollback 413 * any PFS, including PFS#0, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN 414 * PRUNED, and to points that are older only if they are on a retained 415 * (pruning softlink) boundary. 416 * 417 * Rollbacks destroy information. If you don't mind inode numbers changing 418 * a better way would be to cpdup a snapshot back onto the master. 419 */ 420 static 421 int 422 hammer_pfs_rollback(hammer_transaction_t trans, 423 hammer_pseudofs_inmem_t pfsm, 424 hammer_tid_t trunc_tid) 425 { 426 struct hammer_cmirror cmirror; 427 struct hammer_cursor cursor; 428 struct hammer_base_elm key_cur; 429 int error; 430 int seq; 431 432 bzero(&cmirror, sizeof(cmirror)); 433 bzero(&key_cur, sizeof(key_cur)); 434 key_cur.localization = HAMMER_MIN_LOCALIZATION + pfsm->localization; 435 key_cur.obj_id = HAMMER_MIN_OBJID; 436 key_cur.key = HAMMER_MIN_KEY; 437 key_cur.create_tid = 1; 438 key_cur.rec_type = HAMMER_MIN_RECTYPE; 439 440 seq = trans->hmp->flusher.done; 441 442 retry: 443 error = hammer_init_cursor(trans, &cursor, NULL, NULL); 444 if (error) { 445 hammer_done_cursor(&cursor); 446 goto failed; 447 } 448 cursor.key_beg = key_cur; 449 cursor.key_end.localization = HAMMER_MAX_LOCALIZATION + 450 pfsm->localization; 451 cursor.key_end.obj_id = HAMMER_MAX_OBJID; 452 cursor.key_end.key = HAMMER_MAX_KEY; 453 cursor.key_end.create_tid = HAMMER_MAX_TID; 454 cursor.key_end.rec_type = HAMMER_MAX_RECTYPE; 455 456 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 457 cursor.flags |= HAMMER_CURSOR_BACKEND; 458 459 /* 460 * Do an optimized scan of only records created or modified 461 * >= trunc_tid, so we can fix up those records. We must 462 * still check the TIDs but this greatly reduces the size of 463 * the scan. 464 */ 465 cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED; 466 cursor.cmirror = &cmirror; 467 cmirror.mirror_tid = trunc_tid; 468 469 error = hammer_btree_first(&cursor); 470 while (error == 0) { 471 /* 472 * Abort the rollback. 473 */ 474 if (error == 0) { 475 error = hammer_signal_check(trans->hmp); 476 if (error) 477 break; 478 } 479 480 /* 481 * We only care about leafs. Internal nodes can be returned 482 * in mirror-filtered mode (they are used to generate SKIP 483 * mrecords), but we don't need them for this code. 484 * 485 * WARNING: See warnings in hammer_unlock_cursor() function. 486 */ 487 cursor.flags |= HAMMER_CURSOR_ATEDISK; 488 if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) { 489 key_cur = cursor.node->ondisk->elms[cursor.index].base; 490 error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid); 491 } 492 493 while (hammer_flusher_meta_halflimit(trans->hmp) || 494 hammer_flusher_undo_exhausted(trans, 2)) { 495 hammer_unlock_cursor(&cursor); 496 hammer_flusher_wait(trans->hmp, seq); 497 hammer_lock_cursor(&cursor); 498 seq = hammer_flusher_async_one(trans->hmp); 499 } 500 501 if (error == 0) 502 error = hammer_btree_iterate(&cursor); 503 } 504 if (error == ENOENT) 505 error = 0; 506 hammer_done_cursor(&cursor); 507 if (error == EDEADLK) 508 goto retry; 509 failed: 510 return(error); 511 } 512 513 /* 514 * Helper function - perform rollback on a B-Tree element given trunc_tid. 515 * 516 * If create_tid >= trunc_tid the record is physically destroyed. 517 * If delete_tid >= trunc_tid it will be set to 0, undeleting the record. 518 */ 519 static 520 int 521 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid) 522 { 523 hammer_btree_leaf_elm_t elm; 524 int error; 525 526 elm = &cursor->node->ondisk->elms[cursor->index].leaf; 527 if (elm->base.create_tid < trunc_tid && 528 elm->base.delete_tid < trunc_tid) { 529 return(0); 530 } 531 532 if (elm->base.create_tid >= trunc_tid) { 533 error = hammer_delete_at_cursor( 534 cursor, HAMMER_DELETE_DESTROY, 535 cursor->trans->tid, cursor->trans->time32, 536 1, NULL); 537 } else if (elm->base.delete_tid >= trunc_tid) { 538 error = hammer_delete_at_cursor( 539 cursor, HAMMER_DELETE_ADJUST, 540 0, 0, 541 1, NULL); 542 } else { 543 error = 0; 544 } 545 return(error); 546 } 547 548