1 /* 2 * Copyright (c) 2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_pfs.c,v 1.5 2008/07/31 04:42:04 dillon Exp $ 35 */ 36 /* 37 * HAMMER PFS ioctls - Manage pseudo-fs configurations 38 */ 39 40 #include "hammer.h" 41 42 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, 43 hammer_inode_t ip); 44 static int hammer_pfs_rollback(hammer_transaction_t trans, 45 hammer_pseudofs_inmem_t pfsm, 46 hammer_tid_t trunc_tid); 47 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, 48 hammer_tid_t trunc_tid); 49 50 /* 51 * Get mirroring/pseudo-fs information 52 * 53 * NOTE: The ip used for ioctl is not necessarily related to the PFS 54 */ 55 int 56 hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 57 struct hammer_ioc_pseudofs_rw *pfs) 58 { 59 hammer_pseudofs_inmem_t pfsm; 60 u_int32_t localization; 61 int error; 62 63 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 64 return(error); 65 localization = (u_int32_t)pfs->pfs_id << 16; 66 pfs->bytes = sizeof(struct hammer_pseudofs_data); 67 pfs->version = HAMMER_IOC_PSEUDOFS_VERSION; 68 69 pfsm = hammer_load_pseudofs(trans, localization, &error); 70 if (error) { 71 hammer_rel_pseudofs(trans->hmp, pfsm); 72 return(error); 73 } 74 75 /* 76 * If the PFS is a master the sync tid is set by normal operation 77 * rather than the mirroring code, and will always track the 78 * real HAMMER filesystem. 79 * 80 * We use flush_tid1, which is the highest fully committed TID. 81 * flush_tid2 is the TID most recently flushed, but the UNDO hasn't 82 * caught up to it yet so a crash will roll us back to flush_tid1. 83 */ 84 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) 85 pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1; 86 87 /* 88 * Copy out to userland. 89 */ 90 error = 0; 91 if (pfs->ondisk && error == 0) 92 error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd)); 93 hammer_rel_pseudofs(trans->hmp, pfsm); 94 return(error); 95 } 96 97 /* 98 * Set mirroring/pseudo-fs information 99 * 100 * NOTE: The ip used for ioctl is not necessarily related to the PFS 101 */ 102 int 103 hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 104 struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs) 105 { 106 hammer_pseudofs_inmem_t pfsm; 107 u_int32_t localization; 108 int error; 109 110 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 111 return(error); 112 localization = (u_int32_t)pfs->pfs_id << 16; 113 if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION) 114 error = EINVAL; 115 localization = (u_int32_t)pfs->pfs_id << 16; 116 117 if (error == 0 && pfs->ondisk) { 118 /* 119 * Load the PFS so we can modify our in-core copy. Ignore 120 * ENOENT errors. 121 */ 122 pfsm = hammer_load_pseudofs(trans, localization, &error); 123 error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd)); 124 125 /* 126 * Save it back, create a root inode if we are in master 127 * mode and no root exists. 128 * 129 * We do not create root inodes for slaves, the root inode 130 * must be mirrored from the master. 131 */ 132 if (error == 0 && 133 (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) { 134 error = hammer_mkroot_pseudofs(trans, cred, pfsm); 135 } 136 if (error == 0) 137 error = hammer_save_pseudofs(trans, pfsm); 138 139 /* 140 * Wakeup anyone waiting for a TID update for this PFS 141 */ 142 wakeup(&pfsm->pfsd.sync_end_tid); 143 hammer_rel_pseudofs(trans->hmp, pfsm); 144 } 145 return(error); 146 } 147 148 /* 149 * Upgrade a slave to a master 150 * 151 * This is fairly easy to do, but we must physically undo any partial syncs 152 * for transaction ids > sync_end_tid. Effective, we must do a partial 153 * rollback. 154 * 155 * NOTE: The ip used for ioctl is not necessarily related to the PFS 156 */ 157 int 158 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 159 struct hammer_ioc_pseudofs_rw *pfs) 160 { 161 hammer_pseudofs_inmem_t pfsm; 162 u_int32_t localization; 163 int error; 164 165 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 166 return(error); 167 localization = (u_int32_t)pfs->pfs_id << 16; 168 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 169 return(error); 170 171 /* 172 * A master id must be set when upgrading 173 */ 174 pfsm = hammer_load_pseudofs(trans, localization, &error); 175 if (error == 0) { 176 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) != 0) { 177 error = hammer_pfs_rollback(trans, pfsm, 178 pfsm->pfsd.sync_end_tid + 1); 179 if (error == 0) { 180 pfsm->pfsd.mirror_flags &= ~HAMMER_PFSD_SLAVE; 181 error = hammer_save_pseudofs(trans, pfsm); 182 } 183 } 184 } 185 hammer_rel_pseudofs(trans->hmp, pfsm); 186 if (error == EINTR) { 187 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 188 error = 0; 189 } 190 return (error); 191 } 192 193 /* 194 * Downgrade a master to a slave 195 * 196 * This is really easy to do, just set the SLAVE flag and update sync_end_tid. 197 * 198 * We previously did not update sync_end_tid in consideration for a slave 199 * upgraded to a master and then downgraded again, but this completely breaks 200 * the case where one starts with a master and then downgrades to a slave, 201 * then upgrades again. 202 * 203 * NOTE: The ip used for ioctl is not necessarily related to the PFS 204 */ 205 int 206 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 207 struct hammer_ioc_pseudofs_rw *pfs) 208 { 209 hammer_mount_t hmp = trans->hmp; 210 hammer_pseudofs_inmem_t pfsm; 211 u_int32_t localization; 212 int error; 213 214 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 215 return(error); 216 localization = (u_int32_t)pfs->pfs_id << 16; 217 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 218 return(error); 219 220 pfsm = hammer_load_pseudofs(trans, localization, &error); 221 if (error == 0) { 222 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) { 223 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE; 224 if (pfsm->pfsd.sync_end_tid < hmp->flush_tid1) 225 pfsm->pfsd.sync_end_tid = hmp->flush_tid1; 226 error = hammer_save_pseudofs(trans, pfsm); 227 } 228 } 229 hammer_rel_pseudofs(trans->hmp, pfsm); 230 return (error); 231 } 232 233 /* 234 * Destroy a PFS 235 * 236 * We can destroy a PFS by scanning and deleting all of its records in the 237 * B-Tree. The hammer utility will delete the softlink in the primary 238 * filesystem. 239 * 240 * NOTE: The ip used for ioctl is not necessarily related to the PFS 241 */ 242 int 243 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 244 struct hammer_ioc_pseudofs_rw *pfs) 245 { 246 hammer_pseudofs_inmem_t pfsm; 247 u_int32_t localization; 248 int error; 249 250 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 251 return(error); 252 localization = (u_int32_t)pfs->pfs_id << 16; 253 254 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 255 return(error); 256 257 pfsm = hammer_load_pseudofs(trans, localization, &error); 258 if (error == 0) { 259 error = hammer_pfs_rollback(trans, pfsm, 0); 260 if (error == 0) { 261 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_DELETED; 262 error = hammer_save_pseudofs(trans, pfsm); 263 } 264 } 265 hammer_rel_pseudofs(trans->hmp, pfsm); 266 if (error == EINTR) { 267 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 268 error = 0; 269 } 270 return(error); 271 } 272 273 /* 274 * Wait for the PFS to sync past the specified TID 275 */ 276 int 277 hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 278 struct hammer_ioc_pseudofs_rw *pfs) 279 { 280 hammer_pseudofs_inmem_t pfsm; 281 struct hammer_pseudofs_data pfsd; 282 u_int32_t localization; 283 hammer_tid_t tid; 284 void *waitp; 285 int error; 286 287 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 288 return(error); 289 localization = (u_int32_t)pfs->pfs_id << 16; 290 291 if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0) 292 return(error); 293 294 pfsm = hammer_load_pseudofs(trans, localization, &error); 295 if (error == 0) { 296 if (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) { 297 tid = pfsm->pfsd.sync_end_tid; 298 waitp = &pfsm->pfsd.sync_end_tid; 299 } else { 300 tid = trans->hmp->flush_tid1; 301 waitp = &trans->hmp->flush_tid1; 302 } 303 if (tid <= pfsd.sync_end_tid) 304 tsleep(waitp, PCATCH, "hmrmwt", 0); 305 } 306 hammer_rel_pseudofs(trans->hmp, pfsm); 307 if (error == EINTR) { 308 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 309 error = 0; 310 } 311 return(error); 312 } 313 314 315 /* 316 * Auto-detect the pseudofs and do basic bounds checking. 317 */ 318 static 319 int 320 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, hammer_inode_t ip) 321 { 322 int error = 0; 323 324 if (pfs->pfs_id == -1) 325 pfs->pfs_id = (int)(ip->obj_localization >> 16); 326 if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS) 327 error = EINVAL; 328 if (pfs->bytes < sizeof(struct hammer_pseudofs_data)) 329 error = EINVAL; 330 return(error); 331 } 332 333 /* 334 * Rollback the specified PFS to (trunc_tid - 1), removing everything 335 * greater or equal to trunc_tid. The PFS must not have been in no-mirror 336 * mode or the MIRROR_FILTERED scan will not work properly. 337 * 338 * This is typically used to remove any partial syncs when upgrading a 339 * slave to a master. It can theoretically also be used to rollback 340 * any PFS, including PFS#0, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN 341 * PRUNED, and to points that are older only if they are on a retained 342 * (pruning softlink) boundary. 343 * 344 * Rollbacks destroy information. If you don't mind inode numbers changing 345 * a better way would be to cpdup a snapshot back onto the master. 346 */ 347 static 348 int 349 hammer_pfs_rollback(hammer_transaction_t trans, 350 hammer_pseudofs_inmem_t pfsm, 351 hammer_tid_t trunc_tid) 352 { 353 struct hammer_cmirror cmirror; 354 struct hammer_cursor cursor; 355 struct hammer_base_elm key_cur; 356 int error; 357 int seq; 358 359 bzero(&cmirror, sizeof(cmirror)); 360 bzero(&key_cur, sizeof(key_cur)); 361 key_cur.localization = HAMMER_MIN_LOCALIZATION + pfsm->localization; 362 key_cur.obj_id = HAMMER_MIN_OBJID; 363 key_cur.key = HAMMER_MIN_KEY; 364 key_cur.create_tid = 1; 365 key_cur.rec_type = HAMMER_MIN_RECTYPE; 366 367 seq = trans->hmp->flusher.done; 368 369 retry: 370 error = hammer_init_cursor(trans, &cursor, NULL, NULL); 371 if (error) { 372 hammer_done_cursor(&cursor); 373 goto failed; 374 } 375 cursor.key_beg = key_cur; 376 cursor.key_end.localization = HAMMER_MAX_LOCALIZATION + 377 pfsm->localization; 378 cursor.key_end.obj_id = HAMMER_MAX_OBJID; 379 cursor.key_end.key = HAMMER_MAX_KEY; 380 cursor.key_end.create_tid = HAMMER_MAX_TID; 381 cursor.key_end.rec_type = HAMMER_MAX_RECTYPE; 382 383 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 384 cursor.flags |= HAMMER_CURSOR_BACKEND; 385 386 /* 387 * Do an optimized scan of only records created or modified 388 * >= trunc_tid, so we can fix up those records. We must 389 * still check the TIDs but this greatly reduces the size of 390 * the scan. 391 */ 392 cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED; 393 cursor.cmirror = &cmirror; 394 cmirror.mirror_tid = trunc_tid; 395 396 error = hammer_btree_first(&cursor); 397 while (error == 0) { 398 /* 399 * Abort the rollback. 400 */ 401 if (error == 0) { 402 error = hammer_signal_check(trans->hmp); 403 if (error) 404 break; 405 } 406 407 /* 408 * We only care about leafs. Internal nodes can be returned 409 * in mirror-filtered mode (they are used to generate SKIP 410 * mrecords), but we don't need them for this code. 411 * 412 * WARNING: See warnings in hammer_unlock_cursor() function. 413 */ 414 cursor.flags |= HAMMER_CURSOR_ATEDISK; 415 if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) { 416 key_cur = cursor.node->ondisk->elms[cursor.index].base; 417 error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid); 418 } 419 420 while (hammer_flusher_meta_halflimit(trans->hmp) || 421 hammer_flusher_undo_exhausted(trans, 2)) { 422 hammer_unlock_cursor(&cursor); 423 hammer_flusher_wait(trans->hmp, seq); 424 hammer_lock_cursor(&cursor); 425 seq = hammer_flusher_async_one(trans->hmp); 426 } 427 428 if (error == 0) 429 error = hammer_btree_iterate(&cursor); 430 } 431 if (error == ENOENT) 432 error = 0; 433 hammer_done_cursor(&cursor); 434 if (error == EDEADLK) 435 goto retry; 436 failed: 437 return(error); 438 } 439 440 /* 441 * Helper function - perform rollback on a B-Tree element given trunc_tid. 442 * 443 * If create_tid >= trunc_tid the record is physically destroyed. 444 * If delete_tid >= trunc_tid it will be set to 0, undeleting the record. 445 */ 446 static 447 int 448 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid) 449 { 450 hammer_btree_leaf_elm_t elm; 451 hammer_transaction_t trans; 452 int error; 453 454 elm = &cursor->node->ondisk->elms[cursor->index].leaf; 455 if (elm->base.create_tid < trunc_tid && 456 elm->base.delete_tid < trunc_tid) { 457 return(0); 458 } 459 trans = cursor->trans; 460 461 if (elm->base.create_tid >= trunc_tid) { 462 error = hammer_delete_at_cursor( 463 cursor, HAMMER_DELETE_DESTROY, 464 cursor->trans->tid, cursor->trans->time32, 465 1, NULL); 466 } else if (elm->base.delete_tid >= trunc_tid) { 467 error = hammer_delete_at_cursor( 468 cursor, HAMMER_DELETE_ADJUST, 469 0, 0, 470 1, NULL); 471 } else { 472 error = 0; 473 } 474 return(error); 475 } 476 477