1 /* 2 * Copyright (c) 2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_pfs.c,v 1.5 2008/07/31 04:42:04 dillon Exp $ 35 */ 36 /* 37 * HAMMER PFS ioctls - Manage pseudo-fs configurations 38 */ 39 40 #include "hammer.h" 41 42 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, 43 hammer_inode_t ip); 44 static int hammer_pfs_rollback(hammer_transaction_t trans, 45 hammer_pseudofs_inmem_t pfsm, 46 hammer_tid_t trunc_tid); 47 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, 48 hammer_tid_t trunc_tid); 49 50 /* 51 * Get mirroring/pseudo-fs information 52 * 53 * NOTE: The ip used for ioctl is not necessarily related to the PFS 54 */ 55 int 56 hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 57 struct hammer_ioc_pseudofs_rw *pfs) 58 { 59 hammer_pseudofs_inmem_t pfsm; 60 u_int32_t localization; 61 int error; 62 63 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 64 return(error); 65 localization = (u_int32_t)pfs->pfs_id << 16; 66 pfs->bytes = sizeof(struct hammer_pseudofs_data); 67 pfs->version = HAMMER_IOC_PSEUDOFS_VERSION; 68 69 pfsm = hammer_load_pseudofs(trans, localization, &error); 70 if (error) { 71 hammer_rel_pseudofs(trans->hmp, pfsm); 72 return(error); 73 } 74 75 /* 76 * If the PFS is a master the sync tid is set by normal operation 77 * rather then the mirroring code, and will always track the 78 * real HAMMER filesystem. 79 * 80 * We use flush_tid1, which is the highest fully committed TID. 81 * flush_tid2 is the TID most recently flushed, but the UNDO hasn't 82 * caught up to it yet so a crash will roll us back to flush_tid1. 83 */ 84 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) 85 pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1; 86 87 /* 88 * Copy out to userland. 89 */ 90 error = 0; 91 if (pfs->ondisk && error == 0) 92 error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd)); 93 hammer_rel_pseudofs(trans->hmp, pfsm); 94 return(error); 95 } 96 97 /* 98 * Set mirroring/pseudo-fs information 99 * 100 * NOTE: The ip used for ioctl is not necessarily related to the PFS 101 */ 102 int 103 hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 104 struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs) 105 { 106 hammer_pseudofs_inmem_t pfsm; 107 u_int32_t localization; 108 int error; 109 110 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 111 return(error); 112 localization = (u_int32_t)pfs->pfs_id << 16; 113 if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION) 114 error = EINVAL; 115 localization = (u_int32_t)pfs->pfs_id << 16; 116 117 if (error == 0 && pfs->ondisk) { 118 /* 119 * Load the PFS so we can modify our in-core copy. Ignore 120 * ENOENT errors. 121 */ 122 pfsm = hammer_load_pseudofs(trans, localization, &error); 123 error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd)); 124 125 /* 126 * Save it back, create a root inode if we are in master 127 * mode and no root exists. 128 * 129 * We do not create root inodes for slaves, the root inode 130 * must be mirrored from the master. 131 */ 132 if (error == 0 && 133 (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) { 134 error = hammer_mkroot_pseudofs(trans, cred, pfsm); 135 } 136 if (error == 0) 137 error = hammer_save_pseudofs(trans, pfsm); 138 139 /* 140 * Wakeup anyone waiting for a TID update for this PFS 141 */ 142 wakeup(&pfsm->pfsd.sync_end_tid); 143 hammer_rel_pseudofs(trans->hmp, pfsm); 144 } 145 return(error); 146 } 147 148 /* 149 * Upgrade a slave to a master 150 * 151 * This is fairly easy to do, but we must physically undo any partial syncs 152 * for transaction ids > sync_end_tid. Effective, we must do a partial 153 * rollback. 154 * 155 * NOTE: The ip used for ioctl is not necessarily related to the PFS 156 */ 157 int 158 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 159 struct hammer_ioc_pseudofs_rw *pfs) 160 { 161 hammer_pseudofs_inmem_t pfsm; 162 u_int32_t localization; 163 int error; 164 165 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 166 return(error); 167 localization = (u_int32_t)pfs->pfs_id << 16; 168 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 169 return(error); 170 171 /* 172 * A master id must be set when upgrading 173 */ 174 pfsm = hammer_load_pseudofs(trans, localization, &error); 175 if (error == 0) { 176 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) != 0) { 177 error = hammer_pfs_rollback(trans, pfsm, 178 pfsm->pfsd.sync_end_tid + 1); 179 if (error == 0) { 180 pfsm->pfsd.mirror_flags &= ~HAMMER_PFSD_SLAVE; 181 error = hammer_save_pseudofs(trans, pfsm); 182 } 183 } 184 } 185 hammer_rel_pseudofs(trans->hmp, pfsm); 186 if (error == EINTR) { 187 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 188 error = 0; 189 } 190 return (error); 191 } 192 193 /* 194 * Downgrade a master to a slave 195 * 196 * This is really easy to do, just set the SLAVE flag. 197 * 198 * We also leave sync_end_tid intact... the field is not used in master 199 * mode (vol0_next_tid overrides it), but if someone switches to master 200 * mode accidently and then back to slave mode we don't want it to change. 201 * Eventually it will be used as the cross-synchronization TID in 202 * multi-master mode, and we don't want to mess with it for that feature 203 * either. 204 * 205 * NOTE: The ip used for ioctl is not necessarily related to the PFS 206 */ 207 int 208 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 209 struct hammer_ioc_pseudofs_rw *pfs) 210 { 211 hammer_pseudofs_inmem_t pfsm; 212 u_int32_t localization; 213 int error; 214 215 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 216 return(error); 217 localization = (u_int32_t)pfs->pfs_id << 16; 218 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 219 return(error); 220 221 pfsm = hammer_load_pseudofs(trans, localization, &error); 222 if (error == 0) { 223 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) { 224 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE; 225 error = hammer_save_pseudofs(trans, pfsm); 226 } 227 } 228 hammer_rel_pseudofs(trans->hmp, pfsm); 229 return (error); 230 } 231 232 /* 233 * Destroy a PFS 234 * 235 * We can destroy a PFS by scanning and deleting all of its records in the 236 * B-Tree. The hammer utility will delete the softlink in the primary 237 * filesystem. 238 * 239 * NOTE: The ip used for ioctl is not necessarily related to the PFS 240 */ 241 int 242 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 243 struct hammer_ioc_pseudofs_rw *pfs) 244 { 245 hammer_pseudofs_inmem_t pfsm; 246 u_int32_t localization; 247 int error; 248 249 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 250 return(error); 251 localization = (u_int32_t)pfs->pfs_id << 16; 252 253 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 254 return(error); 255 256 pfsm = hammer_load_pseudofs(trans, localization, &error); 257 if (error == 0) { 258 error = hammer_pfs_rollback(trans, pfsm, 0); 259 if (error == 0) { 260 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_DELETED; 261 error = hammer_save_pseudofs(trans, pfsm); 262 } 263 } 264 hammer_rel_pseudofs(trans->hmp, pfsm); 265 if (error == EINTR) { 266 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 267 error = 0; 268 } 269 return(error); 270 } 271 272 /* 273 * Wait for the PFS to sync past the specified TID 274 */ 275 int 276 hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 277 struct hammer_ioc_pseudofs_rw *pfs) 278 { 279 hammer_pseudofs_inmem_t pfsm; 280 struct hammer_pseudofs_data pfsd; 281 u_int32_t localization; 282 hammer_tid_t tid; 283 void *waitp; 284 int error; 285 286 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 287 return(error); 288 localization = (u_int32_t)pfs->pfs_id << 16; 289 290 if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0) 291 return(error); 292 293 pfsm = hammer_load_pseudofs(trans, localization, &error); 294 if (error == 0) { 295 if (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) { 296 tid = pfsm->pfsd.sync_end_tid; 297 waitp = &pfsm->pfsd.sync_end_tid; 298 } else { 299 tid = trans->hmp->flush_tid1; 300 waitp = &trans->hmp->flush_tid1; 301 } 302 if (tid <= pfsd.sync_end_tid) 303 tsleep(waitp, PCATCH, "hmrmwt", 0); 304 } 305 hammer_rel_pseudofs(trans->hmp, pfsm); 306 if (error == EINTR) { 307 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 308 error = 0; 309 } 310 return(error); 311 } 312 313 314 /* 315 * Auto-detect the pseudofs and do basic bounds checking. 316 */ 317 static 318 int 319 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, hammer_inode_t ip) 320 { 321 int error = 0; 322 323 if (pfs->pfs_id == -1) 324 pfs->pfs_id = (int)(ip->obj_localization >> 16); 325 if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS) 326 error = EINVAL; 327 if (pfs->bytes < sizeof(struct hammer_pseudofs_data)) 328 error = EINVAL; 329 return(error); 330 } 331 332 /* 333 * Rollback the specified PFS to (trunc_tid - 1), removing everything 334 * greater or equal to trunc_tid. The PFS must not have been in no-mirror 335 * mode or the MIRROR_FILTERED scan will not work properly. 336 * 337 * This is typically used to remove any partial syncs when upgrading a 338 * slave to a master. It can theoretically also be used to rollback 339 * any PFS, including PFS#0, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN 340 * PRUNED, and to points that are older only if they are on a retained 341 * (pruning softlink) boundary. 342 * 343 * Rollbacks destroy information. If you don't mind inode numbers changing 344 * a better way would be to cpdup a snapshot back onto the master. 345 */ 346 static 347 int 348 hammer_pfs_rollback(hammer_transaction_t trans, 349 hammer_pseudofs_inmem_t pfsm, 350 hammer_tid_t trunc_tid) 351 { 352 struct hammer_cmirror cmirror; 353 struct hammer_cursor cursor; 354 struct hammer_base_elm key_cur; 355 int error; 356 int seq; 357 358 bzero(&cmirror, sizeof(cmirror)); 359 bzero(&key_cur, sizeof(key_cur)); 360 key_cur.localization = HAMMER_MIN_LOCALIZATION + pfsm->localization; 361 key_cur.obj_id = HAMMER_MIN_OBJID; 362 key_cur.key = HAMMER_MIN_KEY; 363 key_cur.create_tid = 1; 364 key_cur.rec_type = HAMMER_MIN_RECTYPE; 365 366 seq = trans->hmp->flusher.act; 367 368 retry: 369 error = hammer_init_cursor(trans, &cursor, NULL, NULL); 370 if (error) { 371 hammer_done_cursor(&cursor); 372 goto failed; 373 } 374 cursor.key_beg = key_cur; 375 cursor.key_end.localization = HAMMER_MAX_LOCALIZATION + 376 pfsm->localization; 377 cursor.key_end.obj_id = HAMMER_MAX_OBJID; 378 cursor.key_end.key = HAMMER_MAX_KEY; 379 cursor.key_end.create_tid = HAMMER_MAX_TID; 380 cursor.key_end.rec_type = HAMMER_MAX_RECTYPE; 381 382 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 383 cursor.flags |= HAMMER_CURSOR_BACKEND; 384 385 /* 386 * Do an optimized scan of only records created or modified 387 * >= trunc_tid, so we can fix up those records. We must 388 * still check the TIDs but this greatly reduces the size of 389 * the scan. 390 */ 391 cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED; 392 cursor.cmirror = &cmirror; 393 cmirror.mirror_tid = trunc_tid; 394 395 error = hammer_btree_first(&cursor); 396 while (error == 0) { 397 /* 398 * Abort the rollback. 399 */ 400 if (error == 0) { 401 error = hammer_signal_check(trans->hmp); 402 if (error) 403 break; 404 } 405 406 /* 407 * We only care about leafs. Internal nodes can be returned 408 * in mirror-filtered mode (they are used to generate SKIP 409 * mrecords), but we don't need them for this code. 410 * 411 * WARNING: See warnings in hammer_unlock_cursor() function. 412 */ 413 cursor.flags |= HAMMER_CURSOR_ATEDISK; 414 if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) { 415 key_cur = cursor.node->ondisk->elms[cursor.index].base; 416 error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid); 417 } 418 419 while (hammer_flusher_meta_halflimit(trans->hmp) || 420 hammer_flusher_undo_exhausted(trans, 2)) { 421 hammer_unlock_cursor(&cursor); 422 hammer_flusher_wait(trans->hmp, seq); 423 hammer_lock_cursor(&cursor); 424 seq = hammer_flusher_async_one(trans->hmp); 425 } 426 427 if (error == 0) 428 error = hammer_btree_iterate(&cursor); 429 } 430 if (error == ENOENT) 431 error = 0; 432 hammer_done_cursor(&cursor); 433 if (error == EDEADLK) 434 goto retry; 435 failed: 436 return(error); 437 } 438 439 /* 440 * Helper function - perform rollback on a B-Tree element given trunc_tid. 441 * 442 * If create_tid >= trunc_tid the record is physically destroyed. 443 * If delete_tid >= trunc_tid it will be set to 0, undeleting the record. 444 */ 445 static 446 int 447 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid) 448 { 449 hammer_btree_leaf_elm_t elm; 450 hammer_transaction_t trans; 451 int error; 452 453 elm = &cursor->node->ondisk->elms[cursor->index].leaf; 454 if (elm->base.create_tid < trunc_tid && 455 elm->base.delete_tid < trunc_tid) { 456 return(0); 457 } 458 trans = cursor->trans; 459 460 if (elm->base.create_tid >= trunc_tid) { 461 error = hammer_delete_at_cursor( 462 cursor, HAMMER_DELETE_DESTROY, 463 cursor->trans->tid, cursor->trans->time32, 464 1, NULL); 465 } else if (elm->base.delete_tid >= trunc_tid) { 466 error = hammer_delete_at_cursor( 467 cursor, HAMMER_DELETE_ADJUST, 468 0, 0, 469 1, NULL); 470 } else { 471 error = 0; 472 } 473 return(error); 474 } 475 476