1 /* 2 * Copyright (c) 2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_pfs.c,v 1.5 2008/07/31 04:42:04 dillon Exp $ 35 */ 36 /* 37 * HAMMER PFS ioctls - Manage pseudo-fs configurations 38 */ 39 40 #include "hammer.h" 41 42 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, 43 hammer_inode_t ip); 44 static int hammer_pfs_rollback(hammer_transaction_t trans, 45 hammer_pseudofs_inmem_t pfsm, 46 hammer_tid_t trunc_tid); 47 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, 48 hammer_tid_t trunc_tid); 49 50 /* 51 * Get mirroring/pseudo-fs information 52 * 53 * NOTE: The ip used for ioctl is not necessarily related to the PFS 54 */ 55 int 56 hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 57 struct hammer_ioc_pseudofs_rw *pfs) 58 { 59 hammer_pseudofs_inmem_t pfsm; 60 u_int32_t localization; 61 int error; 62 63 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 64 return(error); 65 localization = (u_int32_t)pfs->pfs_id << 16; 66 pfs->bytes = sizeof(struct hammer_pseudofs_data); 67 pfs->version = HAMMER_IOC_PSEUDOFS_VERSION; 68 69 pfsm = hammer_load_pseudofs(trans, localization, &error); 70 if (error) { 71 hammer_rel_pseudofs(trans->hmp, pfsm); 72 return(error); 73 } 74 75 /* 76 * If the PFS is a master the sync tid is set by normal operation 77 * rather then the mirroring code, and will always track the 78 * real HAMMER filesystem. 79 * 80 * We use flush_tid1, which is the highest fully committed TID. 81 * flush_tid2 is the TID most recently flushed, but the UNDO hasn't 82 * caught up to it yet so a crash will roll us back to flush_tid1. 83 */ 84 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) 85 pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1; 86 87 /* 88 * Copy out to userland. 89 */ 90 error = 0; 91 if (pfs->ondisk && error == 0) 92 error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd)); 93 hammer_rel_pseudofs(trans->hmp, pfsm); 94 return(error); 95 } 96 97 /* 98 * Set mirroring/pseudo-fs information 99 * 100 * NOTE: The ip used for ioctl is not necessarily related to the PFS 101 */ 102 int 103 hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 104 struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs) 105 { 106 hammer_pseudofs_inmem_t pfsm; 107 u_int32_t localization; 108 int error; 109 110 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 111 return(error); 112 localization = (u_int32_t)pfs->pfs_id << 16; 113 if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION) 114 error = EINVAL; 115 localization = (u_int32_t)pfs->pfs_id << 16; 116 117 if (error == 0 && pfs->ondisk) { 118 /* 119 * Load the PFS so we can modify our in-core copy. Ignore 120 * ENOENT errors. 121 */ 122 pfsm = hammer_load_pseudofs(trans, localization, &error); 123 error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd)); 124 125 /* 126 * Save it back, create a root inode if we are in master 127 * mode and no root exists. 128 */ 129 if (error == 0) 130 error = hammer_mkroot_pseudofs(trans, cred, pfsm); 131 if (error == 0) 132 error = hammer_save_pseudofs(trans, pfsm); 133 134 /* 135 * Wakeup anyone waiting for a TID update for this PFS 136 */ 137 wakeup(&pfsm->pfsd.sync_end_tid); 138 hammer_rel_pseudofs(trans->hmp, pfsm); 139 } 140 return(error); 141 } 142 143 /* 144 * Upgrade a slave to a master 145 * 146 * This is fairly easy to do, but we must physically undo any partial syncs 147 * for transaction ids > sync_end_tid. Effective, we must do a partial 148 * rollback. 149 * 150 * NOTE: The ip used for ioctl is not necessarily related to the PFS 151 */ 152 int 153 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 154 struct hammer_ioc_pseudofs_rw *pfs) 155 { 156 hammer_pseudofs_inmem_t pfsm; 157 u_int32_t localization; 158 int error; 159 160 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 161 return(error); 162 localization = (u_int32_t)pfs->pfs_id << 16; 163 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 164 return(error); 165 166 /* 167 * A master id must be set when upgrading 168 */ 169 pfsm = hammer_load_pseudofs(trans, localization, &error); 170 if (error == 0) { 171 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) != 0) { 172 error = hammer_pfs_rollback(trans, pfsm, 173 pfsm->pfsd.sync_end_tid + 1); 174 if (error == 0) { 175 pfsm->pfsd.mirror_flags &= ~HAMMER_PFSD_SLAVE; 176 error = hammer_save_pseudofs(trans, pfsm); 177 } 178 } 179 } 180 hammer_rel_pseudofs(trans->hmp, pfsm); 181 if (error == EINTR) { 182 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 183 error = 0; 184 } 185 return (error); 186 } 187 188 /* 189 * Downgrade a master to a slave 190 * 191 * This is really easy to do, just set the SLAVE flag. 192 * 193 * We also leave sync_end_tid intact... the field is not used in master 194 * mode (vol0_next_tid overrides it), but if someone switches to master 195 * mode accidently and then back to slave mode we don't want it to change. 196 * Eventually it will be used as the cross-synchronization TID in 197 * multi-master mode, and we don't want to mess with it for that feature 198 * either. 199 * 200 * NOTE: The ip used for ioctl is not necessarily related to the PFS 201 */ 202 int 203 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 204 struct hammer_ioc_pseudofs_rw *pfs) 205 { 206 hammer_pseudofs_inmem_t pfsm; 207 u_int32_t localization; 208 int error; 209 210 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 211 return(error); 212 localization = (u_int32_t)pfs->pfs_id << 16; 213 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 214 return(error); 215 216 pfsm = hammer_load_pseudofs(trans, localization, &error); 217 if (error == 0) { 218 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) { 219 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE; 220 error = hammer_save_pseudofs(trans, pfsm); 221 } 222 } 223 hammer_rel_pseudofs(trans->hmp, pfsm); 224 return (error); 225 } 226 227 /* 228 * Destroy a PFS 229 * 230 * We can destroy a PFS by scanning and deleting all of its records in the 231 * B-Tree. The hammer utility will delete the softlink in the primary 232 * filesystem. 233 * 234 * NOTE: The ip used for ioctl is not necessarily related to the PFS 235 */ 236 int 237 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 238 struct hammer_ioc_pseudofs_rw *pfs) 239 { 240 hammer_pseudofs_inmem_t pfsm; 241 u_int32_t localization; 242 int error; 243 244 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 245 return(error); 246 localization = (u_int32_t)pfs->pfs_id << 16; 247 248 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 249 return(error); 250 251 pfsm = hammer_load_pseudofs(trans, localization, &error); 252 if (error == 0) { 253 error = hammer_pfs_rollback(trans, pfsm, 0); 254 if (error == 0) { 255 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_DELETED; 256 error = hammer_save_pseudofs(trans, pfsm); 257 } 258 } 259 hammer_rel_pseudofs(trans->hmp, pfsm); 260 if (error == EINTR) { 261 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 262 error = 0; 263 } 264 return(error); 265 } 266 267 /* 268 * Wait for the PFS to sync past the specified TID 269 */ 270 int 271 hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 272 struct hammer_ioc_pseudofs_rw *pfs) 273 { 274 hammer_pseudofs_inmem_t pfsm; 275 struct hammer_pseudofs_data pfsd; 276 u_int32_t localization; 277 hammer_tid_t tid; 278 void *waitp; 279 int error; 280 281 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 282 return(error); 283 localization = (u_int32_t)pfs->pfs_id << 16; 284 285 if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0) 286 return(error); 287 288 pfsm = hammer_load_pseudofs(trans, localization, &error); 289 if (error == 0) { 290 if (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) { 291 tid = pfsm->pfsd.sync_end_tid; 292 waitp = &pfsm->pfsd.sync_end_tid; 293 } else { 294 tid = trans->hmp->flush_tid1; 295 waitp = &trans->hmp->flush_tid1; 296 } 297 if (tid <= pfsd.sync_end_tid) 298 tsleep(waitp, PCATCH, "hmrmwt", 0); 299 } 300 hammer_rel_pseudofs(trans->hmp, pfsm); 301 if (error == EINTR) { 302 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 303 error = 0; 304 } 305 return(error); 306 } 307 308 309 /* 310 * Auto-detect the pseudofs and do basic bounds checking. 311 */ 312 static 313 int 314 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, hammer_inode_t ip) 315 { 316 int error = 0; 317 318 if (pfs->pfs_id == -1) 319 pfs->pfs_id = (int)(ip->obj_localization >> 16); 320 if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS) 321 error = EINVAL; 322 if (pfs->bytes < sizeof(struct hammer_pseudofs_data)) 323 error = EINVAL; 324 return(error); 325 } 326 327 /* 328 * Rollback the specified PFS to (trunc_tid - 1), removing everything 329 * greater or equal to trunc_tid. The PFS must not have been in no-mirror 330 * mode or the MIRROR_FILTERED scan will not work properly. 331 * 332 * This is typically used to remove any partial syncs when upgrading a 333 * slave to a master. It can theoretically also be used to rollback 334 * any PFS, including PFS#0, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN 335 * PRUNED, and to points that are older only if they are on a retained 336 * (pruning softlink) boundary. 337 * 338 * Rollbacks destroy information. If you don't mind inode numbers changing 339 * a better way would be to cpdup a snapshot back onto the master. 340 */ 341 static 342 int 343 hammer_pfs_rollback(hammer_transaction_t trans, 344 hammer_pseudofs_inmem_t pfsm, 345 hammer_tid_t trunc_tid) 346 { 347 struct hammer_cmirror cmirror; 348 struct hammer_cursor cursor; 349 struct hammer_base_elm key_cur; 350 int error; 351 int seq; 352 353 bzero(&cmirror, sizeof(cmirror)); 354 bzero(&key_cur, sizeof(key_cur)); 355 key_cur.localization = HAMMER_MIN_LOCALIZATION + pfsm->localization; 356 key_cur.obj_id = HAMMER_MIN_OBJID; 357 key_cur.key = HAMMER_MIN_KEY; 358 key_cur.create_tid = 1; 359 key_cur.rec_type = HAMMER_MIN_RECTYPE; 360 361 seq = trans->hmp->flusher.act; 362 363 retry: 364 error = hammer_init_cursor(trans, &cursor, NULL, NULL); 365 if (error) { 366 hammer_done_cursor(&cursor); 367 goto failed; 368 } 369 cursor.key_beg = key_cur; 370 cursor.key_end.localization = HAMMER_MAX_LOCALIZATION + 371 pfsm->localization; 372 cursor.key_end.obj_id = HAMMER_MAX_OBJID; 373 cursor.key_end.key = HAMMER_MAX_KEY; 374 cursor.key_end.create_tid = HAMMER_MAX_TID; 375 cursor.key_end.rec_type = HAMMER_MAX_RECTYPE; 376 377 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 378 cursor.flags |= HAMMER_CURSOR_BACKEND; 379 380 /* 381 * Do an optimized scan of only records created or modified 382 * >= trunc_tid, so we can fix up those records. We must 383 * still check the TIDs but this greatly reduces the size of 384 * the scan. 385 */ 386 cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED; 387 cursor.cmirror = &cmirror; 388 cmirror.mirror_tid = trunc_tid; 389 390 error = hammer_btree_first(&cursor); 391 while (error == 0) { 392 /* 393 * Abort the rollback. 394 */ 395 if (error == 0) { 396 error = hammer_signal_check(trans->hmp); 397 if (error) 398 break; 399 } 400 401 /* 402 * We only care about leafs. Internal nodes can be returned 403 * in mirror-filtered mode (they are used to generate SKIP 404 * mrecords), but we don't need them for this code. 405 */ 406 cursor.flags |= HAMMER_CURSOR_ATEDISK; 407 if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) { 408 key_cur = cursor.node->ondisk->elms[cursor.index].base; 409 error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid); 410 } 411 412 while (hammer_flusher_meta_halflimit(trans->hmp) || 413 hammer_flusher_undo_exhausted(trans, 2)) { 414 hammer_unlock_cursor(&cursor, 0); 415 hammer_flusher_wait(trans->hmp, seq); 416 hammer_lock_cursor(&cursor, 0); 417 seq = hammer_flusher_async_one(trans->hmp); 418 } 419 420 if (error == 0) 421 error = hammer_btree_iterate(&cursor); 422 } 423 if (error == ENOENT) 424 error = 0; 425 hammer_done_cursor(&cursor); 426 if (error == EDEADLK) 427 goto retry; 428 failed: 429 return(error); 430 } 431 432 /* 433 * Helper function - perform rollback on a B-Tree element given trunc_tid. 434 * 435 * If create_tid >= trunc_tid the record is physically destroyed. 436 * If delete_tid >= trunc_tid it will be set to 0, undeleting the record. 437 */ 438 static 439 int 440 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid) 441 { 442 hammer_btree_leaf_elm_t elm; 443 hammer_transaction_t trans; 444 int error; 445 446 elm = &cursor->node->ondisk->elms[cursor->index].leaf; 447 if (elm->base.create_tid < trunc_tid && 448 elm->base.delete_tid < trunc_tid) { 449 return(0); 450 } 451 trans = cursor->trans; 452 453 if (elm->base.create_tid >= trunc_tid) { 454 error = hammer_delete_at_cursor( 455 cursor, HAMMER_DELETE_DESTROY, 456 cursor->trans->tid, cursor->trans->time32, 457 1, NULL); 458 } else if (elm->base.delete_tid >= trunc_tid) { 459 error = hammer_delete_at_cursor( 460 cursor, HAMMER_DELETE_ADJUST, 461 0, 0, 462 1, NULL); 463 } else { 464 error = 0; 465 } 466 return(error); 467 } 468 469