1 /* 2 * Copyright (c) 2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * HAMMER PFS ioctls - Manage pseudo-fs configurations 36 */ 37 38 #include "hammer.h" 39 40 static int hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, 41 hammer_inode_t ip); 42 static int hammer_pfs_rollback(hammer_transaction_t trans, 43 hammer_pseudofs_inmem_t pfsm, 44 hammer_tid_t trunc_tid); 45 static int hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, 46 hammer_tid_t trunc_tid); 47 48 /* 49 * Get mirroring/pseudo-fs information 50 * 51 * NOTE: The ip used for ioctl is not necessarily related to the PFS 52 */ 53 int 54 hammer_ioc_get_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 55 struct hammer_ioc_pseudofs_rw *pfs) 56 { 57 hammer_pseudofs_inmem_t pfsm; 58 u_int32_t localization; 59 int error; 60 61 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 62 return(error); 63 localization = (u_int32_t)pfs->pfs_id << 16; 64 pfs->bytes = sizeof(struct hammer_pseudofs_data); 65 pfs->version = HAMMER_IOC_PSEUDOFS_VERSION; 66 67 pfsm = hammer_load_pseudofs(trans, localization, &error); 68 if (error) { 69 hammer_rel_pseudofs(trans->hmp, pfsm); 70 return(error); 71 } 72 73 /* 74 * If the PFS is a master the sync tid is set by normal operation 75 * rather than the mirroring code, and will always track the 76 * real HAMMER filesystem. 77 * 78 * We use flush_tid1, which is the highest fully committed TID. 79 * flush_tid2 is the TID most recently flushed, but the UNDO hasn't 80 * caught up to it yet so a crash will roll us back to flush_tid1. 81 */ 82 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) 83 pfsm->pfsd.sync_end_tid = trans->hmp->flush_tid1; 84 85 /* 86 * Copy out to userland. 87 */ 88 error = 0; 89 if (pfs->ondisk && error == 0) 90 error = copyout(&pfsm->pfsd, pfs->ondisk, sizeof(pfsm->pfsd)); 91 hammer_rel_pseudofs(trans->hmp, pfsm); 92 return(error); 93 } 94 95 /* 96 * Set mirroring/pseudo-fs information 97 * 98 * NOTE: The ip used for ioctl is not necessarily related to the PFS 99 */ 100 int 101 hammer_ioc_set_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 102 struct ucred *cred, struct hammer_ioc_pseudofs_rw *pfs) 103 { 104 hammer_pseudofs_inmem_t pfsm; 105 u_int32_t localization; 106 int error; 107 108 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 109 return(error); 110 localization = (u_int32_t)pfs->pfs_id << 16; 111 if (pfs->version != HAMMER_IOC_PSEUDOFS_VERSION) 112 error = EINVAL; 113 localization = (u_int32_t)pfs->pfs_id << 16; 114 115 if (error == 0 && pfs->ondisk) { 116 /* 117 * Load the PFS so we can modify our in-core copy. Ignore 118 * ENOENT errors. 119 */ 120 pfsm = hammer_load_pseudofs(trans, localization, &error); 121 error = copyin(pfs->ondisk, &pfsm->pfsd, sizeof(pfsm->pfsd)); 122 123 /* 124 * Save it back, create a root inode if we are in master 125 * mode and no root exists. 126 * 127 * We do not create root inodes for slaves, the root inode 128 * must be mirrored from the master. 129 */ 130 if (error == 0 && 131 (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) { 132 error = hammer_mkroot_pseudofs(trans, cred, pfsm); 133 } 134 if (error == 0) 135 error = hammer_save_pseudofs(trans, pfsm); 136 137 /* 138 * Wakeup anyone waiting for a TID update for this PFS 139 */ 140 wakeup(&pfsm->pfsd.sync_end_tid); 141 hammer_rel_pseudofs(trans->hmp, pfsm); 142 } 143 return(error); 144 } 145 146 /* 147 * Upgrade a slave to a master 148 * 149 * This is fairly easy to do, but we must physically undo any partial syncs 150 * for transaction ids > sync_end_tid. Effective, we must do a partial 151 * rollback. 152 * 153 * NOTE: The ip used for ioctl is not necessarily related to the PFS 154 */ 155 int 156 hammer_ioc_upgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 157 struct hammer_ioc_pseudofs_rw *pfs) 158 { 159 hammer_pseudofs_inmem_t pfsm; 160 u_int32_t localization; 161 int error; 162 163 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 164 return(error); 165 localization = (u_int32_t)pfs->pfs_id << 16; 166 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 167 return(error); 168 169 /* 170 * A master id must be set when upgrading 171 */ 172 pfsm = hammer_load_pseudofs(trans, localization, &error); 173 if (error == 0) { 174 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) != 0) { 175 error = hammer_pfs_rollback(trans, pfsm, 176 pfsm->pfsd.sync_end_tid + 1); 177 if (error == 0) { 178 pfsm->pfsd.mirror_flags &= ~HAMMER_PFSD_SLAVE; 179 error = hammer_save_pseudofs(trans, pfsm); 180 } 181 } 182 } 183 hammer_rel_pseudofs(trans->hmp, pfsm); 184 if (error == EINTR) { 185 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 186 error = 0; 187 } 188 return (error); 189 } 190 191 /* 192 * Downgrade a master to a slave 193 * 194 * This is really easy to do, just set the SLAVE flag and update sync_end_tid. 195 * 196 * We previously did not update sync_end_tid in consideration for a slave 197 * upgraded to a master and then downgraded again, but this completely breaks 198 * the case where one starts with a master and then downgrades to a slave, 199 * then upgrades again. 200 * 201 * NOTE: The ip used for ioctl is not necessarily related to the PFS 202 */ 203 int 204 hammer_ioc_downgrade_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 205 struct hammer_ioc_pseudofs_rw *pfs) 206 { 207 hammer_mount_t hmp = trans->hmp; 208 hammer_pseudofs_inmem_t pfsm; 209 u_int32_t localization; 210 int error; 211 212 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 213 return(error); 214 localization = (u_int32_t)pfs->pfs_id << 16; 215 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 216 return(error); 217 218 pfsm = hammer_load_pseudofs(trans, localization, &error); 219 if (error == 0) { 220 if ((pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) == 0) { 221 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_SLAVE; 222 if (pfsm->pfsd.sync_end_tid < hmp->flush_tid1) 223 pfsm->pfsd.sync_end_tid = hmp->flush_tid1; 224 error = hammer_save_pseudofs(trans, pfsm); 225 } 226 } 227 hammer_rel_pseudofs(trans->hmp, pfsm); 228 return (error); 229 } 230 231 /* 232 * Destroy a PFS 233 * 234 * We can destroy a PFS by scanning and deleting all of its records in the 235 * B-Tree. The hammer utility will delete the softlink in the primary 236 * filesystem. 237 * 238 * NOTE: The ip used for ioctl is not necessarily related to the PFS 239 */ 240 int 241 hammer_ioc_destroy_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 242 struct hammer_ioc_pseudofs_rw *pfs) 243 { 244 hammer_pseudofs_inmem_t pfsm; 245 u_int32_t localization; 246 int error; 247 248 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 249 return(error); 250 localization = (u_int32_t)pfs->pfs_id << 16; 251 252 if ((error = hammer_unload_pseudofs(trans, localization)) != 0) 253 return(error); 254 255 pfsm = hammer_load_pseudofs(trans, localization, &error); 256 if (error == 0) { 257 error = hammer_pfs_rollback(trans, pfsm, 0); 258 if (error == 0) { 259 pfsm->pfsd.mirror_flags |= HAMMER_PFSD_DELETED; 260 error = hammer_save_pseudofs(trans, pfsm); 261 } 262 } 263 hammer_rel_pseudofs(trans->hmp, pfsm); 264 if (error == EINTR) { 265 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 266 error = 0; 267 } 268 return(error); 269 } 270 271 /* 272 * Wait for the PFS to sync past the specified TID 273 */ 274 int 275 hammer_ioc_wait_pseudofs(hammer_transaction_t trans, hammer_inode_t ip, 276 struct hammer_ioc_pseudofs_rw *pfs) 277 { 278 hammer_pseudofs_inmem_t pfsm; 279 struct hammer_pseudofs_data pfsd; 280 u_int32_t localization; 281 hammer_tid_t tid; 282 void *waitp; 283 int error; 284 285 if ((error = hammer_pfs_autodetect(pfs, ip)) != 0) 286 return(error); 287 localization = (u_int32_t)pfs->pfs_id << 16; 288 289 if ((error = copyin(pfs->ondisk, &pfsd, sizeof(pfsd))) != 0) 290 return(error); 291 292 pfsm = hammer_load_pseudofs(trans, localization, &error); 293 if (error == 0) { 294 if (pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE) { 295 tid = pfsm->pfsd.sync_end_tid; 296 waitp = &pfsm->pfsd.sync_end_tid; 297 } else { 298 tid = trans->hmp->flush_tid1; 299 waitp = &trans->hmp->flush_tid1; 300 } 301 if (tid <= pfsd.sync_end_tid) 302 tsleep(waitp, PCATCH, "hmrmwt", 0); 303 } 304 hammer_rel_pseudofs(trans->hmp, pfsm); 305 if (error == EINTR) { 306 pfs->head.flags |= HAMMER_IOC_HEAD_INTR; 307 error = 0; 308 } 309 return(error); 310 } 311 312 313 /* 314 * Auto-detect the pseudofs and do basic bounds checking. 315 */ 316 static 317 int 318 hammer_pfs_autodetect(struct hammer_ioc_pseudofs_rw *pfs, hammer_inode_t ip) 319 { 320 int error = 0; 321 322 if (pfs->pfs_id == -1) 323 pfs->pfs_id = (int)(ip->obj_localization >> 16); 324 if (pfs->pfs_id < 0 || pfs->pfs_id >= HAMMER_MAX_PFS) 325 error = EINVAL; 326 if (pfs->bytes < sizeof(struct hammer_pseudofs_data)) 327 error = EINVAL; 328 return(error); 329 } 330 331 /* 332 * Rollback the specified PFS to (trunc_tid - 1), removing everything 333 * greater or equal to trunc_tid. The PFS must not have been in no-mirror 334 * mode or the MIRROR_FILTERED scan will not work properly. 335 * 336 * This is typically used to remove any partial syncs when upgrading a 337 * slave to a master. It can theoretically also be used to rollback 338 * any PFS, including PFS#0, BUT ONLY TO POINTS THAT HAVE NOT YET BEEN 339 * PRUNED, and to points that are older only if they are on a retained 340 * (pruning softlink) boundary. 341 * 342 * Rollbacks destroy information. If you don't mind inode numbers changing 343 * a better way would be to cpdup a snapshot back onto the master. 344 */ 345 static 346 int 347 hammer_pfs_rollback(hammer_transaction_t trans, 348 hammer_pseudofs_inmem_t pfsm, 349 hammer_tid_t trunc_tid) 350 { 351 struct hammer_cmirror cmirror; 352 struct hammer_cursor cursor; 353 struct hammer_base_elm key_cur; 354 int error; 355 int seq; 356 357 bzero(&cmirror, sizeof(cmirror)); 358 bzero(&key_cur, sizeof(key_cur)); 359 key_cur.localization = HAMMER_MIN_LOCALIZATION + pfsm->localization; 360 key_cur.obj_id = HAMMER_MIN_OBJID; 361 key_cur.key = HAMMER_MIN_KEY; 362 key_cur.create_tid = 1; 363 key_cur.rec_type = HAMMER_MIN_RECTYPE; 364 365 seq = trans->hmp->flusher.done; 366 367 retry: 368 error = hammer_init_cursor(trans, &cursor, NULL, NULL); 369 if (error) { 370 hammer_done_cursor(&cursor); 371 goto failed; 372 } 373 cursor.key_beg = key_cur; 374 cursor.key_end.localization = HAMMER_MAX_LOCALIZATION + 375 pfsm->localization; 376 cursor.key_end.obj_id = HAMMER_MAX_OBJID; 377 cursor.key_end.key = HAMMER_MAX_KEY; 378 cursor.key_end.create_tid = HAMMER_MAX_TID; 379 cursor.key_end.rec_type = HAMMER_MAX_RECTYPE; 380 381 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; 382 cursor.flags |= HAMMER_CURSOR_BACKEND; 383 384 /* 385 * Do an optimized scan of only records created or modified 386 * >= trunc_tid, so we can fix up those records. We must 387 * still check the TIDs but this greatly reduces the size of 388 * the scan. 389 */ 390 cursor.flags |= HAMMER_CURSOR_MIRROR_FILTERED; 391 cursor.cmirror = &cmirror; 392 cmirror.mirror_tid = trunc_tid; 393 394 error = hammer_btree_first(&cursor); 395 while (error == 0) { 396 /* 397 * Abort the rollback. 398 */ 399 if (error == 0) { 400 error = hammer_signal_check(trans->hmp); 401 if (error) 402 break; 403 } 404 405 /* 406 * We only care about leafs. Internal nodes can be returned 407 * in mirror-filtered mode (they are used to generate SKIP 408 * mrecords), but we don't need them for this code. 409 * 410 * WARNING: See warnings in hammer_unlock_cursor() function. 411 */ 412 cursor.flags |= HAMMER_CURSOR_ATEDISK; 413 if (cursor.node->ondisk->type == HAMMER_BTREE_TYPE_LEAF) { 414 key_cur = cursor.node->ondisk->elms[cursor.index].base; 415 error = hammer_pfs_delete_at_cursor(&cursor, trunc_tid); 416 } 417 418 while (hammer_flusher_meta_halflimit(trans->hmp) || 419 hammer_flusher_undo_exhausted(trans, 2)) { 420 hammer_unlock_cursor(&cursor); 421 hammer_flusher_wait(trans->hmp, seq); 422 hammer_lock_cursor(&cursor); 423 seq = hammer_flusher_async_one(trans->hmp); 424 } 425 426 if (error == 0) 427 error = hammer_btree_iterate(&cursor); 428 } 429 if (error == ENOENT) 430 error = 0; 431 hammer_done_cursor(&cursor); 432 if (error == EDEADLK) 433 goto retry; 434 failed: 435 return(error); 436 } 437 438 /* 439 * Helper function - perform rollback on a B-Tree element given trunc_tid. 440 * 441 * If create_tid >= trunc_tid the record is physically destroyed. 442 * If delete_tid >= trunc_tid it will be set to 0, undeleting the record. 443 */ 444 static 445 int 446 hammer_pfs_delete_at_cursor(hammer_cursor_t cursor, hammer_tid_t trunc_tid) 447 { 448 hammer_btree_leaf_elm_t elm; 449 int error; 450 451 elm = &cursor->node->ondisk->elms[cursor->index].leaf; 452 if (elm->base.create_tid < trunc_tid && 453 elm->base.delete_tid < trunc_tid) { 454 return(0); 455 } 456 457 if (elm->base.create_tid >= trunc_tid) { 458 error = hammer_delete_at_cursor( 459 cursor, HAMMER_DELETE_DESTROY, 460 cursor->trans->tid, cursor->trans->time32, 461 1, NULL); 462 } else if (elm->base.delete_tid >= trunc_tid) { 463 error = hammer_delete_at_cursor( 464 cursor, HAMMER_DELETE_ADJUST, 465 0, 0, 466 1, NULL); 467 } else { 468 error = 0; 469 } 470 return(error); 471 } 472 473