1 /* $NetBSD: coalesce.c,v 1.4 2002/11/24 08:47:28 yamt Exp $ */ 2 3 /*- 4 * Copyright (c) 2002 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 #include <sys/param.h> 40 #include <sys/mount.h> 41 #include <sys/time.h> 42 #include <sys/resource.h> 43 #include <sys/types.h> 44 #include <sys/wait.h> 45 #include <sys/mman.h> 46 47 #include <ufs/ufs/dinode.h> 48 #include <ufs/lfs/lfs.h> 49 50 #include <fcntl.h> 51 #include <signal.h> 52 #include <stdio.h> 53 #include <stdlib.h> 54 #include <string.h> 55 #include <time.h> 56 #include <unistd.h> 57 #include <util.h> 58 #include <errno.h> 59 #include <err.h> 60 61 #include <syslog.h> 62 63 #include "clean.h" 64 65 extern int debug, do_mmap; 66 67 static int 68 tossdead(const void *client, const void *a, const void *b) 69 { 70 return (((BLOCK_INFO_15 *)a)->bi_daddr <= 0 || 71 ((BLOCK_INFO_15 *)a)->bi_size == 0); 72 } 73 74 static int log2int(int n) 75 { 76 int log; 77 78 log = 0; 79 while (n > 0) { 80 ++log; 81 n /= 2; 82 } 83 return log - 1; 84 } 85 86 enum coalesce_returncodes { 87 COALESCE_OK = 0, 88 COALESCE_NOINODE, 89 COALESCE_TOOSMALL, 90 COALESCE_BADSIZE, 91 COALESCE_BADBLOCKSIZE, 92 COALESCE_NOMEM, 93 COALESCE_BADBMAPV, 94 COALESCE_NOTWORTHIT, 95 COALESCE_NOTHINGLEFT, 96 COALESCE_NOTHINGLEFT2, 97 98 COALESCE_MAXERROR 99 }; 100 101 char *coalesce_return[] = { 102 "Successfully coalesced", 103 "File not in use or inode not found", 104 "Not large enough to coalesce", 105 "Negative size", 106 "Not enough blocks to account for size", 107 "Malloc failed", 108 "lfs_bmapv failed", 109 "Not broken enough to fix", 110 "Too many blocks not found", 111 "Too many blocks found in active segments", 112 113 "No such error" 114 }; 115 116 /* 117 * Find out if this inode's data blocks are discontinuous; if they are, 118 * rewrite them using lfs_markv. Return the number of inodes rewritten. 119 */ 120 int clean_inode(struct fs_info *fsp, ino_t ino) 121 { 122 int i, error; 123 BLOCK_INFO_15 *bip, *tbip; 124 struct dinode *dip; 125 int nb, onb, noff; 126 ufs_daddr_t toff; 127 struct lfs *lfsp; 128 int bps; 129 SEGUSE *sup; 130 131 lfsp = &fsp->fi_lfs; 132 133 dip = get_dinode(fsp, ino); 134 if (dip == NULL) 135 return COALESCE_NOINODE; 136 137 /* Compute file block size, set up for lfs_bmapv */ 138 onb = nb = lblkno(lfsp, dip->di_size); 139 140 /* XXX for now, don't do any file small enough to have fragments */ 141 if (nb < NDADDR) 142 return COALESCE_TOOSMALL; 143 144 /* Sanity checks */ 145 if (dip->di_size < 0) { 146 if (debug) 147 syslog(LOG_DEBUG, "ino %d, negative size (%lld)", 148 ino, (long long)dip->di_size); 149 return COALESCE_BADSIZE; 150 } 151 if (nb > dip->di_blocks) { 152 if (debug) 153 syslog(LOG_DEBUG, "ino %d, computed blocks %d > held blocks %d", 154 ino, nb, dip->di_blocks); 155 return COALESCE_BADBLOCKSIZE; 156 } 157 158 bip = (BLOCK_INFO_15 *)malloc(sizeof(BLOCK_INFO_15) * nb); 159 if (bip == NULL) { 160 syslog(LOG_WARNING, "ino %d, %d blocks: %m", ino, nb); 161 return COALESCE_NOMEM; 162 } 163 for (i = 0; i < nb; i++) { 164 memset(bip + i, 0, sizeof(BLOCK_INFO_15)); 165 bip[i].bi_inode = ino; 166 bip[i].bi_lbn = i; 167 bip[i].bi_version = dip->di_gen; 168 /* Don't set the size, but let lfs_bmap fill it in */ 169 } 170 if ((error = lfs_bmapv(&fsp->fi_statfsp->f_fsid, bip, nb)) < 0) { 171 syslog(LOG_WARNING, "lfs_bmapv: %m"); 172 free(bip); 173 return COALESCE_BADBMAPV; 174 } 175 noff = toff = 0; 176 for (i = 1; i < nb; i++) { 177 if (bip[i].bi_daddr != bip[i - 1].bi_daddr + lfsp->lfs_frag) 178 ++noff; 179 toff += abs(bip[i].bi_daddr - bip[i - 1].bi_daddr 180 - lfsp->lfs_frag) >> lfsp->lfs_fbshift; 181 } 182 183 /* 184 * If this file is not discontinuous, there's no point in rewriting it. 185 * 186 * Explicitly allow a certain amount of discontinuity, since large 187 * files will be broken among segments and medium-sized files 188 * can have a break or two and it's okay. 189 */ 190 if (nb <= 1 || noff == 0 || noff < log2int(nb) || 191 segtod(lfsp, noff) * 2 < nb) { 192 free(bip); 193 return COALESCE_NOTWORTHIT; 194 } else if (debug) 195 syslog(LOG_DEBUG, "ino %d total discontinuity " 196 "%d (%d) for %d blocks", ino, noff, toff, nb); 197 198 /* Search for blocks in active segments; don't move them. */ 199 for (i = 0; i < nb; i++) { 200 if (bip[i].bi_daddr <= 0) 201 continue; 202 sup = SEGUSE_ENTRY(lfsp, fsp->fi_segusep, 203 dtosn(lfsp, bip[i].bi_daddr)); 204 if (sup->su_flags & SEGUSE_ACTIVE) 205 bip[i].bi_daddr = LFS_UNUSED_DADDR; /* 0 */ 206 } 207 /* 208 * Get rid of any we've marked dead. If this is an older 209 * kernel that doesn't have lfs_bmapv fill in the block 210 * sizes, we'll toss everything here. 211 */ 212 toss(bip, &nb, sizeof(BLOCK_INFO_15), tossdead, NULL); 213 if (nb && tossdead(NULL, bip + nb - 1, NULL)) 214 --nb; 215 if (nb == 0) { 216 free(bip); 217 return COALESCE_NOTHINGLEFT; 218 } 219 220 /* 221 * We may have tossed enough blocks that it is no longer worthwhile 222 * to rewrite this inode. 223 */ 224 if (onb - nb > log2int(onb)) { 225 if (debug) 226 syslog(LOG_DEBUG, "too many blocks tossed, not rewriting"); 227 return COALESCE_NOTHINGLEFT2; 228 } 229 230 /* 231 * We are going to rewrite this inode. 232 * For any remaining blocks, read in their contents. 233 */ 234 for (i = 0; i < nb; i++) { 235 bip[i].bi_bp = malloc(bip[i].bi_size); 236 get_rawblock(fsp, bip[i].bi_bp, bip[i].bi_size, bip[i].bi_daddr); 237 } 238 if (debug) 239 syslog(LOG_DEBUG, "ino %d markv %d blocks", ino, nb); 240 241 /* 242 * Write in segment-sized chunks. If at any point we'd write more 243 * than half of the available segments, sleep until that's not 244 * true any more. 245 */ 246 bps = segtod(lfsp, 1); 247 for (tbip = bip; tbip < bip + nb; tbip += bps) { 248 while (fsp->fi_cip->clean < 4) { 249 lfs_segwait(&fsp->fi_statfsp->f_fsid, NULL); 250 reread_fs_info(fsp, do_mmap); 251 /* XXX start over? */ 252 } 253 lfs_markv(&fsp->fi_statfsp->f_fsid, tbip, 254 (tbip + bps < bip + nb ? bps : nb % bps)); 255 } 256 257 for (i = 0; i < nb; i++) 258 if (bip[i].bi_bp) 259 free(bip[i].bi_bp); 260 free(bip); 261 return COALESCE_OK; 262 } 263 264 /* 265 * Try coalescing every inode in the filesystem. 266 * Return the number of inodes actually altered. 267 */ 268 int clean_all_inodes(struct fs_info *fsp) 269 { 270 int i, r; 271 int totals[COALESCE_MAXERROR]; 272 273 memset(totals, 0, sizeof(totals)); 274 for (i = 0; i < fsp->fi_ifile_count; i++) { 275 r = clean_inode(fsp, i); 276 ++totals[r]; 277 } 278 279 for (i = 0; i < COALESCE_MAXERROR; i++) 280 if (totals[i]) 281 syslog(LOG_DEBUG, "%s: %d", coalesce_return[i], 282 totals[i]); 283 284 return totals[COALESCE_OK]; 285 } 286 287 int fork_coalesce(struct fs_info *fsp) 288 { 289 static pid_t childpid; 290 int num; 291 292 reread_fs_info(fsp, do_mmap); 293 294 if (childpid) { 295 if (waitpid(childpid, NULL, WNOHANG) == childpid) 296 childpid = 0; 297 } 298 if (childpid && kill(childpid, 0) >= 0) { 299 /* already running a coalesce process */ 300 if (debug) 301 syslog(LOG_DEBUG, "coalescing already in progress"); 302 return 0; 303 } 304 childpid = fork(); 305 if (childpid < 0) { 306 syslog(LOG_ERR, "fork: %m"); 307 return 0; 308 } else if (childpid == 0) { 309 syslog(LOG_NOTICE, "new coalescing process, pid %d", getpid()); 310 num = clean_all_inodes(fsp); 311 syslog(LOG_NOTICE, "coalesced %d discontiguous inodes", num); 312 exit(0); 313 } 314 return 0; 315 } 316