xref: /netbsd/libexec/lfs_cleanerd/coalesce.c (revision c4a72b64)
1 /*      $NetBSD: coalesce.c,v 1.4 2002/11/24 08:47:28 yamt Exp $  */
2 
3 /*-
4  * Copyright (c) 2002 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Konrad E. Schroder <perseant@hhhh.org>.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *      This product includes software developed by the NetBSD
21  *      Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #include <sys/param.h>
40 #include <sys/mount.h>
41 #include <sys/time.h>
42 #include <sys/resource.h>
43 #include <sys/types.h>
44 #include <sys/wait.h>
45 #include <sys/mman.h>
46 
47 #include <ufs/ufs/dinode.h>
48 #include <ufs/lfs/lfs.h>
49 
50 #include <fcntl.h>
51 #include <signal.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include <time.h>
56 #include <unistd.h>
57 #include <util.h>
58 #include <errno.h>
59 #include <err.h>
60 
61 #include <syslog.h>
62 
63 #include "clean.h"
64 
65 extern int debug, do_mmap;
66 
67 static int
68 tossdead(const void *client, const void *a, const void *b)
69 {
70 	return (((BLOCK_INFO_15 *)a)->bi_daddr <= 0 ||
71 		((BLOCK_INFO_15 *)a)->bi_size == 0);
72 }
73 
74 static int log2int(int n)
75 {
76 	int log;
77 
78 	log = 0;
79 	while (n > 0) {
80 		++log;
81 		n /= 2;
82 	}
83 	return log - 1;
84 }
85 
86 enum coalesce_returncodes {
87 	COALESCE_OK = 0,
88 	COALESCE_NOINODE,
89 	COALESCE_TOOSMALL,
90 	COALESCE_BADSIZE,
91 	COALESCE_BADBLOCKSIZE,
92 	COALESCE_NOMEM,
93 	COALESCE_BADBMAPV,
94 	COALESCE_NOTWORTHIT,
95 	COALESCE_NOTHINGLEFT,
96 	COALESCE_NOTHINGLEFT2,
97 
98 	COALESCE_MAXERROR
99 };
100 
101 char *coalesce_return[] = {
102 	"Successfully coalesced",
103 	"File not in use or inode not found",
104 	"Not large enough to coalesce",
105 	"Negative size",
106 	"Not enough blocks to account for size",
107 	"Malloc failed",
108 	"lfs_bmapv failed",
109 	"Not broken enough to fix",
110 	"Too many blocks not found",
111 	"Too many blocks found in active segments",
112 
113 	"No such error"
114 };
115 
116 /*
117  * Find out if this inode's data blocks are discontinuous; if they are,
118  * rewrite them using lfs_markv.  Return the number of inodes rewritten.
119  */
120 int clean_inode(struct fs_info *fsp, ino_t ino)
121 {
122 	int i, error;
123 	BLOCK_INFO_15 *bip, *tbip;
124 	struct dinode *dip;
125 	int nb, onb, noff;
126 	ufs_daddr_t toff;
127 	struct lfs *lfsp;
128 	int bps;
129         SEGUSE *sup;
130 
131 	lfsp = &fsp->fi_lfs;
132 
133         dip = get_dinode(fsp, ino);
134 	if (dip == NULL)
135 		return COALESCE_NOINODE;
136 
137 	/* Compute file block size, set up for lfs_bmapv */
138 	onb = nb = lblkno(lfsp, dip->di_size);
139 
140 	/* XXX for now, don't do any file small enough to have fragments */
141 	if (nb < NDADDR)
142 		return COALESCE_TOOSMALL;
143 
144 	/* Sanity checks */
145 	if (dip->di_size < 0) {
146 		if (debug)
147 			syslog(LOG_DEBUG, "ino %d, negative size (%lld)",
148 				ino, (long long)dip->di_size);
149 		return COALESCE_BADSIZE;
150 	}
151 	if (nb > dip->di_blocks) {
152 		if (debug)
153 			syslog(LOG_DEBUG, "ino %d, computed blocks %d > held blocks %d",
154 				ino, nb, dip->di_blocks);
155 		return COALESCE_BADBLOCKSIZE;
156 	}
157 
158 	bip = (BLOCK_INFO_15 *)malloc(sizeof(BLOCK_INFO_15) * nb);
159 	if (bip == NULL) {
160 		syslog(LOG_WARNING, "ino %d, %d blocks: %m", ino, nb);
161 		return COALESCE_NOMEM;
162 	}
163 	for (i = 0; i < nb; i++) {
164 		memset(bip + i, 0, sizeof(BLOCK_INFO_15));
165 		bip[i].bi_inode = ino;
166 		bip[i].bi_lbn = i;
167 		bip[i].bi_version = dip->di_gen;
168 		/* Don't set the size, but let lfs_bmap fill it in */
169 	}
170 	if ((error = lfs_bmapv(&fsp->fi_statfsp->f_fsid, bip, nb)) < 0) {
171                 syslog(LOG_WARNING, "lfs_bmapv: %m");
172 		free(bip);
173 		return COALESCE_BADBMAPV;
174 	}
175 	noff = toff = 0;
176 	for (i = 1; i < nb; i++) {
177 		if (bip[i].bi_daddr != bip[i - 1].bi_daddr + lfsp->lfs_frag)
178 			++noff;
179 		toff += abs(bip[i].bi_daddr - bip[i - 1].bi_daddr
180 		    - lfsp->lfs_frag) >> lfsp->lfs_fbshift;
181 	}
182 
183 	/*
184 	 * If this file is not discontinuous, there's no point in rewriting it.
185          *
186          * Explicitly allow a certain amount of discontinuity, since large
187          * files will be broken among segments and medium-sized files
188          * can have a break or two and it's okay.
189 	 */
190 	if (nb <= 1 || noff == 0 || noff < log2int(nb) ||
191 	    segtod(lfsp, noff) * 2 < nb) {
192 		free(bip);
193 		return COALESCE_NOTWORTHIT;
194 	} else if (debug)
195 		syslog(LOG_DEBUG, "ino %d total discontinuity "
196 			"%d (%d) for %d blocks", ino, noff, toff, nb);
197 
198 	/* Search for blocks in active segments; don't move them. */
199 	for (i = 0; i < nb; i++) {
200 		if (bip[i].bi_daddr <= 0)
201 			continue;
202 		sup = SEGUSE_ENTRY(lfsp, fsp->fi_segusep,
203 				dtosn(lfsp, bip[i].bi_daddr));
204 		if (sup->su_flags & SEGUSE_ACTIVE)
205 			bip[i].bi_daddr = LFS_UNUSED_DADDR; /* 0 */
206 	}
207         /*
208 	 * Get rid of any we've marked dead.  If this is an older
209 	 * kernel that doesn't have lfs_bmapv fill in the block
210 	 * sizes, we'll toss everything here.
211 	 */
212 	toss(bip, &nb, sizeof(BLOCK_INFO_15), tossdead, NULL);
213         if (nb && tossdead(NULL, bip + nb - 1, NULL))
214                 --nb;
215         if (nb == 0) {
216 		free(bip);
217 		return COALESCE_NOTHINGLEFT;
218 	}
219 
220 	/*
221 	 * We may have tossed enough blocks that it is no longer worthwhile
222 	 * to rewrite this inode.
223 	 */
224 	if (onb - nb > log2int(onb)) {
225 		if (debug)
226 			syslog(LOG_DEBUG, "too many blocks tossed, not rewriting");
227 		return COALESCE_NOTHINGLEFT2;
228 	}
229 
230         /*
231 	 * We are going to rewrite this inode.
232 	 * For any remaining blocks, read in their contents.
233 	 */
234 	for (i = 0; i < nb; i++) {
235 		bip[i].bi_bp = malloc(bip[i].bi_size);
236                 get_rawblock(fsp, bip[i].bi_bp, bip[i].bi_size, bip[i].bi_daddr);
237 	}
238 	if (debug)
239 		syslog(LOG_DEBUG, "ino %d markv %d blocks", ino, nb);
240 
241 	/*
242 	 * Write in segment-sized chunks.  If at any point we'd write more
243 	 * than half of the available segments, sleep until that's not
244 	 * true any more.
245 	 */
246 	bps = segtod(lfsp, 1);
247 	for (tbip = bip; tbip < bip + nb; tbip += bps) {
248 		while (fsp->fi_cip->clean < 4) {
249 			lfs_segwait(&fsp->fi_statfsp->f_fsid, NULL);
250 			reread_fs_info(fsp, do_mmap);
251 			/* XXX start over? */
252 		}
253 		lfs_markv(&fsp->fi_statfsp->f_fsid, tbip,
254                           (tbip + bps < bip + nb ? bps : nb % bps));
255 	}
256 
257 	for (i = 0; i < nb; i++)
258 		if (bip[i].bi_bp)
259 			free(bip[i].bi_bp);
260 	free(bip);
261 	return COALESCE_OK;
262 }
263 
264 /*
265  * Try coalescing every inode in the filesystem.
266  * Return the number of inodes actually altered.
267  */
268 int clean_all_inodes(struct fs_info *fsp)
269 {
270 	int i, r;
271 	int totals[COALESCE_MAXERROR];
272 
273 	memset(totals, 0, sizeof(totals));
274 	for (i = 0; i < fsp->fi_ifile_count; i++) {
275 		r = clean_inode(fsp, i);
276 		++totals[r];
277 	}
278 
279 	for (i = 0; i < COALESCE_MAXERROR; i++)
280 		if (totals[i])
281 			syslog(LOG_DEBUG, "%s: %d", coalesce_return[i],
282 				totals[i]);
283 
284 	return totals[COALESCE_OK];
285 }
286 
287 int fork_coalesce(struct fs_info *fsp)
288 {
289 	static pid_t childpid;
290 	int num;
291 
292 	reread_fs_info(fsp, do_mmap);
293 
294 	if (childpid) {
295      		if (waitpid(childpid, NULL, WNOHANG) == childpid)
296 			childpid = 0;
297 	}
298 	if (childpid && kill(childpid, 0) >= 0) {
299 		/* already running a coalesce process */
300 		if (debug)
301 			syslog(LOG_DEBUG, "coalescing already in progress");
302 		return 0;
303 	}
304 	childpid = fork();
305 	if (childpid < 0) {
306 		syslog(LOG_ERR, "fork: %m");
307 		return 0;
308 	} else if (childpid == 0) {
309 		syslog(LOG_NOTICE, "new coalescing process, pid %d", getpid());
310 		num = clean_all_inodes(fsp);
311 		syslog(LOG_NOTICE, "coalesced %d discontiguous inodes", num);
312 		exit(0);
313 	}
314 	return 0;
315 }
316