xref: /openbsd/usr.sbin/amd/amd/map.c (revision 3a50f0a9)
1 /*	$OpenBSD: map.c,v 1.17 2022/12/28 21:30:15 jmc Exp $	*/
2 
3 /*-
4  * Copyright (c) 1990 Jan-Simon Pendry
5  * Copyright (c) 1990 Imperial College of Science, Technology & Medicine
6  * Copyright (c) 1990, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * This code is derived from software contributed to Berkeley by
10  * Jan-Simon Pendry at Imperial College, London.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #include "am.h"
38 
39 #include <unistd.h>
40 
41 /*
42  * Generation Numbers.
43  *
44  * Generation numbers are allocated to every node created
45  * by amd.  When a filehandle is computed and sent to the
46  * kernel, the generation number makes sure that it is safe
47  * to reallocate a node slot even when the kernel has a cached
48  * reference to its old incarnation.
49  * No garbage collection is done, since it is assumed that
50  * there is no way that 2^32 generation numbers could ever
51  * be allocated by a single run of amd - there is simply
52  * not enough cpu time available.
53  */
54 static unsigned int am_gen = 2;	/* Initial generation number */
55 #define new_gen() (am_gen++)
56 
57 am_node **exported_ap = (am_node **) 0;
58 int exported_ap_size = 0;
59 int first_free_map = 0;		/* First available free slot */
60 int last_used_map = -1;		/* Last unavailable used slot */
61 static int timeout_mp_id;	/* Id from last call to timeout */
62 
63 /*
64  * This is the default attributes field which
65  * is copied into every new node to be created.
66  * The individual filesystem fs_init() routines
67  * patch the copy to represent the particular
68  * details for the relevant filesystem type
69  */
70 static struct fattr gen_fattr = {
71 	NFLNK,				/* type */
72 	NFSMODE_LNK | 0777,		/* mode */
73 	1,				/* nlink */
74 	0,				/* uid */
75 	0,				/* gid */
76 	0,				/* size */
77 	4096,				/* blocksize */
78 	0,				/* rdev */
79 	1,				/* blocks */
80 	0,				/* fsid */
81 	0,				/* fileid */
82 	{ 0, 0 },			/* atime */
83 	{ 0, 0 },			/* mtime */
84 	{ 0, 0 },			/* ctime */
85 };
86 
87 /*
88  * Resize exported_ap map
89  */
90 static int
exported_ap_realloc_map(int nsize)91 exported_ap_realloc_map(int nsize)
92 {
93 	/*
94 	 * this shouldn't happen, but...
95 	 */
96 	if (nsize < 0 || nsize == exported_ap_size)
97 		return 0;
98 
99 	exported_ap = xreallocarray(exported_ap, nsize, sizeof *exported_ap);
100 
101 	if (nsize > exported_ap_size)
102 		bzero(exported_ap+exported_ap_size,
103 			(nsize - exported_ap_size) * sizeof(am_node*));
104 	exported_ap_size = nsize;
105 
106 	return 1;
107 }
108 
109 /*
110  * The root of the mount tree.
111  */
112 am_node *root_node;
113 
114 /*
115  * Allocate a new mount slot and create
116  * a new node.
117  * Fills in the map number of the node,
118  * but leaves everything else uninitialised.
119  */
exported_ap_alloc(void)120 am_node *exported_ap_alloc(void)
121 {
122 	am_node *mp, **mpp;
123 
124 	/*
125 	 * First check if there are any slots left, realloc if needed
126 	 */
127 	if (first_free_map >= exported_ap_size)
128 		if (!exported_ap_realloc_map(exported_ap_size + NEXP_AP))
129 			return 0;
130 
131 	/*
132 	 * Grab the next free slot
133 	 */
134 	mpp = exported_ap + first_free_map;
135 	mp = *mpp = ALLOC(am_node);
136 	bzero(mp, sizeof(*mp));
137 
138 	mp->am_mapno = first_free_map++;
139 
140 	/*
141 	 * Update free pointer
142 	 */
143 	while (first_free_map < exported_ap_size && exported_ap[first_free_map])
144 		first_free_map++;
145 
146 	if (first_free_map > last_used_map)
147 		last_used_map = first_free_map - 1;
148 
149 	/*
150 	 * Shrink exported_ap if reasonable
151 	 */
152 	if (last_used_map < exported_ap_size - (NEXP_AP + NEXP_AP_MARGIN))
153 		exported_ap_realloc_map(exported_ap_size - NEXP_AP);
154 
155 #ifdef DEBUG
156 	/*dlog("alloc_exp: last_used_map = %d, first_free_map = %d",
157 		last_used_map, first_free_map);*/
158 #endif /* DEBUG */
159 
160 	return mp;
161 }
162 
163 /*
164  * Free a mount slot
165  */
166 static void
exported_ap_free(am_node * mp)167 exported_ap_free(am_node *mp)
168 {
169 	/*
170 	 * Sanity check
171 	 */
172 	if (!mp)
173 		return;
174 
175 	/*
176 	 * Zero the slot pointer to avoid double free's
177 	 */
178 	exported_ap[mp->am_mapno] = 0;
179 
180 	/*
181 	 * Update the free and last_used indices
182 	 */
183 	if (mp->am_mapno == last_used_map)
184 		while (last_used_map >= 0 && exported_ap[last_used_map] == 0)
185 			--last_used_map;
186 
187 	if (first_free_map > mp->am_mapno)
188 		first_free_map = mp->am_mapno;
189 
190 #ifdef DEBUG
191 	/*dlog("free_exp: last_used_map = %d, first_free_map = %d",
192 		last_used_map, first_free_map);*/
193 #endif /* DEBUG */
194 
195 	/*
196 	 * Free the mount node
197 	 */
198 	free(mp);
199 }
200 
201 /*
202  * Insert mp into the correct place,
203  * where p_mp is its parent node.
204  * A new node gets placed as the youngest sibling
205  * of any other children, and the parent's child
206  * pointer is adjusted to point to the new child node.
207  */
208 void
insert_am(am_node * mp,am_node * p_mp)209 insert_am(am_node *mp, am_node *p_mp)
210 {
211 	/*
212 	 * If this is going in at the root then flag it
213 	 * so that it cannot be unmounted by amq.
214 	 */
215 	if (p_mp == root_node)
216 		mp->am_flags |= AMF_ROOT;
217 	/*
218 	 * Fill in n-way links
219 	 */
220 	mp->am_parent = p_mp;
221 	mp->am_osib = p_mp->am_child;
222 	if (mp->am_osib)
223 		mp->am_osib->am_ysib = mp;
224 	p_mp->am_child = mp;
225 }
226 
227 /*
228  * Remove am from its place in the mount tree
229  */
230 static void
remove_am(am_node * mp)231 remove_am(am_node *mp)
232 {
233 	/*
234 	 * 1.  Consistency check
235 	 */
236 	if (mp->am_child && mp->am_parent) {
237 		plog(XLOG_WARNING, "children of \"%s\" still exist - deleting anyway", mp->am_path);
238 	}
239 
240 	/*
241 	 * 2.  Update parent's child pointer
242 	 */
243 	if (mp->am_parent && mp->am_parent->am_child == mp)
244 		mp->am_parent->am_child = mp->am_osib;
245 
246 	/*
247 	 * 3.  Unlink from sibling chain
248 	 */
249 	if (mp->am_ysib)
250 		mp->am_ysib->am_osib = mp->am_osib;
251 	if (mp->am_osib)
252 		mp->am_osib->am_ysib = mp->am_ysib;
253 }
254 
255 /*
256  * Compute a new time to live value for a node.
257  */
258 void
new_ttl(am_node * mp)259 new_ttl(am_node *mp)
260 {
261 	mp->am_timeo_w = 0;
262 
263 	mp->am_ttl = clocktime();
264 	mp->am_fattr.atime.seconds = mp->am_ttl;
265 	mp->am_ttl += mp->am_timeo;	/* sun's -tl option */
266 }
267 
268 void
mk_fattr(am_node * mp,int vntype)269 mk_fattr(am_node *mp, int vntype)
270 {
271 	switch (vntype) {
272 	case NFDIR:
273 		mp->am_fattr.type = NFDIR;
274 		mp->am_fattr.mode = NFSMODE_DIR | 0555;
275 		mp->am_fattr.nlink = 2;
276 		mp->am_fattr.size = 512;
277 		break;
278 	case NFLNK:
279 		mp->am_fattr.type = NFLNK;
280 		mp->am_fattr.mode = NFSMODE_LNK | 0777;
281 		mp->am_fattr.nlink = 1;
282 		mp->am_fattr.size = 0;
283 		break;
284 	default:
285 		plog(XLOG_FATAL, "Unknown fattr type %d - ignored", vntype);
286 		break;
287 	}
288 }
289 
290 /*
291  * Initialise an allocated mount node.
292  * It is assumed that the mount node was bzero'd
293  * before getting here so anything that would
294  * be set to zero isn't done here.
295  */
296 void
init_map(am_node * mp,char * dir)297 init_map(am_node *mp, char *dir)
298 {
299 	/* mp->am_mapno initialized by exported_ap_alloc */
300 	mp->am_mnt = new_mntfs();
301 	mp->am_name = strdup(dir);
302 	mp->am_path = strdup(dir);
303 	/*mp->am_link = 0;*/
304 	/*mp->am_parent = 0;*/
305 	/*mp->am_ysib = 0;*/
306 	/*mp->am_osib = 0;*/
307 	/*mp->am_child = 0;*/
308 	/*mp->am_flags = 0;*/
309 	/*mp->am_error = 0;*/
310 	mp->am_gen = new_gen();
311 	/*mp->am_pref = 0;*/
312 
313 	mp->am_timeo = am_timeo;
314 	mp->am_attr.status = NFS_OK;
315 	mp->am_fattr = gen_fattr;
316 	mp->am_fattr.fsid = 42;
317 	mp->am_fattr.fileid = 0;
318 	mp->am_fattr.atime.seconds = clocktime();
319 	mp->am_fattr.atime.useconds = 0;
320 	mp->am_fattr.mtime = mp->am_fattr.ctime = mp->am_fattr.atime;
321 
322 	new_ttl(mp);
323 	mp->am_stats.s_mtime = mp->am_fattr.atime.seconds;
324 	/*mp->am_private = 0;*/
325 }
326 
327 /*
328  * Free a mount node.
329  * The node must be already unmounted.
330  */
331 void
free_map(am_node * mp)332 free_map(am_node *mp)
333 {
334 	remove_am(mp);
335 
336 	free(mp->am_link);
337 	free(mp->am_name);
338 	free(mp->am_path);
339 	free(mp->am_pref);
340 
341 	if (mp->am_mnt)
342 		free_mntfs(mp->am_mnt);
343 
344 	exported_ap_free(mp);
345 }
346 
347 /*
348  * Convert from file handle to
349  * automount node.
350  */
351 am_node *
fh_to_mp3(nfs_fh * fhp,int * rp,int c_or_d)352 fh_to_mp3(nfs_fh *fhp, int *rp, int c_or_d)
353 {
354 	struct am_fh *fp = (struct am_fh *) fhp;
355 	am_node *ap = 0;
356 
357 	/*
358 	 * Check process id matches
359 	 * If it doesn't then it is probably
360 	 * from an old kernel cached filehandle
361 	 * which is now out of date.
362 	 */
363 	if (fp->fhh_pid != mypid)
364 		goto drop;
365 
366 	/*
367 	 * Make sure the index is valid before
368 	 * exported_ap is referenced.
369 	 */
370 	if (fp->fhh_id < 0 || fp->fhh_id >= exported_ap_size)
371 		goto drop;
372 
373 	/*
374 	 * Get hold of the supposed mount node
375 	 */
376 	ap = exported_ap[fp->fhh_id];
377 
378 	/*
379 	 * If it exists then maybe...
380 	 */
381 	if (ap) {
382 		/*
383 		 * Check the generation number in the node
384 		 * matches the one from the kernel.  If not
385 		 * then the old node has been timed out and
386 		 * a new one allocated.
387 		 */
388 		if (ap->am_gen != fp->fhh_gen) {
389 			ap = 0;
390 			goto drop;
391 		}
392 
393 		/*
394 		 * If the node is hung then locate a new node
395 		 * for it.  This implements the replicated filesystem
396 		 * retries.
397 		 */
398 		if (ap->am_mnt && FSRV_ISDOWN(ap->am_mnt->mf_server) && ap->am_parent) {
399 			int error;
400 			am_node *orig_ap = ap;
401 #ifdef DEBUG
402 			dlog("fh_to_mp3: %s (%s) is hung:- call lookup",
403 					orig_ap->am_path, orig_ap->am_mnt->mf_info);
404 #endif /* DEBUG */
405 			/*
406 			 * Update modify time of parent node.
407 			 * With any luck the kernel will re-stat
408 			 * the child node and get new information.
409 			 */
410 			orig_ap->am_fattr.mtime.seconds = clocktime();
411 
412 			/*
413 			 * Call the parent's lookup routine for an object
414 			 * with the same name.  This may return -1 in error
415 			 * if a mount is in progress.  In any case, if no
416 			 * mount node is returned the error code is propagated
417 			 * to the caller.
418 			 */
419 			if (c_or_d == VLOOK_CREATE) {
420 				ap = (*orig_ap->am_parent->am_mnt->mf_ops->lookuppn)(orig_ap->am_parent,
421 						orig_ap->am_name, &error, c_or_d);
422 			} else {
423 				ap = 0;
424 				error = ESTALE;
425 			}
426 			if (ap == 0) {
427 				if (error < 0 && amd_state == Finishing)
428 					error = ENOENT;
429 				*rp = error;
430 				return 0;
431 			}
432 			/*
433 			 * Update last access to original node.  This
434 			 * avoids timing it out and so sending ESTALE
435 			 * back to the kernel.
436 			 * XXX - Not sure we need this anymore (jsp, 90/10/6).
437 			 */
438 			new_ttl(orig_ap);
439 
440 		}
441 		/*
442 		 * Disallow references to objects being unmounted, unless
443 		 * they are automount points.
444 		 */
445 		if (ap->am_mnt && (ap->am_mnt->mf_flags & MFF_UNMOUNTING) &&
446 				!(ap->am_flags & AMF_ROOT)) {
447 			if (amd_state == Finishing)
448 				*rp = ENOENT;
449 			else
450 				*rp = -1;
451 			return 0;
452 		}
453 		new_ttl(ap);
454 	}
455 
456 drop:
457 	if (!ap || !ap->am_mnt) {
458 		/*
459 		 * If we are shutting down then it is likely
460 		 * that this node has disappeared because of
461 		 * a fast timeout.  To avoid things thrashing
462 		 * just pretend it doesn't exist at all.  If
463 		 * ESTALE is returned, some NFS clients just
464 		 * keep retrying (stupid or what - if it's
465 		 * stale now, what's it going to be in 5 minutes?)
466 		 */
467 		if (amd_state == Finishing)
468 			*rp = ENOENT;
469 		else
470 			*rp = ESTALE;
471 		amd_stats.d_stale++;
472 	}
473 
474 	return ap;
475 }
476 
477 am_node *
fh_to_mp(nfs_fh * fhp)478 fh_to_mp(nfs_fh *fhp)
479 {
480 	int dummy;
481 	return fh_to_mp2(fhp, &dummy);
482 }
483 
484 /*
485  * Convert from automount node to
486  * file handle.
487  */
488 void
mp_to_fh(am_node * mp,struct nfs_fh * fhp)489 mp_to_fh(am_node *mp, struct nfs_fh *fhp)
490 {
491 	struct am_fh *fp = (struct am_fh *) fhp;
492 
493 	/*
494 	 * Take the process id
495 	 */
496 	fp->fhh_pid = mypid;
497 	/*
498 	 * .. the map number
499 	 */
500 	fp->fhh_id = mp->am_mapno;
501 	/*
502 	 * .. and the generation number
503 	 */
504 	fp->fhh_gen = mp->am_gen;
505 	/*
506 	 * .. to make a "unique" triple that will never
507 	 * be reallocated except across reboots (which doesn't matter)
508 	 * or if we are unlucky enough to be given the same
509 	 * pid as a previous amd (very unlikely).
510 	 */
511 }
512 
513 static am_node *
find_ap2(char * dir,am_node * mp)514 find_ap2(char *dir, am_node *mp)
515 {
516 	if (mp) {
517 		am_node *mp2;
518 		if (strcmp(mp->am_path, dir) == 0)
519 			return mp;
520 
521 		if ((mp->am_mnt->mf_flags & MFF_MOUNTED) &&
522 			strcmp(mp->am_mnt->mf_mount, dir) == 0)
523 			return mp;
524 
525 		mp2 = find_ap2(dir, mp->am_osib);
526 		if (mp2)
527 			return mp2;
528 		return find_ap2(dir, mp->am_child);
529 	}
530 
531 	return 0;
532 }
533 
534 /*
535  * Find the mount node corresponding
536  * to dir.  dir can match either the
537  * automount path or, if the node is
538  * mounted, the mount location.
539  */
540 am_node *
find_ap(char * dir)541 find_ap(char *dir)
542 {
543 	int i;
544 
545 	for (i = last_used_map; i >= 0; --i) {
546 		am_node *mp = exported_ap[i];
547 		if (mp && (mp->am_flags & AMF_ROOT)) {
548 			mp = find_ap2(dir, exported_ap[i]);
549 			if (mp)
550 				return mp;
551 		}
552 	}
553 
554 	return 0;
555 }
556 
557 /*
558  * Find the mount node corresponding
559  * to the mntfs structure.
560  */
561 am_node *
find_mf(mntfs * mf)562 find_mf(mntfs *mf)
563 {
564 	int i;
565 
566 	for (i = last_used_map; i >= 0; --i) {
567 		am_node *mp = exported_ap[i];
568 		if (mp && mp->am_mnt == mf)
569 			return mp;
570 	}
571 	return 0;
572 }
573 
574 /*
575  * Get the filehandle for a particular named directory.
576  * This is used during the bootstrap to tell the kernel
577  * the filehandles of the initial automount points.
578  */
579 nfs_fh *
root_fh(char * dir)580 root_fh(char *dir)
581 {
582 	static nfs_fh nfh;
583 	am_node *mp = root_ap(dir, TRUE);
584 	if (mp) {
585 		mp_to_fh(mp, &nfh);
586 		/*
587 		 * Patch up PID to match main server...
588 		 */
589 		if (!foreground) {
590 			pid_t pid = getppid();
591 			((struct am_fh *) &nfh)->fhh_pid = pid;
592 #ifdef DEBUG
593 			dlog("root_fh substitutes pid %d", (int)pid);
594 #endif
595 		}
596 		return &nfh;
597 	}
598 
599 	/*
600 	 * Should never get here...
601 	 */
602 	plog(XLOG_ERROR, "Can't find root filehandle for %s", dir);
603 	return 0;
604 }
605 
606 am_node *
root_ap(char * dir,int path)607 root_ap(char *dir, int path)
608 {
609 	am_node *mp = find_ap(dir);
610 	if (mp && mp->am_parent == root_node)
611 		return mp;
612 
613 	return 0;
614 }
615 
616 /*
617  * Timeout all nodes waiting on
618  * a given Fserver.
619  */
620 void
map_flush_srvr(fserver * fs)621 map_flush_srvr(fserver *fs)
622 {
623 	int i;
624 	int done = 0;
625 
626 	for (i = last_used_map; i >= 0; --i) {
627 		am_node *mp = exported_ap[i];
628 		if (mp && mp->am_mnt && mp->am_mnt->mf_server == fs) {
629 			plog(XLOG_INFO, "Flushed %s; dependent on %s", mp->am_path, fs->fs_host);
630 			mp->am_ttl = clocktime();
631 			done = 1;
632 		}
633 	}
634 	if (done)
635 		reschedule_timeout_mp();
636 }
637 
638 /*
639  * Mount a top level automount node
640  * by calling lookup in the parent
641  * (root) node which will cause the
642  * automount node to be automounted.
643  */
644 int
mount_auto_node(char * dir,void * arg)645 mount_auto_node(char *dir, void *arg)
646 {
647 	int error = 0;
648 
649 	(void) afs_ops.lookuppn((am_node *) arg, dir, &error, VLOOK_CREATE);
650 	if (error > 0) {
651 		errno = error; /* XXX */
652 		plog(XLOG_ERROR, "Could not mount %s: %m", dir);
653 	}
654 	return error;
655 }
656 
657 /*
658  * Cause all the top-level mount nodes
659  * to be automounted
660  */
661 int
mount_exported(void)662 mount_exported(void)
663 {
664 	/*
665 	 * Iterate over all the nodes to be started
666 	 */
667 	return root_keyiter((void (*)(char *, void *)) mount_auto_node, root_node);
668 }
669 
670 /*
671  * Construct top-level node
672  */
673 void
make_root_node(void)674 make_root_node(void)
675 {
676 	mntfs *root_mnt;
677 	char *rootmap = ROOT_MAP;
678 	root_node = exported_ap_alloc();
679 
680 	/*
681 	 * Allocate a new map
682 	 */
683 	init_map(root_node, "");
684 	/*
685 	 * Allocate a new mounted filesystem
686 	 */
687 	root_mnt = find_mntfs(&root_ops, (am_opts *) 0, "", rootmap, "", "", "");
688 	/*
689 	 * Replace the initial null reference
690 	 */
691 	free_mntfs(root_node->am_mnt);
692 	root_node->am_mnt = root_mnt;
693 
694 	/*
695 	 * Initialise the root
696 	 */
697 	if (root_mnt->mf_ops->fs_init)
698 		(*root_mnt->mf_ops->fs_init)(root_mnt);
699 
700 	/*
701 	 * Mount the root
702 	 */
703 	root_mnt->mf_error = (*root_mnt->mf_ops->mount_fs)(root_node);
704 }
705 
706 /*
707  * Cause all the nodes to be unmounted by timing
708  * them out.
709  */
710 void
umount_exported(void)711 umount_exported(void)
712 {
713 	int i;
714 
715 	for (i = last_used_map; i >= 0; --i) {
716 		am_node *mp = exported_ap[i];
717 		if (mp) {
718 			mntfs *mf = mp->am_mnt;
719 			if (mf->mf_flags & MFF_UNMOUNTING) {
720 				/*
721 				 * If this node is being unmounted then
722 				 * just ignore it.  However, this could
723 				 * prevent amd from finishing if the
724 				 * unmount gets blocked since the am_node
725 				 * will never be free'd.  am_unmounted needs
726 				 * telling about this possibility. - XXX
727 				 */
728 				continue;
729 			}
730 			if (mf && !(mf->mf_ops->fs_flags & FS_DIRECTORY)) {
731 				/*
732 				 * When shutting down this had better
733 				 * look like a directory, otherwise it
734 				 * can't be unmounted!
735 				 */
736 				mk_fattr(mp, NFDIR);
737 			}
738 			if ((--immediate_abort < 0 && !(mp->am_flags & AMF_ROOT) && mp->am_parent) ||
739 			    (mf->mf_flags & MFF_RESTART)) {
740 				/*
741 				 * Just throw this node away without
742 				 * bothering to unmount it.  If the
743 				 * server is not known to be up then
744 				 * don't discard the mounted on directory
745 				 * or Amd might hang...
746 				 */
747 				if (mf->mf_server &&
748 					(mf->mf_server->fs_flags & (FSF_DOWN|FSF_VALID)) != FSF_VALID)
749 					mf->mf_flags &= ~MFF_MKMNT;
750 				am_unmounted(mp);
751 			} else {
752 				/*
753 				 * Any other node gets forcibly
754 				 * timed out
755 				 */
756 				mp->am_flags &= ~AMF_NOTIMEOUT;
757 				mp->am_mnt->mf_flags &= ~MFF_RSTKEEP;
758 				mp->am_ttl = 0;
759 				mp->am_timeo = 1;
760 				mp->am_timeo_w = 0;
761 			}
762 		}
763 	}
764 }
765 
766 static int
unmount_node(am_node * mp)767 unmount_node(am_node *mp)
768 {
769 	mntfs *mf = mp->am_mnt;
770 	int error;
771 
772 	if ((mf->mf_flags & MFF_ERROR) || mf->mf_refc > 1) {
773 		/*
774 		 * Just unlink
775 		 */
776 #ifdef DEBUG
777 		if (mf->mf_flags & MFF_ERROR)
778 			dlog("No-op unmount of error node %s", mf->mf_info);
779 #endif /* DEBUG */
780 		error = 0;
781 	} else {
782 #ifdef DEBUG
783 		dlog("Unmounting %s (%s)", mf->mf_mount, mf->mf_info);
784 #endif /* DEBUG */
785 		error = (*mf->mf_ops->umount_fs)(mp);
786 	}
787 
788 	if (error) {
789 #ifdef DEBUG
790 		errno = error; /* XXX */
791 		dlog("%s: unmount: %m", mf->mf_mount);
792 #endif /* DEBUG */
793 	}
794 
795 	return error;
796 }
797 
798 #ifdef FLUSH_KERNEL_NAME_CACHE
799 static void
flush_kernel_name_cache(am_node * mp)800 flush_kernel_name_cache(am_node *mp)
801 {
802 	int islink = (mp->am_mnt->mf_fattr.type == NFLNK);
803 	int isdir = (mp->am_mnt->mf_fattr.type == NFDIR);
804 	int elog = 0;
805 
806 	if (islink) {
807 		if (unlink(mp->am_path) < 0)
808 			elog = 1;
809 	} else if (isdir) {
810 		if (rmdir(mp->am_path) < 0)
811 			elog = 1;
812 	}
813 	if (elog)
814 		plog(XLOG_WARNING, "failed to clear \"%s\" from dnlc: %m", mp->am_path);
815 }
816 #endif /* FLUSH_KERNEL_NAME_CACHE */
817 
818 static int
unmount_node_wrap(void * vp)819 unmount_node_wrap(void *vp)
820 {
821 #ifndef FLUSH_KERNEL_NAME_CACHE
822 	return unmount_node((am_node*) vp);
823 #else /* FLUSH_KERNEL_NAME_CACHE */
824 	/*
825 	 * This code should just say:
826 	 * return unmount_node((am_node *) vp);
827 	 *
828 	 * However...
829 	 * The kernel keeps a cached copy of filehandles,
830 	 * and doesn't ever uncache them (apparently).  So
831 	 * when Amd times out a node the kernel will have a
832 	 * stale filehandle.  When the kernel next uses the
833 	 * filehandle it gets ESTALE.
834 	 *
835 	 * The workaround:
836 	 * Arrange that when a node is removed an unlink or
837 	 * rmdir is done on that path so that the kernel
838 	 * cache is done.  Yes - yuck.
839 	 *
840 	 * This can all be removed (and the background
841 	 * unmount flag in sfs_ops) if/when the kernel does
842 	 * something smarter.
843 	 *
844 	 * If the unlink or rmdir failed then just log a warning,
845 	 * don't fail the unmount.  This can occur if the kernel
846 	 * client code decides that the object is still referenced
847 	 * and should be renamed rather than discarded.
848 	 *
849 	 * There is still a race condition here...
850 	 * if another process is trying to access the same
851 	 * filesystem at the time we get here, then
852 	 * it will block, since the MF_UNMOUNTING flag will
853 	 * be set.  That may, or may not, cause the entire
854 	 * system to deadlock.  Hmmm...
855 	 */
856 	am_node *mp = (am_node *) vp;
857 	int isauto = mp->am_parent && (mp->am_parent->am_mnt->mf_fattr.type == NFDIR);
858 	int error = unmount_node(mp);
859 	if (error)
860 		return error;
861 	if (isauto && (int)amd_state < (int)Finishing)
862 		flush_kernel_name_cache(mp);
863 
864 	return 0;
865 #endif /* FLUSH_KERNEL_NAME_CACHE */
866 }
867 
868 static void
free_map_if_success(int rc,int term,void * closure)869 free_map_if_success(int rc, int term, void *closure)
870 {
871 	am_node *mp = (am_node *) closure;
872 	mntfs *mf = mp->am_mnt;
873 
874 	/*
875 	 * Not unmounting any more
876 	 */
877 	mf->mf_flags &= ~MFF_UNMOUNTING;
878 
879 	/*
880 	 * If a timeout was deferred because the underlying filesystem
881 	 * was busy then arrange for a timeout as soon as possible.
882 	 */
883 	if (mf->mf_flags & MFF_WANTTIMO) {
884 		mf->mf_flags &= ~MFF_WANTTIMO;
885 		reschedule_timeout_mp();
886 	}
887 
888 	if (term) {
889 		plog(XLOG_ERROR, "unmount for %s got signal %d", mp->am_path, term);
890 #if defined(DEBUG) && defined(SIGTRAP)
891 		/*
892 		 * dbx likes to put a trap on exit().
893 		 * Pretend it succeeded for now...
894 		 */
895 		if (term == SIGTRAP) {
896 			am_unmounted(mp);
897 		}
898 #endif /* DEBUG */
899 		amd_stats.d_uerr++;
900 	} else if (rc) {
901 		if (rc == EBUSY) {
902 			plog(XLOG_STATS, "\"%s\" on %s still active", mp->am_path, mf->mf_mount);
903 		} else {
904 			errno = rc;	/* XXX */
905 			plog(XLOG_ERROR, "%s: unmount: %m", mp->am_path);
906 		}
907 		amd_stats.d_uerr++;
908 	} else {
909 		am_unmounted(mp);
910 	}
911 
912 	/*
913 	 * Wakeup anything waiting for this mount
914 	 */
915 	wakeup(mf);
916 }
917 
918 static int
unmount_mp(am_node * mp)919 unmount_mp(am_node *mp)
920 {
921 	int was_backgrounded = 0;
922 	mntfs *mf = mp->am_mnt;
923 
924 #ifdef notdef
925 	plog(XLOG_INFO, "\"%s\" on %s timed out", mp->am_path, mp->am_mnt->mf_mount);
926 #endif /* notdef */
927 
928 	if ((mf->mf_ops->fs_flags & FS_UBACKGROUND) &&
929 			(mf->mf_flags & MFF_MOUNTED)) {
930 		if (mf->mf_refc == 1 && !FSRV_ISUP(mf->mf_server)) {
931 			/*
932 			 * Don't try to unmount from a server that is known to be down
933 			 */
934 			if (!(mf->mf_flags & MFF_LOGDOWN)) {
935 				/* Only log this once, otherwise gets a bit boring */
936 				plog(XLOG_STATS, "file server %s is down - timeout of \"%s\" ignored", mf->mf_server->fs_host, mp->am_path);
937 				mf->mf_flags |= MFF_LOGDOWN;
938 			}
939 		} else {
940 			/* Clear logdown flag - since the server must be up */
941 			mf->mf_flags &= ~MFF_LOGDOWN;
942 #ifdef DEBUG
943 			dlog("\"%s\" on %s timed out", mp->am_path, mp->am_mnt->mf_mount);
944 			/*dlog("Will background the unmount attempt");*/
945 #endif /* DEBUG */
946 			/*
947 			 * Note that we are unmounting this node
948 			 */
949 			mf->mf_flags |= MFF_UNMOUNTING;
950 			run_task(unmount_node_wrap, mp,
951 				 free_map_if_success, mp);
952 			was_backgrounded = 1;
953 #ifdef DEBUG
954 			dlog("unmount attempt backgrounded");
955 #endif /* DEBUG */
956 		}
957 	} else {
958 #ifdef DEBUG
959 		dlog("\"%s\" on %s timed out", mp->am_path, mp->am_mnt->mf_mount);
960 		dlog("Trying unmount in foreground");
961 #endif
962 		mf->mf_flags |= MFF_UNMOUNTING;
963 		free_map_if_success(unmount_node(mp), 0, mp);
964 #ifdef DEBUG
965 		dlog("unmount attempt done");
966 #endif /* DEBUG */
967 	}
968 
969 	return was_backgrounded;
970 }
971 
972 static void
timeout_mp(void * arg)973 timeout_mp(void *arg)
974 {
975 #define NEVER (time_t) 0
976 #define	smallest_t(t1, t2) \
977 	(t1 != NEVER ? (t2 != NEVER ? (t1 < t2 ? t1 : t2) : t1) : t2)
978 #define IGNORE_FLAGS (MFF_MOUNTING|MFF_UNMOUNTING|MFF_RESTART)
979 
980 	int i;
981 	time_t t = NEVER;
982 	time_t now = clocktime();
983 	int backoff = 0;
984 
985 #ifdef DEBUG
986 	dlog("Timing out automount points...");
987 #endif /* DEBUG */
988 	for (i = last_used_map; i >= 0; --i) {
989 		am_node *mp = exported_ap[i];
990 		mntfs *mf;
991 		/*
992 		 * Just continue if nothing mounted, or can't be timed out.
993 		 */
994 		if (!mp || (mp->am_flags & AMF_NOTIMEOUT))
995 			continue;
996 		/*
997 		 * Pick up mounted filesystem
998 		 */
999 		mf = mp->am_mnt;
1000 		if (!mf)
1001 			continue;
1002 		/*
1003 		 * Don't delete last reference to a restarted filesystem.
1004 		 */
1005 		if ((mf->mf_flags & MFF_RSTKEEP) && mf->mf_refc == 1)
1006 			continue;
1007 		/*
1008 		 * If there is action on this filesystem then ignore it
1009 		 */
1010 		if (!(mf->mf_flags & IGNORE_FLAGS)) {
1011 			int expired = 0;
1012 			mf->mf_flags &= ~MFF_WANTTIMO;
1013 #ifdef DEBUG
1014 			/*dlog("t is initially @%d, zero in %d secs", t, t - now);*/
1015 #endif /* DEBUG */
1016 			if (now >= mp->am_ttl) {
1017 				if (!backoff) {
1018 					expired = 1;
1019 					/*
1020 					 * Move the ttl forward to avoid thrashing effects
1021 					 * on the next call to timeout!
1022 					 */
1023 					/* sun's -tw option */
1024 					if (mp->am_timeo_w < 4 * am_timeo_w)
1025 						mp->am_timeo_w += am_timeo_w;
1026 					mp->am_ttl = now + mp->am_timeo_w;
1027 				} else {
1028 					/*
1029 					 * Just backoff this unmount for
1030 					 * a couple of seconds to avoid
1031 					 * many multiple unmounts being
1032 					 * started in parallel.
1033 					 */
1034 					mp->am_ttl = now + backoff + 1;
1035 				}
1036 			}
1037 			/*
1038 			 * If the next ttl is smallest, use that
1039 			 */
1040 			t = smallest_t(t, mp->am_ttl);
1041 
1042 #ifdef DEBUG
1043 			/*dlog("after ttl t is @%d, zero in %d secs", t, t - now);*/
1044 #endif /* DEBUG */
1045 
1046 			if (!mp->am_child && mf->mf_error >= 0 && expired) {
1047 				/*
1048 				 * If the unmount was backgrounded then
1049 				 * bump the backoff counter.
1050 				 */
1051 				if (unmount_mp(mp)) {
1052 					backoff = 2;
1053 #ifdef DEBUG
1054 					/*dlog("backing off subsequent unmounts by at least %d seconds", backoff);*/
1055 #endif
1056 				}
1057 			}
1058 		} else if (mf->mf_flags & MFF_UNMOUNTING) {
1059 			mf->mf_flags |= MFF_WANTTIMO;
1060 		}
1061 	}
1062 
1063 	if (t == NEVER) {
1064 #ifdef DEBUG
1065 		dlog("No further timeouts");
1066 #endif /* DEBUG */
1067 		t = now + ONE_HOUR;
1068 	}
1069 
1070 	/*
1071 	 * Sanity check to avoid runaways.
1072 	 * Absolutely should never get this but
1073 	 * if you do without this trap amd will thrash.
1074 	 */
1075 	if (t <= now) {
1076 		t = now + 6;	/* XXX */
1077 		plog(XLOG_ERROR, "Got a zero interval in timeout_mp()!");
1078 	}
1079 	/*
1080 	 * XXX - when shutting down, make things happen faster
1081 	 */
1082 	if ((int)amd_state >= (int)Finishing)
1083 		t = now + 1;
1084 #ifdef DEBUG
1085 	dlog("Next mount timeout in %ds", t - now);
1086 #endif /* DEBUG */
1087 
1088 	timeout_mp_id = timeout(t - now, timeout_mp, 0);
1089 
1090 #undef NEVER
1091 #undef smallest_t
1092 #undef IGNORE_FLAGS
1093 }
1094 
1095 /*
1096  * Cause timeout_mp to be called soonest
1097  */
reschedule_timeout_mp()1098 void reschedule_timeout_mp()
1099 {
1100 	if (timeout_mp_id)
1101 		untimeout(timeout_mp_id);
1102 	timeout_mp_id = timeout(0, timeout_mp, 0);
1103 }
1104