1 /* $OpenBSD: map.c,v 1.17 2022/12/28 21:30:15 jmc Exp $ */
2
3 /*-
4 * Copyright (c) 1990 Jan-Simon Pendry
5 * Copyright (c) 1990 Imperial College of Science, Technology & Medicine
6 * Copyright (c) 1990, 1993
7 * The Regents of the University of California. All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * Jan-Simon Pendry at Imperial College, London.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #include "am.h"
38
39 #include <unistd.h>
40
41 /*
42 * Generation Numbers.
43 *
44 * Generation numbers are allocated to every node created
45 * by amd. When a filehandle is computed and sent to the
46 * kernel, the generation number makes sure that it is safe
47 * to reallocate a node slot even when the kernel has a cached
48 * reference to its old incarnation.
49 * No garbage collection is done, since it is assumed that
50 * there is no way that 2^32 generation numbers could ever
51 * be allocated by a single run of amd - there is simply
52 * not enough cpu time available.
53 */
54 static unsigned int am_gen = 2; /* Initial generation number */
55 #define new_gen() (am_gen++)
56
57 am_node **exported_ap = (am_node **) 0;
58 int exported_ap_size = 0;
59 int first_free_map = 0; /* First available free slot */
60 int last_used_map = -1; /* Last unavailable used slot */
61 static int timeout_mp_id; /* Id from last call to timeout */
62
63 /*
64 * This is the default attributes field which
65 * is copied into every new node to be created.
66 * The individual filesystem fs_init() routines
67 * patch the copy to represent the particular
68 * details for the relevant filesystem type
69 */
70 static struct fattr gen_fattr = {
71 NFLNK, /* type */
72 NFSMODE_LNK | 0777, /* mode */
73 1, /* nlink */
74 0, /* uid */
75 0, /* gid */
76 0, /* size */
77 4096, /* blocksize */
78 0, /* rdev */
79 1, /* blocks */
80 0, /* fsid */
81 0, /* fileid */
82 { 0, 0 }, /* atime */
83 { 0, 0 }, /* mtime */
84 { 0, 0 }, /* ctime */
85 };
86
87 /*
88 * Resize exported_ap map
89 */
90 static int
exported_ap_realloc_map(int nsize)91 exported_ap_realloc_map(int nsize)
92 {
93 /*
94 * this shouldn't happen, but...
95 */
96 if (nsize < 0 || nsize == exported_ap_size)
97 return 0;
98
99 exported_ap = xreallocarray(exported_ap, nsize, sizeof *exported_ap);
100
101 if (nsize > exported_ap_size)
102 bzero(exported_ap+exported_ap_size,
103 (nsize - exported_ap_size) * sizeof(am_node*));
104 exported_ap_size = nsize;
105
106 return 1;
107 }
108
109 /*
110 * The root of the mount tree.
111 */
112 am_node *root_node;
113
114 /*
115 * Allocate a new mount slot and create
116 * a new node.
117 * Fills in the map number of the node,
118 * but leaves everything else uninitialised.
119 */
exported_ap_alloc(void)120 am_node *exported_ap_alloc(void)
121 {
122 am_node *mp, **mpp;
123
124 /*
125 * First check if there are any slots left, realloc if needed
126 */
127 if (first_free_map >= exported_ap_size)
128 if (!exported_ap_realloc_map(exported_ap_size + NEXP_AP))
129 return 0;
130
131 /*
132 * Grab the next free slot
133 */
134 mpp = exported_ap + first_free_map;
135 mp = *mpp = ALLOC(am_node);
136 bzero(mp, sizeof(*mp));
137
138 mp->am_mapno = first_free_map++;
139
140 /*
141 * Update free pointer
142 */
143 while (first_free_map < exported_ap_size && exported_ap[first_free_map])
144 first_free_map++;
145
146 if (first_free_map > last_used_map)
147 last_used_map = first_free_map - 1;
148
149 /*
150 * Shrink exported_ap if reasonable
151 */
152 if (last_used_map < exported_ap_size - (NEXP_AP + NEXP_AP_MARGIN))
153 exported_ap_realloc_map(exported_ap_size - NEXP_AP);
154
155 #ifdef DEBUG
156 /*dlog("alloc_exp: last_used_map = %d, first_free_map = %d",
157 last_used_map, first_free_map);*/
158 #endif /* DEBUG */
159
160 return mp;
161 }
162
163 /*
164 * Free a mount slot
165 */
166 static void
exported_ap_free(am_node * mp)167 exported_ap_free(am_node *mp)
168 {
169 /*
170 * Sanity check
171 */
172 if (!mp)
173 return;
174
175 /*
176 * Zero the slot pointer to avoid double free's
177 */
178 exported_ap[mp->am_mapno] = 0;
179
180 /*
181 * Update the free and last_used indices
182 */
183 if (mp->am_mapno == last_used_map)
184 while (last_used_map >= 0 && exported_ap[last_used_map] == 0)
185 --last_used_map;
186
187 if (first_free_map > mp->am_mapno)
188 first_free_map = mp->am_mapno;
189
190 #ifdef DEBUG
191 /*dlog("free_exp: last_used_map = %d, first_free_map = %d",
192 last_used_map, first_free_map);*/
193 #endif /* DEBUG */
194
195 /*
196 * Free the mount node
197 */
198 free(mp);
199 }
200
201 /*
202 * Insert mp into the correct place,
203 * where p_mp is its parent node.
204 * A new node gets placed as the youngest sibling
205 * of any other children, and the parent's child
206 * pointer is adjusted to point to the new child node.
207 */
208 void
insert_am(am_node * mp,am_node * p_mp)209 insert_am(am_node *mp, am_node *p_mp)
210 {
211 /*
212 * If this is going in at the root then flag it
213 * so that it cannot be unmounted by amq.
214 */
215 if (p_mp == root_node)
216 mp->am_flags |= AMF_ROOT;
217 /*
218 * Fill in n-way links
219 */
220 mp->am_parent = p_mp;
221 mp->am_osib = p_mp->am_child;
222 if (mp->am_osib)
223 mp->am_osib->am_ysib = mp;
224 p_mp->am_child = mp;
225 }
226
227 /*
228 * Remove am from its place in the mount tree
229 */
230 static void
remove_am(am_node * mp)231 remove_am(am_node *mp)
232 {
233 /*
234 * 1. Consistency check
235 */
236 if (mp->am_child && mp->am_parent) {
237 plog(XLOG_WARNING, "children of \"%s\" still exist - deleting anyway", mp->am_path);
238 }
239
240 /*
241 * 2. Update parent's child pointer
242 */
243 if (mp->am_parent && mp->am_parent->am_child == mp)
244 mp->am_parent->am_child = mp->am_osib;
245
246 /*
247 * 3. Unlink from sibling chain
248 */
249 if (mp->am_ysib)
250 mp->am_ysib->am_osib = mp->am_osib;
251 if (mp->am_osib)
252 mp->am_osib->am_ysib = mp->am_ysib;
253 }
254
255 /*
256 * Compute a new time to live value for a node.
257 */
258 void
new_ttl(am_node * mp)259 new_ttl(am_node *mp)
260 {
261 mp->am_timeo_w = 0;
262
263 mp->am_ttl = clocktime();
264 mp->am_fattr.atime.seconds = mp->am_ttl;
265 mp->am_ttl += mp->am_timeo; /* sun's -tl option */
266 }
267
268 void
mk_fattr(am_node * mp,int vntype)269 mk_fattr(am_node *mp, int vntype)
270 {
271 switch (vntype) {
272 case NFDIR:
273 mp->am_fattr.type = NFDIR;
274 mp->am_fattr.mode = NFSMODE_DIR | 0555;
275 mp->am_fattr.nlink = 2;
276 mp->am_fattr.size = 512;
277 break;
278 case NFLNK:
279 mp->am_fattr.type = NFLNK;
280 mp->am_fattr.mode = NFSMODE_LNK | 0777;
281 mp->am_fattr.nlink = 1;
282 mp->am_fattr.size = 0;
283 break;
284 default:
285 plog(XLOG_FATAL, "Unknown fattr type %d - ignored", vntype);
286 break;
287 }
288 }
289
290 /*
291 * Initialise an allocated mount node.
292 * It is assumed that the mount node was bzero'd
293 * before getting here so anything that would
294 * be set to zero isn't done here.
295 */
296 void
init_map(am_node * mp,char * dir)297 init_map(am_node *mp, char *dir)
298 {
299 /* mp->am_mapno initialized by exported_ap_alloc */
300 mp->am_mnt = new_mntfs();
301 mp->am_name = strdup(dir);
302 mp->am_path = strdup(dir);
303 /*mp->am_link = 0;*/
304 /*mp->am_parent = 0;*/
305 /*mp->am_ysib = 0;*/
306 /*mp->am_osib = 0;*/
307 /*mp->am_child = 0;*/
308 /*mp->am_flags = 0;*/
309 /*mp->am_error = 0;*/
310 mp->am_gen = new_gen();
311 /*mp->am_pref = 0;*/
312
313 mp->am_timeo = am_timeo;
314 mp->am_attr.status = NFS_OK;
315 mp->am_fattr = gen_fattr;
316 mp->am_fattr.fsid = 42;
317 mp->am_fattr.fileid = 0;
318 mp->am_fattr.atime.seconds = clocktime();
319 mp->am_fattr.atime.useconds = 0;
320 mp->am_fattr.mtime = mp->am_fattr.ctime = mp->am_fattr.atime;
321
322 new_ttl(mp);
323 mp->am_stats.s_mtime = mp->am_fattr.atime.seconds;
324 /*mp->am_private = 0;*/
325 }
326
327 /*
328 * Free a mount node.
329 * The node must be already unmounted.
330 */
331 void
free_map(am_node * mp)332 free_map(am_node *mp)
333 {
334 remove_am(mp);
335
336 free(mp->am_link);
337 free(mp->am_name);
338 free(mp->am_path);
339 free(mp->am_pref);
340
341 if (mp->am_mnt)
342 free_mntfs(mp->am_mnt);
343
344 exported_ap_free(mp);
345 }
346
347 /*
348 * Convert from file handle to
349 * automount node.
350 */
351 am_node *
fh_to_mp3(nfs_fh * fhp,int * rp,int c_or_d)352 fh_to_mp3(nfs_fh *fhp, int *rp, int c_or_d)
353 {
354 struct am_fh *fp = (struct am_fh *) fhp;
355 am_node *ap = 0;
356
357 /*
358 * Check process id matches
359 * If it doesn't then it is probably
360 * from an old kernel cached filehandle
361 * which is now out of date.
362 */
363 if (fp->fhh_pid != mypid)
364 goto drop;
365
366 /*
367 * Make sure the index is valid before
368 * exported_ap is referenced.
369 */
370 if (fp->fhh_id < 0 || fp->fhh_id >= exported_ap_size)
371 goto drop;
372
373 /*
374 * Get hold of the supposed mount node
375 */
376 ap = exported_ap[fp->fhh_id];
377
378 /*
379 * If it exists then maybe...
380 */
381 if (ap) {
382 /*
383 * Check the generation number in the node
384 * matches the one from the kernel. If not
385 * then the old node has been timed out and
386 * a new one allocated.
387 */
388 if (ap->am_gen != fp->fhh_gen) {
389 ap = 0;
390 goto drop;
391 }
392
393 /*
394 * If the node is hung then locate a new node
395 * for it. This implements the replicated filesystem
396 * retries.
397 */
398 if (ap->am_mnt && FSRV_ISDOWN(ap->am_mnt->mf_server) && ap->am_parent) {
399 int error;
400 am_node *orig_ap = ap;
401 #ifdef DEBUG
402 dlog("fh_to_mp3: %s (%s) is hung:- call lookup",
403 orig_ap->am_path, orig_ap->am_mnt->mf_info);
404 #endif /* DEBUG */
405 /*
406 * Update modify time of parent node.
407 * With any luck the kernel will re-stat
408 * the child node and get new information.
409 */
410 orig_ap->am_fattr.mtime.seconds = clocktime();
411
412 /*
413 * Call the parent's lookup routine for an object
414 * with the same name. This may return -1 in error
415 * if a mount is in progress. In any case, if no
416 * mount node is returned the error code is propagated
417 * to the caller.
418 */
419 if (c_or_d == VLOOK_CREATE) {
420 ap = (*orig_ap->am_parent->am_mnt->mf_ops->lookuppn)(orig_ap->am_parent,
421 orig_ap->am_name, &error, c_or_d);
422 } else {
423 ap = 0;
424 error = ESTALE;
425 }
426 if (ap == 0) {
427 if (error < 0 && amd_state == Finishing)
428 error = ENOENT;
429 *rp = error;
430 return 0;
431 }
432 /*
433 * Update last access to original node. This
434 * avoids timing it out and so sending ESTALE
435 * back to the kernel.
436 * XXX - Not sure we need this anymore (jsp, 90/10/6).
437 */
438 new_ttl(orig_ap);
439
440 }
441 /*
442 * Disallow references to objects being unmounted, unless
443 * they are automount points.
444 */
445 if (ap->am_mnt && (ap->am_mnt->mf_flags & MFF_UNMOUNTING) &&
446 !(ap->am_flags & AMF_ROOT)) {
447 if (amd_state == Finishing)
448 *rp = ENOENT;
449 else
450 *rp = -1;
451 return 0;
452 }
453 new_ttl(ap);
454 }
455
456 drop:
457 if (!ap || !ap->am_mnt) {
458 /*
459 * If we are shutting down then it is likely
460 * that this node has disappeared because of
461 * a fast timeout. To avoid things thrashing
462 * just pretend it doesn't exist at all. If
463 * ESTALE is returned, some NFS clients just
464 * keep retrying (stupid or what - if it's
465 * stale now, what's it going to be in 5 minutes?)
466 */
467 if (amd_state == Finishing)
468 *rp = ENOENT;
469 else
470 *rp = ESTALE;
471 amd_stats.d_stale++;
472 }
473
474 return ap;
475 }
476
477 am_node *
fh_to_mp(nfs_fh * fhp)478 fh_to_mp(nfs_fh *fhp)
479 {
480 int dummy;
481 return fh_to_mp2(fhp, &dummy);
482 }
483
484 /*
485 * Convert from automount node to
486 * file handle.
487 */
488 void
mp_to_fh(am_node * mp,struct nfs_fh * fhp)489 mp_to_fh(am_node *mp, struct nfs_fh *fhp)
490 {
491 struct am_fh *fp = (struct am_fh *) fhp;
492
493 /*
494 * Take the process id
495 */
496 fp->fhh_pid = mypid;
497 /*
498 * .. the map number
499 */
500 fp->fhh_id = mp->am_mapno;
501 /*
502 * .. and the generation number
503 */
504 fp->fhh_gen = mp->am_gen;
505 /*
506 * .. to make a "unique" triple that will never
507 * be reallocated except across reboots (which doesn't matter)
508 * or if we are unlucky enough to be given the same
509 * pid as a previous amd (very unlikely).
510 */
511 }
512
513 static am_node *
find_ap2(char * dir,am_node * mp)514 find_ap2(char *dir, am_node *mp)
515 {
516 if (mp) {
517 am_node *mp2;
518 if (strcmp(mp->am_path, dir) == 0)
519 return mp;
520
521 if ((mp->am_mnt->mf_flags & MFF_MOUNTED) &&
522 strcmp(mp->am_mnt->mf_mount, dir) == 0)
523 return mp;
524
525 mp2 = find_ap2(dir, mp->am_osib);
526 if (mp2)
527 return mp2;
528 return find_ap2(dir, mp->am_child);
529 }
530
531 return 0;
532 }
533
534 /*
535 * Find the mount node corresponding
536 * to dir. dir can match either the
537 * automount path or, if the node is
538 * mounted, the mount location.
539 */
540 am_node *
find_ap(char * dir)541 find_ap(char *dir)
542 {
543 int i;
544
545 for (i = last_used_map; i >= 0; --i) {
546 am_node *mp = exported_ap[i];
547 if (mp && (mp->am_flags & AMF_ROOT)) {
548 mp = find_ap2(dir, exported_ap[i]);
549 if (mp)
550 return mp;
551 }
552 }
553
554 return 0;
555 }
556
557 /*
558 * Find the mount node corresponding
559 * to the mntfs structure.
560 */
561 am_node *
find_mf(mntfs * mf)562 find_mf(mntfs *mf)
563 {
564 int i;
565
566 for (i = last_used_map; i >= 0; --i) {
567 am_node *mp = exported_ap[i];
568 if (mp && mp->am_mnt == mf)
569 return mp;
570 }
571 return 0;
572 }
573
574 /*
575 * Get the filehandle for a particular named directory.
576 * This is used during the bootstrap to tell the kernel
577 * the filehandles of the initial automount points.
578 */
579 nfs_fh *
root_fh(char * dir)580 root_fh(char *dir)
581 {
582 static nfs_fh nfh;
583 am_node *mp = root_ap(dir, TRUE);
584 if (mp) {
585 mp_to_fh(mp, &nfh);
586 /*
587 * Patch up PID to match main server...
588 */
589 if (!foreground) {
590 pid_t pid = getppid();
591 ((struct am_fh *) &nfh)->fhh_pid = pid;
592 #ifdef DEBUG
593 dlog("root_fh substitutes pid %d", (int)pid);
594 #endif
595 }
596 return &nfh;
597 }
598
599 /*
600 * Should never get here...
601 */
602 plog(XLOG_ERROR, "Can't find root filehandle for %s", dir);
603 return 0;
604 }
605
606 am_node *
root_ap(char * dir,int path)607 root_ap(char *dir, int path)
608 {
609 am_node *mp = find_ap(dir);
610 if (mp && mp->am_parent == root_node)
611 return mp;
612
613 return 0;
614 }
615
616 /*
617 * Timeout all nodes waiting on
618 * a given Fserver.
619 */
620 void
map_flush_srvr(fserver * fs)621 map_flush_srvr(fserver *fs)
622 {
623 int i;
624 int done = 0;
625
626 for (i = last_used_map; i >= 0; --i) {
627 am_node *mp = exported_ap[i];
628 if (mp && mp->am_mnt && mp->am_mnt->mf_server == fs) {
629 plog(XLOG_INFO, "Flushed %s; dependent on %s", mp->am_path, fs->fs_host);
630 mp->am_ttl = clocktime();
631 done = 1;
632 }
633 }
634 if (done)
635 reschedule_timeout_mp();
636 }
637
638 /*
639 * Mount a top level automount node
640 * by calling lookup in the parent
641 * (root) node which will cause the
642 * automount node to be automounted.
643 */
644 int
mount_auto_node(char * dir,void * arg)645 mount_auto_node(char *dir, void *arg)
646 {
647 int error = 0;
648
649 (void) afs_ops.lookuppn((am_node *) arg, dir, &error, VLOOK_CREATE);
650 if (error > 0) {
651 errno = error; /* XXX */
652 plog(XLOG_ERROR, "Could not mount %s: %m", dir);
653 }
654 return error;
655 }
656
657 /*
658 * Cause all the top-level mount nodes
659 * to be automounted
660 */
661 int
mount_exported(void)662 mount_exported(void)
663 {
664 /*
665 * Iterate over all the nodes to be started
666 */
667 return root_keyiter((void (*)(char *, void *)) mount_auto_node, root_node);
668 }
669
670 /*
671 * Construct top-level node
672 */
673 void
make_root_node(void)674 make_root_node(void)
675 {
676 mntfs *root_mnt;
677 char *rootmap = ROOT_MAP;
678 root_node = exported_ap_alloc();
679
680 /*
681 * Allocate a new map
682 */
683 init_map(root_node, "");
684 /*
685 * Allocate a new mounted filesystem
686 */
687 root_mnt = find_mntfs(&root_ops, (am_opts *) 0, "", rootmap, "", "", "");
688 /*
689 * Replace the initial null reference
690 */
691 free_mntfs(root_node->am_mnt);
692 root_node->am_mnt = root_mnt;
693
694 /*
695 * Initialise the root
696 */
697 if (root_mnt->mf_ops->fs_init)
698 (*root_mnt->mf_ops->fs_init)(root_mnt);
699
700 /*
701 * Mount the root
702 */
703 root_mnt->mf_error = (*root_mnt->mf_ops->mount_fs)(root_node);
704 }
705
706 /*
707 * Cause all the nodes to be unmounted by timing
708 * them out.
709 */
710 void
umount_exported(void)711 umount_exported(void)
712 {
713 int i;
714
715 for (i = last_used_map; i >= 0; --i) {
716 am_node *mp = exported_ap[i];
717 if (mp) {
718 mntfs *mf = mp->am_mnt;
719 if (mf->mf_flags & MFF_UNMOUNTING) {
720 /*
721 * If this node is being unmounted then
722 * just ignore it. However, this could
723 * prevent amd from finishing if the
724 * unmount gets blocked since the am_node
725 * will never be free'd. am_unmounted needs
726 * telling about this possibility. - XXX
727 */
728 continue;
729 }
730 if (mf && !(mf->mf_ops->fs_flags & FS_DIRECTORY)) {
731 /*
732 * When shutting down this had better
733 * look like a directory, otherwise it
734 * can't be unmounted!
735 */
736 mk_fattr(mp, NFDIR);
737 }
738 if ((--immediate_abort < 0 && !(mp->am_flags & AMF_ROOT) && mp->am_parent) ||
739 (mf->mf_flags & MFF_RESTART)) {
740 /*
741 * Just throw this node away without
742 * bothering to unmount it. If the
743 * server is not known to be up then
744 * don't discard the mounted on directory
745 * or Amd might hang...
746 */
747 if (mf->mf_server &&
748 (mf->mf_server->fs_flags & (FSF_DOWN|FSF_VALID)) != FSF_VALID)
749 mf->mf_flags &= ~MFF_MKMNT;
750 am_unmounted(mp);
751 } else {
752 /*
753 * Any other node gets forcibly
754 * timed out
755 */
756 mp->am_flags &= ~AMF_NOTIMEOUT;
757 mp->am_mnt->mf_flags &= ~MFF_RSTKEEP;
758 mp->am_ttl = 0;
759 mp->am_timeo = 1;
760 mp->am_timeo_w = 0;
761 }
762 }
763 }
764 }
765
766 static int
unmount_node(am_node * mp)767 unmount_node(am_node *mp)
768 {
769 mntfs *mf = mp->am_mnt;
770 int error;
771
772 if ((mf->mf_flags & MFF_ERROR) || mf->mf_refc > 1) {
773 /*
774 * Just unlink
775 */
776 #ifdef DEBUG
777 if (mf->mf_flags & MFF_ERROR)
778 dlog("No-op unmount of error node %s", mf->mf_info);
779 #endif /* DEBUG */
780 error = 0;
781 } else {
782 #ifdef DEBUG
783 dlog("Unmounting %s (%s)", mf->mf_mount, mf->mf_info);
784 #endif /* DEBUG */
785 error = (*mf->mf_ops->umount_fs)(mp);
786 }
787
788 if (error) {
789 #ifdef DEBUG
790 errno = error; /* XXX */
791 dlog("%s: unmount: %m", mf->mf_mount);
792 #endif /* DEBUG */
793 }
794
795 return error;
796 }
797
798 #ifdef FLUSH_KERNEL_NAME_CACHE
799 static void
flush_kernel_name_cache(am_node * mp)800 flush_kernel_name_cache(am_node *mp)
801 {
802 int islink = (mp->am_mnt->mf_fattr.type == NFLNK);
803 int isdir = (mp->am_mnt->mf_fattr.type == NFDIR);
804 int elog = 0;
805
806 if (islink) {
807 if (unlink(mp->am_path) < 0)
808 elog = 1;
809 } else if (isdir) {
810 if (rmdir(mp->am_path) < 0)
811 elog = 1;
812 }
813 if (elog)
814 plog(XLOG_WARNING, "failed to clear \"%s\" from dnlc: %m", mp->am_path);
815 }
816 #endif /* FLUSH_KERNEL_NAME_CACHE */
817
818 static int
unmount_node_wrap(void * vp)819 unmount_node_wrap(void *vp)
820 {
821 #ifndef FLUSH_KERNEL_NAME_CACHE
822 return unmount_node((am_node*) vp);
823 #else /* FLUSH_KERNEL_NAME_CACHE */
824 /*
825 * This code should just say:
826 * return unmount_node((am_node *) vp);
827 *
828 * However...
829 * The kernel keeps a cached copy of filehandles,
830 * and doesn't ever uncache them (apparently). So
831 * when Amd times out a node the kernel will have a
832 * stale filehandle. When the kernel next uses the
833 * filehandle it gets ESTALE.
834 *
835 * The workaround:
836 * Arrange that when a node is removed an unlink or
837 * rmdir is done on that path so that the kernel
838 * cache is done. Yes - yuck.
839 *
840 * This can all be removed (and the background
841 * unmount flag in sfs_ops) if/when the kernel does
842 * something smarter.
843 *
844 * If the unlink or rmdir failed then just log a warning,
845 * don't fail the unmount. This can occur if the kernel
846 * client code decides that the object is still referenced
847 * and should be renamed rather than discarded.
848 *
849 * There is still a race condition here...
850 * if another process is trying to access the same
851 * filesystem at the time we get here, then
852 * it will block, since the MF_UNMOUNTING flag will
853 * be set. That may, or may not, cause the entire
854 * system to deadlock. Hmmm...
855 */
856 am_node *mp = (am_node *) vp;
857 int isauto = mp->am_parent && (mp->am_parent->am_mnt->mf_fattr.type == NFDIR);
858 int error = unmount_node(mp);
859 if (error)
860 return error;
861 if (isauto && (int)amd_state < (int)Finishing)
862 flush_kernel_name_cache(mp);
863
864 return 0;
865 #endif /* FLUSH_KERNEL_NAME_CACHE */
866 }
867
868 static void
free_map_if_success(int rc,int term,void * closure)869 free_map_if_success(int rc, int term, void *closure)
870 {
871 am_node *mp = (am_node *) closure;
872 mntfs *mf = mp->am_mnt;
873
874 /*
875 * Not unmounting any more
876 */
877 mf->mf_flags &= ~MFF_UNMOUNTING;
878
879 /*
880 * If a timeout was deferred because the underlying filesystem
881 * was busy then arrange for a timeout as soon as possible.
882 */
883 if (mf->mf_flags & MFF_WANTTIMO) {
884 mf->mf_flags &= ~MFF_WANTTIMO;
885 reschedule_timeout_mp();
886 }
887
888 if (term) {
889 plog(XLOG_ERROR, "unmount for %s got signal %d", mp->am_path, term);
890 #if defined(DEBUG) && defined(SIGTRAP)
891 /*
892 * dbx likes to put a trap on exit().
893 * Pretend it succeeded for now...
894 */
895 if (term == SIGTRAP) {
896 am_unmounted(mp);
897 }
898 #endif /* DEBUG */
899 amd_stats.d_uerr++;
900 } else if (rc) {
901 if (rc == EBUSY) {
902 plog(XLOG_STATS, "\"%s\" on %s still active", mp->am_path, mf->mf_mount);
903 } else {
904 errno = rc; /* XXX */
905 plog(XLOG_ERROR, "%s: unmount: %m", mp->am_path);
906 }
907 amd_stats.d_uerr++;
908 } else {
909 am_unmounted(mp);
910 }
911
912 /*
913 * Wakeup anything waiting for this mount
914 */
915 wakeup(mf);
916 }
917
918 static int
unmount_mp(am_node * mp)919 unmount_mp(am_node *mp)
920 {
921 int was_backgrounded = 0;
922 mntfs *mf = mp->am_mnt;
923
924 #ifdef notdef
925 plog(XLOG_INFO, "\"%s\" on %s timed out", mp->am_path, mp->am_mnt->mf_mount);
926 #endif /* notdef */
927
928 if ((mf->mf_ops->fs_flags & FS_UBACKGROUND) &&
929 (mf->mf_flags & MFF_MOUNTED)) {
930 if (mf->mf_refc == 1 && !FSRV_ISUP(mf->mf_server)) {
931 /*
932 * Don't try to unmount from a server that is known to be down
933 */
934 if (!(mf->mf_flags & MFF_LOGDOWN)) {
935 /* Only log this once, otherwise gets a bit boring */
936 plog(XLOG_STATS, "file server %s is down - timeout of \"%s\" ignored", mf->mf_server->fs_host, mp->am_path);
937 mf->mf_flags |= MFF_LOGDOWN;
938 }
939 } else {
940 /* Clear logdown flag - since the server must be up */
941 mf->mf_flags &= ~MFF_LOGDOWN;
942 #ifdef DEBUG
943 dlog("\"%s\" on %s timed out", mp->am_path, mp->am_mnt->mf_mount);
944 /*dlog("Will background the unmount attempt");*/
945 #endif /* DEBUG */
946 /*
947 * Note that we are unmounting this node
948 */
949 mf->mf_flags |= MFF_UNMOUNTING;
950 run_task(unmount_node_wrap, mp,
951 free_map_if_success, mp);
952 was_backgrounded = 1;
953 #ifdef DEBUG
954 dlog("unmount attempt backgrounded");
955 #endif /* DEBUG */
956 }
957 } else {
958 #ifdef DEBUG
959 dlog("\"%s\" on %s timed out", mp->am_path, mp->am_mnt->mf_mount);
960 dlog("Trying unmount in foreground");
961 #endif
962 mf->mf_flags |= MFF_UNMOUNTING;
963 free_map_if_success(unmount_node(mp), 0, mp);
964 #ifdef DEBUG
965 dlog("unmount attempt done");
966 #endif /* DEBUG */
967 }
968
969 return was_backgrounded;
970 }
971
972 static void
timeout_mp(void * arg)973 timeout_mp(void *arg)
974 {
975 #define NEVER (time_t) 0
976 #define smallest_t(t1, t2) \
977 (t1 != NEVER ? (t2 != NEVER ? (t1 < t2 ? t1 : t2) : t1) : t2)
978 #define IGNORE_FLAGS (MFF_MOUNTING|MFF_UNMOUNTING|MFF_RESTART)
979
980 int i;
981 time_t t = NEVER;
982 time_t now = clocktime();
983 int backoff = 0;
984
985 #ifdef DEBUG
986 dlog("Timing out automount points...");
987 #endif /* DEBUG */
988 for (i = last_used_map; i >= 0; --i) {
989 am_node *mp = exported_ap[i];
990 mntfs *mf;
991 /*
992 * Just continue if nothing mounted, or can't be timed out.
993 */
994 if (!mp || (mp->am_flags & AMF_NOTIMEOUT))
995 continue;
996 /*
997 * Pick up mounted filesystem
998 */
999 mf = mp->am_mnt;
1000 if (!mf)
1001 continue;
1002 /*
1003 * Don't delete last reference to a restarted filesystem.
1004 */
1005 if ((mf->mf_flags & MFF_RSTKEEP) && mf->mf_refc == 1)
1006 continue;
1007 /*
1008 * If there is action on this filesystem then ignore it
1009 */
1010 if (!(mf->mf_flags & IGNORE_FLAGS)) {
1011 int expired = 0;
1012 mf->mf_flags &= ~MFF_WANTTIMO;
1013 #ifdef DEBUG
1014 /*dlog("t is initially @%d, zero in %d secs", t, t - now);*/
1015 #endif /* DEBUG */
1016 if (now >= mp->am_ttl) {
1017 if (!backoff) {
1018 expired = 1;
1019 /*
1020 * Move the ttl forward to avoid thrashing effects
1021 * on the next call to timeout!
1022 */
1023 /* sun's -tw option */
1024 if (mp->am_timeo_w < 4 * am_timeo_w)
1025 mp->am_timeo_w += am_timeo_w;
1026 mp->am_ttl = now + mp->am_timeo_w;
1027 } else {
1028 /*
1029 * Just backoff this unmount for
1030 * a couple of seconds to avoid
1031 * many multiple unmounts being
1032 * started in parallel.
1033 */
1034 mp->am_ttl = now + backoff + 1;
1035 }
1036 }
1037 /*
1038 * If the next ttl is smallest, use that
1039 */
1040 t = smallest_t(t, mp->am_ttl);
1041
1042 #ifdef DEBUG
1043 /*dlog("after ttl t is @%d, zero in %d secs", t, t - now);*/
1044 #endif /* DEBUG */
1045
1046 if (!mp->am_child && mf->mf_error >= 0 && expired) {
1047 /*
1048 * If the unmount was backgrounded then
1049 * bump the backoff counter.
1050 */
1051 if (unmount_mp(mp)) {
1052 backoff = 2;
1053 #ifdef DEBUG
1054 /*dlog("backing off subsequent unmounts by at least %d seconds", backoff);*/
1055 #endif
1056 }
1057 }
1058 } else if (mf->mf_flags & MFF_UNMOUNTING) {
1059 mf->mf_flags |= MFF_WANTTIMO;
1060 }
1061 }
1062
1063 if (t == NEVER) {
1064 #ifdef DEBUG
1065 dlog("No further timeouts");
1066 #endif /* DEBUG */
1067 t = now + ONE_HOUR;
1068 }
1069
1070 /*
1071 * Sanity check to avoid runaways.
1072 * Absolutely should never get this but
1073 * if you do without this trap amd will thrash.
1074 */
1075 if (t <= now) {
1076 t = now + 6; /* XXX */
1077 plog(XLOG_ERROR, "Got a zero interval in timeout_mp()!");
1078 }
1079 /*
1080 * XXX - when shutting down, make things happen faster
1081 */
1082 if ((int)amd_state >= (int)Finishing)
1083 t = now + 1;
1084 #ifdef DEBUG
1085 dlog("Next mount timeout in %ds", t - now);
1086 #endif /* DEBUG */
1087
1088 timeout_mp_id = timeout(t - now, timeout_mp, 0);
1089
1090 #undef NEVER
1091 #undef smallest_t
1092 #undef IGNORE_FLAGS
1093 }
1094
1095 /*
1096 * Cause timeout_mp to be called soonest
1097 */
reschedule_timeout_mp()1098 void reschedule_timeout_mp()
1099 {
1100 if (timeout_mp_id)
1101 untimeout(timeout_mp_id);
1102 timeout_mp_id = timeout(0, timeout_mp, 0);
1103 }
1104