xref: /netbsd/sys/fs/puffs/puffs_msgif.c (revision 6550d01e)
1 /*	$NetBSD: puffs_msgif.c,v 1.84 2010/11/15 20:31:41 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 2005, 2006, 2007  Antti Kantee.  All Rights Reserved.
5  *
6  * Development of this software was supported by the
7  * Google Summer of Code program and the Ulla Tuominen Foundation.
8  * The Google SoC project was mentored by Bill Studenmund.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
20  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: puffs_msgif.c,v 1.84 2010/11/15 20:31:41 pooka Exp $");
34 
35 #include <sys/param.h>
36 #include <sys/atomic.h>
37 #include <sys/kmem.h>
38 #include <sys/kthread.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/mount.h>
42 #include <sys/namei.h>
43 #include <sys/proc.h>
44 #include <sys/vnode.h>
45 #include <sys/atomic.h>
46 
47 #include <dev/putter/putter_sys.h>
48 
49 #include <fs/puffs/puffs_msgif.h>
50 #include <fs/puffs/puffs_sys.h>
51 
52 #include <miscfs/syncfs/syncfs.h> /* XXX: for syncer_mutex reference */
53 
54 /*
55  * waitq data structures
56  */
57 
58 /*
59  * While a request is going to userspace, park the caller within the
60  * kernel.  This is the kernel counterpart of "struct puffs_req".
61  */
62 struct puffs_msgpark {
63 	struct puffs_req	*park_preq;	/* req followed by buf	*/
64 
65 	size_t			park_copylen;	/* userspace copylength	*/
66 	size_t			park_maxlen;	/* max size in comeback */
67 
68 	struct puffs_req	*park_creq;	/* non-compat preq	*/
69 	size_t			park_creqlen;	/* non-compat preq len	*/
70 
71 	parkdone_fn		park_done;	/* "biodone" a'la puffs	*/
72 	void			*park_donearg;
73 
74 	int			park_flags;
75 	int			park_refcount;
76 
77 	kcondvar_t		park_cv;
78 	kmutex_t		park_mtx;
79 
80 	TAILQ_ENTRY(puffs_msgpark) park_entries;
81 };
82 #define PARKFLAG_WAITERGONE	0x01
83 #define PARKFLAG_DONE		0x02
84 #define PARKFLAG_ONQUEUE1	0x04
85 #define PARKFLAG_ONQUEUE2	0x08
86 #define PARKFLAG_CALL		0x10
87 #define PARKFLAG_WANTREPLY	0x20
88 #define	PARKFLAG_HASERROR	0x40
89 
90 static pool_cache_t parkpc;
91 #ifdef PUFFSDEBUG
92 static int totalpark;
93 #endif
94 
95 static int
96 makepark(void *arg, void *obj, int flags)
97 {
98 	struct puffs_msgpark *park = obj;
99 
100 	mutex_init(&park->park_mtx, MUTEX_DEFAULT, IPL_NONE);
101 	cv_init(&park->park_cv, "puffsrpl");
102 
103 	return 0;
104 }
105 
106 static void
107 nukepark(void *arg, void *obj)
108 {
109 	struct puffs_msgpark *park = obj;
110 
111 	cv_destroy(&park->park_cv);
112 	mutex_destroy(&park->park_mtx);
113 }
114 
115 void
116 puffs_msgif_init(void)
117 {
118 
119 	parkpc = pool_cache_init(sizeof(struct puffs_msgpark), 0, 0, 0,
120 	    "puffprkl", NULL, IPL_NONE, makepark, nukepark, NULL);
121 }
122 
123 void
124 puffs_msgif_destroy(void)
125 {
126 
127 	pool_cache_destroy(parkpc);
128 }
129 
130 static struct puffs_msgpark *
131 puffs_msgpark_alloc(int waitok)
132 {
133 	struct puffs_msgpark *park;
134 
135 	park = pool_cache_get(parkpc, waitok ? PR_WAITOK : PR_NOWAIT);
136 	if (park == NULL)
137 		return park;
138 
139 	park->park_refcount = 1;
140 	park->park_preq = park->park_creq = NULL;
141 	park->park_flags = PARKFLAG_WANTREPLY;
142 
143 #ifdef PUFFSDEBUG
144 	totalpark++;
145 #endif
146 
147 	return park;
148 }
149 
150 static void
151 puffs_msgpark_reference(struct puffs_msgpark *park)
152 {
153 
154 	KASSERT(mutex_owned(&park->park_mtx));
155 	park->park_refcount++;
156 }
157 
158 /*
159  * Release reference to park structure.
160  */
161 static void
162 puffs_msgpark_release1(struct puffs_msgpark *park, int howmany)
163 {
164 	struct puffs_req *preq = park->park_preq;
165 	struct puffs_req *creq = park->park_creq;
166 	int refcnt;
167 
168 	KASSERT(mutex_owned(&park->park_mtx));
169 	refcnt = park->park_refcount -= howmany;
170 	mutex_exit(&park->park_mtx);
171 
172 	KASSERT(refcnt >= 0);
173 
174 	if (refcnt == 0) {
175 		if (preq)
176 			kmem_free(preq, park->park_maxlen);
177 #if 1
178 		if (creq)
179 			kmem_free(creq, park->park_creqlen);
180 #endif
181 		pool_cache_put(parkpc, park);
182 
183 #ifdef PUFFSDEBUG
184 		totalpark--;
185 #endif
186 	}
187 }
188 #define puffs_msgpark_release(a) puffs_msgpark_release1(a, 1)
189 
190 #ifdef PUFFSDEBUG
191 static void
192 parkdump(struct puffs_msgpark *park)
193 {
194 
195 	DPRINTF(("park %p, preq %p, id %" PRIu64 "\n"
196 	    "\tcopy %zu, max %zu - done: %p/%p\n"
197 	    "\tflags 0x%08x, refcount %d, cv/mtx: %p/%p\n",
198 	    park, park->park_preq, park->park_preq->preq_id,
199 	    park->park_copylen, park->park_maxlen,
200 	    park->park_done, park->park_donearg,
201 	    park->park_flags, park->park_refcount,
202 	    &park->park_cv, &park->park_mtx));
203 }
204 
205 static void
206 parkqdump(struct puffs_wq *q, int dumpall)
207 {
208 	struct puffs_msgpark *park;
209 	int total = 0;
210 
211 	TAILQ_FOREACH(park, q, park_entries) {
212 		if (dumpall)
213 			parkdump(park);
214 		total++;
215 	}
216 	DPRINTF(("puffs waitqueue at %p dumped, %d total\n", q, total));
217 
218 }
219 #endif /* PUFFSDEBUG */
220 
221 /*
222  * A word about locking in the park structures: the lock protects the
223  * fields of the *park* structure (not preq) and acts as an interlock
224  * in cv operations.  The lock is always internal to this module and
225  * callers do not need to worry about it.
226  */
227 
228 int
229 puffs_msgmem_alloc(size_t len, struct puffs_msgpark **ppark, void **mem,
230 	int cansleep)
231 {
232 	struct puffs_msgpark *park;
233 	void *m;
234 
235 	m = kmem_zalloc(len, cansleep ? KM_SLEEP : KM_NOSLEEP);
236 	if (m == NULL) {
237 		KASSERT(cansleep == 0);
238 		return ENOMEM;
239 	}
240 
241 	park = puffs_msgpark_alloc(cansleep);
242 	if (park == NULL) {
243 		KASSERT(cansleep == 0);
244 		kmem_free(m, len);
245 		return ENOMEM;
246 	}
247 
248 	park->park_preq = m;
249 	park->park_maxlen = park->park_copylen = len;
250 
251 	*ppark = park;
252 	*mem = m;
253 
254 	return 0;
255 }
256 
257 void
258 puffs_msgmem_release(struct puffs_msgpark *park)
259 {
260 
261 	if (park == NULL)
262 		return;
263 
264 	mutex_enter(&park->park_mtx);
265 	puffs_msgpark_release(park);
266 }
267 
268 void
269 puffs_msg_setfaf(struct puffs_msgpark *park)
270 {
271 
272 	KASSERT((park->park_flags & PARKFLAG_CALL) == 0);
273 	park->park_flags &= ~PARKFLAG_WANTREPLY;
274 }
275 
276 void
277 puffs_msg_setdelta(struct puffs_msgpark *park, size_t delta)
278 {
279 
280 	KASSERT(delta < park->park_maxlen); /* "<=" wouldn't make sense */
281 	park->park_copylen = park->park_maxlen - delta;
282 }
283 
284 void
285 puffs_msg_setinfo(struct puffs_msgpark *park, int class, int type,
286 	puffs_cookie_t ck)
287 {
288 
289 	park->park_preq->preq_opclass = PUFFSOP_OPCLASS(class);
290 	park->park_preq->preq_optype = type;
291 	park->park_preq->preq_cookie = ck;
292 }
293 
294 void
295 puffs_msg_setcall(struct puffs_msgpark *park, parkdone_fn donefn, void *donearg)
296 {
297 
298 	KASSERT(park->park_flags & PARKFLAG_WANTREPLY);
299 	park->park_done = donefn;
300 	park->park_donearg = donearg;
301 	park->park_flags |= PARKFLAG_CALL;
302 }
303 
304 /*
305  * kernel-user-kernel waitqueues
306  */
307 
308 static uint64_t
309 puffs_getmsgid(struct puffs_mount *pmp)
310 {
311 	uint64_t rv;
312 
313 	mutex_enter(&pmp->pmp_lock);
314 	rv = pmp->pmp_nextmsgid++;
315 	mutex_exit(&pmp->pmp_lock);
316 
317 	return rv;
318 }
319 
320 /*
321  * A word about reference counting of parks.  A reference must be taken
322  * when accessing a park and additionally when it is on a queue.  So
323  * when taking it off a queue and releasing the access reference, the
324  * reference count is generally decremented by 2.
325  */
326 
327 void
328 puffs_msg_enqueue(struct puffs_mount *pmp, struct puffs_msgpark *park)
329 {
330 	struct lwp *l = curlwp;
331 	struct mount *mp;
332 	struct puffs_req *preq, *creq;
333 	ssize_t delta;
334 
335 	/*
336 	 * Some clients reuse a park, so reset some flags.  We might
337 	 * want to provide a caller-side interface for this and add
338 	 * a few more invariant checks here, but this will do for now.
339 	 */
340 	park->park_flags &= ~(PARKFLAG_DONE | PARKFLAG_HASERROR);
341 	KASSERT((park->park_flags & PARKFLAG_WAITERGONE) == 0);
342 
343 	mp = PMPTOMP(pmp);
344 	preq = park->park_preq;
345 
346 #if 1
347 	/* check if we do compat adjustments */
348 	if (pmp->pmp_docompat && puffs_compat_outgoing(preq, &creq, &delta)) {
349 		park->park_creq = park->park_preq;
350 		park->park_creqlen = park->park_maxlen;
351 
352 		park->park_maxlen += delta;
353 		park->park_copylen += delta;
354 		park->park_preq = preq = creq;
355 	}
356 #endif
357 
358 	preq->preq_buflen = park->park_maxlen;
359 	KASSERT(preq->preq_id == 0
360 	    || (preq->preq_opclass & PUFFSOPFLAG_ISRESPONSE));
361 
362 	if ((park->park_flags & PARKFLAG_WANTREPLY) == 0)
363 		preq->preq_opclass |= PUFFSOPFLAG_FAF;
364 	else
365 		preq->preq_id = puffs_getmsgid(pmp);
366 
367 	/* fill in caller information */
368 	preq->preq_pid = l->l_proc->p_pid;
369 	preq->preq_lid = l->l_lid;
370 
371 	/*
372 	 * To support cv_sig, yet another movie: check if there are signals
373 	 * pending and we are issueing a non-FAF.  If so, return an error
374 	 * directly UNLESS we are issueing INACTIVE/RECLAIM.  In that case,
375 	 * convert it to a FAF, fire off to the file server and return
376 	 * an error.  Yes, this is bordering disgusting.  Barfbags are on me.
377 	 */
378 	if (__predict_false((park->park_flags & PARKFLAG_WANTREPLY)
379 	   && (park->park_flags & PARKFLAG_CALL) == 0
380 	   && (l->l_flag & LW_PENDSIG) != 0 && sigispending(l, 0))) {
381 		sigset_t ss;
382 
383 		/*
384 		 * see the comment about signals in puffs_msg_wait.
385 		 */
386 		sigpending1(l, &ss);
387 		if (sigismember(&ss, SIGINT) ||
388 		    sigismember(&ss, SIGTERM) ||
389 		    sigismember(&ss, SIGKILL) ||
390 		    sigismember(&ss, SIGHUP) ||
391 		    sigismember(&ss, SIGQUIT)) {
392 			park->park_flags |= PARKFLAG_HASERROR;
393 			preq->preq_rv = EINTR;
394 			if (PUFFSOP_OPCLASS(preq->preq_opclass) == PUFFSOP_VN
395 			    && (preq->preq_optype == PUFFS_VN_INACTIVE
396 			     || preq->preq_optype == PUFFS_VN_RECLAIM)) {
397 				park->park_preq->preq_opclass |=
398 				    PUFFSOPFLAG_FAF;
399 				park->park_flags &= ~PARKFLAG_WANTREPLY;
400 				DPRINTF(("puffs_msg_enqueue: "
401 				    "converted to FAF %p\n", park));
402 			} else {
403 				return;
404 			}
405 		}
406 	}
407 
408 	mutex_enter(&pmp->pmp_lock);
409 	if (pmp->pmp_status != PUFFSTAT_RUNNING) {
410 		mutex_exit(&pmp->pmp_lock);
411 		park->park_flags |= PARKFLAG_HASERROR;
412 		preq->preq_rv = ENXIO;
413 		return;
414 	}
415 
416 #ifdef PUFFSDEBUG
417 	parkqdump(&pmp->pmp_msg_touser, puffsdebug > 1);
418 	parkqdump(&pmp->pmp_msg_replywait, puffsdebug > 1);
419 #endif
420 
421 	/*
422 	 * Note: we don't need to lock park since we have the only
423 	 * reference to it at this point.
424 	 */
425 	TAILQ_INSERT_TAIL(&pmp->pmp_msg_touser, park, park_entries);
426 	park->park_flags |= PARKFLAG_ONQUEUE1;
427 	pmp->pmp_msg_touser_count++;
428 	park->park_refcount++;
429 	mutex_exit(&pmp->pmp_lock);
430 
431 	cv_broadcast(&pmp->pmp_msg_waiter_cv);
432 	putter_notify(pmp->pmp_pi);
433 
434 	DPRINTF(("touser: req %" PRIu64 ", preq: %p, park: %p, "
435 	    "c/t: 0x%x/0x%x, f: 0x%x\n", preq->preq_id, preq, park,
436 	    preq->preq_opclass, preq->preq_optype, park->park_flags));
437 }
438 
439 int
440 puffs_msg_wait(struct puffs_mount *pmp, struct puffs_msgpark *park)
441 {
442 	lwp_t *l = curlwp;
443 	proc_t *p = l->l_proc;
444 	struct puffs_req *preq = park->park_preq; /* XXX: hmmm */
445 	sigset_t ss;
446 	sigset_t oss;
447 	int error = 0;
448 	int rv;
449 
450 	/*
451 	 * block unimportant signals.
452 	 *
453 	 * The set of "important" signals here was chosen to be same as
454 	 * nfs interruptible mount.
455 	 */
456 	sigfillset(&ss);
457 	sigdelset(&ss, SIGINT);
458 	sigdelset(&ss, SIGTERM);
459 	sigdelset(&ss, SIGKILL);
460 	sigdelset(&ss, SIGHUP);
461 	sigdelset(&ss, SIGQUIT);
462 	mutex_enter(p->p_lock);
463 	sigprocmask1(l, SIG_BLOCK, &ss, &oss);
464 	mutex_exit(p->p_lock);
465 
466 	mutex_enter(&pmp->pmp_lock);
467 	puffs_mp_reference(pmp);
468 	mutex_exit(&pmp->pmp_lock);
469 
470 	mutex_enter(&park->park_mtx);
471 	if ((park->park_flags & PARKFLAG_WANTREPLY) == 0
472 	    || (park->park_flags & PARKFLAG_CALL)) {
473 		mutex_exit(&park->park_mtx);
474 		rv = 0;
475 		goto skipwait;
476 	}
477 
478 	/* did the response beat us to the wait? */
479 	if (__predict_false((park->park_flags & PARKFLAG_DONE)
480 	    || (park->park_flags & PARKFLAG_HASERROR))) {
481 		rv = park->park_preq->preq_rv;
482 		mutex_exit(&park->park_mtx);
483 		goto skipwait;
484 	}
485 
486 	error = cv_wait_sig(&park->park_cv, &park->park_mtx);
487 	DPRINTF(("puffs_touser: waiter for %p woke up with %d\n",
488 	    park, error));
489 	if (error) {
490 		park->park_flags |= PARKFLAG_WAITERGONE;
491 		if (park->park_flags & PARKFLAG_DONE) {
492 			rv = preq->preq_rv;
493 			mutex_exit(&park->park_mtx);
494 		} else {
495 			/*
496 			 * ok, we marked it as going away, but
497 			 * still need to do queue ops.  take locks
498 			 * in correct order.
499 			 *
500 			 * We don't want to release our reference
501 			 * if it's on replywait queue to avoid error
502 			 * to file server.  putop() code will DTRT.
503 			 */
504 			mutex_exit(&park->park_mtx);
505 			mutex_enter(&pmp->pmp_lock);
506 			mutex_enter(&park->park_mtx);
507 
508 			/*
509 			 * Still on queue1?  We can safely remove it
510 			 * without any consequences since the file
511 			 * server hasn't seen it.  "else" we need to
512 			 * wait for the response and just ignore it
513 			 * to avoid signalling an incorrect error to
514 			 * the file server.
515 			 */
516 			if (park->park_flags & PARKFLAG_ONQUEUE1) {
517 				TAILQ_REMOVE(&pmp->pmp_msg_touser,
518 				    park, park_entries);
519 				puffs_msgpark_release(park);
520 				pmp->pmp_msg_touser_count--;
521 				park->park_flags &= ~PARKFLAG_ONQUEUE1;
522 			} else {
523 				mutex_exit(&park->park_mtx);
524 			}
525 			mutex_exit(&pmp->pmp_lock);
526 
527 			rv = EINTR;
528 		}
529 	} else {
530 		rv = preq->preq_rv;
531 		mutex_exit(&park->park_mtx);
532 	}
533 
534  skipwait:
535 	mutex_enter(&pmp->pmp_lock);
536 	puffs_mp_release(pmp);
537 	mutex_exit(&pmp->pmp_lock);
538 
539 	mutex_enter(p->p_lock);
540 	sigprocmask1(l, SIG_SETMASK, &oss, NULL);
541 	mutex_exit(p->p_lock);
542 
543 	return rv;
544 }
545 
546 /*
547  * XXX: this suuuucks.  Hopefully I'll get rid of this lossage once
548  * the whole setback-nonsense gets fixed.
549  */
550 int
551 puffs_msg_wait2(struct puffs_mount *pmp, struct puffs_msgpark *park,
552 	struct puffs_node *pn1, struct puffs_node *pn2)
553 {
554 	struct puffs_req *preq;
555 	int rv;
556 
557 	rv = puffs_msg_wait(pmp, park);
558 
559 	preq = park->park_preq;
560 	if (pn1 && preq->preq_setbacks & PUFFS_SETBACK_INACT_N1)
561 		pn1->pn_stat |= PNODE_DOINACT;
562 	if (pn2 && preq->preq_setbacks & PUFFS_SETBACK_INACT_N2)
563 		pn2->pn_stat |= PNODE_DOINACT;
564 
565 	if (pn1 && preq->preq_setbacks & PUFFS_SETBACK_NOREF_N1)
566 		pn1->pn_stat |= PNODE_NOREFS;
567 	if (pn2 && preq->preq_setbacks & PUFFS_SETBACK_NOREF_N2)
568 		pn2->pn_stat |= PNODE_NOREFS;
569 
570 	return rv;
571 
572 }
573 
574 /*
575  * XXX: lazy bum.  please, for the love of foie gras, fix me.
576  * This should *NOT* depend on setfaf.  Also "memcpy" could
577  * be done more nicely.
578  */
579 void
580 puffs_msg_sendresp(struct puffs_mount *pmp, struct puffs_req *origpreq, int rv)
581 {
582 	struct puffs_msgpark *park;
583 	struct puffs_req *preq;
584 
585 	puffs_msgmem_alloc(sizeof(struct puffs_req), &park, (void *)&preq, 1);
586 	puffs_msg_setfaf(park); /* XXXXXX: avoids reqid override */
587 
588 	memcpy(preq, origpreq, sizeof(struct puffs_req));
589 	preq->preq_rv = rv;
590 	preq->preq_opclass |= PUFFSOPFLAG_ISRESPONSE;
591 
592 	puffs_msg_enqueue(pmp, park);
593 	puffs_msgmem_release(park);
594 }
595 
596 /*
597  * Get next request in the outgoing queue.  "maxsize" controls the
598  * size the caller can accommodate and "nonblock" signals if this
599  * should block while waiting for input.  Handles all locking internally.
600  */
601 int
602 puffs_msgif_getout(void *this, size_t maxsize, int nonblock,
603 	uint8_t **data, size_t *dlen, void **parkptr)
604 {
605 	struct puffs_mount *pmp = this;
606 	struct puffs_msgpark *park;
607 	struct puffs_req *preq;
608 	int error;
609 
610 	error = 0;
611 	mutex_enter(&pmp->pmp_lock);
612 	puffs_mp_reference(pmp);
613 	for (;;) {
614 		/* RIP? */
615 		if (pmp->pmp_status != PUFFSTAT_RUNNING) {
616 			error = ENXIO;
617 			break;
618 		}
619 
620 		/* need platinum yendorian express card? */
621 		if (TAILQ_EMPTY(&pmp->pmp_msg_touser)) {
622 			DPRINTF(("puffs_getout: no outgoing op, "));
623 			if (nonblock) {
624 				DPRINTF(("returning EWOULDBLOCK\n"));
625 				error = EWOULDBLOCK;
626 				break;
627 			}
628 			DPRINTF(("waiting ...\n"));
629 
630 			error = cv_wait_sig(&pmp->pmp_msg_waiter_cv,
631 			    &pmp->pmp_lock);
632 			if (error)
633 				break;
634 			else
635 				continue;
636 		}
637 
638 		park = TAILQ_FIRST(&pmp->pmp_msg_touser);
639 		if (park == NULL)
640 			continue;
641 
642 		mutex_enter(&park->park_mtx);
643 		puffs_msgpark_reference(park);
644 
645 		DPRINTF(("puffs_getout: found park at %p, ", park));
646 
647 		/* If it's a goner, don't process any furher */
648 		if (park->park_flags & PARKFLAG_WAITERGONE) {
649 			DPRINTF(("waitergone!\n"));
650 			puffs_msgpark_release(park);
651 			continue;
652 		}
653 		preq = park->park_preq;
654 
655 #if 0
656 		/* check size */
657 		/*
658 		 * XXX: this check is not valid for now, we don't know
659 		 * the size of the caller's input buffer.  i.e. this
660 		 * will most likely go away
661 		 */
662 		if (maxsize < preq->preq_frhdr.pfr_len) {
663 			DPRINTF(("buffer too small\n"));
664 			puffs_msgpark_release(park);
665 			error = E2BIG;
666 			break;
667 		}
668 #endif
669 
670 		DPRINTF(("returning\n"));
671 
672 		/*
673 		 * Ok, we found what we came for.  Release it from the
674 		 * outgoing queue but do not unlock.  We will unlock
675 		 * only after we "releaseout" it to avoid complications:
676 		 * otherwise it is (theoretically) possible for userland
677 		 * to race us into "put" before we have a change to put
678 		 * this baby on the receiving queue.
679 		 */
680 		TAILQ_REMOVE(&pmp->pmp_msg_touser, park, park_entries);
681 		KASSERT(park->park_flags & PARKFLAG_ONQUEUE1);
682 		park->park_flags &= ~PARKFLAG_ONQUEUE1;
683 		mutex_exit(&park->park_mtx);
684 
685 		pmp->pmp_msg_touser_count--;
686 		KASSERT(pmp->pmp_msg_touser_count >= 0);
687 
688 		break;
689 	}
690 	puffs_mp_release(pmp);
691 	mutex_exit(&pmp->pmp_lock);
692 
693 	if (error == 0) {
694 		*data = (uint8_t *)preq;
695 		preq->preq_pth.pth_framelen = park->park_copylen;
696 		*dlen = preq->preq_pth.pth_framelen;
697 		*parkptr = park;
698 	}
699 
700 	return error;
701 }
702 
703 /*
704  * Release outgoing structure.  Now, depending on the success of the
705  * outgoing send, it is either going onto the result waiting queue
706  * or the death chamber.
707  */
708 void
709 puffs_msgif_releaseout(void *this, void *parkptr, int status)
710 {
711 	struct puffs_mount *pmp = this;
712 	struct puffs_msgpark *park = parkptr;
713 
714 	DPRINTF(("puffs_releaseout: returning park %p, errno %d: " ,
715 	    park, status));
716 	mutex_enter(&pmp->pmp_lock);
717 	mutex_enter(&park->park_mtx);
718 	if (park->park_flags & PARKFLAG_WANTREPLY) {
719 		if (status == 0) {
720 			DPRINTF(("enqueue replywait\n"));
721 			TAILQ_INSERT_TAIL(&pmp->pmp_msg_replywait, park,
722 			    park_entries);
723 			park->park_flags |= PARKFLAG_ONQUEUE2;
724 		} else {
725 			DPRINTF(("error path!\n"));
726 			park->park_preq->preq_rv = status;
727 			park->park_flags |= PARKFLAG_DONE;
728 			cv_signal(&park->park_cv);
729 		}
730 		puffs_msgpark_release(park);
731 	} else {
732 		DPRINTF(("release\n"));
733 		puffs_msgpark_release1(park, 2);
734 	}
735 	mutex_exit(&pmp->pmp_lock);
736 }
737 
738 size_t
739 puffs_msgif_waitcount(void *this)
740 {
741 	struct puffs_mount *pmp = this;
742 	size_t rv;
743 
744 	mutex_enter(&pmp->pmp_lock);
745 	rv = pmp->pmp_msg_touser_count;
746 	mutex_exit(&pmp->pmp_lock);
747 
748 	return rv;
749 }
750 
751 /*
752  * XXX: locking with this one?
753  */
754 static void
755 puffsop_msg(void *this, struct puffs_req *preq)
756 {
757 	struct puffs_mount *pmp = this;
758 	struct putter_hdr *pth = &preq->preq_pth;
759 	struct puffs_msgpark *park;
760 	int wgone;
761 
762 	mutex_enter(&pmp->pmp_lock);
763 
764 	/* Locate waiter */
765 	TAILQ_FOREACH(park, &pmp->pmp_msg_replywait, park_entries) {
766 		if (park->park_preq->preq_id == preq->preq_id)
767 			break;
768 	}
769 	if (park == NULL) {
770 		DPRINTF(("puffsop_msg: no request: %" PRIu64 "\n",
771 		    preq->preq_id));
772 		mutex_exit(&pmp->pmp_lock);
773 		return; /* XXX send error */
774 	}
775 
776 	mutex_enter(&park->park_mtx);
777 	puffs_msgpark_reference(park);
778 	if (pth->pth_framelen > park->park_maxlen) {
779 		DPRINTF(("puffsop_msg: invalid buffer length: "
780 		    "%" PRIu64 " (req %" PRIu64 ", \n", pth->pth_framelen,
781 		    preq->preq_id));
782 		park->park_preq->preq_rv = EPROTO;
783 		cv_signal(&park->park_cv);
784 		puffs_msgpark_release1(park, 2);
785 		mutex_exit(&pmp->pmp_lock);
786 		return; /* XXX: error */
787 	}
788 	wgone = park->park_flags & PARKFLAG_WAITERGONE;
789 
790 	KASSERT(park->park_flags & PARKFLAG_ONQUEUE2);
791 	TAILQ_REMOVE(&pmp->pmp_msg_replywait, park, park_entries);
792 	park->park_flags &= ~PARKFLAG_ONQUEUE2;
793 	mutex_exit(&pmp->pmp_lock);
794 
795 	if (wgone) {
796 		DPRINTF(("puffsop_msg: bad service - waiter gone for "
797 		    "park %p\n", park));
798 	} else {
799 #if 1
800 		if (park->park_creq) {
801 			struct puffs_req *creq;
802 			size_t csize;
803 
804 			KASSERT(pmp->pmp_docompat);
805 			puffs_compat_incoming(preq, park->park_creq);
806 			creq = park->park_creq;
807 			csize = park->park_creqlen;
808 			park->park_creq = park->park_preq;
809 			park->park_creqlen = park->park_maxlen;
810 
811 			park->park_preq = creq;
812 			park->park_maxlen = csize;
813 
814 			memcpy(park->park_creq, preq, pth->pth_framelen);
815 		} else {
816 #endif
817 			memcpy(park->park_preq, preq, pth->pth_framelen);
818 		}
819 
820 		if (park->park_flags & PARKFLAG_CALL) {
821 			DPRINTF(("puffsop_msg: call for %p, arg %p\n",
822 			    park->park_preq, park->park_donearg));
823 			park->park_done(pmp, preq, park->park_donearg);
824 		}
825 	}
826 
827 	if (!wgone) {
828 		DPRINTF(("puffs_putop: flagging done for "
829 		    "park %p\n", park));
830 		cv_signal(&park->park_cv);
831 	}
832 
833 	park->park_flags |= PARKFLAG_DONE;
834 	puffs_msgpark_release1(park, 2);
835 }
836 
837 static void
838 puffsop_flush(struct puffs_mount *pmp, struct puffs_flush *pf)
839 {
840 	struct vnode *vp;
841 	voff_t offlo, offhi;
842 	int rv, flags = 0;
843 
844 	KASSERT(pf->pf_req.preq_pth.pth_framelen == sizeof(struct puffs_flush));
845 
846 	/* XXX: slurry */
847 	if (pf->pf_op == PUFFS_INVAL_NAMECACHE_ALL) {
848 		cache_purgevfs(PMPTOMP(pmp));
849 		rv = 0;
850 		goto out;
851 	}
852 
853 	/*
854 	 * Get vnode, don't lock it.  Namecache is protected by its own lock
855 	 * and we have a reference to protect against premature harvesting.
856 	 *
857 	 * The node we want here might be locked and the op is in
858 	 * userspace waiting for us to complete ==> deadlock.  Another
859 	 * reason we need to eventually bump locking to userspace, as we
860 	 * will need to lock the node if we wish to do flushes.
861 	 */
862 	rv = puffs_cookie2vnode(pmp, pf->pf_cookie, 0, 0, &vp);
863 	if (rv) {
864 		if (rv == PUFFS_NOSUCHCOOKIE)
865 			rv = ENOENT;
866 		goto out;
867 	}
868 
869 	switch (pf->pf_op) {
870 #if 0
871 	/* not quite ready, yet */
872 	case PUFFS_INVAL_NAMECACHE_NODE:
873 	struct componentname *pf_cn;
874 	char *name;
875 		/* get comfortab^Wcomponentname */
876 		pf_cn = kmem_alloc(componentname);
877 		memset(pf_cn, 0, sizeof(struct componentname));
878 		break;
879 
880 #endif
881 	case PUFFS_INVAL_NAMECACHE_DIR:
882 		if (vp->v_type != VDIR) {
883 			rv = EINVAL;
884 			break;
885 		}
886 		cache_purge1(vp, NULL, PURGE_CHILDREN);
887 		break;
888 
889 	case PUFFS_INVAL_PAGECACHE_NODE_RANGE:
890 		flags = PGO_FREE;
891 		/*FALLTHROUGH*/
892 	case PUFFS_FLUSH_PAGECACHE_NODE_RANGE:
893 		if (flags == 0)
894 			flags = PGO_CLEANIT;
895 
896 		if (pf->pf_end > vp->v_size || vp->v_type != VREG) {
897 			rv = EINVAL;
898 			break;
899 		}
900 
901 		offlo = trunc_page(pf->pf_start);
902 		offhi = round_page(pf->pf_end);
903 		if (offhi != 0 && offlo >= offhi) {
904 			rv = EINVAL;
905 			break;
906 		}
907 
908 		mutex_enter(&vp->v_uobj.vmobjlock);
909 		rv = VOP_PUTPAGES(vp, offlo, offhi, flags);
910 		break;
911 
912 	default:
913 		rv = EINVAL;
914 	}
915 
916 	vrele(vp);
917 
918  out:
919 	puffs_msg_sendresp(pmp, &pf->pf_req, rv);
920 }
921 
922 int
923 puffs_msgif_dispatch(void *this, struct putter_hdr *pth)
924 {
925 	struct puffs_mount *pmp = this;
926 	struct puffs_req *preq = (struct puffs_req *)pth;
927 	struct puffs_sopreq *psopr;
928 
929 	if (pth->pth_framelen < sizeof(struct puffs_req)) {
930 		puffs_msg_sendresp(pmp, preq, EINVAL); /* E2SMALL */
931 		return 0;
932 	}
933 
934 	switch (PUFFSOP_OPCLASS(preq->preq_opclass)) {
935 	case PUFFSOP_VN:
936 	case PUFFSOP_VFS:
937 		DPRINTF(("dispatch: vn/vfs message 0x%x\n", preq->preq_optype));
938 		puffsop_msg(pmp, preq);
939 		break;
940 
941 	case PUFFSOP_FLUSH: /* process in sop thread */
942 	{
943 		struct puffs_flush *pf;
944 
945 		DPRINTF(("dispatch: flush 0x%x\n", preq->preq_optype));
946 
947 		if (preq->preq_pth.pth_framelen != sizeof(struct puffs_flush)) {
948 			puffs_msg_sendresp(pmp, preq, EINVAL); /* E2SMALL */
949 			break;
950 		}
951 		pf = (struct puffs_flush *)preq;
952 
953 		psopr = kmem_alloc(sizeof(*psopr), KM_SLEEP);
954 		memcpy(&psopr->psopr_pf, pf, sizeof(*pf));
955 		psopr->psopr_sopreq = PUFFS_SOPREQ_FLUSH;
956 
957 		mutex_enter(&pmp->pmp_sopmtx);
958 		if (pmp->pmp_sopthrcount == 0) {
959 			mutex_exit(&pmp->pmp_sopmtx);
960 			kmem_free(psopr, sizeof(*psopr));
961 			puffs_msg_sendresp(pmp, preq, ENXIO);
962 		} else {
963 			TAILQ_INSERT_TAIL(&pmp->pmp_sopreqs,
964 			    psopr, psopr_entries);
965 			cv_signal(&pmp->pmp_sopcv);
966 			mutex_exit(&pmp->pmp_sopmtx);
967 		}
968 		break;
969 	}
970 
971 	case PUFFSOP_UNMOUNT: /* process in sop thread */
972 	{
973 
974 		DPRINTF(("dispatch: unmount 0x%x\n", preq->preq_optype));
975 
976 		psopr = kmem_alloc(sizeof(*psopr), KM_SLEEP);
977 		psopr->psopr_preq = *preq;
978 		psopr->psopr_sopreq = PUFFS_SOPREQ_UNMOUNT;
979 
980 		mutex_enter(&pmp->pmp_sopmtx);
981 		if (pmp->pmp_sopthrcount == 0) {
982 			mutex_exit(&pmp->pmp_sopmtx);
983 			kmem_free(psopr, sizeof(*psopr));
984 			puffs_msg_sendresp(pmp, preq, ENXIO);
985 		} else {
986 			TAILQ_INSERT_TAIL(&pmp->pmp_sopreqs,
987 			    psopr, psopr_entries);
988 			cv_signal(&pmp->pmp_sopcv);
989 			mutex_exit(&pmp->pmp_sopmtx);
990 		}
991 		break;
992 	}
993 
994 	default:
995 		DPRINTF(("dispatch: invalid class 0x%x\n", preq->preq_opclass));
996 		puffs_msg_sendresp(pmp, preq, EOPNOTSUPP);
997 		break;
998 	}
999 
1000 	return 0;
1001 }
1002 
1003 /*
1004  * Work loop for thread processing all ops from server which
1005  * cannot safely be handled in caller context.  This includes
1006  * everything which might need a lock currently "held" by the file
1007  * server, i.e. a long-term kernel lock which will be released only
1008  * once the file server acknowledges a request
1009  */
1010 void
1011 puffs_sop_thread(void *arg)
1012 {
1013 	struct puffs_mount *pmp = arg;
1014 	struct mount *mp = PMPTOMP(pmp);
1015 	struct puffs_sopreq *psopr;
1016 	bool keeprunning;
1017 	bool unmountme = false;
1018 
1019 	mutex_enter(&pmp->pmp_sopmtx);
1020 	for (keeprunning = true; keeprunning; ) {
1021 		while ((psopr = TAILQ_FIRST(&pmp->pmp_sopreqs)) == NULL)
1022 			cv_wait(&pmp->pmp_sopcv, &pmp->pmp_sopmtx);
1023 		TAILQ_REMOVE(&pmp->pmp_sopreqs, psopr, psopr_entries);
1024 		mutex_exit(&pmp->pmp_sopmtx);
1025 
1026 		switch (psopr->psopr_sopreq) {
1027 		case PUFFS_SOPREQSYS_EXIT:
1028 			keeprunning = false;
1029 			break;
1030 		case PUFFS_SOPREQ_FLUSH:
1031 			puffsop_flush(pmp, &psopr->psopr_pf);
1032 			break;
1033 		case PUFFS_SOPREQ_UNMOUNT:
1034 			puffs_msg_sendresp(pmp, &psopr->psopr_preq, 0);
1035 
1036 			unmountme = true;
1037 			keeprunning = false;
1038 
1039 			/*
1040 			 * We know the mountpoint is still alive because
1041 			 * the thread that is us (poetic?) is still alive.
1042 			 */
1043 			atomic_inc_uint((unsigned int*)&mp->mnt_refcnt);
1044 			break;
1045 		}
1046 
1047 		kmem_free(psopr, sizeof(*psopr));
1048 		mutex_enter(&pmp->pmp_sopmtx);
1049 	}
1050 
1051 	/*
1052 	 * Purge remaining ops.
1053 	 */
1054 	while ((psopr = TAILQ_FIRST(&pmp->pmp_sopreqs)) != NULL) {
1055 		TAILQ_REMOVE(&pmp->pmp_sopreqs, psopr, psopr_entries);
1056 		mutex_exit(&pmp->pmp_sopmtx);
1057 		puffs_msg_sendresp(pmp, &psopr->psopr_preq, ENXIO);
1058 		kmem_free(psopr, sizeof(*psopr));
1059 		mutex_enter(&pmp->pmp_sopmtx);
1060 	}
1061 
1062 	pmp->pmp_sopthrcount--;
1063 	cv_broadcast(&pmp->pmp_sopcv);
1064 	mutex_exit(&pmp->pmp_sopmtx); /* not allowed to access fs after this */
1065 
1066 	/*
1067 	 * If unmount was requested, we can now safely do it here, since
1068 	 * our context is dead from the point-of-view of puffs_unmount()
1069 	 * and we are just another thread.  dounmount() makes internally
1070 	 * sure that VFS_UNMOUNT() isn't called reentrantly and that it
1071 	 * is eventually completed.
1072 	 */
1073 	if (unmountme) {
1074 		(void)dounmount(mp, MNT_FORCE, curlwp);
1075 		vfs_destroy(mp);
1076 	}
1077 
1078 	kthread_exit(0);
1079 }
1080 
1081 int
1082 puffs_msgif_close(void *this)
1083 {
1084 	struct puffs_mount *pmp = this;
1085 	struct mount *mp = PMPTOMP(pmp);
1086 
1087 	mutex_enter(&pmp->pmp_lock);
1088 	puffs_mp_reference(pmp);
1089 
1090 	/*
1091 	 * Free the waiting callers before proceeding any further.
1092 	 * The syncer might be jogging around in this file system
1093 	 * currently.  If we allow it to go to the userspace of no
1094 	 * return while trying to get the syncer lock, well ...
1095 	 */
1096 	puffs_userdead(pmp);
1097 
1098 	/*
1099 	 * Make sure someone from puffs_unmount() isn't currently in
1100 	 * userspace.  If we don't take this precautionary step,
1101 	 * they might notice that the mountpoint has disappeared
1102 	 * from under them once they return.  Especially note that we
1103 	 * cannot simply test for an unmounter before calling
1104 	 * dounmount(), since it might be possible that that particular
1105 	 * invocation of unmount was called without MNT_FORCE.  Here we
1106 	 * *must* make sure unmount succeeds.  Also, restart is necessary
1107 	 * since pmp isn't locked.  We might end up with PUTTER_DEAD after
1108 	 * restart and exit from there.
1109 	 */
1110 	if (pmp->pmp_unmounting) {
1111 		cv_wait(&pmp->pmp_unmounting_cv, &pmp->pmp_lock);
1112 		puffs_mp_release(pmp);
1113 		mutex_exit(&pmp->pmp_lock);
1114 		DPRINTF(("puffs_fop_close: unmount was in progress for pmp %p, "
1115 		    "restart\n", pmp));
1116 		return ERESTART;
1117 	}
1118 
1119 	/* Won't access pmp from here anymore */
1120 	atomic_inc_uint((unsigned int*)&mp->mnt_refcnt);
1121 	puffs_mp_release(pmp);
1122 	mutex_exit(&pmp->pmp_lock);
1123 
1124 	/* Detach from VFS. */
1125 	(void)dounmount(mp, MNT_FORCE, curlwp);
1126 	vfs_destroy(mp);
1127 
1128 	return 0;
1129 }
1130 
1131 /*
1132  * We're dead, kaput, RIP, slightly more than merely pining for the
1133  * fjords, belly-up, fallen, lifeless, finished, expired, gone to meet
1134  * our maker, ceased to be, etcetc.  YASD.  It's a dead FS!
1135  *
1136  * Caller must hold puffs mutex.
1137  */
1138 void
1139 puffs_userdead(struct puffs_mount *pmp)
1140 {
1141 	struct puffs_msgpark *park, *park_next;
1142 
1143 	/*
1144 	 * Mark filesystem status as dying so that operations don't
1145 	 * attempt to march to userspace any longer.
1146 	 */
1147 	pmp->pmp_status = PUFFSTAT_DYING;
1148 
1149 	/* signal waiters on REQUEST TO file server queue */
1150 	for (park = TAILQ_FIRST(&pmp->pmp_msg_touser); park; park = park_next) {
1151 		uint8_t opclass;
1152 
1153 		mutex_enter(&park->park_mtx);
1154 		puffs_msgpark_reference(park);
1155 		park_next = TAILQ_NEXT(park, park_entries);
1156 
1157 		KASSERT(park->park_flags & PARKFLAG_ONQUEUE1);
1158 		TAILQ_REMOVE(&pmp->pmp_msg_touser, park, park_entries);
1159 		park->park_flags &= ~PARKFLAG_ONQUEUE1;
1160 		pmp->pmp_msg_touser_count--;
1161 
1162 		/*
1163 		 * Even though waiters on QUEUE1 are removed in touser()
1164 		 * in case of WAITERGONE, it is still possible for us to
1165 		 * get raced here due to having to retake locks in said
1166 		 * touser().  In the race case simply "ignore" the item
1167 		 * on the queue and move on to the next one.
1168 		 */
1169 		if (park->park_flags & PARKFLAG_WAITERGONE) {
1170 			KASSERT((park->park_flags & PARKFLAG_CALL) == 0);
1171 			KASSERT(park->park_flags & PARKFLAG_WANTREPLY);
1172 			puffs_msgpark_release(park);
1173 
1174 		} else {
1175 			opclass = park->park_preq->preq_opclass;
1176 			park->park_preq->preq_rv = ENXIO;
1177 
1178 			if (park->park_flags & PARKFLAG_CALL) {
1179 				park->park_done(pmp, park->park_preq,
1180 				    park->park_donearg);
1181 				puffs_msgpark_release1(park, 2);
1182 			} else if ((park->park_flags & PARKFLAG_WANTREPLY)==0) {
1183 				puffs_msgpark_release1(park, 2);
1184 			} else {
1185 				park->park_preq->preq_rv = ENXIO;
1186 				cv_signal(&park->park_cv);
1187 				puffs_msgpark_release(park);
1188 			}
1189 		}
1190 	}
1191 
1192 	/* signal waiters on RESPONSE FROM file server queue */
1193 	for (park=TAILQ_FIRST(&pmp->pmp_msg_replywait); park; park=park_next) {
1194 		mutex_enter(&park->park_mtx);
1195 		puffs_msgpark_reference(park);
1196 		park_next = TAILQ_NEXT(park, park_entries);
1197 
1198 		KASSERT(park->park_flags & PARKFLAG_ONQUEUE2);
1199 		KASSERT(park->park_flags & PARKFLAG_WANTREPLY);
1200 
1201 		TAILQ_REMOVE(&pmp->pmp_msg_replywait, park, park_entries);
1202 		park->park_flags &= ~PARKFLAG_ONQUEUE2;
1203 
1204 		if (park->park_flags & PARKFLAG_WAITERGONE) {
1205 			KASSERT((park->park_flags & PARKFLAG_CALL) == 0);
1206 			puffs_msgpark_release(park);
1207 		} else {
1208 			park->park_preq->preq_rv = ENXIO;
1209 			if (park->park_flags & PARKFLAG_CALL) {
1210 				park->park_done(pmp, park->park_preq,
1211 				    park->park_donearg);
1212 				puffs_msgpark_release1(park, 2);
1213 			} else {
1214 				cv_signal(&park->park_cv);
1215 				puffs_msgpark_release(park);
1216 			}
1217 		}
1218 	}
1219 
1220 	cv_broadcast(&pmp->pmp_msg_waiter_cv);
1221 }
1222