1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * This file contains a set of routines used to perform wait based method
28  * reaping.
29  */
30 
31 #include <wait.h>
32 #include <sys/param.h>
33 #include <fcntl.h>
34 #include <libcontract.h>
35 #include <errno.h>
36 #include <libintl.h>
37 #include <unistd.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <sys/resource.h>
41 #include "inetd_impl.h"
42 
43 /* inetd's open file limit, set in method_init() */
44 #define	INETD_NOFILE_LIMIT RLIM_INFINITY
45 
46 /* structure used to represent an active method process */
47 typedef struct {
48 	int			fd;	/* fd of process's /proc psinfo file */
49 	/* associated contract id if known, else -1 */
50 	ctid_t			cid;
51 	pid_t			pid;
52 	instance_t		*inst;	/* pointer to associated instance */
53 	instance_method_t	method;	/* the method type running */
54 	uu_list_node_t		link;
55 } method_el_t;
56 
57 
58 static void unregister_method(method_el_t *);
59 
60 
61 /* list of currently executing method processes */
62 static uu_list_pool_t		*method_pool = NULL;
63 static uu_list_t		*method_list = NULL;
64 
65 /*
66  * File limit saved during initialization before modification, so that it can
67  * be reverted back to for inetd's exec'd methods.
68  */
69 static struct rlimit		saved_file_limit;
70 
71 /*
72  * Setup structures used for method termination monitoring.
73  * Returns -1 if an allocation failure occurred, else 0.
74  */
75 int
76 method_init(void)
77 {
78 	struct rlimit rl;
79 
80 	/*
81 	 * Save aside the old file limit and impose one large enough to support
82 	 * all the /proc file handles we could have open.
83 	 */
84 
85 	(void) getrlimit(RLIMIT_NOFILE, &saved_file_limit);
86 
87 	rl.rlim_cur = rl.rlim_max = INETD_NOFILE_LIMIT;
88 	if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
89 		error_msg("Failed to set file limit: %s", strerror(errno));
90 		return (-1);
91 	}
92 
93 	if ((method_pool = uu_list_pool_create("method_pool",
94 	    sizeof (method_el_t), offsetof(method_el_t, link), NULL,
95 	    UU_LIST_POOL_DEBUG)) == NULL) {
96 		error_msg("%s: %s", gettext("Failed to create method pool"),
97 		    uu_strerror(uu_error()));
98 		return (-1);
99 	}
100 
101 	if ((method_list = uu_list_create(method_pool, NULL, 0)) == NULL) {
102 		error_msg("%s: %s",
103 		    gettext("Failed to create method list"),
104 		    uu_strerror(uu_error()));
105 		/* let method_fini() clean-up */
106 		return (-1);
107 	}
108 
109 	return (0);
110 }
111 
112 /*
113  * Tear-down structures created in method_init().
114  */
115 void
116 method_fini(void)
117 {
118 	if (method_list != NULL) {
119 		method_el_t *me;
120 
121 		while ((me = uu_list_first(method_list)) != NULL)
122 			unregister_method(me);
123 
124 		(void) uu_list_destroy(method_list);
125 		method_list = NULL;
126 	}
127 	if (method_pool != NULL) {
128 		(void) uu_list_pool_destroy(method_pool);
129 		method_pool = NULL;
130 	}
131 
132 	/* revert file limit */
133 	method_preexec();
134 }
135 
136 /*
137  * Revert file limit back to pre-initialization one. This shouldn't fail as
138  * long as its called *after* descriptor cleanup.
139  */
140 void
141 method_preexec(void)
142 {
143 	(void) setrlimit(RLIMIT_NOFILE, &saved_file_limit);
144 }
145 
146 
147 /*
148  * Callback function that handles the timeout of an instance's method.
149  * 'arg' points at the method_el_t representing the method.
150  */
151 /* ARGSUSED0 */
152 static void
153 method_timeout(iu_tq_t *tq, void *arg)
154 {
155 	method_el_t *mp = arg;
156 
157 	error_msg(gettext("The %s method of instance %s timed-out"),
158 	    methods[mp->method].name, mp->inst->fmri);
159 
160 	mp->inst->timer_id = -1;
161 
162 	if (mp->method == IM_START) {
163 		process_start_term(mp->inst);
164 	} else {
165 		process_non_start_term(mp->inst, IMRET_FAILURE);
166 	}
167 
168 	unregister_method(mp);
169 }
170 
171 /*
172  * Registers the attributes of a running method passed as arguments so that
173  * the method's termination is noticed and any further processing of the
174  * associated instance is carried out. The function also sets up any
175  * necessary timers so we can detect hung methods.
176  * Returns -1 if either it failed to open the /proc psinfo file which is used
177  * to monitor the method process, it failed to setup a required timer or
178  * memory allocation failed; else 0.
179  */
180 int
181 register_method(instance_t *ins, pid_t pid, ctid_t cid, instance_method_t mthd)
182 {
183 	char		path[MAXPATHLEN];
184 	int		fd;
185 	method_el_t	*me;
186 
187 	/* open /proc psinfo file of process to listen for POLLHUP events on */
188 	(void) snprintf(path, sizeof (path), "/proc/%u/psinfo", pid);
189 	for (;;) {
190 		if ((fd = open(path, O_RDONLY)) >= 0) {
191 			break;
192 		} else if (errno != EINTR) {
193 			/*
194 			 * Don't output an error for ENOENT; we get this
195 			 * if a method has gone away whilst we were stopped,
196 			 * and we're now trying to re-listen for it.
197 			 */
198 			if (errno != ENOENT) {
199 				error_msg(gettext("Failed to open %s: %s"),
200 				    path, strerror(errno));
201 			}
202 			return (-1);
203 		}
204 	}
205 
206 	/* add method record to in-memory list */
207 	if ((me = calloc(1, sizeof (method_el_t))) == NULL) {
208 		error_msg(strerror(errno));
209 		(void) close(fd);
210 		return (-1);
211 	}
212 	me->fd = fd;
213 	me->inst = (instance_t *)ins;
214 	me->method = mthd;
215 	me->pid = pid;
216 	me->cid = cid;
217 
218 	/* register a timeout for the method, if required */
219 	if (mthd != IM_START) {
220 		method_info_t *mi = ins->config->methods[mthd];
221 
222 		if (mi->timeout > 0) {
223 			assert(ins->timer_id == -1);
224 			ins->timer_id = iu_schedule_timer(timer_queue,
225 			    mi->timeout, method_timeout, me);
226 			if (ins->timer_id == -1) {
227 				error_msg(gettext(
228 				    "Failed to schedule method timeout"));
229 				free(me);
230 				(void) close(fd);
231 				return (-1);
232 			}
233 		}
234 	}
235 
236 	/*
237 	 * Add fd of psinfo file to poll set, but pass 0 for events to
238 	 * poll for, so we should only get a POLLHUP event on the fd.
239 	 */
240 	if (set_pollfd(fd, 0) == -1) {
241 		cancel_inst_timer(ins);
242 		free(me);
243 		(void) close(fd);
244 		return (-1);
245 	}
246 
247 	uu_list_node_init(me, &me->link, method_pool);
248 	(void) uu_list_insert_after(method_list, NULL, me);
249 
250 	return (0);
251 }
252 
253 /*
254  * A counterpart to register_method(), this function stops the monitoring of a
255  * method process for its termination.
256  */
257 static void
258 unregister_method(method_el_t *me)
259 {
260 	/* cancel any timer associated with the method */
261 	if (me->inst->timer_id != -1)
262 		cancel_inst_timer(me->inst);
263 
264 	/* stop polling on the psinfo file fd */
265 	clear_pollfd(me->fd);
266 	(void) close(me->fd);
267 
268 	/* remove method record from list */
269 	uu_list_remove(method_list, me);
270 
271 	free(me);
272 }
273 
274 /*
275  * Unregister all methods associated with instance 'inst'.
276  */
277 void
278 unregister_instance_methods(const instance_t *inst)
279 {
280 	method_el_t *me = uu_list_first(method_list);
281 
282 	while (me != NULL) {
283 		if (me->inst == inst) {
284 			method_el_t *tmp = me;
285 
286 			me = uu_list_next(method_list, me);
287 			unregister_method(tmp);
288 		} else  {
289 			me = uu_list_next(method_list, me);
290 		}
291 	}
292 }
293 
294 /*
295  * Process any terminated methods. For each method determined to have
296  * terminated, the function determines its return value and calls the
297  * appropriate handling function, depending on the type of the method.
298  */
299 void
300 process_terminated_methods(void)
301 {
302 	method_el_t	*me = uu_list_first(method_list);
303 
304 	while (me != NULL) {
305 		struct pollfd	*pfd;
306 		pid_t		pid;
307 		int		status;
308 		int		ret;
309 		method_el_t	*tmp;
310 
311 		pfd = find_pollfd(me->fd);
312 
313 		/*
314 		 * We expect to get a POLLHUP back on the fd of the process's
315 		 * open psinfo file from /proc when the method terminates.
316 		 * A POLLERR could(?) mask a POLLHUP, so handle this
317 		 * also.
318 		 */
319 		if ((pfd->revents & (POLLHUP|POLLERR)) == 0) {
320 			me = uu_list_next(method_list, me);
321 			continue;
322 		}
323 
324 		/* get the method's exit code (no need to loop for EINTR) */
325 		pid = waitpid(me->pid, &status, WNOHANG);
326 
327 		switch (pid) {
328 		case 0:					/* child still around */
329 			/*
330 			 * Either poll() is sending us invalid POLLHUP events
331 			 * or is flagging a POLLERR on the fd. Neither should
332 			 * happen, but in the event they do, ignore this fd
333 			 * this time around and wait out the termination
334 			 * of its associated method. This may result in
335 			 * inetd swiftly looping in event_loop(), but means
336 			 * we don't miss the termination of a method.
337 			 */
338 			me = uu_list_next(method_list, me);
339 			continue;
340 
341 		case -1:				/* non-existent child */
342 			assert(errno == ECHILD);
343 			/*
344 			 * the method must not be owned by inetd due to it
345 			 * persisting over an inetd restart. Let's assume the
346 			 * best, that it was successful.
347 			 */
348 			ret = IMRET_SUCCESS;
349 			break;
350 
351 		default:				/* child terminated */
352 			if (WIFEXITED(status)) {
353 				ret = WEXITSTATUS(status);
354 				debug_msg("process %ld of instance %s returned "
355 				    "%d", pid, me->inst->fmri, ret);
356 			} else if (WIFSIGNALED(status)) {
357 				/*
358 				 * Terminated by signal.  This may be due
359 				 * to a kill that we sent from a disable or
360 				 * offline event. We flag it as a failure, but
361 				 * this flagged failure will only be processed
362 				 * in the case of non-start methods, or when
363 				 * the instance is still enabled.
364 				 */
365 				debug_msg("process %ld of instance %s exited "
366 				    "due to signal %d", pid, me->inst->fmri,
367 				    WTERMSIG(status));
368 				ret = IMRET_FAILURE;
369 			} else {
370 				/*
371 				 * Can we actually get here?  Don't think so.
372 				 * Treat it as a failure, anyway.
373 				 */
374 				debug_msg("waitpid() for %s method of "
375 				    "instance %s returned %d",
376 				    methods[me->method].name, me->inst->fmri,
377 				    status);
378 				ret = IMRET_FAILURE;
379 			}
380 		}
381 
382 		remove_method_ids(me->inst, me->pid, me->cid, me->method);
383 
384 		/* continue state transition processing of the instance */
385 		if (me->method != IM_START) {
386 			process_non_start_term(me->inst, ret);
387 		} else {
388 			process_start_term(me->inst);
389 		}
390 
391 		if (me->cid != -1)
392 			(void) abandon_contract(me->cid);
393 
394 		tmp = me;
395 		me = uu_list_next(method_list, me);
396 		unregister_method(tmp);
397 	}
398 }
399