1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/proc.h>
29 #include <sys/systm.h>
30 #include <sys/param.h>
31 #include <sys/kmem.h>
32 #include <sys/sysmacros.h>
33 #include <sys/types.h>
34 #include <sys/cmn_err.h>
35 #include <sys/user.h>
36 #include <sys/cred.h>
37 #include <sys/vnode.h>
38 #include <sys/file.h>
39 #include <sys/pathname.h>
40 #include <sys/modctl.h>
41 #include <sys/acctctl.h>
42 #include <sys/bitmap.h>
43 #include <sys/exacct.h>
44 #include <sys/policy.h>
45 
46 /*
47  * acctctl(2)
48  *
49  *   acctctl() provides the administrative interface to the extended accounting
50  *   subsystem.  The process and task accounting facilities are configurable:
51  *   resources can be individually specified for recording in the appropriate
52  *   accounting file.
53  *
54  *   The current implementation of acctctl() requires that the process and task
55  *   and flow files be distinct across all zones.
56  *
57  * Locking
58  *   Each accounting species has an ac_info_t which contains a mutex,
59  *   used to protect the ac_info_t's contents, and to serialize access to the
60  *   appropriate file.
61  */
62 
63 static list_t exacct_globals_list;
64 static kmutex_t exacct_globals_list_lock;
65 
66 static int
67 ac_state_set(ac_info_t *info, void *buf, size_t bufsz)
68 {
69 	int state;
70 
71 	if (buf == NULL || (bufsz != sizeof (int)))
72 		return (EINVAL);
73 
74 	if (copyin(buf, &state, bufsz) != 0)
75 		return (EFAULT);
76 
77 	if (state != AC_ON && state != AC_OFF)
78 		return (EINVAL);
79 
80 	mutex_enter(&info->ac_lock);
81 	info->ac_state = state;
82 	mutex_exit(&info->ac_lock);
83 	return (0);
84 }
85 
86 static int
87 ac_state_get(ac_info_t *info, void *buf, size_t bufsz)
88 {
89 	if (buf == NULL || (bufsz != sizeof (int)))
90 		return (EINVAL);
91 
92 	mutex_enter(&info->ac_lock);
93 	if (copyout(&info->ac_state, buf, bufsz) != 0) {
94 		mutex_exit(&info->ac_lock);
95 		return (EFAULT);
96 	}
97 	mutex_exit(&info->ac_lock);
98 	return (0);
99 }
100 
101 static boolean_t
102 ac_file_in_use(vnode_t *vp)
103 {
104 	boolean_t in_use = B_FALSE;
105 	struct exacct_globals *acg;
106 
107 	if (vp == NULL)
108 		return (B_FALSE);
109 	mutex_enter(&exacct_globals_list_lock);
110 	/*
111 	 * Start off by grabbing all locks.
112 	 */
113 	for (acg = list_head(&exacct_globals_list); acg != NULL;
114 	    acg = list_next(&exacct_globals_list, acg)) {
115 		mutex_enter(&acg->ac_proc.ac_lock);
116 		mutex_enter(&acg->ac_task.ac_lock);
117 		mutex_enter(&acg->ac_flow.ac_lock);
118 	}
119 
120 	for (acg = list_head(&exacct_globals_list); !in_use && acg != NULL;
121 	    acg = list_next(&exacct_globals_list, acg)) {
122 		/*
123 		 * We need to verify that we aren't already using this file for
124 		 * accounting in any zone.
125 		 */
126 		if (vn_compare(acg->ac_proc.ac_vnode, vp) ||
127 		    vn_compare(acg->ac_task.ac_vnode, vp) ||
128 		    vn_compare(acg->ac_flow.ac_vnode, vp))
129 			in_use = B_TRUE;
130 	}
131 
132 	/*
133 	 * Drop all locks.
134 	 */
135 	for (acg = list_head(&exacct_globals_list); acg != NULL;
136 	    acg = list_next(&exacct_globals_list, acg)) {
137 		mutex_exit(&acg->ac_proc.ac_lock);
138 		mutex_exit(&acg->ac_task.ac_lock);
139 		mutex_exit(&acg->ac_flow.ac_lock);
140 	}
141 	mutex_exit(&exacct_globals_list_lock);
142 	return (in_use);
143 }
144 
145 static int
146 ac_file_set(ac_info_t *info, void *ubuf, size_t bufsz)
147 {
148 	int error = 0;
149 	void *kbuf;
150 	void *namebuf;
151 	int namelen;
152 	vnode_t *vp;
153 	void *hdr;
154 	size_t hdrsize;
155 
156 	if (ubuf == NULL) {
157 		mutex_enter(&info->ac_lock);
158 
159 		/*
160 		 * Closing accounting file
161 		 */
162 		if (info->ac_vnode != NULL) {
163 			error = VOP_CLOSE(info->ac_vnode, FWRITE, 1, 0,
164 			    CRED(), NULL);
165 			if (error) {
166 				mutex_exit(&info->ac_lock);
167 				return (error);
168 			}
169 			VN_RELE(info->ac_vnode);
170 			info->ac_vnode = NULL;
171 		}
172 		if (info->ac_file != NULL) {
173 			kmem_free(info->ac_file, strlen(info->ac_file) + 1);
174 			info->ac_file = NULL;
175 		}
176 
177 		mutex_exit(&info->ac_lock);
178 		return (error);
179 	}
180 
181 	if (bufsz < 2 || bufsz > MAXPATHLEN)
182 		return (EINVAL);
183 
184 	/*
185 	 * We have to copy in the whole buffer since we can't tell the length
186 	 * of the string in user's address space.
187 	 */
188 	kbuf = kmem_zalloc(bufsz, KM_SLEEP);
189 	if ((error = copyinstr((char *)ubuf, (char *)kbuf, bufsz, NULL)) != 0) {
190 		kmem_free(kbuf, bufsz);
191 		return (error);
192 	}
193 	if (*((char *)kbuf) != '/') {
194 		kmem_free(kbuf, bufsz);
195 		return (EINVAL);
196 	}
197 
198 	/*
199 	 * Now, allocate the space where we are going to save the
200 	 * name of the accounting file and kmem_free kbuf. We have to do this
201 	 * now because it is not good to sleep in kmem_alloc() while
202 	 * holding ac_info's lock.
203 	 */
204 	namelen = strlen(kbuf) + 1;
205 	namebuf = kmem_alloc(namelen, KM_SLEEP);
206 	(void) strcpy(namebuf, kbuf);
207 	kmem_free(kbuf, bufsz);
208 
209 	/*
210 	 * Check if this file already exists.
211 	 */
212 	error = lookupname(namebuf, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
213 
214 	/*
215 	 * Check if the file is already in use.
216 	 */
217 	if (!error) {
218 		if (ac_file_in_use(vp)) {
219 			/*
220 			 * If we're already using it then return EBUSY
221 			 */
222 			kmem_free(namebuf, namelen);
223 			VN_RELE(vp);
224 			return (EBUSY);
225 		}
226 		VN_RELE(vp);
227 	}
228 
229 	/*
230 	 * Now, grab info's ac_lock and try to set up everything.
231 	 */
232 	mutex_enter(&info->ac_lock);
233 
234 	if ((error = vn_open(namebuf, UIO_SYSSPACE,
235 	    FCREAT | FWRITE | FTRUNC, 0600, &vp, CRCREAT, 0)) != 0) {
236 		mutex_exit(&info->ac_lock);
237 		kmem_free(namebuf, namelen);
238 		return (error);
239 	}
240 
241 	if (vp->v_type != VREG) {
242 		VN_RELE(vp);
243 		mutex_exit(&info->ac_lock);
244 		kmem_free(namebuf, namelen);
245 		return (EACCES);
246 	}
247 
248 	if (info->ac_vnode != NULL) {
249 		/*
250 		 * Switch from an old file to a new file by swapping
251 		 * their vnode pointers.
252 		 */
253 		vnode_t *oldvp;
254 		oldvp = info->ac_vnode;
255 		info->ac_vnode = vp;
256 		vp = oldvp;
257 	} else {
258 		/*
259 		 * Start writing accounting records to a new file.
260 		 */
261 		info->ac_vnode = vp;
262 		vp = NULL;
263 	}
264 	if (vp) {
265 		/*
266 		 * We still need to close the old file.
267 		 */
268 		if ((error = VOP_CLOSE(vp, FWRITE, 1, 0, CRED(), NULL)) != 0) {
269 			VN_RELE(vp);
270 			mutex_exit(&info->ac_lock);
271 			kmem_free(namebuf, namelen);
272 			return (error);
273 		}
274 		VN_RELE(vp);
275 		if (info->ac_file != NULL) {
276 			kmem_free(info->ac_file,
277 			    strlen(info->ac_file) + 1);
278 			info->ac_file = NULL;
279 		}
280 	}
281 	/*
282 	 * Finally, point ac_file to the filename string and release the lock.
283 	 */
284 	info->ac_file = namebuf;
285 	mutex_exit(&info->ac_lock);
286 
287 	/*
288 	 * Create and write an exacct header to the file.
289 	 */
290 	hdr = exacct_create_header(&hdrsize);
291 	error = exacct_write_header(info, hdr, hdrsize);
292 
293 	return (error);
294 }
295 
296 static int
297 ac_file_get(ac_info_t *info, void *buf, size_t bufsz)
298 {
299 	int error = 0;
300 	vnode_t *vnode;
301 	char *file;
302 
303 	mutex_enter(&info->ac_lock);
304 	file = info->ac_file;
305 	vnode = info->ac_vnode;
306 
307 	if (file == NULL || vnode == NULL) {
308 		mutex_exit(&info->ac_lock);
309 		return (ENOTACTIVE);
310 	}
311 
312 	if (strlen(file) >= bufsz)
313 		error = ENOMEM;
314 	else
315 		error = copyoutstr(file, buf, MAXPATHLEN, NULL);
316 
317 	mutex_exit(&info->ac_lock);
318 	return (error);
319 }
320 
321 static int
322 ac_res_set(ac_info_t *info, void *buf, size_t bufsz, int maxres)
323 {
324 	ac_res_t *res;
325 	ac_res_t *tmp;
326 	ulong_t *maskp;
327 	int id;
328 	uint_t counter = 0;
329 
330 	/*
331 	 * Validate that a non-zero buffer, sized within limits and to an
332 	 * integral number of ac_res_t's has been specified.
333 	 */
334 	if (bufsz == 0 ||
335 	    bufsz > sizeof (ac_res_t) * (AC_MAX_RES + 1) ||
336 	    (bufsz / sizeof (ac_res_t)) * sizeof (ac_res_t) != bufsz)
337 		return (EINVAL);
338 
339 	tmp = res = kmem_alloc(bufsz, KM_SLEEP);
340 	if (copyin(buf, res, bufsz) != 0) {
341 		kmem_free(res, bufsz);
342 		return (EFAULT);
343 	}
344 
345 	maskp = (ulong_t *)&info->ac_mask;
346 
347 	mutex_enter(&info->ac_lock);
348 	while ((id = tmp->ar_id) != AC_NONE && counter < maxres + 1) {
349 		if (id > maxres || id < 0) {
350 			mutex_exit(&info->ac_lock);
351 			kmem_free(res, bufsz);
352 			return (EINVAL);
353 		}
354 		if (tmp->ar_state == AC_ON) {
355 			BT_SET(maskp, id);
356 		} else if (tmp->ar_state == AC_OFF) {
357 			BT_CLEAR(maskp, id);
358 		} else {
359 			mutex_exit(&info->ac_lock);
360 			kmem_free(res, bufsz);
361 			return (EINVAL);
362 		}
363 		tmp++;
364 		counter++;
365 	}
366 	mutex_exit(&info->ac_lock);
367 	kmem_free(res, bufsz);
368 	return (0);
369 }
370 
371 static int
372 ac_res_get(ac_info_t *info, void *buf, size_t bufsz, int maxres)
373 {
374 	int error = 0;
375 	ac_res_t *res;
376 	ac_res_t *tmp;
377 	size_t ressz = sizeof (ac_res_t) * (maxres + 1);
378 	ulong_t *maskp;
379 	int id;
380 
381 	if (bufsz < ressz)
382 		return (EINVAL);
383 	tmp = res = kmem_alloc(ressz, KM_SLEEP);
384 
385 	mutex_enter(&info->ac_lock);
386 	maskp = (ulong_t *)&info->ac_mask;
387 	for (id = 1; id <= maxres; id++) {
388 		tmp->ar_id = id;
389 		tmp->ar_state = BT_TEST(maskp, id);
390 		tmp++;
391 	}
392 	tmp->ar_id = AC_NONE;
393 	tmp->ar_state = AC_OFF;
394 	mutex_exit(&info->ac_lock);
395 	error = copyout(res, buf, ressz);
396 	kmem_free(res, ressz);
397 	return (error);
398 }
399 
400 /*
401  * acctctl()
402  *
403  * Overview
404  *   acctctl() is the entry point for the acctctl(2) system call.
405  *
406  * Return values
407  *   On successful completion, return 0; otherwise -1 is returned and errno is
408  *   set appropriately.
409  *
410  * Caller's context
411  *   Called from the system call path.
412  */
413 int
414 acctctl(int cmd, void *buf, size_t bufsz)
415 {
416 	int error = 0;
417 	int mode = AC_MODE(cmd);
418 	int option = AC_OPTION(cmd);
419 	int maxres;
420 	ac_info_t *info;
421 	zone_t *zone = curproc->p_zone;
422 	struct exacct_globals *acg;
423 
424 	acg = zone_getspecific(exacct_zone_key, zone);
425 	/*
426 	 * exacct_zone_key and associated per-zone state were initialized when
427 	 * the module was loaded.
428 	 */
429 	ASSERT(exacct_zone_key != ZONE_KEY_UNINITIALIZED);
430 	ASSERT(acg != NULL);
431 
432 	switch (mode) {	/* sanity check */
433 	case AC_TASK:
434 		info = &acg->ac_task;
435 		maxres = AC_TASK_MAX_RES;
436 		break;
437 	case AC_PROC:
438 		info = &acg->ac_proc;
439 		maxres = AC_PROC_MAX_RES;
440 		break;
441 	case AC_FLOW:
442 		/*
443 		 * Flow accounting isn't currently configurable in non-global
444 		 * zones, but we have this field on a per-zone basis for future
445 		 * expansion as well as the ability to return default "unset"
446 		 * values for the various AC_*_GET queries.  AC_*_SET commands
447 		 * fail with EPERM for AC_FLOW in non-global zones.
448 		 */
449 		info = &acg->ac_flow;
450 		maxres = AC_FLOW_MAX_RES;
451 		break;
452 	default:
453 		return (set_errno(EINVAL));
454 	}
455 
456 	switch (option) {
457 	case AC_STATE_SET:
458 		if ((error = secpolicy_acct(CRED())) != 0)
459 			break;
460 		if (mode == AC_FLOW && getzoneid() != GLOBAL_ZONEID) {
461 			error = EPERM;
462 			break;
463 		}
464 		error = ac_state_set(info, buf, bufsz);
465 		break;
466 	case AC_STATE_GET:
467 		error = ac_state_get(info, buf, bufsz);
468 		break;
469 	case AC_FILE_SET:
470 		if ((error = secpolicy_acct(CRED())) != 0)
471 			break;
472 		if (mode == AC_FLOW && getzoneid() != GLOBAL_ZONEID) {
473 			error = EPERM;
474 			break;
475 		}
476 		error = ac_file_set(info, buf, bufsz);
477 		break;
478 	case AC_FILE_GET:
479 		error = ac_file_get(info, buf, bufsz);
480 		break;
481 	case AC_RES_SET:
482 		if ((error = secpolicy_acct(CRED())) != 0)
483 			break;
484 		if (mode == AC_FLOW && getzoneid() != GLOBAL_ZONEID) {
485 			error = EPERM;
486 			break;
487 		}
488 		error = ac_res_set(info, buf, bufsz, maxres);
489 		break;
490 	case AC_RES_GET:
491 		error = ac_res_get(info, buf, bufsz, maxres);
492 		break;
493 	default:
494 		return (set_errno(EINVAL));
495 	}
496 	if (error)
497 		return (set_errno(error));
498 	return (0);
499 }
500 
501 static struct sysent ac_sysent = {
502 	3,
503 	SE_NOUNLOAD | SE_ARGC | SE_32RVAL1,
504 	acctctl
505 };
506 
507 static struct modlsys modlsys = {
508 	&mod_syscallops,
509 	"acctctl system call",
510 	&ac_sysent
511 };
512 
513 #ifdef _SYSCALL32_IMPL
514 static struct modlsys modlsys32 = {
515 	&mod_syscallops32,
516 	"32-bit acctctl system call",
517 	&ac_sysent
518 };
519 #endif
520 
521 static struct modlinkage modlinkage = {
522 	MODREV_1,
523 	&modlsys,
524 #ifdef _SYSCALL32_IMPL
525 	&modlsys32,
526 #endif
527 	NULL
528 };
529 
530 /* ARGSUSED */
531 static void *
532 exacct_zone_init(zoneid_t zoneid)
533 {
534 	struct exacct_globals *acg;
535 
536 	acg = kmem_zalloc(sizeof (*acg), KM_SLEEP);
537 	mutex_enter(&exacct_globals_list_lock);
538 	list_insert_tail(&exacct_globals_list, acg);
539 	mutex_exit(&exacct_globals_list_lock);
540 	return (acg);
541 }
542 
543 static void
544 exacct_free_info(ac_info_t *info)
545 {
546 	mutex_enter(&info->ac_lock);
547 	if (info->ac_vnode) {
548 		(void) VOP_CLOSE(info->ac_vnode, FWRITE, 1, 0, kcred, NULL);
549 		VN_RELE(info->ac_vnode);
550 		kmem_free(info->ac_file, strlen(info->ac_file) + 1);
551 	}
552 	info->ac_state = AC_OFF;
553 	info->ac_vnode = NULL;
554 	info->ac_file = NULL;
555 	mutex_exit(&info->ac_lock);
556 }
557 
558 /* ARGSUSED */
559 static void
560 exacct_zone_shutdown(zoneid_t zoneid, void *data)
561 {
562 	struct exacct_globals *acg = data;
563 
564 	/*
565 	 * The accounting files need to be closed during shutdown rather than
566 	 * destroy, since otherwise the filesystem they reside on may fail to
567 	 * unmount, thus causing the entire zone halt/reboot to fail.
568 	 */
569 	exacct_free_info(&acg->ac_proc);
570 	exacct_free_info(&acg->ac_task);
571 	exacct_free_info(&acg->ac_flow);
572 }
573 
574 /* ARGSUSED */
575 static void
576 exacct_zone_fini(zoneid_t zoneid, void *data)
577 {
578 	struct exacct_globals *acg = data;
579 
580 	mutex_enter(&exacct_globals_list_lock);
581 	list_remove(&exacct_globals_list, acg);
582 	mutex_exit(&exacct_globals_list_lock);
583 
584 	mutex_destroy(&acg->ac_proc.ac_lock);
585 	mutex_destroy(&acg->ac_task.ac_lock);
586 	mutex_destroy(&acg->ac_flow.ac_lock);
587 	kmem_free(acg, sizeof (*acg));
588 }
589 
590 int
591 _init()
592 {
593 	int error;
594 
595 	mutex_init(&exacct_globals_list_lock, NULL, MUTEX_DEFAULT, NULL);
596 	list_create(&exacct_globals_list, sizeof (struct exacct_globals),
597 	    offsetof(struct exacct_globals, ac_link));
598 	zone_key_create(&exacct_zone_key, exacct_zone_init,
599 	    exacct_zone_shutdown, exacct_zone_fini);
600 
601 	if ((error = mod_install(&modlinkage)) != 0) {
602 		(void) zone_key_delete(exacct_zone_key);
603 		exacct_zone_key = ZONE_KEY_UNINITIALIZED;
604 		mutex_destroy(&exacct_globals_list_lock);
605 		list_destroy(&exacct_globals_list);
606 	}
607 	return (error);
608 }
609 
610 int
611 _info(struct modinfo *modinfop)
612 {
613 	return (mod_info(&modlinkage, modinfop));
614 }
615 
616 int
617 _fini()
618 {
619 	return (EBUSY);
620 }
621