xref: /minix/minix/servers/mib/proc.c (revision e3b8d4bb)
1 /* MIB service - proc.c - functionality based on service process tables */
2 /* Eventually, the CTL_PROC subtree might end up here as well. */
3 
4 #include "mib.h"
5 
6 #include <sys/exec.h>
7 #include <minix/sysinfo.h>
8 
9 #include <machine/archtypes.h>
10 #include "kernel/proc.h"
11 #include "servers/pm/mproc.h"
12 #include "servers/vfs/const.h"
13 #include "servers/vfs/fproc.h"
14 
15 typedef struct proc ixfer_proc_t;
16 typedef struct mproc ixfer_mproc_t;
17 
18 static ixfer_proc_t proc_tab[NR_TASKS + NR_PROCS];
19 static ixfer_mproc_t mproc_tab[NR_PROCS];
20 static struct fproc_light fproc_tab[NR_PROCS];
21 
22 /*
23  * The number of processes added to the current number of processes when doing
24  * a size estimation, so that the actual data retrieval does not end up with
25  * too little space if new processes have forked between the two calls.  We do
26  * a process table update only once per clock tick, which means that typically
27  * no update will take place between the user process's size estimation request
28  * and its subsequent data retrieval request.  On the other hand, if we do
29  * update process tables in between, quite a bit might have changed.
30  */
31 #define EXTRA_PROCS	8
32 
33 #define HASH_SLOTS 	(NR_PROCS / 4)	/* expected nr. of processes in use */
34 #define NO_SLOT		(-1)
35 static int hash_tab[HASH_SLOTS];	/* hash table mapping from PID.. */
36 static int hnext_tab[NR_PROCS];		/* ..to PM process slot */
37 
38 static clock_t tabs_updated = 0;	/* when the tables were last updated */
39 static int tabs_valid = TRUE;		/* FALSE if obtaining tables failed */
40 
41 /*
42  * Update the process tables by pulling in new copies from the kernel, PM, and
43  * VFS, but only every so often and only if it has not failed before.  Return
44  * TRUE iff the tables are now valid.
45  */
46 static int
update_tables(void)47 update_tables(void)
48 {
49 	clock_t now;
50 	pid_t pid;
51 	int r, kslot, mslot, hslot;
52 
53 	/*
54 	 * If retrieving the tables failed at some point, do not keep trying
55 	 * all the time.  Such a failure is very unlikely to be transient.
56 	 */
57 	if (tabs_valid == FALSE)
58 		return FALSE;
59 
60 	/*
61 	 * Update the tables once per clock tick at most.  The update operation
62 	 * is rather heavy, transferring several hundreds of kilobytes between
63 	 * servers.  Userland should be able to live with information that is
64 	 * outdated by at most one clock tick.
65 	 */
66 	now = getticks();
67 
68 	if (tabs_updated != 0 && tabs_updated == now)
69 		return TRUE;
70 
71 	/* Perform an actual update now. */
72 	tabs_valid = FALSE;
73 
74 	/* Retrieve and check the kernel process table. */
75 	if ((r = sys_getproctab(proc_tab)) != OK) {
76 		printf("MIB: unable to obtain kernel process table (%d)\n", r);
77 
78 		return FALSE;
79 	}
80 
81 	for (kslot = 0; kslot < NR_TASKS + NR_PROCS; kslot++) {
82 		if (proc_tab[kslot].p_magic != PMAGIC) {
83 			printf("MIB: kernel process table mismatch\n");
84 
85 			return FALSE;
86 		}
87 	}
88 
89 	/* Retrieve and check the PM process table. */
90 	r = getsysinfo(PM_PROC_NR, SI_PROC_TAB, mproc_tab, sizeof(mproc_tab));
91 	if (r != OK) {
92 		printf("MIB: unable to obtain PM process table (%d)\n", r);
93 
94 		return FALSE;
95 	}
96 
97 	for (mslot = 0; mslot < NR_PROCS; mslot++) {
98 		if (mproc_tab[mslot].mp_magic != MP_MAGIC) {
99 			printf("MIB: PM process table mismatch\n");
100 
101 			return FALSE;
102 		}
103 	}
104 
105 	/* Retrieve an extract of the VFS process table. */
106 	r = getsysinfo(VFS_PROC_NR, SI_PROCLIGHT_TAB, fproc_tab,
107 	    sizeof(fproc_tab));
108 	if (r != OK) {
109 		printf("MIB: unable to obtain VFS process table (%d)\n", r);
110 
111 		return FALSE;
112 	}
113 
114 	tabs_valid = TRUE;
115 	tabs_updated = now;
116 
117 	/*
118 	 * Build a hash table mapping from process IDs to slot numbers, for
119 	 * fast access.  TODO: decide if this is better done on demand only.
120 	 */
121 	for (hslot = 0; hslot < HASH_SLOTS; hslot++)
122 		hash_tab[hslot] = NO_SLOT;
123 
124 	for (mslot = 0; mslot < NR_PROCS; mslot++) {
125 		if (mproc_tab[mslot].mp_flags & IN_USE) {
126 			if ((pid = mproc_tab[mslot].mp_pid) <= 0)
127 				continue;
128 
129 			hslot = mproc_tab[mslot].mp_pid % HASH_SLOTS;
130 
131 			hnext_tab[mslot] = hash_tab[hslot];
132 			hash_tab[hslot] = mslot;
133 		}
134 	}
135 
136 	return TRUE;
137 }
138 
139 /*
140  * Return the PM slot number for the given PID, or NO_SLOT if the PID is not in
141  * use by a process.
142  */
143 static int
get_mslot(pid_t pid)144 get_mslot(pid_t pid)
145 {
146 	int mslot;
147 
148 	/* PID 0 identifies the kernel; checking this is up to the caller. */
149 	if (pid <= 0)
150 		return NO_SLOT;
151 
152 	for (mslot = hash_tab[pid % HASH_SLOTS]; mslot != NO_SLOT;
153 	    mslot = hnext_tab[mslot])
154 		if (mproc_tab[mslot].mp_pid == pid)
155 			break;
156 
157 	return mslot;
158 }
159 
160 /*
161  * Store the given number of clock ticks as a timeval structure.
162  */
163 static void
ticks_to_timeval(struct timeval * tv,clock_t ticks)164 ticks_to_timeval(struct timeval * tv, clock_t ticks)
165 {
166 	clock_t hz;
167 
168 	hz = sys_hz();
169 
170 	tv->tv_sec = ticks / hz;
171 	tv->tv_usec = (long)((ticks % hz) * 1000000ULL / hz);
172 }
173 
174 /*
175  * Generate a wchan message text for the cases that the process is blocked on
176  * IPC with another process, of which the endpoint is given as 'endpt' here.
177  * The name of the other process is to be stored in 'wmesg', which is a buffer
178  * of size 'wmsz'.  The result should be null terminated.  If 'ipc' is set, the
179  * process is blocked on a direct IPC call, in which case the name of the other
180  * process is enclosed in parentheses.  If 'ipc' is not set, the call is made
181  * indirectly through VFS, and the name of the other process should not be
182  * enclosed in parentheses.  If no name can be obtained, we use the endpoint of
183  * the other process instead.
184  */
185 static void
fill_wmesg(char * wmesg,size_t wmsz,endpoint_t endpt,int ipc)186 fill_wmesg(char * wmesg, size_t wmsz, endpoint_t endpt, int ipc)
187 {
188 	const char *name;
189 	int mslot;
190 
191 	switch (endpt) {
192 	case ANY:
193 		name = "any";
194 		break;
195 	case SELF:
196 		name = "self";
197 		break;
198 	case NONE:
199 		name = "none";
200 		break;
201 	default:
202 		mslot = _ENDPOINT_P(endpt);
203 		if (mslot >= -NR_TASKS && mslot < NR_PROCS &&
204 		    (mslot < 0 || (mproc_tab[mslot].mp_flags & IN_USE)))
205 			name = proc_tab[NR_TASKS + mslot].p_name;
206 		else
207 			name = NULL;
208 	}
209 
210 	if (name != NULL)
211 		snprintf(wmesg, wmsz, "%s%s%s",
212 		    ipc ? "(" : "", name, ipc ? ")" : "");
213 	else
214 		snprintf(wmesg, wmsz, "%s%d%s",
215 		    ipc ? "(" : "", endpt, ipc ? ")" : "");
216 }
217 
218 /*
219  * Return the LWP status of a process, along with additional information in
220  * case the process is sleeping (LSSLEEP): a wchan value and text to indicate
221  * what the process is sleeping on, and possibly a flag field modification to
222  * indicate that the sleep is interruptible.
223  */
224 static int
get_lwp_stat(int mslot,uint64_t * wcptr,char * wmptr,size_t wmsz,int32_t * flag)225 get_lwp_stat(int mslot, uint64_t * wcptr, char * wmptr, size_t wmsz,
226 	int32_t * flag)
227 {
228 	struct mproc *mp;
229 	struct fproc_light *fp;
230 	struct proc *kp;
231 	const char *wmesg;
232 	uint64_t wchan;
233 	endpoint_t endpt;
234 
235 	mp = &mproc_tab[mslot];
236 	fp = &fproc_tab[mslot];
237 	kp = &proc_tab[NR_TASKS + mslot];
238 
239 	/*
240 	 * First cover all the cases that the process is not sleeping.  In
241 	 * those cases, we need not return additional sleep information either.
242 	 */
243 	if (mp->mp_flags & (TRACE_ZOMBIE | ZOMBIE))
244 		return LSZOMB;
245 
246 	if (mp->mp_flags & EXITING)
247 		return LSDEAD;
248 
249 	if ((mp->mp_flags & TRACE_STOPPED) || RTS_ISSET(kp, RTS_P_STOP))
250 		return LSSTOP;
251 
252 	if (proc_is_runnable(kp))
253 		return LSRUN;
254 
255 	/*
256 	 * The process is sleeping.  In that case, we must also figure out why,
257 	 * and return an appropriate wchan value and human-readable wmesg text.
258 	 *
259 	 * The process can be blocked on either a known sleep state in PM or
260 	 * VFS, or otherwise on IPC communication with another process, or
261 	 * otherwise on a kernel RTS flag.  In each case, decide what to use as
262 	 * wchan value and wmesg text, and whether the sleep is interruptible.
263 	 *
264 	 * The wchan value should be unique for the sleep reason.  We use its
265 	 * lower eight bits to indicate a class:
266 	 *   0x00 = kernel task
267 	 *   0x01 = kerel RTS block
268 	 *   0x02 = PM call
269 	 *   0x03 = VFS call
270 	 *   0x04 = MIB call
271 	 *   0xff = blocked on process
272 	 * The upper bits are used for class-specific information.  The actual
273 	 * value does not really matter, as long as it is nonzero and there is
274 	 * no overlap between the different values.
275 	 */
276 	wchan = 0;
277 	wmesg = NULL;
278 
279 	/*
280 	 * First see if the process is marked as blocked in the tables of PM or
281 	 * VFS.  Such a block reason is always an interruptible sleep.  Note
282 	 * that we do not use the kernel table at all in this case: each of the
283 	 * three tables is consistent within itself, but not necessarily
284 	 * consistent with any of the other tables, so we avoid internal
285 	 * mismatches if we can.
286 	 */
287 	if (mp->mp_flags & WAITING) {
288 		wchan = 0x102;
289 		wmesg = "wait";
290 	} else if (mp->mp_flags & SIGSUSPENDED) {
291 		wchan = 0x202;
292 		wmesg = "pause";
293 	} else if (fp->fpl_blocked_on != FP_BLOCKED_ON_NONE) {
294 		wchan = (fp->fpl_blocked_on << 8) | 0x03;
295 		switch (fp->fpl_blocked_on) {
296 		case FP_BLOCKED_ON_PIPE:
297 			wmesg = "pipe";
298 			break;
299 		case FP_BLOCKED_ON_FLOCK:
300 			wmesg = "flock";
301 			break;
302 		case FP_BLOCKED_ON_POPEN:
303 			wmesg = "popen";
304 			break;
305 		case FP_BLOCKED_ON_SELECT:
306 			wmesg = "select";
307 			break;
308 		case FP_BLOCKED_ON_CDEV:
309 		case FP_BLOCKED_ON_SDEV:
310 			/*
311 			 * Add the task (= character or socket driver) endpoint
312 			 * to the wchan value, and use the driver's process
313 			 * name, without parentheses, as wmesg text.
314 			 */
315 			wchan |= (uint64_t)fp->fpl_task << 16;
316 			fill_wmesg(wmptr, wmsz, fp->fpl_task, FALSE /*ipc*/);
317 			break;
318 		default:
319 			/* A newly added flag we don't yet know about? */
320 			wmesg = "???";
321 			break;
322 		}
323 	}
324 	if (wchan != 0) {
325 		*wcptr = wchan;
326 		if (wmesg != NULL) /* NULL means "already set" here */
327 			strlcpy(wmptr, wmesg, wmsz);
328 		*flag |= L_SINTR;
329 	}
330 
331 	/*
332 	 * See if the process is blocked on sending or receiving.  If not, then
333 	 * use one of the kernel RTS flags as reason.
334 	 */
335 	endpt = P_BLOCKEDON(kp);
336 
337 	switch (endpt) {
338 	case MIB_PROC_NR:
339 		/* This is really just aesthetics. */
340 		wchan = 0x04;
341 		wmesg = "sysctl";
342 		break;
343 	case NONE:
344 		/*
345 		 * The process is not running, but also not blocked on IPC with
346 		 * another process.  This means it must be stopped on a kernel
347 		 * RTS flag.
348 		 */
349 		wchan = ((uint64_t)kp->p_rts_flags << 8) | 0x01;
350 		if (RTS_ISSET(kp, RTS_PROC_STOP))
351 			wmesg = "kstop";
352 		else if (RTS_ISSET(kp, RTS_SIGNALED) ||
353 		    RTS_ISSET(kp, RTS_SIGNALED))
354 			wmesg = "ksignal";
355 		else if (RTS_ISSET(kp, RTS_NO_PRIV))
356 			wmesg = "knopriv";
357 		else if (RTS_ISSET(kp, RTS_PAGEFAULT) ||
358 		    RTS_ISSET(kp, RTS_VMREQTARGET))
359 			wmesg = "fault";
360 		else if (RTS_ISSET(kp, RTS_NO_QUANTUM))
361 			wmesg = "sched";
362 		else
363 			wmesg = "kflag";
364 		break;
365 	case ANY:
366 		/*
367 		 * If the process is blocked receiving from ANY, mark it as
368 		 * being in an interruptible sleep.  This looks nicer, even
369 		 * though "interruptible" is not applicable to services at all.
370 		 */
371 		*flag |= L_SINTR;
372 		break;
373 	}
374 
375 	/*
376 	 * If at this point wchan is still zero, the process is blocked sending
377 	 * or receiving.  Use a wchan value based on the target endpoint, and
378 	 * use "(procname)" as wmesg text.
379 	 */
380 	if (wchan == 0) {
381 		*wcptr = ((uint64_t)endpt << 8) | 0xff;
382 		fill_wmesg(wmptr, wmsz, endpt, TRUE /*ipc*/);
383 	} else {
384 		*wcptr = wchan;
385 		if (wmesg != NULL) /* NULL means "already set" here */
386 			strlcpy(wmptr, wmesg, wmsz);
387 	}
388 
389 	return LSSLEEP;
390 }
391 
392 
393 /*
394  * Fill the part of a LWP structure that is common between kernel tasks and
395  * user processes.  Also return a CPU estimate in 'estcpu', because we generate
396  * the value as a side effect here, and the LWP structure has no estcpu field.
397  */
398 static void
fill_lwp_common(struct kinfo_lwp * l,int kslot,uint32_t * estcpu)399 fill_lwp_common(struct kinfo_lwp * l, int kslot, uint32_t * estcpu)
400 {
401 	struct proc *kp;
402 	struct timeval tv;
403 	clock_t uptime;
404 	uint32_t hz;
405 
406 	kp = &proc_tab[kslot];
407 
408 	uptime = getticks();
409 	hz = sys_hz();
410 
411 	/*
412 	 * We use the process endpoint as the LWP ID.  Not only does this allow
413 	 * users to obtain process endpoints with "ps -s" (thus replacing the
414 	 * MINIX3 ps(1)'s "ps -E"), but if we ever do implement kernel threads,
415 	 * this is probably still going to be accurate.
416 	 */
417 	l->l_lid = kp->p_endpoint;
418 
419 	/*
420 	 * The time during which the process has not been swapped in or out is
421 	 * not applicable for us, and thus, we set it to the time the process
422 	 * has been running (in seconds).  This value is relevant mostly for
423 	 * ps(1)'s CPU usage correction for processes that have just started.
424 	 */
425 	if (kslot >= NR_TASKS)
426 		l->l_swtime = uptime - mproc_tab[kslot - NR_TASKS].mp_started;
427 	else
428 		l->l_swtime = uptime;
429 	l->l_swtime /= hz;
430 
431 	/*
432 	 * Sleep (dequeue) times are not maintained for kernel tasks, so
433 	 * pretend they are never asleep (which is pretty accurate).
434 	 */
435 	if (kslot < NR_TASKS)
436 		l->l_slptime = 0;
437 	else
438 		l->l_slptime = (uptime - kp->p_dequeued) / hz;
439 
440 	l->l_priority = kp->p_priority;
441 	l->l_usrpri = kp->p_priority;
442 	l->l_cpuid = kp->p_cpu;
443 	ticks_to_timeval(&tv, kp->p_user_time + kp->p_sys_time);
444 	l->l_rtime_sec = tv.tv_sec;
445 	l->l_rtime_usec = tv.tv_usec;
446 
447 	/*
448 	 * Obtain CPU usage percentages and estimates through library code
449 	 * shared between the kernel and this service; see its source for
450 	 * details.  We note that the produced estcpu value is rather different
451 	 * from the one produced by NetBSD, but this should not be a problem.
452 	 */
453 	l->l_pctcpu = cpuavg_getstats(&kp->p_cpuavg, &l->l_cpticks, estcpu,
454 	    uptime, hz);
455 }
456 
457 /*
458  * Fill a LWP structure for a kernel task.  Each kernel task has its own LWP,
459  * and all of them have negative PIDs.
460  */
461 static void
fill_lwp_kern(struct kinfo_lwp * l,int kslot)462 fill_lwp_kern(struct kinfo_lwp * l, int kslot)
463 {
464 	uint32_t estcpu;
465 
466 	memset(l, 0, sizeof(*l));
467 
468 	l->l_flag = L_INMEM | L_SINTR | L_SYSTEM;
469 	l->l_stat = LSSLEEP;
470 	l->l_pid = kslot - NR_TASKS;
471 
472 	/*
473 	 * When showing LWP entries, ps(1) uses the process name rather than
474 	 * the LWP name.  All kernel tasks are therefore shown as "[kernel]"
475 	 * anyway.  We use the wmesg field to show the actual kernel task name.
476 	 */
477 	l->l_wchan = ((uint64_t)(l->l_pid) << 8) | 0x00;
478 	strlcpy(l->l_wmesg, proc_tab[kslot].p_name, sizeof(l->l_wmesg));
479 	strlcpy(l->l_name, "kernel", sizeof(l->l_name));
480 
481 	fill_lwp_common(l, kslot, &estcpu);
482 }
483 
484 /*
485  * Fill a LWP structure for a user process.
486  */
487 static void
fill_lwp_user(struct kinfo_lwp * l,int mslot)488 fill_lwp_user(struct kinfo_lwp * l, int mslot)
489 {
490 	struct mproc *mp;
491 	uint32_t estcpu;
492 
493 	memset(l, 0, sizeof(*l));
494 
495 	mp = &mproc_tab[mslot];
496 
497 	l->l_flag = L_INMEM;
498 	l->l_stat = get_lwp_stat(mslot, &l->l_wchan, l->l_wmesg,
499 	    sizeof(l->l_wmesg), &l->l_flag);
500 	l->l_pid = mp->mp_pid;
501 	strlcpy(l->l_name, mp->mp_name, sizeof(l->l_name));
502 
503 	fill_lwp_common(l, NR_TASKS + mslot, &estcpu);
504 }
505 
506 /*
507  * Implementation of CTL_KERN KERN_LWP.
508  */
509 ssize_t
mib_kern_lwp(struct mib_call * call,struct mib_node * node __unused,struct mib_oldp * oldp,struct mib_newp * newp __unused)510 mib_kern_lwp(struct mib_call * call, struct mib_node * node __unused,
511 	struct mib_oldp * oldp, struct mib_newp * newp __unused)
512 {
513 	struct kinfo_lwp lwp;
514 	struct mproc *mp;
515 	size_t copysz;
516 	ssize_t off;
517 	pid_t pid;
518 	int r, elsz, elmax, kslot, mslot, last_mslot;
519 
520 	if (call->call_namelen != 3)
521 		return EINVAL;
522 
523 	pid = (pid_t)call->call_name[0];
524 	elsz = call->call_name[1];
525 	elmax = call->call_name[2]; /* redundant with the given oldlen.. */
526 
527 	if (pid < -1 || elsz <= 0 || elmax < 0)
528 		return EINVAL;
529 
530 	if (!update_tables())
531 		return EINVAL;
532 
533 	off = 0;
534 	copysz = MIN((size_t)elsz, sizeof(lwp));
535 
536 	/*
537 	 * We model kernel tasks as LWP threads of the kernel (with PID 0).
538 	 * Modeling the kernel tasks as processes with negative PIDs, like
539 	 * ProcFS does, conflicts with the KERN_LWP API here: a PID of -1
540 	 * indicates that the caller wants a full listing of LWPs.
541 	 */
542 	if (pid <= 0) {
543 		for (kslot = 0; kslot < NR_TASKS; kslot++) {
544 			if (mib_inrange(oldp, off) && elmax > 0) {
545 				fill_lwp_kern(&lwp, kslot);
546 				if ((r = mib_copyout(oldp, off, &lwp,
547 				    copysz)) < 0)
548 					return r;
549 				elmax--;
550 			}
551 			off += elsz;
552 		}
553 
554 		/* No need to add extra space here: NR_TASKS is static. */
555 		if (pid == 0)
556 			return off;
557 	}
558 
559 	/*
560 	 * With PID 0 out of the way: the user requested the LWP for either a
561 	 * specific user process (pid > 0), or for all processes (pid < 0).
562 	 */
563 	if (pid > 0) {
564 		if ((mslot = get_mslot(pid)) == NO_SLOT ||
565 		    (mproc_tab[mslot].mp_flags & (TRACE_ZOMBIE | ZOMBIE)))
566 			return ESRCH;
567 		last_mslot = mslot;
568 	} else {
569 		mslot = 0;
570 		last_mslot = NR_PROCS - 1;
571 	}
572 
573 	for (; mslot <= last_mslot; mslot++) {
574 		mp = &mproc_tab[mslot];
575 
576 		if ((mp->mp_flags & (IN_USE | TRACE_ZOMBIE | ZOMBIE)) !=
577 		    IN_USE)
578 			continue;
579 
580 		if (mib_inrange(oldp, off) && elmax > 0) {
581 			fill_lwp_user(&lwp, mslot);
582 			if ((r = mib_copyout(oldp, off, &lwp, copysz)) < 0)
583 				return r;
584 			elmax--;
585 		}
586 		off += elsz;
587 	}
588 
589 	if (oldp == NULL && pid < 0)
590 		off += EXTRA_PROCS * elsz;
591 
592 	return off;
593 }
594 
595 
596 /*
597  * Fill the part of a process structure that is common between kernel tasks and
598  * user processes.
599  */
600 static void
fill_proc2_common(struct kinfo_proc2 * p,int kslot)601 fill_proc2_common(struct kinfo_proc2 * p, int kslot)
602 {
603 	struct vm_usage_info vui;
604 	struct timeval tv;
605 	struct proc *kp;
606 	struct kinfo_lwp l;
607 
608 	kp = &proc_tab[kslot];
609 
610 	/*
611 	 * Much of the information in the LWP structure also ends up in the
612 	 * process structure.  In order to avoid duplication of some important
613 	 * code, first generate LWP values and then copy it them into the
614 	 * process structure.
615 	 */
616 	memset(&l, 0, sizeof(l));
617 	fill_lwp_common(&l, kslot, &p->p_estcpu);
618 
619 	/* Obtain memory usage information from VM.  Ignore failures. */
620 	memset(&vui, 0, sizeof(vui));
621 	(void)vm_info_usage(kp->p_endpoint, &vui);
622 
623 	ticks_to_timeval(&tv, kp->p_user_time + kp->p_sys_time);
624 	p->p_rtime_sec = l.l_rtime_sec;
625 	p->p_rtime_usec = l.l_rtime_usec;
626 	p->p_cpticks = l.l_cpticks;
627 	p->p_pctcpu = l.l_pctcpu;
628 	p->p_swtime = l.l_swtime;
629 	p->p_slptime = l.l_slptime;
630 	p->p_uticks = kp->p_user_time;
631 	p->p_sticks = kp->p_sys_time;
632 	/* TODO: p->p_iticks */
633 	ticks_to_timeval(&tv, kp->p_user_time);
634 	p->p_uutime_sec = tv.tv_sec;
635 	p->p_uutime_usec = tv.tv_usec;
636 	ticks_to_timeval(&tv, kp->p_sys_time);
637 	p->p_ustime_sec = tv.tv_sec;
638 	p->p_ustime_usec = tv.tv_usec;
639 
640 	p->p_priority = l.l_priority;
641 	p->p_usrpri = l.l_usrpri;
642 
643 	p->p_vm_rssize = howmany(vui.vui_total, PAGE_SIZE);
644 	p->p_vm_vsize = howmany(vui.vui_virtual, PAGE_SIZE);
645 	p->p_vm_msize = howmany(vui.vui_mvirtual, PAGE_SIZE);
646 
647 	p->p_uru_maxrss = vui.vui_maxrss;
648 	p->p_uru_minflt = vui.vui_minflt;
649 	p->p_uru_majflt = vui.vui_majflt;
650 
651 	p->p_cpuid = l.l_cpuid;
652 }
653 
654 /*
655  * Fill a process structure for the kernel pseudo-process (with PID 0).
656  */
657 static void
fill_proc2_kern(struct kinfo_proc2 * p)658 fill_proc2_kern(struct kinfo_proc2 * p)
659 {
660 
661 	memset(p, 0, sizeof(*p));
662 
663 	p->p_flag = L_INMEM | L_SYSTEM | L_SINTR;
664 	p->p_pid = 0;
665 	p->p_stat = LSSLEEP;
666 	p->p_nice = NZERO;
667 
668 	/* Use the KERNEL task wchan, for consistency between ps and top. */
669 	p->p_wchan = ((uint64_t)KERNEL << 8) | 0x00;
670 	strlcpy(p->p_wmesg, "kernel", sizeof(p->p_wmesg));
671 
672 	strlcpy(p->p_comm, "kernel", sizeof(p->p_comm));
673 	p->p_realflag = P_INMEM | P_SYSTEM | P_SINTR;
674 	p->p_realstat = SACTIVE;
675 	p->p_nlwps = NR_TASKS;
676 
677 	/*
678 	 * By using the KERNEL slot here, the kernel process will get a proper
679 	 * CPU usage average.
680 	 */
681 	fill_proc2_common(p, KERNEL + NR_TASKS);
682 }
683 
684 /*
685  * Fill a process structure for a user process.
686  */
687 static void
fill_proc2_user(struct kinfo_proc2 * p,int mslot)688 fill_proc2_user(struct kinfo_proc2 * p, int mslot)
689 {
690 	struct mproc *mp;
691 	struct fproc_light *fp;
692 	time_t boottime;
693 	dev_t tty;
694 	struct timeval tv;
695 	int i, r, kslot, zombie;
696 
697 	memset(p, 0, sizeof(*p));
698 
699 	if ((r = getuptime(NULL, NULL, &boottime)) != OK)
700 		panic("getuptime failed: %d", r);
701 
702 	kslot = NR_TASKS + mslot;
703 	mp = &mproc_tab[mslot];
704 	fp = &fproc_tab[mslot];
705 
706 	zombie = (mp->mp_flags & (TRACE_ZOMBIE | ZOMBIE));
707 	tty = (!zombie) ? fp->fpl_tty : NO_DEV;
708 
709 	p->p_eflag = 0;
710 	if (tty != NO_DEV)
711 		p->p_eflag |= EPROC_CTTY;
712 	if (mp->mp_pid == mp->mp_procgrp) /* TODO: job control support */
713 		p->p_eflag |= EPROC_SLEADER;
714 
715 	p->p_exitsig = SIGCHLD; /* TODO */
716 
717 	p->p_flag = P_INMEM;
718 	if (mp->mp_flags & TAINTED)
719 		p->p_flag |= P_SUGID;
720 	if (mp->mp_tracer != NO_TRACER)
721 		p->p_flag |= P_TRACED;
722 	if (tty != NO_DEV)
723 		p->p_flag |= P_CONTROLT;
724 	p->p_pid = mp->mp_pid;
725 	if (mp->mp_parent >= 0 && mp->mp_parent < NR_PROCS)
726 		p->p_ppid = mproc_tab[mp->mp_parent].mp_pid;
727 	p->p_sid = mp->mp_procgrp; /* TODO: job control supported */
728 	p->p__pgid = mp->mp_procgrp;
729 	p->p_tpgid = (tty != NO_DEV) ? mp->mp_procgrp : 0;
730 	p->p_uid = mp->mp_effuid;
731 	p->p_ruid = mp->mp_realuid;
732 	p->p_gid = mp->mp_effgid;
733 	p->p_rgid = mp->mp_realgid;
734 	p->p_ngroups = MIN(mp->mp_ngroups, KI_NGROUPS);
735 	for (i = 0; i < p->p_ngroups; i++)
736 		p->p_groups[i] = mp->mp_sgroups[i];
737 	p->p_tdev = tty;
738 	memcpy(&p->p_siglist, &mp->mp_sigpending, sizeof(p->p_siglist));
739 	memcpy(&p->p_sigmask, &mp->mp_sigmask, sizeof(p->p_sigmask));
740 	memcpy(&p->p_sigcatch, &mp->mp_catch, sizeof(p->p_sigcatch));
741 	memcpy(&p->p_sigignore, &mp->mp_ignore, sizeof(p->p_sigignore));
742 	p->p_nice = mp->mp_nice + NZERO;
743 	strlcpy(p->p_comm, mp->mp_name, sizeof(p->p_comm));
744 	p->p_uvalid = 1;
745 	ticks_to_timeval(&tv, mp->mp_started);
746 	p->p_ustart_sec = boottime + tv.tv_sec;
747 	p->p_ustart_usec = tv.tv_usec;
748 	/* TODO: other rusage fields */
749 	ticks_to_timeval(&tv, mp->mp_child_utime + mp->mp_child_stime);
750 	p->p_uctime_sec = tv.tv_sec;
751 	p->p_uctime_usec = tv.tv_usec;
752 	p->p_realflag = p->p_flag;
753 	p->p_nlwps = (zombie) ? 0 : 1;
754 	p->p_svuid = mp->mp_svuid;
755 	p->p_svgid = mp->mp_svgid;
756 
757 	p->p_stat = get_lwp_stat(mslot, &p->p_wchan, p->p_wmesg,
758 	    sizeof(p->p_wmesg), &p->p_flag);
759 
760 	switch (p->p_stat) {
761 	case LSRUN:
762 		p->p_realstat = SACTIVE;
763 		p->p_nrlwps = 1;
764 		break;
765 	case LSSLEEP:
766 		p->p_realstat = SACTIVE;
767 		if (p->p_flag & L_SINTR)
768 			p->p_realflag |= P_SINTR;
769 		break;
770 	case LSSTOP:
771 		p->p_realstat = SSTOP;
772 		break;
773 	case LSZOMB:
774 		p->p_realstat = SZOMB;
775 		break;
776 	case LSDEAD:
777 		p->p_stat = LSZOMB; /* ps(1) STAT does not know LSDEAD */
778 		p->p_realstat = SDEAD;
779 		break;
780 	default:
781 		assert(0);
782 	}
783 
784 	if (!zombie)
785 		fill_proc2_common(p, kslot);
786 }
787 
788 /*
789  * Implementation of CTL_KERN KERN_PROC2.
790  */
791 ssize_t
mib_kern_proc2(struct mib_call * call,struct mib_node * node __unused,struct mib_oldp * oldp,struct mib_newp * newp __unused)792 mib_kern_proc2(struct mib_call * call, struct mib_node * node __unused,
793 	struct mib_oldp * oldp, struct mib_newp * newp __unused)
794 {
795 	struct kinfo_proc2 proc2;
796 	struct mproc *mp;
797 	size_t copysz;
798 	ssize_t off;
799 	dev_t tty;
800 	int r, req, arg, elsz, elmax, kmatch, zombie, mslot;
801 
802 	if (call->call_namelen != 4)
803 		return EINVAL;
804 
805 	req = call->call_name[0];
806 	arg = call->call_name[1];
807 	elsz = call->call_name[2];
808 	elmax = call->call_name[3]; /* redundant with the given oldlen.. */
809 
810 	/*
811 	 * The kernel is special, in that it does not have a slot in the PM or
812 	 * VFS tables.  As such, it is dealt with separately.  While checking
813 	 * arguments, we might as well check whether the kernel is matched.
814 	 */
815 	switch (req) {
816 	case KERN_PROC_ALL:
817 		kmatch = TRUE;
818 		break;
819 	case KERN_PROC_PID:
820 	case KERN_PROC_SESSION:
821 	case KERN_PROC_PGRP:
822 	case KERN_PROC_UID:
823 	case KERN_PROC_RUID:
824 	case KERN_PROC_GID:
825 	case KERN_PROC_RGID:
826 		kmatch = (arg == 0);
827 		break;
828 	case KERN_PROC_TTY:
829 		kmatch = ((dev_t)arg == KERN_PROC_TTY_NODEV);
830 		break;
831 	default:
832 		return EINVAL;
833 	}
834 
835 	if (elsz <= 0 || elmax < 0)
836 		return EINVAL;
837 
838 	if (!update_tables())
839 		return EINVAL;
840 
841 	off = 0;
842 	copysz = MIN((size_t)elsz, sizeof(proc2));
843 
844 	if (kmatch) {
845 		if (mib_inrange(oldp, off) && elmax > 0) {
846 			fill_proc2_kern(&proc2);
847 			if ((r = mib_copyout(oldp, off, &proc2, copysz)) < 0)
848 				return r;
849 			elmax--;
850 		}
851 		off += elsz;
852 	}
853 
854 	for (mslot = 0; mslot < NR_PROCS; mslot++) {
855 		mp = &mproc_tab[mslot];
856 
857 		if (!(mp->mp_flags & IN_USE))
858 			continue;
859 
860 		switch (req) {
861 		case KERN_PROC_PID:
862 			if ((pid_t)arg != mp->mp_pid)
863 				continue;
864 			break;
865 		case KERN_PROC_SESSION: /* TODO: job control support */
866 		case KERN_PROC_PGRP:
867 			if ((pid_t)arg != mp->mp_procgrp)
868 				continue;
869 			break;
870 		case KERN_PROC_TTY:
871 			if ((dev_t)arg == KERN_PROC_TTY_REVOKE)
872 				continue; /* TODO: revoke(2) support */
873 			/* Do not access the fproc_tab slot of zombies. */
874 			zombie = (mp->mp_flags & (TRACE_ZOMBIE | ZOMBIE));
875 			tty = (zombie) ? fproc_tab[mslot].fpl_tty : NO_DEV;
876 			if ((dev_t)arg == KERN_PROC_TTY_NODEV) {
877 				if (tty != NO_DEV)
878 					continue;
879 			} else if ((dev_t)arg == NO_DEV || (dev_t)arg != tty)
880 				continue;
881 			break;
882 		case KERN_PROC_UID:
883 			if ((uid_t)arg != mp->mp_effuid)
884 				continue;
885 			break;
886 		case KERN_PROC_RUID:
887 			if ((uid_t)arg != mp->mp_realuid)
888 				continue;
889 			break;
890 		case KERN_PROC_GID:
891 			if ((gid_t)arg != mp->mp_effgid)
892 				continue;
893 			break;
894 		case KERN_PROC_RGID:
895 			if ((gid_t)arg != mp->mp_realgid)
896 				continue;
897 			break;
898 		}
899 
900 		if (mib_inrange(oldp, off) && elmax > 0) {
901 			fill_proc2_user(&proc2, mslot);
902 			if ((r = mib_copyout(oldp, off, &proc2, copysz)) < 0)
903 				return r;
904 			elmax--;
905 		}
906 		off += elsz;
907 	}
908 
909 	if (oldp == NULL && req != KERN_PROC_PID)
910 		off += EXTRA_PROCS * elsz;
911 
912 	return off;
913 }
914 
915 /*
916  * Implementation of CTL_KERN KERN_PROC_ARGS.
917  */
918 ssize_t
mib_kern_proc_args(struct mib_call * call,struct mib_node * node __unused,struct mib_oldp * oldp,struct mib_newp * newp __unused)919 mib_kern_proc_args(struct mib_call * call, struct mib_node * node __unused,
920 	struct mib_oldp * oldp, struct mib_newp * newp __unused)
921 {
922 	char vbuf[PAGE_SIZE], sbuf[PAGE_SIZE], obuf[PAGE_SIZE];
923 	struct ps_strings pss;
924 	struct mproc *mp;
925 	char *buf, *p, *q, *pptr;
926 	vir_bytes vaddr, vpage, spage, paddr, ppage;
927 	size_t max, off, olen, oleft, oldlen, bytes, pleft;
928 	unsigned int copybudget;
929 	pid_t pid;
930 	int req, mslot, count, aborted, ended;
931 	ssize_t r;
932 
933 	if (call->call_namelen != 2)
934 		return EINVAL;
935 
936 	pid = call->call_name[0];
937 	req = call->call_name[1];
938 
939 	switch (req) {
940 	case KERN_PROC_ARGV:
941 	case KERN_PROC_ENV:
942 	case KERN_PROC_NARGV:
943 	case KERN_PROC_NENV:
944 		break;
945 	default:
946 		return EOPNOTSUPP;
947 	}
948 
949 	if (!update_tables())
950 		return EINVAL;
951 
952 	if ((mslot = get_mslot(pid)) == NO_SLOT)
953 		return ESRCH;
954 	mp = &mproc_tab[mslot];
955 	if (mp->mp_flags & (TRACE_ZOMBIE | ZOMBIE))
956 		return ESRCH;
957 
958 	/* We can return the count field size without copying in any data. */
959 	if (oldp == NULL && (req == KERN_PROC_NARGV || req == KERN_PROC_NENV))
960 		return sizeof(count);
961 
962 	if (sys_datacopy(mp->mp_endpoint,
963 	    mp->mp_frame_addr + mp->mp_frame_len - sizeof(pss),
964 	    SELF, (vir_bytes)&pss, sizeof(pss)) != OK)
965 		return EINVAL;
966 
967 	/*
968 	 * Determine the upper size limit of the requested data.  Not only may
969 	 * the size never exceed ARG_MAX, it may also not exceed the frame
970 	 * length as given in its original exec call.  In fact, the frame
971 	 * length should be substantially larger: all strings for both the
972 	 * arguments and the environment are in there, along with other stuff,
973 	 * and there must be no overlap between strings.  It is possible that
974 	 * the application called setproctitle(3), in which case the ps_strings
975 	 * pointers refer to data outside the frame altogether.  However, this
976 	 * data should not exceed 2048 bytes, and we cover this by rounding up
977 	 * the frame length to a multiple of the page size.  Anyhow, NetBSD
978 	 * blindly returns ARG_MAX when asked for a size estimate, so with this
979 	 * maximum we are already quite a bit more accurate.
980 	 */
981 	max = roundup(MIN(mp->mp_frame_len, ARG_MAX), PAGE_SIZE);
982 
983 	switch (req) {
984 	case KERN_PROC_NARGV:
985 		count = pss.ps_nargvstr;
986 		return mib_copyout(oldp, 0, &count, sizeof(count));
987 	case KERN_PROC_NENV:
988 		count = pss.ps_nenvstr;
989 		return mib_copyout(oldp, 0, &count, sizeof(count));
990 	case KERN_PROC_ARGV:
991 		if (oldp == NULL)
992 			return max;
993 		vaddr = (vir_bytes)pss.ps_argvstr;
994 		count = pss.ps_nargvstr;
995 		break;
996 	case KERN_PROC_ENV:
997 		if (oldp == NULL)
998 			return max;
999 		vaddr = (vir_bytes)pss.ps_envstr;
1000 		count = pss.ps_nenvstr;
1001 		break;
1002 	}
1003 
1004 	/*
1005 	 * Go through the strings.  Copy in entire, machine-aligned pages at
1006 	 * once, in the hope that all data is stored consecutively, which it
1007 	 * should be: we expect that the vector is followed by the strings, and
1008 	 * that the strings are stored in order of vector reference.  We keep
1009 	 * up to two pages with copied-in data: one for the vector, and
1010 	 * optionally one for string data.  In addition, we keep one page with
1011 	 * data to be copied out, so that we do not cause a lot of copy
1012 	 * overhead for short strings.
1013 	 *
1014 	 * We stop whenever any of the following conditions are met:
1015 	 * - copying in data from the target process fails for any reason;
1016 	 * - we have processed the last index ('count') into the vector;
1017 	 * - the current vector element is a NULL pointer;
1018 	 * - the requested number of output bytes ('oldlen') has been reached;
1019 	 * - the maximum number of output bytes ('max') has been reached;
1020 	 * - the number of page copy-ins exceeds an estimated threshold;
1021 	 * - copying out data fails for any reason (we then return the error).
1022 	 *
1023 	 * We limit the number of page copy-ins because otherwise a rogue
1024 	 * process could create an argument vector consisting of only two-byte
1025 	 * strings that all span two pages, causing us to copy up to 1GB of
1026 	 * data with the current ARG_MAX value of 256K.  No reasonable vector
1027 	 * should cause more than (ARG_MAX / PAGE_SIZE) page copies for
1028 	 * strings; we are nice enough to allow twice that.  Vector copies do
1029 	 * not count, as they are linear anyway.
1030 	 *
1031 	 * Unlike every other sysctl(2) call, we are supposed to truncate the
1032 	 * resulting size (the returned 'oldlen') to the requested size (the
1033 	 * given 'oldlen') *and* return the resulting size, rather than ENOMEM
1034 	 * and the real size.  Unfortunately, libkvm actually relies on this.
1035 	 *
1036 	 * Generally speaking, upon failure we just return a truncated result.
1037 	 * In case of truncation, the data we copy out need not be null
1038 	 * terminated.  It is up to userland to process the data correctly.
1039 	 */
1040 	if (trunc_page(vaddr) == 0 || vaddr % sizeof(char *) != 0)
1041 		return 0;
1042 
1043 	off = 0;
1044 	olen = 0;
1045 	aborted = FALSE;
1046 
1047 	oldlen = mib_getoldlen(oldp);
1048 	if (oldlen > max)
1049 		oldlen = max;
1050 
1051 	copybudget = (ARG_MAX / PAGE_SIZE) * 2;
1052 
1053 	vpage = 0;
1054 	spage = 0;
1055 
1056 	while (count > 0 && off + olen < oldlen && !aborted) {
1057 		/*
1058 		 * Start by fetching the page containing the current vector
1059 		 * element, if needed.  We could limit the fetch to the vector
1060 		 * size, but our hope is that for the simple cases, the strings
1061 		 * are on the remainder of the same page, so we save a copy
1062 		 * call.  TODO: since the strings should follow the vector, we
1063 		 * could start the copy at the base of the vector.
1064 		 */
1065 		if (trunc_page(vaddr) != vpage) {
1066 			vpage = trunc_page(vaddr);
1067 			if (sys_datacopy(mp->mp_endpoint, vpage, SELF,
1068 			    (vir_bytes)vbuf, PAGE_SIZE) != OK)
1069 				break;
1070 		}
1071 
1072 		/* Get the current vector element, pointing to a string. */
1073 		memcpy(&pptr, &vbuf[vaddr - vpage], sizeof(pptr));
1074 		paddr = (vir_bytes)pptr;
1075 		ppage = trunc_page(paddr);
1076 		if (ppage == 0)
1077 			break;
1078 
1079 		/* Fetch the string itself, one page at a time at most. */
1080 		do {
1081 			/*
1082 			 * See if the string pointer falls inside either the
1083 			 * vector page or the previously fetched string page
1084 			 * (if any).  If not, fetch a string page.
1085 			 */
1086 			if (ppage == vpage) {
1087 				buf = vbuf;
1088 			} else if (ppage == spage) {
1089 				buf = sbuf;
1090 			} else {
1091 				if (--copybudget == 0) {
1092 					aborted = TRUE;
1093 					break;
1094 				}
1095 				spage = ppage;
1096 				if (sys_datacopy(mp->mp_endpoint, spage, SELF,
1097 				    (vir_bytes)sbuf, PAGE_SIZE) != OK) {
1098 					aborted = TRUE;
1099 					break;
1100 				}
1101 				buf = sbuf;
1102 			}
1103 
1104 			/*
1105 			 * We now have a string fragment in a buffer.  See if
1106 			 * the string is null terminated.  If not, all the data
1107 			 * up to the buffer end is part of the string, and the
1108 			 * string continues on the next page.
1109 			 */
1110 			p = &buf[paddr - ppage];
1111 			pleft = PAGE_SIZE - (paddr - ppage);
1112 			assert(pleft > 0);
1113 
1114 			if ((q = memchr(p, '\0', pleft)) != NULL) {
1115 				bytes = (size_t)(q - p + 1);
1116 				assert(bytes <= pleft);
1117 				ended = TRUE;
1118 			} else {
1119 				bytes = pleft;
1120 				ended = FALSE;
1121 			}
1122 
1123 			/* Limit the result to the requested length. */
1124 			if (off + olen + bytes > oldlen)
1125 				bytes = oldlen - off - olen;
1126 
1127 			/*
1128 			 * Add 'bytes' bytes from string pointer 'p' to the
1129 			 * output buffer, copying out its contents to userland
1130 			 * if it has filled up.
1131 			 */
1132 			if (olen + bytes > sizeof(obuf)) {
1133 				oleft = sizeof(obuf) - olen;
1134 				memcpy(&obuf[olen], p, oleft);
1135 
1136 				if ((r = mib_copyout(oldp, off, obuf,
1137 				    sizeof(obuf))) < 0)
1138 					return r;
1139 				off += sizeof(obuf);
1140 				olen = 0;
1141 
1142 				p += oleft;
1143 				bytes -= oleft;
1144 			}
1145 			if (bytes > 0) {
1146 				memcpy(&obuf[olen], p, bytes);
1147 				olen += bytes;
1148 			}
1149 
1150 			/*
1151 			 * Continue as long as we have not yet found the string
1152 			 * end, and we have not yet filled the output buffer.
1153 			 */
1154 			paddr += pleft;
1155 			assert(trunc_page(paddr) == paddr);
1156 			ppage = paddr;
1157 		} while (!ended && off + olen < oldlen);
1158 
1159 		vaddr += sizeof(char *);
1160 		count--;
1161 	}
1162 
1163 	/* Copy out any remainder of the output buffer. */
1164 	if (olen > 0) {
1165 		if ((r = mib_copyout(oldp, off, obuf, olen)) < 0)
1166 			return r;
1167 		off += olen;
1168 	}
1169 
1170 	assert(off <= oldlen);
1171 	return off;
1172 }
1173 
1174 /*
1175  * Implementation of CTL_MINIX MINIX_PROC PROC_LIST.
1176  */
1177 ssize_t
mib_minix_proc_list(struct mib_call * call __unused,struct mib_node * node __unused,struct mib_oldp * oldp,struct mib_newp * newp __unused)1178 mib_minix_proc_list(struct mib_call * call __unused,
1179 	struct mib_node * node __unused, struct mib_oldp * oldp,
1180 	struct mib_newp * newp __unused)
1181 {
1182 	struct minix_proc_list mpl[NR_PROCS];
1183 	struct minix_proc_list *mplp;
1184 	struct mproc *mp;
1185 	unsigned int mslot;
1186 
1187 	if (oldp == NULL)
1188 		return sizeof(mpl);
1189 
1190 	if (!update_tables())
1191 		return EINVAL;
1192 
1193 	memset(&mpl, 0, sizeof(mpl));
1194 
1195 	mplp = mpl;
1196 	mp = mproc_tab;
1197 
1198 	for (mslot = 0; mslot < NR_PROCS; mslot++, mplp++, mp++) {
1199 		if (!(mp->mp_flags & IN_USE) || mp->mp_pid <= 0)
1200 			continue;
1201 
1202 		mplp->mpl_flags = MPLF_IN_USE;
1203 		if (mp->mp_flags & (TRACE_ZOMBIE | ZOMBIE))
1204 			mplp->mpl_flags |= MPLF_ZOMBIE;
1205 		mplp->mpl_pid = mp->mp_pid;
1206 		mplp->mpl_uid = mp->mp_effuid;
1207 		mplp->mpl_gid = mp->mp_effgid;
1208 	}
1209 
1210 	return mib_copyout(oldp, 0, &mpl, sizeof(mpl));
1211 }
1212 
1213 /*
1214  * Implementation of CTL_MINIX MINIX_PROC PROC_DATA.
1215  */
1216 ssize_t
mib_minix_proc_data(struct mib_call * call,struct mib_node * node __unused,struct mib_oldp * oldp,struct mib_newp * newp __unused)1217 mib_minix_proc_data(struct mib_call * call, struct mib_node * node __unused,
1218 	struct mib_oldp * oldp, struct mib_newp * newp __unused)
1219 {
1220 	struct minix_proc_data mpd;
1221 	struct proc *kp;
1222 	int kslot, mslot = 0;
1223 	unsigned int mflags;
1224 	pid_t pid;
1225 
1226 	/*
1227 	 * It is currently only possible to retrieve the process data for a
1228 	 * particular PID, which must be given as the last name component.
1229 	 */
1230 	if (call->call_namelen != 1)
1231 		return EINVAL;
1232 
1233 	pid = (pid_t)call->call_name[0];
1234 
1235 	if (!update_tables())
1236 		return EINVAL;
1237 
1238 	/*
1239 	 * Unlike the CTL_KERN nodes, we use the ProcFS semantics here: if the
1240 	 * given PID is negative, it is a kernel task; otherwise, it identifies
1241 	 * a user process.  A request for PID 0 will result in ESRCH.
1242 	 */
1243 	if (pid < 0) {
1244 		if (pid < -NR_TASKS)
1245 			return ESRCH;
1246 
1247 		kslot = pid + NR_TASKS;
1248 		assert(kslot < NR_TASKS);
1249 	} else {
1250 		if ((mslot = get_mslot(pid)) == NO_SLOT)
1251 			return ESRCH;
1252 
1253 		kslot = NR_TASKS + mslot;
1254 	}
1255 
1256 	if (oldp == NULL)
1257 		return sizeof(mpd);
1258 
1259 	kp = &proc_tab[kslot];
1260 
1261 	mflags = (pid > 0) ? mproc_tab[mslot].mp_flags : 0;
1262 
1263 	memset(&mpd, 0, sizeof(mpd));
1264 	mpd.mpd_endpoint = kp->p_endpoint;
1265 	if (mflags & PRIV_PROC)
1266 		mpd.mpd_flags |= MPDF_SYSTEM;
1267 	if (mflags & (TRACE_ZOMBIE | ZOMBIE))
1268 		mpd.mpd_flags |= MPDF_ZOMBIE;
1269 	else if ((mflags & TRACE_STOPPED) || RTS_ISSET(kp, RTS_P_STOP))
1270 		mpd.mpd_flags |= MPDF_STOPPED;
1271 	else if (proc_is_runnable(kp))
1272 		mpd.mpd_flags |= MPDF_RUNNABLE;
1273 	mpd.mpd_blocked_on = P_BLOCKEDON(kp);
1274 	mpd.mpd_priority = kp->p_priority;
1275 	mpd.mpd_user_time = kp->p_user_time;
1276 	mpd.mpd_sys_time = kp->p_sys_time;
1277 	mpd.mpd_cycles = kp->p_cycles;
1278 	mpd.mpd_kipc_cycles = kp->p_kipc_cycles;
1279 	mpd.mpd_kcall_cycles = kp->p_kcall_cycles;
1280 	if (kslot >= NR_TASKS) {
1281 		mpd.mpd_nice = mproc_tab[mslot].mp_nice;
1282 		strlcpy(mpd.mpd_name, mproc_tab[mslot].mp_name,
1283 		    sizeof(mpd.mpd_name));
1284 	} else
1285 		strlcpy(mpd.mpd_name, kp->p_name, sizeof(mpd.mpd_name));
1286 
1287 	return mib_copyout(oldp, 0, &mpd, sizeof(mpd));
1288 }
1289