xref: /minix/external/bsd/top/dist/machine/m_linux.c (revision e1cdaee1)
1 /*
2  * Copyright (c) 1984 through 2008, William LeFebvre
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *     * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *
11  *     * Redistributions in binary form must reproduce the above
12  * copyright notice, this list of conditions and the following disclaimer
13  * in the documentation and/or other materials provided with the
14  * distribution.
15  *
16  *     * Neither the name of William LeFebvre nor the names of other
17  * contributors may be used to endorse or promote products derived from
18  * this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * top - a top users display for Unix
35  *
36  * SYNOPSIS:  Linux 1.2.x, 1.3.x, 2.x, using the /proc filesystem
37  *
38  * DESCRIPTION:
39  * This is the machine-dependent module for Linux 1.2.x, 1.3.x or 2.x.
40  *
41  * LIBS:
42  *
43  * CFLAGS: -DHAVE_GETOPT -DHAVE_STRERROR -DORDER
44  *
45  * TERMCAP: -lcurses
46  *
47  * AUTHOR: Richard Henderson <rth@tamu.edu>
48  * Order support added by Alexey Klimkin <kad@klon.tme.mcst.ru>
49  * Ported to 2.4 by William LeFebvre
50  * Additions for 2.6 by William LeFebvre
51  */
52 
53 #include "config.h"
54 
55 #include <sys/types.h>
56 #include <time.h>
57 #include <stdio.h>
58 #include <fcntl.h>
59 #include <unistd.h>
60 #include <stdlib.h>
61 #include <errno.h>
62 #include <dirent.h>
63 #include <string.h>
64 #include <math.h>
65 #include <ctype.h>
66 #include <sys/time.h>
67 #include <sys/stat.h>
68 #include <sys/vfs.h>
69 
70 #include <sys/param.h>		/* for HZ */
71 #include <asm/page.h>		/* for PAGE_SHIFT */
72 
73 #if 0
74 #include <linux/proc_fs.h>	/* for PROC_SUPER_MAGIC */
75 #else
76 #define PROC_SUPER_MAGIC 0x9fa0
77 #endif
78 
79 #include "top.h"
80 #include "hash.h"
81 #include "machine.h"
82 #include "utils.h"
83 #include "username.h"
84 
85 #define PROCFS "/proc"
86 extern char *myname;
87 
88 /*=PROCESS INFORMATION==================================================*/
89 
90 struct top_proc
91 {
92     pid_t pid;
93     uid_t uid;
94     char *name;
95     int pri, nice, threads;
96     unsigned long size, rss, shared;	/* in k */
97     int state;
98     unsigned long time;
99     unsigned long start_time;
100     double pcpu;
101     struct top_proc *next;
102 };
103 
104 
105 /*=STATE IDENT STRINGS==================================================*/
106 
107 #define NPROCSTATES 7
108 static char *state_abbrev[NPROCSTATES+1] =
109 {
110     "", "run", "sleep", "disk", "zomb", "stop", "swap",
111     NULL
112 };
113 
114 static char *procstatenames[NPROCSTATES+1] =
115 {
116     "", " running, ", " sleeping, ", " uninterruptable, ",
117     " zombie, ", " stopped, ", " swapping, ",
118     NULL
119 };
120 
121 #define NCPUSTATES 5
122 static char *cpustatenames[NCPUSTATES+1] =
123 {
124     "user", "nice", "system", "idle", "iowait",
125     NULL
126 };
127 static int show_iowait = 0;
128 
129 #define KERNELCTXT    0
130 #define KERNELFLT     1
131 #define KERNELINTR    2
132 #define KERNELNEWPROC 3
133 #define NKERNELSTATS  4
134 static char *kernelnames[NKERNELSTATS+1] =
135 {
136     " ctxsw, ", " flt, ", " intr, ", " newproc",
137     NULL
138 };
139 
140 #define MEMUSED    0
141 #define MEMFREE    1
142 #define MEMSHARED  2
143 #define MEMBUFFERS 3
144 #define MEMCACHED  4
145 #define NMEMSTATS  5
146 static char *memorynames[NMEMSTATS+1] =
147 {
148     "K used, ", "K free, ", "K shared, ", "K buffers, ", "K cached",
149     NULL
150 };
151 
152 #define SWAPUSED   0
153 #define SWAPFREE   1
154 #define SWAPCACHED 2
155 #define NSWAPSTATS 3
156 static char *swapnames[NSWAPSTATS+1] =
157 {
158     "K used, ", "K free, ", "K cached",
159     NULL
160 };
161 
162 static char fmt_header[] =
163 "  PID X         THR PRI NICE  SIZE   RES STATE   TIME    CPU COMMAND";
164 
165 static char proc_header_thr[] =
166 "  PID %-9s THR PRI NICE  SIZE   RES   SHR STATE   TIME    CPU COMMAND";
167 
168 static char proc_header_nothr[] =
169 "  PID %-9s PRI NICE  SIZE   RES   SHR STATE   TIME    CPU COMMAND";
170 
171 /* these are names given to allowed sorting orders -- first is default */
172 char *ordernames[] =
173 {"cpu", "size", "res", "time", "command", NULL};
174 
175 /* forward definitions for comparison functions */
176 int compare_cpu();
177 int compare_size();
178 int compare_res();
179 int compare_time();
180 int compare_cmd();
181 
182 int (*proc_compares[])() = {
183     compare_cpu,
184     compare_size,
185     compare_res,
186     compare_time,
187     compare_cmd,
188     NULL };
189 
190 /*=SYSTEM STATE INFO====================================================*/
191 
192 /* these are for calculating cpu state percentages */
193 
194 static long cp_time[NCPUSTATES];
195 static long cp_old[NCPUSTATES];
196 static long cp_diff[NCPUSTATES];
197 
198 /* for calculating the exponential average */
199 
200 static struct timeval lasttime = { 0, 0 };
201 static struct timeval timediff = { 0, 0 };
202 static long elapsed_msecs;
203 
204 /* these are for keeping track of processes and tasks */
205 
206 #define HASH_SIZE	     (1003)
207 #define INITIAL_ACTIVE_SIZE  (256)
208 #define PROCBLOCK_SIZE       (32)
209 static hash_table *ptable;
210 static hash_table *tasktable;
211 static struct top_proc **pactive;
212 static struct top_proc **nextactive;
213 static unsigned int activesize = 0;
214 static time_t boottime = -1;
215 static int have_task = 0;
216 
217 /* these are counters that need to be track */
218 static unsigned long last_ctxt = 0;
219 static unsigned long last_intr = 0;
220 static unsigned long last_newproc = 0;
221 static unsigned long last_flt = 0;
222 
223 /* these are for passing data back to the machine independant portion */
224 
225 static int cpu_states[NCPUSTATES];
226 static int process_states[NPROCSTATES];
227 static int kernel_stats[NKERNELSTATS];
228 static long memory_stats[NMEMSTATS];
229 static long swap_stats[NSWAPSTATS];
230 
231 /* useful macros */
232 #define bytetok(x)	(((x) + 512) >> 10)
233 #define pagetok(x)	((x) << (PAGE_SHIFT - 10))
234 #define HASH(x)		(((x) * 1686629713U) % HASH_SIZE)
235 
236 /* calculate a per-second rate using milliseconds */
237 #define per_second(n, msec)   (((n) * 1000) / (msec))
238 
239 /*======================================================================*/
240 
241 static inline char *
242 skip_ws(const char *p)
243 {
244     while (isspace(*p)) p++;
245     return (char *)p;
246 }
247 
248 static inline char *
249 skip_token(const char *p)
250 {
251     while (isspace(*p)) p++;
252     while (*p && !isspace(*p)) p++;
253     return (char *)p;
254 }
255 
256 static void
257 xfrm_cmdline(char *p, int len)
258 {
259     while (--len > 0)
260     {
261 	if (*p == '\0')
262 	{
263 	    *p = ' ';
264 	}
265 	p++;
266     }
267 }
268 
269 static void
270 update_procname(struct top_proc *proc, char *cmd)
271 
272 {
273     printable(cmd);
274 
275     if (proc->name == NULL)
276     {
277 	proc->name = strdup(cmd);
278     }
279     else if (strcmp(proc->name, cmd) != 0)
280     {
281 	free(proc->name);
282 	proc->name = strdup(cmd);
283     }
284 }
285 
286 /*
287  * Process structures are allocated and freed as needed.  Here we
288  * keep big pools of them, adding more pool as needed.  When a
289  * top_proc structure is freed, it is added to a freelist and reused.
290  */
291 
292 static struct top_proc *freelist = NULL;
293 static struct top_proc *procblock = NULL;
294 static struct top_proc *procmax = NULL;
295 
296 static struct top_proc *
297 new_proc()
298 {
299     struct top_proc *p;
300 
301     if (freelist)
302     {
303 	p = freelist;
304 	freelist = freelist->next;
305     }
306     else if (procblock)
307     {
308 	p = procblock;
309 	if (++procblock >= procmax)
310 	{
311 	    procblock = NULL;
312 	}
313     }
314     else
315     {
316 	p = procblock = (struct top_proc *)calloc(PROCBLOCK_SIZE,
317 						  sizeof(struct top_proc));
318 	procmax = procblock++ + PROCBLOCK_SIZE;
319     }
320 
321     /* initialization */
322     if (p->name != NULL)
323     {
324 	free(p->name);
325 	p->name = NULL;
326     }
327 
328     return p;
329 }
330 
331 static void
332 free_proc(struct top_proc *proc)
333 {
334     proc->next = freelist;
335     freelist = proc;
336 }
337 
338 
339 int
340 machine_init(struct statics *statics)
341 
342 {
343     /* make sure the proc filesystem is mounted */
344     {
345 	struct statfs sb;
346 	if (statfs(PROCFS, &sb) < 0 || sb.f_type != PROC_SUPER_MAGIC)
347 	{
348 	    fprintf(stderr, "%s: proc filesystem not mounted on " PROCFS "\n",
349 		    myname);
350 	    return -1;
351 	}
352     }
353 
354     /* chdir to the proc filesystem to make things easier */
355     chdir(PROCFS);
356 
357     /* a few preliminary checks */
358     {
359 	int fd;
360 	char buff[128];
361 	char *p;
362 	int cnt = 0;
363 	unsigned long uptime;
364 	struct timeval tv;
365 	struct stat st;
366 
367 	/* get a boottime */
368 	if ((fd = open("uptime", 0)) != -1)
369 	{
370 	    if (read(fd, buff, sizeof(buff)) > 0)
371 	    {
372 		uptime = strtoul(buff, &p, 10);
373 		gettimeofday(&tv, 0);
374 		boottime = tv.tv_sec - uptime;
375 	    }
376 	    close(fd);
377 	}
378 
379 	/* see how many states we get from stat */
380 	if ((fd = open("stat", 0)) != -1)
381 	{
382 	    if (read(fd, buff, sizeof(buff)) > 0)
383 	    {
384 		if ((p = strchr(buff, '\n')) != NULL)
385 		{
386 		    *p = '\0';
387 		    p = buff;
388 		    while (*p != '\0')
389 		    {
390 			if (*p++ == ' ')
391 			{
392 			    cnt++;
393 			}
394 		    }
395 		}
396 	    }
397 
398 	    close(fd);
399 	}
400 	if (cnt > 5)
401 	{
402 	    /* we have iowait */
403 	    show_iowait = 1;
404 	}
405 
406 	/* see if we have task subdirs */
407 	if (stat("self/task", &st) != -1 && S_ISDIR(st.st_mode))
408 	{
409 	    dprintf("we have task directories\n");
410 	    have_task = 1;
411 	}
412     }
413 
414     /* if we aren't showing iowait, then we have to tweak cpustatenames */
415     if (!show_iowait)
416     {
417 	cpustatenames[4] = NULL;
418     }
419 
420     /* fill in the statics information */
421     statics->procstate_names = procstatenames;
422     statics->cpustate_names = cpustatenames;
423     statics->kernel_names = kernelnames;
424     statics->memory_names = memorynames;
425     statics->swap_names = swapnames;
426     statics->order_names = ordernames;
427     statics->boottime = boottime;
428     statics->flags.fullcmds = 1;
429     statics->flags.warmup = 1;
430     statics->flags.threads = 1;
431 
432     /* allocate needed space */
433     pactive = (struct top_proc **)malloc(sizeof(struct top_proc *) * INITIAL_ACTIVE_SIZE);
434     activesize = INITIAL_ACTIVE_SIZE;
435 
436     /* create process and task hashes */
437     ptable = hash_create(HASH_SIZE);
438     tasktable = hash_create(HASH_SIZE);
439 
440     /* all done! */
441     return 0;
442 }
443 
444 
445 void
446 get_system_info(struct system_info *info)
447 
448 {
449     char buffer[4096+1];
450     int fd, len;
451     char *p;
452     struct timeval thistime;
453     unsigned long intr = 0;
454     unsigned long ctxt = 0;
455     unsigned long newproc = 0;
456     unsigned long flt = 0;
457 
458     /* timestamp and time difference */
459     gettimeofday(&thistime, 0);
460     timersub(&thistime, &lasttime, &timediff);
461     elapsed_msecs = timediff.tv_sec * 1000 + timediff.tv_usec / 1000;
462     lasttime = thistime;
463 
464     /* get load averages */
465     if ((fd = open("loadavg", O_RDONLY)) != -1)
466     {
467 	if ((len = read(fd, buffer, sizeof(buffer)-1)) > 0)
468 	{
469 	    buffer[len] = '\0';
470 	    info->load_avg[0] = strtod(buffer, &p);
471 	    info->load_avg[1] = strtod(p, &p);
472 	    info->load_avg[2] = strtod(p, &p);
473 	    p = skip_token(p);			/* skip running/tasks */
474 	    p = skip_ws(p);
475 	    if (*p)
476 	    {
477 		info->last_pid = atoi(p);
478 	    }
479 	    else
480 	    {
481 		info->last_pid = -1;
482 	    }
483 	}
484 	close(fd);
485     }
486 
487     /* get the cpu time info */
488     if ((fd = open("stat", O_RDONLY)) != -1)
489     {
490 	if ((len = read(fd, buffer, sizeof(buffer)-1)) > 0)
491 	{
492 	    buffer[len] = '\0';
493 	    p = skip_token(buffer);			/* "cpu" */
494 	    cp_time[0] = strtoul(p, &p, 0);
495 	    cp_time[1] = strtoul(p, &p, 0);
496 	    cp_time[2] = strtoul(p, &p, 0);
497 	    cp_time[3] = strtoul(p, &p, 0);
498 	    if (show_iowait)
499 	    {
500 		cp_time[4] = strtoul(p, &p, 0);
501 	    }
502 
503 	    /* convert cp_time counts to percentages */
504 	    percentages(NCPUSTATES, cpu_states, cp_time, cp_old, cp_diff);
505 
506 	    /* get the rest of it */
507 	    p = strchr(p, '\n');
508 	    while (p != NULL)
509 	    {
510 		p++;
511 		if (strncmp(p, "intr ", 5) == 0)
512 		{
513 		    p = skip_token(p);
514 		    intr = strtoul(p, &p, 10);
515 		}
516 		else if (strncmp(p, "ctxt ", 5) == 0)
517 		{
518 		    p = skip_token(p);
519 		    ctxt = strtoul(p, &p, 10);
520 		}
521 		else if (strncmp(p, "processes ", 10) == 0)
522 		{
523 		    p = skip_token(p);
524 		    newproc = strtoul(p, &p, 10);
525 		}
526 
527 		p = strchr(p, '\n');
528 	    }
529 
530 	    kernel_stats[KERNELINTR] = per_second(intr - last_intr, elapsed_msecs);
531 	    kernel_stats[KERNELCTXT] = per_second(ctxt - last_ctxt, elapsed_msecs);
532 	    kernel_stats[KERNELNEWPROC] = per_second(newproc - last_newproc, elapsed_msecs);
533 	    last_intr = intr;
534 	    last_ctxt = ctxt;
535 	    last_newproc = newproc;
536 	}
537 	close(fd);
538     }
539 
540     /* get system wide memory usage */
541     if ((fd = open("meminfo", O_RDONLY)) != -1)
542     {
543 	char *p;
544 	int mem = 0;
545 	int swap = 0;
546 	unsigned long memtotal = 0;
547 	unsigned long memfree = 0;
548 	unsigned long swaptotal = 0;
549 
550 	if ((len = read(fd, buffer, sizeof(buffer)-1)) > 0)
551 	{
552 	    buffer[len] = '\0';
553 	    p = buffer-1;
554 
555 	    /* iterate thru the lines */
556 	    while (p != NULL)
557 	    {
558 		p++;
559 		if (p[0] == ' ' || p[0] == '\t')
560 		{
561 		    /* skip */
562 		}
563 		else if (strncmp(p, "Mem:", 4) == 0)
564 		{
565 		    p = skip_token(p);			/* "Mem:" */
566 		    p = skip_token(p);			/* total memory */
567 		    memory_stats[MEMUSED] = strtoul(p, &p, 10);
568 		    memory_stats[MEMFREE] = strtoul(p, &p, 10);
569 		    memory_stats[MEMSHARED] = strtoul(p, &p, 10);
570 		    memory_stats[MEMBUFFERS] = strtoul(p, &p, 10);
571 		    memory_stats[MEMCACHED] = strtoul(p, &p, 10);
572 		    memory_stats[MEMUSED] = bytetok(memory_stats[MEMUSED]);
573 		    memory_stats[MEMFREE] = bytetok(memory_stats[MEMFREE]);
574 		    memory_stats[MEMSHARED] = bytetok(memory_stats[MEMSHARED]);
575 		    memory_stats[MEMBUFFERS] = bytetok(memory_stats[MEMBUFFERS]);
576 		    memory_stats[MEMCACHED] = bytetok(memory_stats[MEMCACHED]);
577 		    mem = 1;
578 		}
579 		else if (strncmp(p, "Swap:", 5) == 0)
580 		{
581 		    p = skip_token(p);			/* "Swap:" */
582 		    p = skip_token(p);			/* total swap */
583 		    swap_stats[SWAPUSED] = strtoul(p, &p, 10);
584 		    swap_stats[SWAPFREE] = strtoul(p, &p, 10);
585 		    swap_stats[SWAPUSED] = bytetok(swap_stats[SWAPUSED]);
586 		    swap_stats[SWAPFREE] = bytetok(swap_stats[SWAPFREE]);
587 		    swap = 1;
588 		}
589 		else if (!mem && strncmp(p, "MemTotal:", 9) == 0)
590 		{
591 		    p = skip_token(p);
592 		    memtotal = strtoul(p, &p, 10);
593 		}
594 		else if (!mem && memtotal > 0 && strncmp(p, "MemFree:", 8) == 0)
595 		{
596 		    p = skip_token(p);
597 		    memfree = strtoul(p, &p, 10);
598 		    memory_stats[MEMUSED] = memtotal - memfree;
599 		    memory_stats[MEMFREE] = memfree;
600 		}
601 		else if (!mem && strncmp(p, "MemShared:", 10) == 0)
602 		{
603 		    p = skip_token(p);
604 		    memory_stats[MEMSHARED] = strtoul(p, &p, 10);
605 		}
606 		else if (!mem && strncmp(p, "Buffers:", 8) == 0)
607 		{
608 		    p = skip_token(p);
609 		    memory_stats[MEMBUFFERS] = strtoul(p, &p, 10);
610 		}
611 		else if (!mem && strncmp(p, "Cached:", 7) == 0)
612 		{
613 		    p = skip_token(p);
614 		    memory_stats[MEMCACHED] = strtoul(p, &p, 10);
615 		}
616 		else if (!swap && strncmp(p, "SwapTotal:", 10) == 0)
617 		{
618 		    p = skip_token(p);
619 		    swaptotal = strtoul(p, &p, 10);
620 		}
621 		else if (!swap && swaptotal > 0 && strncmp(p, "SwapFree:", 9) == 0)
622 		{
623 		    p = skip_token(p);
624 		    memfree = strtoul(p, &p, 10);
625 		    swap_stats[SWAPUSED] = swaptotal - memfree;
626 		    swap_stats[SWAPFREE] = memfree;
627 		}
628 		else if (!mem && strncmp(p, "SwapCached:", 11) == 0)
629 		{
630 		    p = skip_token(p);
631 		    swap_stats[SWAPCACHED] = strtoul(p, &p, 10);
632 		}
633 
634 		/* move to the next line */
635 		p = strchr(p, '\n');
636 	    }
637 	}
638 	close(fd);
639     }
640 
641     /* get vm related stuff */
642     if ((fd = open("vmstat", O_RDONLY)) != -1)
643     {
644 	char *p;
645 
646 	if ((len = read(fd, buffer, sizeof(buffer)-1)) > 0)
647 	{
648 	    buffer[len] = '\0';
649 	    p = buffer;
650 
651 	    /* iterate thru the lines */
652 	    while (p != NULL)
653 	    {
654 		if (strncmp(p, "pgmajfault ", 11) == 0)
655 		{
656 		    p = skip_token(p);
657 		    flt = strtoul(p, &p, 10);
658 		    kernel_stats[KERNELFLT] = per_second(flt - last_flt, elapsed_msecs);
659 		    last_flt = flt;
660 		    break;
661 		}
662 
663 		/* move to the next line */
664 		p = strchr(p, '\n');
665 		p++;
666 	    }
667 	}
668 	close(fd);
669     }
670 
671     /* set arrays and strings */
672     info->cpustates = cpu_states;
673     info->memory = memory_stats;
674     info->swap = swap_stats;
675     info->kernel = kernel_stats;
676 }
677 
678 static void
679 read_one_proc_stat(pid_t pid, pid_t taskpid, struct top_proc *proc, struct process_select *sel)
680 {
681     char buffer[4096], *p, *q;
682     int fd, len;
683     int fullcmd;
684 
685     dprintf("reading proc %d - %d\n", pid, taskpid);
686 
687     /* if anything goes wrong, we return with proc->state == 0 */
688     proc->state = 0;
689 
690     /* full cmd handling */
691     fullcmd = sel->fullcmd;
692     if (fullcmd)
693     {
694 	if (taskpid == -1)
695 	{
696 	    sprintf(buffer, "%d/cmdline", pid);
697 	}
698 	else
699 	{
700 	    sprintf(buffer, "%d/task/%d/cmdline", pid, taskpid);
701 	}
702 	if ((fd = open(buffer, O_RDONLY)) != -1)
703 	{
704 	    /* read command line data */
705 	    /* (theres no sense in reading more than we can fit) */
706 	    if ((len = read(fd, buffer, MAX_COLS)) > 1)
707 	    {
708 		buffer[len] = '\0';
709 		xfrm_cmdline(buffer, len);
710 		update_procname(proc, buffer);
711 	    }
712 	    else
713 	    {
714 		fullcmd = 0;
715 	    }
716 	    close(fd);
717 	}
718 	else
719 	{
720 	    fullcmd = 0;
721 	}
722     }
723 
724     /* grab the shared memory size */
725     sprintf(buffer, "%d/statm", pid);
726     fd = open(buffer, O_RDONLY);
727     len = read(fd, buffer, sizeof(buffer)-1);
728     close(fd);
729     buffer[len] = '\0';
730     p = buffer;
731     p = skip_token(p);		/* skip size */
732     p = skip_token(p);		/* skip resident */
733     proc->shared = pagetok(strtoul(p, &p, 10));
734 
735     /* grab the proc stat info in one go */
736     if (taskpid == -1)
737     {
738 	sprintf(buffer, "%d/stat", pid);
739     }
740     else
741     {
742 	sprintf(buffer, "%d/task/%d/stat", pid, taskpid);
743     }
744 
745     fd = open(buffer, O_RDONLY);
746     len = read(fd, buffer, sizeof(buffer)-1);
747     close(fd);
748 
749     buffer[len] = '\0';
750 
751     proc->uid = (uid_t)proc_owner((int)pid);
752 
753     /* parse out the status */
754 
755     /* skip pid and locate command, which is in parentheses */
756     if ((p = strchr(buffer, '(')) == NULL)
757     {
758 	return;
759     }
760     if ((q = strrchr(++p, ')')) == NULL)
761     {
762 	return;
763     }
764 
765     /* set the procname */
766     *q = '\0';
767     if (!fullcmd)
768     {
769 	update_procname(proc, p);
770     }
771 
772     /* scan the rest of the line */
773     p = q+1;
774     p = skip_ws(p);
775     switch (*p++)				/* state */
776     {
777     case 'R': proc->state = 1; break;
778     case 'S': proc->state = 2; break;
779     case 'D': proc->state = 3; break;
780     case 'Z': proc->state = 4; break;
781     case 'T': proc->state = 5; break;
782     case 'W': proc->state = 6; break;
783     case '\0': return;
784     }
785 
786     p = skip_token(p);				/* skip ppid */
787     p = skip_token(p);				/* skip pgrp */
788     p = skip_token(p);				/* skip session */
789     p = skip_token(p);				/* skip tty */
790     p = skip_token(p);				/* skip tty pgrp */
791     p = skip_token(p);				/* skip flags */
792     p = skip_token(p);				/* skip min flt */
793     p = skip_token(p);				/* skip cmin flt */
794     p = skip_token(p);				/* skip maj flt */
795     p = skip_token(p);				/* skip cmaj flt */
796 
797     proc->time = strtoul(p, &p, 10);		/* utime */
798     proc->time += strtoul(p, &p, 10);		/* stime */
799 
800     p = skip_token(p);				/* skip cutime */
801     p = skip_token(p);				/* skip cstime */
802 
803     proc->pri = strtol(p, &p, 10);		/* priority */
804     proc->nice = strtol(p, &p, 10);		/* nice */
805     proc->threads = strtol(p, &p, 10);		/* threads */
806 
807     p = skip_token(p);				/* skip it_real_val */
808     proc->start_time = strtoul(p, &p, 10);	/* start_time */
809 
810     proc->size = bytetok(strtoul(p, &p, 10));	/* vsize */
811     proc->rss = pagetok(strtoul(p, &p, 10));	/* rss */
812 
813 #if 0
814     /* for the record, here are the rest of the fields */
815     p = skip_token(p);				/* skip rlim */
816     p = skip_token(p);				/* skip start_code */
817     p = skip_token(p);				/* skip end_code */
818     p = skip_token(p);				/* skip start_stack */
819     p = skip_token(p);				/* skip sp */
820     p = skip_token(p);				/* skip pc */
821     p = skip_token(p);				/* skip signal */
822     p = skip_token(p);				/* skip sigblocked */
823     p = skip_token(p);				/* skip sigignore */
824     p = skip_token(p);				/* skip sigcatch */
825     p = skip_token(p);				/* skip wchan */
826 #endif
827 
828 }
829 
830 static int show_usernames;
831 static int show_threads;
832 
833 
834 caddr_t
835 get_process_info(struct system_info *si,
836 		 struct process_select *sel,
837 		 int compare_index)
838 {
839     struct top_proc *proc;
840     struct top_proc *taskproc;
841     pid_t pid;
842     pid_t taskpid;
843     unsigned long now;
844     unsigned long elapsed;
845     hash_item_pid *hi;
846     hash_pos pos;
847 
848     /* round current time to a second */
849     now = (unsigned long)lasttime.tv_sec;
850     if (lasttime.tv_usec >= 500000)
851     {
852 	now++;
853     }
854 
855     /* calculate number of ticks since our last check */
856     elapsed = timediff.tv_sec * HZ + (timediff.tv_usec * HZ) / 1000000;
857     if (elapsed <= 0)
858     {
859 	elapsed = 1;
860     }
861     dprintf("get_process_info: elapsed %d ticks\n", elapsed);
862 
863     /* mark all hash table entries as not seen */
864     hi = hash_first_pid(ptable, &pos);
865     while (hi != NULL)
866     {
867 	((struct top_proc *)(hi->value))->state = 0;
868 	hi = hash_next_pid(&pos);
869     }
870     /* mark all hash table entries as not seen */
871     hi = hash_first_pid(tasktable, &pos);
872     while (hi != NULL)
873     {
874 	((struct top_proc *)(hi->value))->state = 0;
875 	hi = hash_next_pid(&pos);
876     }
877 
878     /* read the process information */
879     {
880 	DIR *dir = opendir(".");
881 	DIR *taskdir;
882 	struct dirent *ent;
883 	struct dirent *taskent;
884 	int total_procs = 0;
885 	struct top_proc **active;
886 	hash_item_pid *hi;
887 	hash_pos pos;
888 	char buffer[64];
889 
890 	int show_idle = sel->idle;
891 	int show_uid = sel->uid != -1;
892 	char *show_command = sel->command;
893 
894 	show_usernames = sel->usernames;
895 	show_threads = sel->threads && have_task;
896 
897 	memset(process_states, 0, sizeof(process_states));
898 
899 	taskdir = NULL;
900 	taskent = NULL;
901 	taskpid = -1;
902 
903 	while ((ent = readdir(dir)) != NULL)
904 	{
905 	    unsigned long otime;
906 
907 	    if (!isdigit(ent->d_name[0]))
908 		continue;
909 
910 	    pid = atoi(ent->d_name);
911 
912 	    /* look up hash table entry */
913 	    proc = hash_lookup_pid(ptable, pid);
914 
915 	    /* if we came up empty, create a new entry */
916 	    if (proc == NULL)
917 	    {
918 		proc = new_proc();
919 		proc->pid = pid;
920 		proc->time = 0;
921 		hash_add_pid(ptable, pid, (void *)proc);
922 	    }
923 
924 	    /* remember the previous cpu time */
925 	    otime = proc->time;
926 
927 	    /* get current data */
928 	    read_one_proc_stat(pid, -1, proc, sel);
929 
930 	    /* continue on if this isn't really a process */
931 	    if (proc->state == 0)
932 		continue;
933 
934 	    /* reset linked list (for threads) */
935 	    proc->next = NULL;
936 
937 	    /* accumulate process state data */
938 	    total_procs++;
939 	    process_states[proc->state]++;
940 
941 	    /* calculate pcpu */
942 	    if ((proc->pcpu = (proc->time - otime) / (double)elapsed) < 0.0001)
943 	    {
944 		proc->pcpu = 0;
945 	    }
946 
947 	    /* if we have task subdirs and this process has more than
948 	       one thread, collect data on each thread */
949 	    if (have_task && proc->threads > 1)
950 	    {
951 		snprintf(buffer, sizeof(buffer), "%d/task", pid);
952 		if ((taskdir = opendir(buffer)) != NULL)
953 		{
954 		    while ((taskent = readdir(taskdir)) != NULL)
955 		    {
956 			if (!isdigit(taskent->d_name[0]))
957 			    continue;
958 
959 			/* lookup entry in tasktable */
960 			taskpid = atoi(taskent->d_name);
961 			taskproc = hash_lookup_pid(tasktable, taskpid);
962 
963 			/* if we came up empty, create a new entry */
964 			if (taskproc == NULL)
965 			{
966 			    taskproc = new_proc();
967 			    taskproc->pid = taskpid;
968 			    taskproc->time = 0;
969 			    hash_add_pid(tasktable, taskpid, (void *)taskproc);
970 			}
971 
972 			/* remember the previous cpu time */
973 			otime = taskproc->time;
974 
975 			/* get current data */
976 			read_one_proc_stat(pid, taskpid, taskproc, sel);
977 
978 			/* ignore if it isnt real */
979 			if (taskproc->state == 0)
980 			    continue;
981 
982 			/* when showing threads, add this to the accumulated
983 			   process state data, but remember that the first
984 			   thread is already accounted for */
985 			if (show_threads && pid != taskpid)
986 			{
987 			    total_procs++;
988 			    process_states[taskproc->state]++;
989 			}
990 
991 			/* calculate pcpu */
992 			if ((taskproc->pcpu = (taskproc->time - otime) /
993 			     (double)elapsed) < 0.0)
994 			{
995 			    taskproc->pcpu = 0;
996 			}
997 
998 			/* link this in to the proc's list */
999 			taskproc->next = proc->next;
1000 			proc->next = taskproc;
1001 		    }
1002 		    closedir(taskdir);
1003 		}
1004 	    }
1005 	}
1006 	closedir(dir);
1007 
1008 	/* make sure we have enough slots for the active procs */
1009 	if (activesize < total_procs)
1010 	{
1011 	    pactive = (struct top_proc **)realloc(pactive,
1012 				  sizeof(struct top_proc *) * total_procs);
1013 	    activesize = total_procs;
1014 	}
1015 
1016 	/* set up the active procs and flush dead entries */
1017 	active = pactive;
1018 	hi = hash_first_pid(ptable, &pos);
1019 	while (hi != NULL)
1020 	{
1021 	    proc = (struct top_proc *)(hi->value);
1022 	    if (proc->state == 0)
1023 	    {
1024 		/* dead entry */
1025 		hash_remove_pos_pid(&pos);
1026 		free_proc(proc);
1027 	    }
1028 	    else
1029 	    {
1030 		/* check to see if it qualifies as active */
1031 		if ((show_idle || proc->state == 1 || proc->pcpu) &&
1032 		    (!show_uid || proc->uid == sel->uid) &&
1033 		    (show_command == NULL ||
1034 		     strstr(proc->name, show_command) != NULL))
1035 		{
1036 		    /* are we showing threads and does this proc have any? */
1037 		    if (show_threads && proc->threads > 1 && proc->next != NULL)
1038 		    {
1039 			/* then add just the thread info -- the main process
1040 			   info is included in the list */
1041 			proc = proc->next;
1042 			while (proc != NULL)
1043 			{
1044 			    *active++ = proc;
1045 			    proc = proc->next;
1046 			}
1047 		    }
1048 		    else
1049 		    {
1050 			/* add the process */
1051 			*active++ = proc;
1052 		    }
1053 		}
1054 	    }
1055 
1056 	    hi = hash_next_pid(&pos);
1057 	}
1058 
1059 	si->p_active = active - pactive;
1060 	si->p_total = total_procs;
1061 	si->procstates = process_states;
1062     }
1063 
1064     /* if requested, sort the "active" procs */
1065     if (si->p_active)
1066 	qsort(pactive, si->p_active, sizeof(struct top_proc *),
1067 	      proc_compares[compare_index]);
1068 
1069     /* don't even pretend that the return value thing here isn't bogus */
1070     nextactive = pactive;
1071     return (caddr_t)0;
1072 }
1073 
1074 
1075 char *
1076 format_header(char *uname_field)
1077 
1078 {
1079     int uname_len = strlen(uname_field);
1080     if (uname_len > 8)
1081 	uname_len = 8;
1082 
1083     memcpy(strchr(fmt_header, 'X'), uname_field, uname_len);
1084 
1085     return fmt_header;
1086 }
1087 
1088 static char p_header[MAX_COLS];
1089 
1090 char *
1091 format_process_header(struct process_select *sel, caddr_t handle, int count)
1092 
1093 {
1094     char *h;
1095 
1096     h = sel->threads ? proc_header_nothr : proc_header_thr;
1097 
1098     snprintf(p_header, MAX_COLS, h, sel->usernames ? "USERNAME" : "UID");
1099 
1100     return p_header;
1101 }
1102 
1103 
1104 char *
1105 format_next_process(caddr_t handle, char *(*get_userid)(int))
1106 
1107 {
1108     static char fmt[MAX_COLS];	/* static area where result is built */
1109     struct top_proc *p = *nextactive++;
1110     char *userbuf;
1111 
1112     userbuf = show_usernames ? username(p->uid) : itoa_w(p->uid, 7);
1113 
1114     if (show_threads)
1115     {
1116 	snprintf(fmt, sizeof(fmt),
1117 		 "%5d %-8.8s  %3d %4d %5s %5s %5s %-5s %6s %5s%% %s",
1118 		 p->pid,
1119 		 userbuf,
1120 		 p->pri < -99 ? -99 : p->pri,
1121 		 p->nice,
1122 		 format_k(p->size),
1123 		 format_k(p->rss),
1124 		 format_k(p->shared),
1125 		 state_abbrev[p->state],
1126 		 format_time(p->time / HZ),
1127 		 format_percent(p->pcpu * 100.0),
1128 		 p->name);
1129     }
1130     else
1131     {
1132 	snprintf(fmt, sizeof(fmt),
1133 		 "%5d %-8.8s %4d %3d %4d %5s %5s %5s %-5s %6s %5s%% %s",
1134 		 p->pid,
1135 		 userbuf,
1136 		 p->threads <= 9999 ? p->threads : 9999,
1137 		 p->pri < -99 ? -99 : p->pri,
1138 		 p->nice,
1139 		 format_k(p->size),
1140 		 format_k(p->rss),
1141 		 format_k(p->shared),
1142 		 state_abbrev[p->state],
1143 		 format_time(p->time / HZ),
1144 		 format_percent(p->pcpu * 100.0),
1145 		 p->name);
1146     }
1147 
1148     /* return the result */
1149     return (fmt);
1150 }
1151 
1152 /* comparison routines for qsort */
1153 
1154 /*
1155  * There are currently four possible comparison routines.  main selects
1156  * one of these by indexing in to the array proc_compares.
1157  *
1158  * Possible keys are defined as macros below.  Currently these keys are
1159  * defined:  percent cpu, cpu ticks, process state, resident set size,
1160  * total virtual memory usage.  The process states are ordered as follows
1161  * (from least to most important):  WAIT, zombie, sleep, stop, start, run.
1162  * The array declaration below maps a process state index into a number
1163  * that reflects this ordering.
1164  */
1165 
1166 /* First, the possible comparison keys.  These are defined in such a way
1167    that they can be merely listed in the source code to define the actual
1168    desired ordering.
1169  */
1170 
1171 #define ORDERKEY_PCTCPU  if (dresult = p2->pcpu - p1->pcpu,\
1172 			 (result = dresult > 0.0 ? 1 : dresult < 0.0 ? -1 : 0) == 0)
1173 #define ORDERKEY_CPTICKS if ((result = (long)p2->time - (long)p1->time) == 0)
1174 #define ORDERKEY_STATE   if ((result = (sort_state[p2->state] - \
1175 			 sort_state[p1->state])) == 0)
1176 #define ORDERKEY_PRIO    if ((result = p2->pri - p1->pri) == 0)
1177 #define ORDERKEY_RSSIZE  if ((result = p2->rss - p1->rss) == 0)
1178 #define ORDERKEY_MEM     if ((result = p2->size - p1->size) == 0)
1179 #define ORDERKEY_NAME    if ((result = strcmp(p1->name, p2->name)) == 0)
1180 
1181 /* Now the array that maps process state to a weight */
1182 
1183 unsigned char sort_state[] =
1184 {
1185 	0,	/* empty */
1186 	6, 	/* run */
1187 	3,	/* sleep */
1188 	5,	/* disk wait */
1189 	1,	/* zombie */
1190 	2,	/* stop */
1191 	4	/* swap */
1192 };
1193 
1194 
1195 /* compare_cpu - the comparison function for sorting by cpu percentage */
1196 
1197 int
1198 compare_cpu (
1199 	       struct top_proc **pp1,
1200 	       struct top_proc **pp2)
1201   {
1202     register struct top_proc *p1;
1203     register struct top_proc *p2;
1204     register long result;
1205     double dresult;
1206 
1207     /* remove one level of indirection */
1208     p1 = *pp1;
1209     p2 = *pp2;
1210 
1211     ORDERKEY_PCTCPU
1212     ORDERKEY_CPTICKS
1213     ORDERKEY_STATE
1214     ORDERKEY_PRIO
1215     ORDERKEY_RSSIZE
1216     ORDERKEY_MEM
1217     ;
1218 
1219     return result == 0 ? 0 : result < 0 ? -1 : 1;
1220   }
1221 
1222 /* compare_size - the comparison function for sorting by total memory usage */
1223 
1224 int
1225 compare_size (
1226 	       struct top_proc **pp1,
1227 	       struct top_proc **pp2)
1228   {
1229     register struct top_proc *p1;
1230     register struct top_proc *p2;
1231     register long result;
1232     double dresult;
1233 
1234     /* remove one level of indirection */
1235     p1 = *pp1;
1236     p2 = *pp2;
1237 
1238     ORDERKEY_MEM
1239     ORDERKEY_RSSIZE
1240     ORDERKEY_PCTCPU
1241     ORDERKEY_CPTICKS
1242     ORDERKEY_STATE
1243     ORDERKEY_PRIO
1244     ;
1245 
1246     return result == 0 ? 0 : result < 0 ? -1 : 1;
1247   }
1248 
1249 /* compare_res - the comparison function for sorting by resident set size */
1250 
1251 int
1252 compare_res (
1253 	       struct top_proc **pp1,
1254 	       struct top_proc **pp2)
1255   {
1256     register struct top_proc *p1;
1257     register struct top_proc *p2;
1258     register long result;
1259     double dresult;
1260 
1261     /* remove one level of indirection */
1262     p1 = *pp1;
1263     p2 = *pp2;
1264 
1265     ORDERKEY_RSSIZE
1266     ORDERKEY_MEM
1267     ORDERKEY_PCTCPU
1268     ORDERKEY_CPTICKS
1269     ORDERKEY_STATE
1270     ORDERKEY_PRIO
1271     ;
1272 
1273     return result == 0 ? 0 : result < 0 ? -1 : 1;
1274   }
1275 
1276 /* compare_time - the comparison function for sorting by total cpu time */
1277 
1278 int
1279 compare_time (
1280 	       struct top_proc **pp1,
1281 	       struct top_proc **pp2)
1282   {
1283     register struct top_proc *p1;
1284     register struct top_proc *p2;
1285     register long result;
1286     double dresult;
1287 
1288     /* remove one level of indirection */
1289     p1 = *pp1;
1290     p2 = *pp2;
1291 
1292     ORDERKEY_CPTICKS
1293     ORDERKEY_PCTCPU
1294     ORDERKEY_STATE
1295     ORDERKEY_PRIO
1296     ORDERKEY_MEM
1297     ORDERKEY_RSSIZE
1298     ;
1299 
1300     return result == 0 ? 0 : result < 0 ? -1 : 1;
1301   }
1302 
1303 
1304 /* compare_cmd - the comparison function for sorting by command name */
1305 
1306 int
1307 compare_cmd (
1308 	       struct top_proc **pp1,
1309 	       struct top_proc **pp2)
1310   {
1311     register struct top_proc *p1;
1312     register struct top_proc *p2;
1313     register long result;
1314     double dresult;
1315 
1316     /* remove one level of indirection */
1317     p1 = *pp1;
1318     p2 = *pp2;
1319 
1320     ORDERKEY_NAME
1321     ORDERKEY_PCTCPU
1322     ORDERKEY_CPTICKS
1323     ORDERKEY_STATE
1324     ORDERKEY_PRIO
1325     ORDERKEY_RSSIZE
1326     ORDERKEY_MEM
1327     ;
1328 
1329     return result == 0 ? 0 : result < 0 ? -1 : 1;
1330   }
1331 
1332 
1333 /*
1334  * proc_owner(pid) - returns the uid that owns process "pid", or -1 if
1335  *              the process does not exist.
1336  *              It is EXTREMLY IMPORTANT that this function work correctly.
1337  *              If top runs setuid root (as in SVR4), then this function
1338  *              is the only thing that stands in the way of a serious
1339  *              security problem.  It validates requests for the "kill"
1340  *              and "renice" commands.
1341  */
1342 
1343 int
1344 proc_owner(int pid)
1345 
1346 {
1347     struct stat sb;
1348     char buffer[32];
1349     sprintf(buffer, "%d", pid);
1350 
1351     if (stat(buffer, &sb) < 0)
1352 	return -1;
1353     else
1354 	return (int)sb.st_uid;
1355 }
1356