1 /* $Id: dtop.c,v 1.5 2008/02/27 18:50:34 garbled Exp $ */
2 /*
3 * Copyright (c) 1998, 1999, 2000, 2007
4 * Tim Rightnour. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by Tim Rightnour.
17 * 4. The name of Tim Rightnour may not be used to endorse or promote
18 * products derived from this software without specific prior written
19 * permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY TIM RIGHTNOUR ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL TIM RIGHTNOUR BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34 #include <sys/time.h>
35 #include <sys/types.h>
36 #include <sys/resource.h>
37 #include <sys/wait.h>
38
39 #include <errno.h>
40 #include <fcntl.h>
41 #include <signal.h>
42 #include <poll.h>
43 #include <libgen.h>
44 #include <curses.h>
45
46 #include "../common/common.h"
47
48 #if !defined(lint) && defined(__NetBSD__)
49 __COPYRIGHT(
50 "@(#) Copyright (c) 1998, 1999, 2000\n\
51 Tim Rightnour. All rights reserved\n");
52 __RCSID("$Id: dtop.c,v 1.5 2008/02/27 18:50:34 garbled Exp $");
53 #endif /* not lint */
54
55 #define DPRINTF if (debug) printf
56
57 typedef struct procdata {
58 node_t *node;
59 int pid;
60 char username[9];
61 int size;
62 int res;
63 float cpu;
64 char command[30];
65 } procdata_t;
66
67 typedef struct nodedata {
68 node_t *node;
69 long long int actmem;
70 long long int inactmem;
71 long long int wiredmem;
72 long long int execmem;
73 long long int filemem;
74 long long int freemem;
75 long long int swap;
76 long long int swapfree;
77 int procs;
78 float load1, load5, load15;
79 } nodedata_t;
80
81 void do_command(int fanout, char *username);
82 void sig_handler(int i);
83
84 #define TOP_COMMAND "/bin/sh -c \"if [ `uname` = \"Linux\" ]; then top -bn 1; else top -Sb 2000; fi\""
85 #define SORT_CPU 0
86 #define SORT_SIZE 1
87 #define SORT_RES 2
88 #define SORT_CPU_ONENODE 3
89 #define SORT_MEM_ONENODE 4
90 #define SORT_HOSTNAME 5
91 #define SORT_LOADAVG 6
92 #define SORT_ACTIVE 7
93 #define SORT_INACTIVE 8
94 #define SORT_FILE 9
95 #define SORT_FREE 10
96 #define SORT_SWAP 11
97
98 #define DISPLAY_PROC 1
99 #define DISPLAY_LOAD 2
100
101 #define TOP_NORMAL 0
102 #define TOP_PROCPS 1
103 #define TOP_NORMAL_THR 2
104
105 /* globals */
106 int debug, exclusion, grouping, interval;
107 int testflag, rshport, porttimeout, batchflag;
108 int showprocs;
109 node_t *nodelink;
110 group_t *grouplist;
111 char **rungroup;
112 int nrofrungroups;
113 char **lumplist;
114 pid_t currentchild;
115 char *progname;
116 volatile sig_atomic_t alarmtime, exitflag;
117 nodedata_t *nodedata, *sortnode;
118 procdata_t **procdata, **sortedprocs;
119 int totalpids, sortmethod=SORT_CPU;
120 int displaymode=DISPLAY_PROC;
121 int topmode=0;
122 size_t maxnodelen;
123 WINDOW *stdscr;
124
125 /*
126 * Dtop is program that runs top across an entire cluster, and displays
127 * the biggest hogs on each node.
128 */
129
130 int
main(int argc,char * argv[])131 main(int argc, char *argv[])
132 {
133 extern char *optarg;
134 extern int optind;
135 extern char *version;
136
137 int someflag, ch, fanout, showflag, fanflag;
138 char *p, *group, *nodename, *username;
139 char **exclude;
140 struct rlimit limit;
141 node_t *nodeptr;
142
143 someflag = showflag = fanflag = 0;
144 exclusion = debug = batchflag = 0;
145 testflag = rshport = nrofrungroups = 0;
146 showprocs = 20; /* nrof procs to show by default */
147 interval = 5; /* 5 second delay between node reads */
148 porttimeout = 5; /* 5 seconds to port timeout */
149 grouping = 0;
150 fanout = DEFAULT_FANOUT;
151 nodename = NULL;
152 username = NULL;
153 group = NULL;
154 nodeptr = NULL;
155 nodelink = NULL;
156 exclude = NULL;
157
158 rungroup = calloc(GROUP_MALLOC, sizeof(char **));
159 if (rungroup == NULL)
160 bailout();
161
162 progname = strdup(basename(argv[0]));
163
164 #if defined(__linux__)
165 while ((ch = getopt(argc, argv, "+?bdqti:f:g:l:m:o:p:s:vw:x:")) != -1)
166 #else
167 while ((ch = getopt(argc, argv, "?bdqti:f:g:l:m:o:p:s:vw:x:")) != -1)
168 #endif
169 switch (ch) {
170 case 'b':
171 batchflag = 1; /* we want batch mode */
172 break;
173 case 'd': /* we want to debug dsh (hidden)*/
174 debug = 1;
175 break;
176 case 'i': /* interval between node reads */
177 interval = atoi(optarg);
178 break;
179 case 'l': /* invoke me as some other user */
180 username = strdup(optarg);
181 break;
182 case 'q': /* just show me some info and quit */
183 showflag = 1;
184 break;
185 case 't': /* test the nodes before connecting */
186 testflag = 1;
187 break;
188 case 'p': /* what is the rsh port number? */
189 rshport = atoi(optarg);
190 break;
191 case 'o': /* set the test timeout in seconds */
192 porttimeout = atoi(optarg);
193 break;
194 case 'm': /* set the display mode */
195 if (strncasecmp(optarg, "proc", 4) == 0)
196 displaymode = DISPLAY_PROC;
197 else if (strncasecmp(optarg, "load", 4) == 0)
198 displaymode = DISPLAY_LOAD;
199 break;
200 case 'f': /* set the fanout size */
201 fanout = atoi(optarg);
202 fanflag = 1;
203 break;
204 case 'g': /* pick a group to run on */
205 grouping = 1;
206 nrofrungroups = parse_gopt(optarg);
207 break;
208 case 'v':
209 (void)printf("%s: %s\n", progname, version);
210 exit(EXIT_SUCCESS);
211 break;
212 case 'x': /* exclude nodes, w overrides this */
213 exclusion = 1;
214 exclude = parse_xopt(optarg);
215 break;
216 case 'w': /* perform operation on these nodes */
217 someflag = 1;
218 for (p = optarg; p != NULL; ) {
219 nodename = (char *)strsep(&p, ",");
220 if (nodename != NULL)
221 (void)nodealloc(nodename);
222 }
223 break;
224 case '?': /* you blew it */
225 (void)fprintf(stderr,
226 "usage:\n%s [-bqtv] [-f fanout] [-p portnum] [-o timeout] "
227 "[-g rungroup1,...,rungroupN]\n"
228 " [-l username] [-x node1,...,nodeN] [-w node1,..,nodeN]"
229 " [-m proc|load] [-i interval]\n", progname);
230 return(EXIT_FAILURE);
231 /*NOTREACHED*/
232 break;
233 default:
234 break;
235 }
236
237 /* Check for various environment variables and use them if they exist */
238 if (!fanflag && getenv("FANOUT"))
239 fanout = atoi(getenv("FANOUT"));
240 if (!rshport && getenv("RCMD_PORT"))
241 rshport = atoi(getenv("RCMD_PORT"));
242 if (!testflag && getenv("RCMD_TEST"))
243 testflag = 1;
244 if (porttimeout == 5 && getenv("RCMD_TEST_TIMEOUT"))
245 porttimeout = atoi(getenv("RCMD_TEST_TIMEOUT"));
246 if (username == NULL && getenv("RCMD_USER"))
247 username = strdup(getenv("RCMD_USER"));
248
249 rshport = get_rshport(testflag, rshport, "RCMD_CMD");
250
251 if (!someflag)
252 parse_cluster(exclude);
253
254 argc -= optind;
255 argv += optind;
256 if (showflag) {
257 do_showcluster(fanout);
258 return(EXIT_SUCCESS);
259 }
260
261 /*
262 * set per-process limits for max descriptors, this avoids running
263 * out of descriptors and the odd errors that come with that.
264 */
265 if (getrlimit(RLIMIT_NOFILE, &limit) != 0)
266 bailout();
267 if (limit.rlim_cur < fanout * 5) {
268 limit.rlim_cur = fanout * 5;
269 if (setrlimit(RLIMIT_NOFILE, &limit) != 0)
270 bailout();
271 }
272
273 do_command(fanout, username);
274 return(EXIT_SUCCESS);
275 }
276
277 #ifdef __DragonFly__
278 /* XXX conflicting proto from <util.h> */
279 #define dehumanize_number dehumanize_number_local
280 #endif
281
282 long long int
dehumanize_number(char * num)283 dehumanize_number(char *num)
284 {
285 int i;
286 long long int val;
287
288 for (i=0; i < 30; i++) {
289 switch (num[i]) {
290 case 'M':
291 case 'm':
292 num[i] = '\0';
293 val = atoll(num) * 1024 * 1024;
294 return val;
295 case 'K':
296 case 'k':
297 num[i] = '\0';
298 val = atoll(num) * 1024;
299 DPRINTF("DE: %s, val=%lld num=%lld\n", num, val, atoll(num));
300 return val;
301 default:
302 break;
303 }
304 }
305 return atoi(num);
306 }
307
308 /* some of these fields may or may not appear, so we do it all stupid like
309 * which refers to 0 for Memory 1 for Swap.
310 */
311
312 void
parse_mem(char * c,nodedata_t * nd,int which)313 parse_mem(char *c, nodedata_t *nd, int which)
314 {
315 char *p, *q, *r, *n, buf[30];
316
317 n = q = strdup(c);
318 q = strchr(q, ':');
319 q++;
320 p = strsep(&q, ",");
321 while (p != NULL) {
322 DPRINTF("got %s\n", p);
323 if (strstr(p, "inactive") != NULL) {
324 sscanf(p, "%s inactive", buf);
325 nd->inactmem = dehumanize_number(buf);
326 DPRINTF(" inactive final %lld\n", nd->inactmem);
327 } else if (strstr(p, "swap free") != NULL) {
328 sscanf(p, "%s swap free", buf);
329 nd->swapfree = dehumanize_number(buf);
330 nd->swap += nd->swapfree;
331 } else if (strstr(p, "swap in use") != NULL) {
332 sscanf(p, "%s swap in use", buf);
333 nd->swap = dehumanize_number(buf);
334 } else if (strstr(p, "active") != NULL) {
335 sscanf(p, "%s active", buf);
336 nd->actmem = dehumanize_number(buf);
337 DPRINTF(" active final %lld\n", nd->actmem);
338 } else if (strstr(p, "used") != NULL && !which) {
339 sscanf(p, "%s used", buf);
340 nd->wiredmem = dehumanize_number(buf);
341 DPRINTF(" wired final %lld\n", nd->wiredmem);
342 } else if (strstr(p, "shared") != NULL) {
343 sscanf(p, "%s shared", buf);
344 nd->execmem = dehumanize_number(buf);
345 DPRINTF(" exec final %lld\n", nd->execmem);
346 } else if (strstr(p, "free") != NULL && !which) {
347 sscanf(p, "%s free", buf);
348 nd->freemem = dehumanize_number(buf);
349 DPRINTF(" memfree final %lld\n", nd->freemem);
350 } else if (strstr(p, "cached") != NULL) {
351 r = strstr(p, "free");
352 sscanf(p, "%s free", buf);
353 if (which) {
354 nd->swapfree = dehumanize_number(buf);
355 DPRINTF(" swapfree final %lld\n", nd->swapfree);
356 }
357 DPRINTF("FOO: %s %s\n", r, p);
358 if (r != NULL) {
359 while (!isdigit(*r))
360 r++;
361 sscanf(r, "%s cached", buf);
362 } else
363 sscanf(p, "%s cached", buf);
364 nd->filemem = dehumanize_number(buf);
365 DPRINTF(" cached final %lld\n", nd->filemem);
366 } else if (strstr(p, "av") != NULL && which) {
367 sscanf(p, "%s av", buf);
368 nd->swap = dehumanize_number(buf);
369 DPRINTF(" swaptotal final %lld\n", nd->swap);
370 } else if (strstr(p, "Act") != NULL) {
371 sscanf(p, "%s Act", buf);
372 nd->actmem = dehumanize_number(buf);
373 DPRINTF(" final %lld\n", nd->actmem);
374 } else if (strstr(p, "Inact") != NULL) {
375 sscanf(p, "%s Inact", buf);
376 nd->inactmem = dehumanize_number(buf);
377 DPRINTF(" final %lld\n", nd->inactmem);
378 } else if (strstr(p, "Wired") != NULL) {
379 sscanf(p, "%s Wired", buf);
380 nd->wiredmem = dehumanize_number(buf);
381 DPRINTF(" final %lld\n", nd->wiredmem);
382 } else if (strstr(p, "Exec") != NULL) {
383 sscanf(p, "%s Exec", buf);
384 nd->execmem = dehumanize_number(buf);
385 DPRINTF(" final %lld\n", nd->execmem);
386 } else if (strstr(p, "File") != NULL) {
387 sscanf(p, "%s File", buf);
388 nd->filemem = dehumanize_number(buf);
389 DPRINTF(" final %lld\n", nd->filemem);
390 } else if (strstr(p, "Free") != NULL) {
391 sscanf(p, "%s Free", buf);
392 if (which) {
393 nd->swapfree = dehumanize_number(buf);
394 DPRINTF(" final %lld\n", nd->swapfree);
395 } else {
396 nd->freemem = dehumanize_number(buf);
397 DPRINTF(" final %lld\n", nd->freemem);
398 }
399 } else if (strstr(p, "Total") != NULL) {
400 sscanf(p, "%s Total", buf);
401 nd->swap = dehumanize_number(buf);
402 DPRINTF(" final %lld\n", nd->swap);
403 }
404 p = strsep(&q, ",");
405 }
406 free(n);
407 }
408
409 void
parse_pid(char * c,node_t * node,int nn,int pid)410 parse_pid(char *c, node_t *node, int nn, int pid)
411 {
412 char *p, *q, *n, buf[80];
413 size_t s;
414 int i, col=0;
415
416 procdata[nn][pid].node = node;
417 n = q = strdup(c);
418 s = strspn(q, " ");
419 q += s;
420 p = strsep(&q, " ");
421 while (p != NULL) {
422 DPRINTF("got data column %d %s\n", col, p);
423 switch (col) {
424 case 0: /* pid */
425 procdata[nn][pid].pid = atoi(p);
426 DPRINTF("pid = %d\n", procdata[nn][pid].pid);
427 break;
428 case 1:
429 strncpy(procdata[nn][pid].username, p, 9);
430 DPRINTF("user=%s\n", procdata[nn][pid].username);
431 break;
432 case 4:
433 if (topmode == TOP_NORMAL_THR)
434 break;
435 procdata[nn][pid].size = dehumanize_number(p);
436 if (topmode == TOP_PROCPS)
437 procdata[nn][pid].size *= 1024;
438 DPRINTF("size=%d\n", procdata[nn][pid].size);
439 break;
440 case 5:
441 if (topmode == TOP_NORMAL_THR)
442 procdata[nn][pid].size = dehumanize_number(p);
443 else
444 procdata[nn][pid].res = dehumanize_number(p);
445 if (topmode == TOP_PROCPS)
446 procdata[nn][pid].res *= 1024;
447 DPRINTF("res=%d\n", procdata[nn][pid].res);
448 DPRINTF("size=%d\n", procdata[nn][pid].size);
449 break;
450 case 6:
451 if (topmode == TOP_NORMAL_THR)
452 procdata[nn][pid].res = dehumanize_number(p);
453 DPRINTF("res=%d\n", procdata[nn][pid].res);
454 DPRINTF("size=%d\n", procdata[nn][pid].size);
455 break;
456 case 8:
457 if (topmode == TOP_PROCPS) {
458 sscanf(p, "%f%%", &procdata[nn][pid].cpu);
459 DPRINTF("cpu=%f\n", procdata[nn][pid].cpu);
460 }
461 case 9:
462 if (topmode != TOP_PROCPS) {
463 sscanf(p, "%f%%", &procdata[nn][pid].cpu);
464 DPRINTF("cpu=%f\n", procdata[nn][pid].cpu);
465 }
466 break;
467 case 10:
468 if (topmode != TOP_PROCPS) {
469 strncpy(procdata[nn][pid].command, p, 30);
470 for (i=0; i < 30; i++)
471 if (procdata[nn][pid].command[i] == '\n')
472 procdata[nn][pid].command[i] = '\0';
473 DPRINTF("comm=%s\n", procdata[nn][pid].command);
474 }
475 break;
476 case 12:
477 if (topmode == TOP_PROCPS) {
478 strncpy(procdata[nn][pid].command, p, 30);
479 for (i=0; i < 30; i++)
480 if (procdata[nn][pid].command[i] == '\n')
481 procdata[nn][pid].command[i] = '\0';
482 DPRINTF("comm=%s\n", procdata[nn][pid].command);
483 }
484 break;
485 default:
486 break;
487 }
488
489 if (q != NULL) {
490 s = strspn(q, " ");
491 q += s;
492 }
493 p = strsep(&q, " ");
494 col++;
495 }
496 free(n);
497 }
498
499 int
parse_top(char * cd,node_t * node,int nn,int pid)500 parse_top(char *cd, node_t *node, int nn, int pid)
501 {
502 char *c, *d;
503 int ret;
504
505 ret = 0;
506 c = cd;
507 DPRINTF("enter pzarse_top for %d %s\n", nn, node->name);
508 DPRINTF("LINE %s\n", cd);
509 while (c != NULL) {
510 if (strstr(c, "load average:") != NULL) {
511 topmode = TOP_PROCPS;
512 d = strstr(c, "load average:");
513 nodedata[nn].node = node;
514 sscanf(d, "load average: %f, %f, %f",
515 &nodedata[nn].load1, &nodedata[nn].load5,
516 &nodedata[nn].load15);
517 DPRINTF("parse_top: Xfloats %f %f %f\n",nodedata[nn].load1,nodedata[nn].load5,nodedata[nn].load15 );
518 } else if (strstr(c, "load averages") != NULL) {
519 nodedata[nn].node = node;
520 sscanf(c, "load averages: %f, %f, %f", &nodedata[nn].load1,
521 &nodedata[nn].load5, &nodedata[nn].load15);
522 DPRINTF("parse_top: floats %f %f %f\n",nodedata[nn].load1,nodedata[nn].load5,nodedata[nn].load15 );
523 } else if (strstr(c, "processes:") != NULL) {
524 sscanf(c, "%d processes:", &nodedata[nn].procs);
525 DPRINTF("parse_top: procs %d\n", nodedata[nn].procs);
526 if (nodedata[nn].procs > 0) {
527 if (procdata[nn] != NULL)
528 free(procdata[nn]);
529 procdata[nn] = calloc(nodedata[nn].procs,
530 sizeof(procdata_t));
531 DPRINTF("parse_top: allocated prodata array for %s\n", node->name);
532 }
533 } else if (strstr(c, "Memory:") != NULL) {
534 parse_mem(c, &nodedata[nn], 0);
535 } else if (strstr(c, "inactive") != NULL) {
536 c[0] = ':';
537 parse_mem(c, &nodedata[nn], 0);
538 } else if (strstr(c, "Mem:") != NULL) {
539 parse_mem(c, &nodedata[nn], 0);
540 } else if (strstr(c, "Swap:") != NULL) {
541 parse_mem(c, &nodedata[nn], 1);
542 } else if (strstr(c, " PID") != NULL) {
543 if (strstr(c, " THR ") != NULL)
544 topmode = TOP_NORMAL_THR;
545 /* ignore this line */
546 } else if (c[0] == '\n') {
547 ; /* ignore this line */
548 } else if (isdigit(c[4])) {
549 parse_pid(c, node, nn, pid);
550 DPRINTF("PIDLINE\n");
551 ret = 1;
552 }
553
554
555
556 DPRINTF("data = %s\n", c);
557 c = strchr(c, '\n');
558 c++;
559 c = strstr(c, "\n");
560 }
561 return ret;
562 }
563
564 int
compare_node(const void * a,const void * b)565 compare_node(const void *a, const void *b)
566 {
567 const nodedata_t *one = a;
568 const nodedata_t *two = b;
569 float fone, ftwo;
570
571 switch (sortmethod) {
572 case SORT_HOSTNAME:
573 return(strcmp(one->node->name, two->node->name));
574 break;
575 case SORT_LOADAVG:
576 if (one->load1 > two->load1)
577 return -1;
578 if (one->load1 < two->load1)
579 return 1;
580 return 0;
581 break;
582 case SORT_ACTIVE:
583 if (one->actmem > two->actmem)
584 return -1;
585 if (one->actmem < two->actmem)
586 return 1;
587 return 0;
588 break;
589 case SORT_INACTIVE:
590 if (one->inactmem > two->inactmem)
591 return -1;
592 if (one->inactmem < two->inactmem)
593 return 1;
594 return 0;
595 break;
596 case SORT_FILE:
597 if (one->filemem > two->filemem)
598 return -1;
599 if (one->filemem < two->filemem)
600 return 1;
601 return 0;
602 break;
603 case SORT_FREE:
604 if (one->freemem > two->freemem)
605 return 1;
606 if (one->freemem < two->freemem)
607 return -1;
608 return 0;
609 break;
610 case SORT_SWAP:
611 fone = 1.0 - ((float)one->swapfree / (float)one->swap);
612 ftwo = 1.0 - ((float)two->swapfree / (float)two->swap);
613 if (fone > ftwo)
614 return -1;
615 if (fone < ftwo)
616 return 1;
617 return 0;
618 break;
619 default:
620 return 0;
621 }
622 }
623
624 int
compare_proc(const void * a,const void * b)625 compare_proc(const void *a, const void *b)
626 {
627 procdata_t * const *one = a;
628 procdata_t * const *two = b;
629
630 switch (sortmethod) {
631 case SORT_SIZE:
632 if ((*one)->size > (*two)->size)
633 return -1;
634 if ((*one)->size < (*two)->size)
635 return 1;
636 return 0;
637 break;
638 case SORT_RES:
639 if ((*one)->res > (*two)->res)
640 return -1;
641 if ((*one)->res < (*two)->res)
642 return 1;
643 return 0;
644 break;
645 case SORT_CPU:
646 default:
647 if ((*one)->cpu > (*two)->cpu)
648 return -1;
649 if ((*one)->cpu < (*two)->cpu)
650 return 1;
651 return 0;
652 break;
653 }
654 }
655
656 /*
657 * Generate a PID line. idx is an index into the sortedprocs array.
658 */
659
660 void
print_pid(char * obuf,int idx)661 print_pid(char *obuf, int idx)
662 {
663 int i;
664 char buf[8], xbuf[80];
665 const char *die = "";
666
667 if (sortedprocs[idx]->node == NULL)
668 return;
669 sprintf(obuf, "%*s", maxnodelen, sortedprocs[idx]->node->name);
670 sprintf(xbuf, " %6d", sortedprocs[idx]->pid);
671 strcat(obuf, xbuf);
672 sprintf(xbuf, " %8s", sortedprocs[idx]->username);
673 strcat(obuf, xbuf);
674 i = humanize_number(buf, 6, sortedprocs[idx]->size, die, HN_AUTOSCALE,
675 HN_NOSPACE|HN_DECIMAL);
676 if (i == -1)
677 sprintf(xbuf, " UNK");
678 else
679 sprintf(xbuf, " %6s", buf);
680 strcat(obuf, xbuf);
681 i = humanize_number(buf, 6, sortedprocs[idx]->res, "", HN_AUTOSCALE,
682 HN_NOSPACE);
683 if (i == -1)
684 sprintf(xbuf, " UNK");
685 else
686 sprintf(xbuf, " %6s", buf);
687 strcat(obuf, xbuf);
688 sprintf(xbuf, " %5.2f%%", sortedprocs[idx]->cpu);
689 strcat(obuf, xbuf);
690 sprintf(xbuf, " %-30s", sortedprocs[idx]->command);
691 strcat(obuf, xbuf);
692 }
693
694 /*
695 * Generate a nodedata line, idx is nodenum
696 */
697
698 void
print_nodedata(char * obuf,int idx)699 print_nodedata(char *obuf, int idx)
700 {
701 int i;
702 float f;
703 char buf[8], xbuf[80];
704
705 sprintf(obuf, "%*s %6d %6.2f %6.2f %6.2f", maxnodelen,
706 sortnode[idx].node->name, sortnode[idx].procs,
707 sortnode[idx].load1, sortnode[idx].load5,
708 sortnode[idx].load15);
709 i = humanize_number(buf, 6, sortnode[idx].actmem, "", HN_AUTOSCALE,
710 HN_NOSPACE);
711 if (i == -1)
712 sprintf(xbuf, " UNK");
713 else
714 sprintf(xbuf, " %6s", buf);
715 strcat(obuf, xbuf);
716
717 i = humanize_number(buf, 6, sortnode[idx].inactmem, "", HN_AUTOSCALE,
718 HN_NOSPACE);
719 if (i == -1)
720 sprintf(xbuf, " UNK");
721 else
722 sprintf(xbuf, " %6s", buf);
723 strcat(obuf, xbuf);
724
725 i = humanize_number(buf, 6, sortnode[idx].filemem, "", HN_AUTOSCALE,
726 HN_NOSPACE);
727 if (i == -1)
728 sprintf(xbuf, " UNK");
729 else
730 sprintf(xbuf, " %6s", buf);
731 strcat(obuf, xbuf);
732
733 i = humanize_number(buf, 6, sortnode[idx].freemem, "", HN_AUTOSCALE,
734 HN_NOSPACE);
735 if (i == -1)
736 sprintf(xbuf, " UNK");
737 else
738 sprintf(xbuf, " %6s", buf);
739 strcat(obuf, xbuf);
740
741 i = humanize_number(buf, 6, sortnode[idx].swapfree, "", HN_AUTOSCALE,
742 HN_NOSPACE);
743 if (i == -1)
744 sprintf(xbuf, " UNK");
745 else
746 sprintf(xbuf, " %6s", buf);
747 strcat(obuf, xbuf);
748
749 f = 1.0 - ((float)sortnode[idx].swapfree / (float)sortnode[idx].swap);
750 f *= 100.0;
751 sprintf(xbuf, " %5.2f%%", f);
752 strcat(obuf, xbuf);
753 }
754
755 /*
756 * Simple ascii print of top, for use in batch mode, etc
757 */
758
759 void
batch_print(int tprocs,int tnodes)760 batch_print(int tprocs, int tnodes)
761 {
762 int i, vnodes;
763 char buf[80];
764
765 switch (displaymode) {
766 case DISPLAY_LOAD:
767 memcpy(sortnode, nodedata, sizeof(nodedata_t) * tnodes);
768 for (i=0, vnodes=0; i < tnodes; i++)
769 if (nodedata[i].node != NULL)
770 vnodes++;
771 qsort(sortnode, vnodes, sizeof(nodedata_t), compare_node);
772 printf("%*s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s\n",
773 maxnodelen, "HOSTNAME", "PROCS", "LOAD1", "LOAD5",
774 "LOAD15", "ACTIVE", "INACT", "FILE", "FREE",
775 "SWPFRE", "SWUSED");
776 for (i=0; i < vnodes; i++) {
777 print_nodedata(buf, i);
778 printf("%s\n", buf);
779 }
780 break;
781 case DISPLAY_PROC:
782 default:
783 printf("%*s %6s %8s %6s %6s %6s %s\n", maxnodelen, "HOSTNAME",
784 "PID", "USERNAME", "SIZE", "RES", "CPU", "COMMAND");
785 printf("tprocs==%d showprocs==%d\n", tprocs, showprocs);
786 for (i=0; i < tprocs && i < showprocs; i++) {
787 print_pid(buf, i);
788 printf("%s\n", buf);
789 }
790 break;
791 }
792 }
793
794 void
curses_header(void)795 curses_header(void)
796 {
797 char buf[80];
798
799 clear();
800 move(1, 0);
801 switch (displaymode) {
802 case DISPLAY_LOAD:
803 sprintf(buf, "%*s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s",
804 maxnodelen, "HOSTNAME", "PROCS", "LOAD1", "LOAD5",
805 "LOAD15", "ACTIVE", "INACT", "FILE", "FREE",
806 "SWPFRE", "SWUSED");
807 break;
808 case DISPLAY_PROC:
809 default:
810 sprintf(buf, "%*s %6s %8s %6s %6s %6s %s", maxnodelen,
811 "HOSTNAME", "PID", "USERNAME", "SIZE", "RES", "CPU",
812 "COMMAND");
813 break;
814 }
815 addstr(buf);
816 refresh();
817 }
818
819 void
curses_help(void)820 curses_help(void)
821 {
822 int i;
823
824 clear();
825 move(1, 0);
826 printw(" p : Switch to proc view, sorted by CPU\n");
827 printw(" m : Switch to proc view, sorted by MEM\n");
828 printw(" l : Switch to load view, sorted by HOSTNAME\n");
829 printw(" v : Switch to load view, sorted by loadavg\n");
830 printw(" a : Switch to load view, sorted by active\n");
831 printw(" i : Switch to load view, sorted by inact\n");
832 printw(" r : Switch to load view, sorted by file\n");
833 printw(" f : Switch to load view, sorted by free\n");
834 printw(" s : Switch to load view, sorted by swapused\n");
835 printw(" ? : Display this help\n");
836 printw(" q : quit\n");
837 printw("\n -- Hit any key --\n");
838 timeout(-1);
839 (void)getch();
840 curses_header();
841 }
842
843 void
curses_setup(void)844 curses_setup(void)
845 {
846 char buf[80];
847
848 if (debug)
849 return;
850 stdscr = initscr();
851 if (stdscr == NULL) {
852 fprintf(stderr, "Could not initialize curses\n");
853 bailout();
854 }
855 timeout(1000*interval);
856 curses_header();
857 }
858
859 void
curses_print(int tprocs,int tnodes)860 curses_print(int tprocs, int tnodes)
861 {
862 int i, vnodes;
863 char buf[80];
864
865 if (debug)
866 return;
867 move(2, 0);
868 clrtobot();
869 refresh();
870 switch (displaymode) {
871 case DISPLAY_LOAD:
872 memcpy(sortnode, nodedata, sizeof(nodedata_t) * tnodes);
873 for (i=0, vnodes=0; i < tnodes; i++)
874 if (nodedata[i].node != NULL)
875 vnodes++;
876 qsort(sortnode, vnodes, sizeof(nodedata_t), compare_node);
877 for (i=0; i < vnodes && i < (LINES-2); i++) {
878 print_nodedata(buf, i);
879 move(i+2, 0);
880 addstr(buf);
881 }
882 break;
883 case DISPLAY_PROC:
884 default:
885 for (i=0; i < tprocs && i < (LINES-2); i++) {
886 print_pid(buf, i);
887 move(i+2, 0);
888 addstr(buf);
889 }
890 break;
891 }
892 move(0, 0);
893 refresh();
894 }
895
896 void
curses_getkey(void)897 curses_getkey(void)
898 {
899 int key;
900
901 timeout(1000 * interval);
902 key = getch();
903 move(0,0); printw("key=%d", key);
904 switch (key) {
905 case 'p':
906 displaymode = DISPLAY_PROC;
907 sortmethod = SORT_CPU;
908 break;
909 case 'm':
910 displaymode = DISPLAY_PROC;
911 sortmethod = SORT_SIZE;
912 break;
913 case 'l':
914 displaymode = DISPLAY_LOAD;
915 sortmethod = SORT_HOSTNAME;
916 break;
917 case 'h':
918 displaymode = DISPLAY_LOAD;
919 sortmethod = SORT_HOSTNAME;
920 break;
921 case 'v':
922 displaymode = DISPLAY_LOAD;
923 sortmethod = SORT_LOADAVG;
924 break;
925 case 'a':
926 displaymode = DISPLAY_LOAD;
927 sortmethod = SORT_ACTIVE;
928 break;
929 case 'i':
930 displaymode = DISPLAY_LOAD;
931 sortmethod = SORT_INACTIVE;
932 break;
933 case 'r':
934 displaymode = DISPLAY_LOAD;
935 sortmethod = SORT_FILE;
936 break;
937 case 'f':
938 displaymode = DISPLAY_LOAD;
939 sortmethod = SORT_FREE;
940 break;
941 case 's':
942 displaymode = DISPLAY_LOAD;
943 sortmethod = SORT_SWAP;
944 break;
945 case 'q':
946 exitflag = 1;
947 break;
948 case ERR:
949 break;
950 case '?':
951 default:
952 curses_help();
953 break;
954 }
955 if (key != ERR)
956 curses_header();
957 }
958
959
960 /*
961 * Do the actual dirty work of the program, now that the arguments
962 * have all been parsed out.
963 */
964
965 void
do_command(int fanout,char * username)966 do_command(int fanout, char *username)
967 {
968 struct sigaction signaler;
969 FILE *fd, *fda, *in;
970 char buf[MAXBUF], cbuf[MAXBUF], pipebuf[2048];
971 char *command = TOP_COMMAND;
972 int status, i, j, n, g, pollret, nrofargs, arg, slen, fdf;
973 int tnodes, k, l;
974 char *p, **q, **rsh, *cd, **cmd, *rshstring;
975 char *scriptbase, *scriptdir;
976 node_t *nodeptr, *nodehold;
977 struct pollfd fds[2];
978
979 maxnodelen = 0;
980 j = i = 0;
981 in = NULL;
982 cd = pipebuf;
983 exitflag = 0;
984 sortedprocs = NULL;
985
986 if (debug) {
987 if (username != NULL)
988 (void)printf("As User: %s\n", username);
989 (void)printf("On nodes:\n");
990 }
991 for (nodeptr = nodelink; nodeptr; nodeptr = nodeptr->next) {
992 if (strlen(nodeptr->name) > maxnodelen)
993 maxnodelen = strlen(nodeptr->name);
994 if (debug) {
995 if (!(j % 4) && j > 0)
996 (void)printf("\n");
997 (void)printf("%s\t", nodeptr->name);
998 }
999 j++;
1000 }
1001 if (maxnodelen < 8)
1002 maxnodelen = 8;
1003
1004 i = j; /* side effect of above */
1005 tnodes = i;
1006 j = i / fanout;
1007 if (i % fanout)
1008 j++; /* compute the # of rungroups */
1009
1010 nodedata = calloc(tnodes, sizeof(nodedata_t));
1011 sortnode = calloc(tnodes, sizeof(nodedata_t));
1012 if (nodedata == NULL || sortnode == NULL)
1013 bailout();
1014
1015 /* the per-node array of procs */
1016 procdata = calloc(tnodes, sizeof(procdata_t *));
1017 if (procdata == NULL)
1018 bailout();
1019
1020 if (debug) {
1021 (void)printf("\nDo Command: %s\n", command);
1022 (void)printf("Fanout: %d Groups:%d\n", fanout, j);
1023 }
1024
1025 #if 0
1026 /* XXX this is going to be a problem */
1027 close(STDIN_FILENO); /* DAMN this bug took awhile to find */
1028 if (open("/dev/null", O_RDONLY, NULL) != 0)
1029 bailout();
1030 #endif
1031 signaler.sa_handler = sig_handler;
1032 signaler.sa_flags = 0;
1033 sigaction(SIGTERM, &signaler, NULL);
1034 sigaction(SIGINT, &signaler, NULL);
1035
1036 rsh = parse_rcmd("RCMD_CMD", "RCMD_CMD_ARGS", &nrofargs);
1037 rshstring = build_rshstring(rsh, nrofargs);
1038
1039 if (!batchflag)
1040 curses_setup();
1041
1042 /* begin the processing loop */
1043 while (!exitflag) {
1044 g = 0;
1045 nodeptr = nodelink;
1046 for (n=0; n <= j; n++) { /* fanout group */
1047 if (exitflag)
1048 goto out;
1049 nodehold = nodeptr;
1050 for (i=0; (i < fanout && nodeptr != NULL); i++) {
1051 g++;
1052 if (exitflag)
1053 goto out;
1054 if (debug)
1055 (void)printf("Working node: %d, fangroup %d,"
1056 " fanout part: %d\n", g, n, i);
1057 /*
1058 * we set up pipes for each node, to prepare for the oncoming barrage of data.
1059 * Close on exec must be set here, otherwise children spawned after other
1060 * children, inherit the open file descriptors, and cause the pipes to remain
1061 * open forever.
1062 */
1063 if (pipe(nodeptr->out.fds) != 0)
1064 bailout();
1065 if (pipe(nodeptr->err.fds) != 0)
1066 bailout();
1067 if (fcntl(nodeptr->out.fds[0], F_SETFD, 1) == -1)
1068 bailout();
1069 if (fcntl(nodeptr->out.fds[1], F_SETFD, 1) == -1)
1070 bailout();
1071 if (fcntl(nodeptr->err.fds[0], F_SETFD, 1) == -1)
1072 bailout();
1073 if (fcntl(nodeptr->err.fds[1], F_SETFD, 1) == -1)
1074 bailout();
1075 nodeptr->childpid = fork();
1076 switch (nodeptr->childpid) {
1077 /* its the ol fork and switch routine eh? */
1078 case -1:
1079 bailout();
1080 break;
1081 case 0:
1082 /* remove from parent group to avoid kernel
1083 * passing signals to children.
1084 */
1085 (void)setsid();
1086 if (dup2(nodeptr->out.fds[1], STDOUT_FILENO)
1087 != STDOUT_FILENO)
1088 bailout();
1089 if (dup2(nodeptr->err.fds[1], STDERR_FILENO)
1090 != STDERR_FILENO)
1091 bailout();
1092 if (close(nodeptr->out.fds[0]) != 0)
1093 bailout();
1094 if (close(nodeptr->err.fds[0]) != 0)
1095 bailout();
1096 /* stdin & stderr non-blocking */
1097 fdf = fcntl(nodeptr->out.fds[0], F_GETFL);
1098 fcntl(nodeptr->out.fds[0], F_SETFL, fdf|O_NONBLOCK);
1099 fdf = fcntl(nodeptr->err.fds[0], F_GETFL);
1100 fcntl(nodeptr->err.fds[0], F_SETFL, fdf|O_NONBLOCK);
1101
1102 if (username != NULL)
1103 (void)snprintf(buf, MAXBUF, "%s@%s", username,
1104 nodeptr->name);
1105 else
1106 (void)snprintf(buf, MAXBUF, "%s", nodeptr->name);
1107 if (debug)
1108 (void)printf("%s %s %s\n", rshstring,
1109 buf, command);
1110 if (testflag && rshport > 0 && porttimeout > 0)
1111 if (!test_node_connection(rshport, porttimeout,
1112 nodeptr))
1113 _exit(EXIT_SUCCESS);
1114 cmd = calloc(nrofargs+1, sizeof(char *));
1115 arg = 0;
1116 while (rsh[arg] != NULL) {
1117 cmd[arg] = rsh[arg];
1118 arg++;
1119 }
1120 cmd[arg++] = buf;
1121 cmd[arg++] = command;
1122 cmd[arg] = (char *)0;
1123 execvp(rsh[0], cmd);
1124 bailout();
1125 break;
1126 default:
1127 break;
1128 } /* switch */
1129 nodeptr = nodeptr->next;
1130 } /* for i */
1131 nodeptr = nodehold;
1132 for (i=0; (i < fanout && nodeptr != NULL); i++) {
1133 int pid=0;
1134
1135 if (exitflag)
1136 goto out;
1137 if (debug)
1138 (void)printf("Printing node: %d, fangroup %d,"
1139 " fanout part: %d\n", ((n) ? g-fanout+i : i),
1140 n, i);
1141 currentchild = nodeptr->childpid;
1142 /* now close off the useless stuff, and read the goodies */
1143 if (close(nodeptr->out.fds[1]) != 0)
1144 bailout();
1145 if (close(nodeptr->err.fds[1]) != 0)
1146 bailout();
1147 fda = fdopen(nodeptr->out.fds[0], "r"); /* stdout */
1148 if (fda == NULL)
1149 bailout();
1150 fd = fdopen(nodeptr->err.fds[0], "r"); /* stderr */
1151 if (fd == NULL)
1152 bailout();
1153 fds[0].fd = nodeptr->out.fds[0];
1154 fds[1].fd = nodeptr->err.fds[0];
1155 fds[0].events = POLLIN|POLLPRI;
1156 fds[1].events = POLLIN|POLLPRI;
1157 pollret = 1;
1158
1159 // DPRINTF("HELLO: n=%d %d %d - %d %d\n", n, g-fanout+i, i, g, fanout);
1160 DPRINTF("HELLO: %d\n", n*fanout + i);
1161 while (pollret >= 0) {
1162 int gotdata;
1163
1164 pollret = poll(fds, 2, 5);
1165 gotdata = 0;
1166 if ((fds[0].revents&POLLIN) == POLLIN ||
1167 (fds[0].revents&POLLHUP) == POLLHUP ||
1168 (fds[0].revents&POLLPRI) == POLLPRI) {
1169 #ifdef __linux__
1170 cd = fgets(pipebuf, sizeof(pipebuf), fda);
1171 if (cd != NULL) {
1172 #else
1173 while ((cd = fgets(pipebuf, sizeof(pipebuf), fda))) {
1174 #endif
1175 pid += parse_top(cd, nodeptr,
1176 n*fanout + i, pid);
1177 gotdata++;
1178 }
1179 }
1180 if ((fds[1].revents&POLLIN) == POLLIN ||
1181 (fds[1].revents&POLLHUP) == POLLHUP ||
1182 (fds[1].revents&POLLPRI) == POLLPRI) {
1183 #ifdef __linux__
1184 cd = fgets(pipebuf, sizeof(pipebuf), fd);
1185 if (cd != NULL) {
1186 #else
1187 while ((cd = fgets(pipebuf, sizeof(pipebuf), fd))) {
1188 #endif
1189 gotdata++;
1190 }
1191 }
1192 if (!gotdata)
1193 if (((fds[0].revents&POLLHUP) == POLLHUP ||
1194 (fds[0].revents&POLLERR) == POLLERR ||
1195 (fds[0].revents&POLLNVAL) == POLLNVAL) &&
1196 ((fds[1].revents&POLLHUP) == POLLHUP ||
1197 (fds[1].revents&POLLERR) == POLLERR ||
1198 (fds[1].revents&POLLNVAL) == POLLNVAL))
1199 break;
1200 }
1201 fclose(fda);
1202 fclose(fd);
1203 (void)wait(&status);
1204 nodeptr = nodeptr->next;
1205 } /* for pipe read */
1206 totalpids = 0;
1207 for (k=0; k < tnodes; k++) {
1208 totalpids += nodedata[k].procs;
1209 DPRINTF("k=%d procs=%d\n", k, nodedata[k].procs);
1210 }
1211 DPRINTF(" total of %d pids\n", totalpids);
1212 if (sortedprocs != NULL)
1213 free(sortedprocs);
1214 sortedprocs = calloc(totalpids, sizeof(procdata_t *));
1215 i = 0;
1216 DPRINTF("tnodes=%d\n", tnodes);
1217 for (k=0; k < tnodes; k++) {
1218 DPRINTF("node %d procs==%d\n", k, nodedata[k].procs);
1219 for (l=0; l < nodedata[k].procs; l++) {
1220 if (procdata[k][l].node != NULL &&
1221 strcmp(procdata[k][l].command, "")) {
1222 sortedprocs[i] = &procdata[k][l];
1223 DPRINTF("pdata=%p\n", &procdata[k][l]);
1224 i++;
1225 }
1226 }
1227 }
1228 qsort(sortedprocs, i, sizeof(procdata_t *), compare_proc);
1229 #if 0
1230 for (k=0; k < i; k++)
1231 DPRINTF("node=%s, pid=%d, cpu=%f\n",
1232 sortedprocs[k]->node->name, sortedprocs[k]->pid,
1233 sortedprocs[k]->cpu);
1234 #endif
1235 if (!batchflag && !debug) {
1236 curses_print(i, tnodes);
1237 curses_getkey();
1238 }
1239 } /* for n */
1240 DPRINTF("fanout done\n");
1241 if (batchflag == 1 || debug) {
1242 batch_print(i, tnodes);
1243 exitflag = 1;
1244 }
1245 } /* while loop */
1246 out:
1247 if (!batchflag && !debug) {
1248 move(LINES, 0);
1249 endwin();
1250 }
1251
1252 free(rshstring);
1253 for (i=0; rsh[i] != NULL; i++)
1254 free(rsh[i]);
1255 free(rsh);
1256 }
1257
1258 void
1259 sig_handler(int i)
1260 {
1261 switch (i) {
1262 case SIGINT:
1263 case SIGTERM:
1264 exitflag = 1;
1265 break;
1266 default:
1267 bailout();
1268 break;
1269 }
1270 }
1271