1 /*
2  * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17 
18 /* clang-format off */
19 
20 #include "stdioInterf.h"
21 #include "fioMacros.h"
22 #include "timeBlk.h"
23 #include <string.h>
24 #include <memory.h>
25 
26 #if defined(WIN32) || defined(WIN64)
27 #define write _write
28 #endif
29 
30 extern char *__fort_getopt();
31 extern void __fort_gettb();
32 
33 static struct tb tb0 = {/* stats at beginning of program */
34                         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
35                         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ""};
36 
37 static struct tb tb1 = {/* stats at end of program */
38                         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
39                         0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ""};
40 
41 /* Fortran-callable time routine
42  * calling:
43  *           real*8 times(3)
44  *           call ENTFTN(TIMES,times)(times)
45  * returns:
46  *    times(1) = wallclock time
47  *    times(2) = user process run time
48  *    times(3) = system time
49  */
50 
ENTFTN(TIMES,times)51 void ENTFTN(TIMES, times)(double *times)
52 {
53   struct tb t;
54 
55   __fort_gettb(&t);
56   times[0] = t.r;
57   times[1] = t.u;
58   times[2] = t.s;
59 }
60 
61 /** \brief Begin stats */
62 int
__fort_stat_init(void)63 __fort_stat_init(void)
64 {
65   __fort_gettb(&tb0);
66   return (1);
67 }
68 
69 /* scale byte quantity */
70 
71 static char *
scale_bytes(double d,double * ds)72 scale_bytes(double d, double *ds)
73 {
74   char *s;
75 
76   s = "B";
77   if (d >= 1024) {
78     d = (d + 1023) / 1024;
79     s = "KB";
80   }
81   if (d >= 1024) {
82     d = (d + 1023) / 1024;
83     s = "MB";
84   }
85   if (d >= 1024) {
86     d = (d + 1023) / 1024;
87     s = "GB";
88   }
89   if (d >= 1024) {
90     d = (d + 1023) / 1024;
91     s = "TB";
92   }
93   *ds = d;
94   return s;
95 }
96 
97 /* scale byte quantity, beginning with kilobytes */
98 
99 static char *
scale_kbytes(double d,double * ds)100 scale_kbytes(double d, double *ds)
101 {
102   char *s;
103 
104   d = (d + 1023) / 1024;
105   s = "KB";
106   if (d >= 1024) {
107     d = (d + 1023) / 1024;
108     s = "MB";
109   }
110   if (d >= 1024) {
111     d = (d + 1023) / 1024;
112     s = "GB";
113   }
114   if (d >= 1024) {
115     d = (d + 1023) / 1024;
116     s = "TB";
117   }
118   *ds = d;
119   return s;
120 }
121 
122 /* print cpu info */
123 
cpu(tbp)124 static void cpu(tbp) struct tb *tbp;
125 {
126   int i, quiet, tcpus;
127   struct tb n, a, x, e;
128   char buf[256];
129   double d;
130 
131   tcpus = GET_DIST_TCPUS;
132   fprintf(__io_stderr(), "\n");
133   fprintf(__io_stderr(),
134           "cpu        real      user       sys     ratio   node\n");
135   n.r = x.r = e.r = tbp[0].r;
136   n.u = x.u = e.u = tbp[0].u;
137   n.s = x.s = e.s = tbp[0].s;
138   for (i = 1; i < tcpus; i++) {
139     if (tbp[i].r < n.r)
140       n.r = tbp[i].r;
141     if (tbp[i].r > x.r)
142       x.r = tbp[i].r;
143     if (tbp[i].u < n.u)
144       n.u = tbp[i].u;
145     if (tbp[i].u > x.u)
146       x.u = tbp[i].u;
147     if (tbp[i].s < n.s)
148       n.s = tbp[i].s;
149     if (tbp[i].s > x.s)
150       x.s = tbp[i].s;
151     e.r += tbp[i].r;
152     e.u += tbp[i].u;
153     e.s += tbp[i].s;
154   }
155 
156   quiet = GET_DIST_QUIET;
157   if (quiet & Q_CPUS) {
158     for (i = 0; i < tcpus; i++) {
159       d = (tbp[i].r == 0.0 ? 0.0 : (tbp[i].u + tbp[i].s) / tbp[i].r);
160       sprintf(buf, "%4d%c%10.2f%10.2f%10.2f%9.0f%%   %-s\n", i,
161               (i == GET_DIST_IOPROC ? '*' : ' '), tbp[i].r, tbp[i].u, tbp[i].s,
162               d * 100, tbp[i].host);
163       write(2, buf, strlen(buf));
164     }
165   }
166   if ((quiet & Q_CPUS) && (tcpus > 1)) {
167     a.r = e.r / (float)tcpus;
168     a.u = e.u / (float)tcpus;
169     a.s = e.s / (float)tcpus;
170     sprintf(buf, " min %10.2f%10.2f%10.2f\n", n.r, n.u, n.s);
171     write(2, buf, strlen(buf));
172     sprintf(buf, " avg %10.2f%10.2f%10.2f\n", a.r, a.u, a.s);
173     write(2, buf, strlen(buf));
174     sprintf(buf, " max %10.2f%10.2f%10.2f\n", x.r, x.u, x.s);
175     write(2, buf, strlen(buf));
176   }
177   d = (x.r == 0.0 ? 0.0 : (e.u + e.s) / x.r);
178   sprintf(buf, "total%10.2f%10.2f%10.2f%9.2fx\n", x.r, e.u, e.s, d);
179   write(2, buf, strlen(buf));
180 }
181 
182 /* print memory info */
183 
mem(tbp)184 static void mem(tbp) struct tb *tbp;
185 {
186   double tmaxrss; /* total max set size */
187   double tminflt; /* total minor fault */
188   double tmajflt; /* total major fault */
189   double tnvcsw;  /* total voluntary switches */
190   double tnivcsw; /* total involuntary switches */
191   double tsbrk;   /* total heap used (local) */
192   double tgsbrk;  /* total heap used (global) */
193   int i, quiet, tcpus;
194   char *s_sbrk, *s_gsbrk;
195   double d_sbrk, d_gsbrk;
196   char buf[256];
197 
198   fprintf(__io_stderr(), "\n");
199   fprintf(__io_stderr(), "memory    local    global  res size  pag flts  pag "
200                            "flts voluntary  involunt\n");
201   fprintf(__io_stderr(), "           heap      heap   (pages)     minor     "
202                            "major  switches  switches\n");
203   tmaxrss = 0.0;
204   tminflt = 0.0;
205   tmajflt = 0.0;
206   tnvcsw = 0.0;
207   tnivcsw = 0.0;
208   tsbrk = 0.0;
209   tgsbrk = 0.0;
210   quiet = GET_DIST_QUIET;
211   tcpus = GET_DIST_TCPUS;
212   for (i = 0; i < tcpus; i++) {
213     tmaxrss += tbp[i].maxrss;
214     tminflt += tbp[i].minflt;
215     tmajflt += tbp[i].majflt;
216     tnvcsw += tbp[i].nvcsw;
217     tnivcsw += tbp[i].nivcsw;
218     tsbrk += tbp[i].sbrk;
219     tgsbrk += tbp[i].gsbrk;
220     if (quiet & Q_MEMS) {
221       s_sbrk = scale_kbytes(tbp[i].sbrk, &d_sbrk);
222       s_gsbrk = scale_kbytes(tbp[i].gsbrk, &d_gsbrk);
223       sprintf(buf,
224               "%4d%c%8.0lf%2s%8.0lf%2s%10.0lf%10.0lf%10.0lf%10.0lf%10.0lf\n", i,
225               (i == GET_DIST_IOPROC ? '*' : ' '), d_sbrk, s_sbrk, d_gsbrk,
226               s_gsbrk, tbp[i].maxrss, tbp[i].minflt, tbp[i].majflt,
227               tbp[i].nvcsw, tbp[i].nivcsw);
228       write(2, buf, strlen(buf));
229     }
230   }
231   s_sbrk = scale_kbytes(tsbrk, &d_sbrk);
232   s_gsbrk = scale_kbytes(tgsbrk, &d_gsbrk);
233   sprintf(buf, "total%8.0lf%2s%8.0lf%2s%10.0lf%10.0lf%10.0lf%10.0lf%10.0lf\n",
234           d_sbrk, s_sbrk, d_gsbrk, s_gsbrk, tmaxrss, tminflt, tmajflt, tnvcsw,
235           tnivcsw);
236   write(2, buf, strlen(buf));
237 }
238 
239 /* print message info */
240 
msg(tbp)241 static void msg(tbp) struct tb *tbp;
242 {
243   int i, quiet, tcpus;
244   double ds, dr, dc, dst, drt, dct;
245   double mst, mrt, mct, ast, art, act;
246   char *ss, *sr, *sc, *as, *ar, *ac;
247   double d;
248   char buf[256];
249 
250   fprintf(__io_stderr(), "\n");
251   fprintf(__io_stderr(), "messages  send   send   send     recv   recv   "
252                            "recv     copy   copy   copy\n");
253   fprintf(__io_stderr(), "           cnt  total    avg      cnt  total    "
254                            "avg      cnt  total    avg\n");
255   dst = 0;
256   drt = 0;
257   dct = 0;
258   mst = 0;
259   mrt = 0;
260   mct = 0;
261   quiet = GET_DIST_QUIET;
262   tcpus = GET_DIST_TCPUS;
263   for (i = 0; i < tcpus; i++) {
264     dst += tbp[i].bytes;
265     drt += tbp[i].byter;
266     dct += tbp[i].bytec;
267     mst += tbp[i].datas;
268     mrt += tbp[i].datar;
269     mct += tbp[i].datac;
270     if (quiet & Q_MSGS) {
271       ss = scale_bytes(tbp[i].bytes, &ds);
272       sr = scale_bytes(tbp[i].byter, &dr);
273       sc = scale_bytes(tbp[i].bytec, &dc);
274       d = (tbp[i].datas == 0 ? 0.0 : tbp[i].bytes / tbp[i].datas);
275       as = scale_bytes(d, &ast);
276       d = (tbp[i].datar == 0 ? 0.0 : tbp[i].byter / tbp[i].datar);
277       ar = scale_bytes(d, &art);
278       d = (tbp[i].datac == 0 ? 0.0 : tbp[i].bytec / tbp[i].datac);
279       ac = scale_bytes(d, &act);
280       sprintf(buf, "%4d%c%9.0lf%5.0lf%2s%5.0lf%2s%9.0lf%5.0lf%2s%5.0lf%2s%9."
281                    "0lf%5.0lf%2s%5.0lf%2s\n",
282               i, (i == GET_DIST_IOPROC ? '*' : ' '), tbp[i].datas, ds, ss, ast,
283               as, tbp[i].datar, dr, sr, art, ar, tbp[i].datac, dc, sc, act, ac);
284       write(2, buf, strlen(buf));
285     }
286   }
287   ss = scale_bytes(dst, &ds);
288   sr = scale_bytes(drt, &dr);
289   sc = scale_bytes(dct, &dc);
290   d = (dst == 0 ? 0.0 : dst / mst);
291   as = scale_bytes(d, &ast);
292   d = (drt == 0 ? 0.0 : drt / mrt);
293   ar = scale_bytes(d, &art);
294   d = (dct == 0 ? 0.0 : dct / mct);
295   ac = scale_bytes(d, &act);
296   sprintf(buf, "total%9.0lf%5.0lf%2s%5.0lf%2s%9.0lf%5.0lf%2s%5.0lf%2s%9.0lf%5."
297                "0lf%2s%5.0lf%2s\n",
298           mst, ds, ss, ast, as, mrt, dr, sr, art, ar, mct, dc, sc, act, ac);
299   write(2, buf, strlen(buf));
300 }
301 
302 /** \brief Print stats (called by all cpus) */
303 void
__fort_stat_term(void)304 __fort_stat_term(void)
305 {
306   int i, ioproc, quiet, tcpus;
307   struct tb *tbp;
308 
309   __fort_gettb(&tb1);
310   tb1.r -= tb0.r;
311   tb1.u -= tb0.u;
312   tb1.s -= tb0.s;
313   if (tb1.r < (tb1.u + tb1.s)) {
314     tb1.r = tb1.u + tb1.s;
315   }
316   tb1.sbrk -= tb0.sbrk;
317   tb1.gsbrk -= tb0.gsbrk;
318 
319   tcpus = GET_DIST_TCPUS;
320   tbp = (struct tb *)__fort_gmalloc(sizeof(struct tb) * tcpus);
321 
322   ioproc = GET_DIST_IOPROC;
323   if (__fort_is_ioproc() == 0) { /* not i/o process */
324     __fort_rsend(ioproc, (char *)&(tb1), sizeof(struct tb), 1, __UCHAR);
325   } else {
326     for (i = 0; i < tcpus; i++) { /* i/o process */
327       if (i == ioproc) {
328         continue;
329       }
330       __fort_rrecv(i, (char *)&(tbp[i]), sizeof(struct tb), 1, __UCHAR);
331     }
332     tbp[ioproc] = tb1;
333     quiet = GET_DIST_QUIET;
334     if (quiet & (Q_CPU | Q_CPUS)) {
335       cpu(tbp);
336     }
337     if (quiet & (Q_MEM | Q_MEMS)) {
338       mem(tbp);
339     }
340     if (quiet & (Q_MSG | Q_MSGS)) {
341       msg(tbp);
342     }
343   }
344 
345   __fort_gfree(tbp);
346 }
347 
348 /* update start receive message stats */
349 
350 void
__fort_stat_recv(int cpu,long len)351 __fort_stat_recv(int cpu, long len)
352 /* cpu: sending cpu */
353 /* len: total length in bytes */
354 {
355   tb1.datar++;
356   tb1.byter += len;
357 }
358 
359 /** \brief Update done receive message stats */
360 void
__fort_stat_recv_done(int cpu)361 __fort_stat_recv_done(int cpu /* sending cpu */)
362 {
363 }
364 
365 /** \brief Update start send message stats
366  * \param cpu: receiving cpu
367  * \param len: total length in bytes
368  */
369 void
__fort_stat_send(int cpu,long len)370 __fort_stat_send(int cpu, long len)
371 {
372   tb1.datas++;
373   tb1.bytes += len;
374 }
375 
376 /** \brief Update done send message stats
377  * \param cpu - receiving cpu
378  */
379 void
__fort_stat_send_done(int cpu)380 __fort_stat_send_done(int cpu)
381 {
382 }
383 
384 /** \brief Update start bcopy message stats
385  * \param len: total length in bytes
386  */
387 void
__fort_stat_copy(long len)388 __fort_stat_copy(long len)
389 {
390   tb1.datac++;
391   tb1.bytec += len;
392 }
393 
394 /** \brief Update done bcopy message stats */
395 void
__fort_stat_copy_done(void)396 __fort_stat_copy_done(void)
397 {
398 }
399 
400 /** \brief Update start asynch receive message stats */
401 void
__fort_stat_arecv(int cpu,long len,int reqn)402 __fort_stat_arecv(int cpu, long len, int reqn)
403 {
404   tb1.datar += 1;
405   tb1.byter += len;
406 }
407 
408 /* update done asynch receive message stats */
409 
410 void
__fort_stat_arecv_done(int cpu)411 __fort_stat_arecv_done(int cpu)
412 {
413 }
414 
415 /* update start asynch send message stats */
416 
417 void
__fort_stat_asend(int cpu,long len,int reqn)418 __fort_stat_asend(int cpu, long len, int reqn)
419 {
420   tb1.datas += 1;
421   tb1.bytes += len;
422 }
423 
424 /* update done asynch send message stats */
425 
426 void
__fort_stat_asend_done(int cpu)427 __fort_stat_asend_done(int cpu)
428 {
429 }
430 
431 /* update start await receive message stats */
432 
433 void
__fort_stat_await(int reqn)434 __fort_stat_await(int reqn)
435 {
436 }
437 
438 /* update done await receive message stats */
439 
440 void
__fort_stat_await_done(int reqn)441 __fort_stat_await_done(int reqn)
442 {
443 }
444 
445 /* return incremental stats (internal use only) */
446 
ENTFTN(MSGSTATS,msgstats)447 void ENTFTN(MSGSTATS, msgstats)(__INT_T *msgstats)
448 {
449   msgstats[0] = tb0.datas; /* send count   */
450   msgstats[1] = tb0.datar; /* recv count   */
451   msgstats[2] = tb0.bytes; /* bytes sent   */
452   msgstats[3] = tb0.byter; /* bytes recv'd */
453 }
454 
455 /* FIXME: still used? */
456 /** \brief Function entry  */
457 void
__fort_stat_function_entry(int line,int lines,int cline,char * func,char * file,int funcl,int filel)458 __fort_stat_function_entry(int line, int lines, int cline, char *func,
459                           char *file, int funcl, int filel)
460 {
461 }
462 
463 /* FIXME: still used? */
464 /** \brief Line entry  */
465 void
__fort_stat_line_entry(int line)466 __fort_stat_line_entry(int line /* current line number */)
467 {
468 }
469 
470 /* FIXME: still used? */
471 /** \brief Function exit  */
472 void
__fort_stat_function_exit(void)473 __fort_stat_function_exit(void)
474 {
475 }
476