1 /*******************************************************************************
2  * thrill/common/linux_proc_stats.cpp
3  *
4  * Profiling Task which reads CPU, network, I/O loads, and more from Linux's
5  * /proc filesystem.
6  *
7  * Part of Project Thrill - http://project-thrill.org
8  *
9  * Copyright (C) 2016 Timo Bingmann <tb@panthema.net>
10  *
11  * All rights reserved. Published under the BSD-2 license in the LICENSE file.
12  ******************************************************************************/
13 
14 #include <thrill/common/linux_proc_stats.hpp>
15 
16 #include <thrill/common/json_logger.hpp>
17 #include <thrill/common/logger.hpp>
18 #include <thrill/common/porting.hpp>
19 #include <thrill/common/profile_task.hpp>
20 #include <thrill/common/profile_thread.hpp>
21 #include <thrill/common/string.hpp>
22 
23 #include <tlx/die.hpp>
24 #include <tlx/string/split_view.hpp>
25 #include <tlx/string/starts_with.hpp>
26 #include <tlx/string/trim.hpp>
27 
28 #include <cstring>
29 #include <fstream>
30 #include <limits>
31 #include <string>
32 #include <vector>
33 
34 #if __linux__
35 
36 #include <dirent.h>
37 #include <unistd.h>
38 
39 #endif
40 
41 namespace thrill {
42 namespace common {
43 
44 #if __linux__
45 
46 using steady_clock = std::chrono::steady_clock;
47 
48 class LinuxProcStats final : public ProfileTask
49 {
50     static constexpr bool debug = false;
51 
52 public:
LinuxProcStats(JsonLogger & logger)53     explicit LinuxProcStats(JsonLogger& logger) : logger_(logger) {
54 
55         sc_pagesize_ = sysconf(_SC_PAGESIZE);
56 
57         file_stat_.open("/proc/stat");
58         file_net_dev_.open("/proc/net/dev");
59         file_diskstats_.open("/proc/diskstats");
60         file_meminfo_.open("/proc/meminfo");
61 
62         pid_t mypid = getpid();
63         file_pid_stat_.open("/proc/" + std::to_string(mypid) + "/stat");
64         file_pid_io_.open("/proc/" + std::to_string(mypid) + "/io");
65 
66         read_sys_block_devices();
67     }
68 
69     //! read /sys/block to find block devices
70     void read_sys_block_devices();
71 
72     //! calculate percentage of change relative to base.
perc(unsigned long long prev,unsigned long long curr,unsigned long long base)73     static double perc(unsigned long long prev, unsigned long long curr,
74                        unsigned long long base) {
75         if (curr < prev)
76             return 0.0;
77         else
78             return static_cast<double>(curr - prev)
79                    / static_cast<double>(base) * 100.0;
80     }
81 
82     //! method to prepare JsonLine
83     JsonLine& prepare_out(JsonLine& out);
84 
85     //! read /proc/stat
86     void read_stat(JsonLine& out);
87 
88     //! read /proc/<pid>/stat
89     void read_pid_stat(JsonLine& out);
90 
91     //! read /proc/net/dev
92     void read_net_dev(const steady_clock::time_point& tp, JsonLine& out);
93 
94     //! read /proc/<pid>/io
95     void read_pid_io(const steady_clock::time_point& tp, JsonLine& out);
96 
97     //! read /proc/diskstats
98     void read_diskstats(JsonLine& out);
99 
100     //! read /proc/meminfo
101     void read_meminfo(JsonLine& out);
102 
RunTask(const steady_clock::time_point & tp)103     void RunTask(const steady_clock::time_point& tp) final {
104 
105         // JsonLine to construct
106         JsonLine out = logger_.line();
107 
108         read_stat(out);
109         read_pid_stat(out);
110         read_net_dev(tp, out);
111         read_pid_io(tp, out);
112         read_diskstats(out);
113         read_meminfo(out);
114 
115         tp_last_ = tp;
116     }
117 
118 private:
119     //! reference to JsonLogger for output
120     JsonLogger& logger_;
121 
122     //! open file handle to /proc/stat
123     std::ifstream file_stat_;
124     //! open file handle to /proc/net/dev
125     std::ifstream file_net_dev_;
126     //! open file handle to /proc/<our-pid>/stat
127     std::ifstream file_pid_stat_;
128     //! open file handle to /proc/<our-pid>/io
129     std::ifstream file_pid_io_;
130     //! open file handle to /proc/diskstats
131     std::ifstream file_diskstats_;
132     //! open file handle to /proc/meminfo
133     std::ifstream file_meminfo_;
134 
135     //! last time point called
136     steady_clock::time_point tp_last_;
137 
138     //! sysconf(_SC_PAGESIZE)
139     size_t sc_pagesize_;
140 
141     struct CpuStat {
142         unsigned long long user = 0;
143         unsigned long long nice = 0;
144         unsigned long long sys = 0;
145         unsigned long long idle = 0;
146         unsigned long long iowait = 0;
147         unsigned long long steal = 0;
148         unsigned long long hardirq = 0;
149         unsigned long long softirq = 0;
150         unsigned long long guest = 0;
151         unsigned long long guest_nice = 0;
152 
153         //! total uptime across all modes
uptimethrill::common::LinuxProcStats::CpuStat154         unsigned long long uptime() const {
155             return user + nice + sys + idle
156                    + iowait + hardirq + steal + softirq;
157         }
158         //! return pure user mode time excluding virtual guests
user_plainthrill::common::LinuxProcStats::CpuStat159         unsigned long long user_plain() const { return user - guest; }
160         //! return pure nice mode time excluding virtual guests
nice_plainthrill::common::LinuxProcStats::CpuStat161         unsigned long long nice_plain() const { return nice - guest_nice; }
162     };
163 
164     struct PidStat {
165         unsigned long long check_pid = 0;
166         unsigned long long utime = 0;
167         unsigned long long stime = 0;
168         unsigned long long cutime = 0;
169         unsigned long long cstime = 0;
170         unsigned long long num_threads = 0;
171         unsigned long long vsize = 0;
172         unsigned long long rss = 0;
173     };
174 
175     struct NetDevStat {
176         std::string        if_name;
177         unsigned long long rx_pkts = 0;
178         unsigned long long tx_pkts = 0;
179         unsigned long long rx_bytes = 0;
180         unsigned long long tx_bytes = 0;
181     };
182 
183     struct PidIoStat {
184         unsigned long long read_bytes = 0;
185         unsigned long long write_bytes = 0;
186     };
187 
188     struct DiskStats {
189         std::string        dev_name;
190         //! number of read operations issued to the device
191         unsigned long long rd_ios = 0;
192         //! number of read requests merged
193         unsigned long long rd_merged = 0;
194         //! number of sectors read (512b sectors)
195         unsigned long long rd_sectors = 0;
196         //! time of read requests in queue (ms)
197         unsigned long long rd_time = 0;
198 
199         //! number of write operations issued to the device
200         unsigned long long wr_ios = 0;
201         //! number of write requests merged
202         unsigned long long wr_merged = 0;
203         //! number of sectors written (512b sectors)
204         unsigned long long wr_sectors = 0;
205         //! Time of write requests in queue (ms)
206         unsigned long long wr_time = 0;
207 
208         //! number of I/Os in progress
209         unsigned long long ios_progr = 0;
210         //! number of time total (for this device) for I/O (ms)
211         unsigned long long total_time = 0;
212         //! number of time requests spent in queue (ms)
213         unsigned long long rq_time = 0;
214     };
215 
216     //! delta jiffies since the last iteration (read from uptime() of the cpu
217     //! summary)
218     unsigned long long jiffies_delta_ = 0;
219 
220     //! previous summary cpu reading
221     CpuStat cpu_prev_;
222 
223     //! previous cpu core reading
224     std::vector<CpuStat> cpu_core_prev_;
225 
226     //! previous reading from pid's stat file
227     PidStat pid_stat_prev_;
228 
229     //! previous reading from network stats
230     std::vector<NetDevStat> net_dev_prev_;
231 
232     //! find or create entry for net_dev
233     NetDevStat& find_net_dev(const std::string& if_name);
234 
235     //! previous reading of pid's io file
236     PidIoStat pid_io_prev_;
237 
238     //! find or create entry for net_dev
239     DiskStats * find_diskstats(const char* dev_name);
240 
241     //! previous reading from diskstats
242     std::vector<DiskStats> diskstats_prev_;
243 
244     //! helper method to parse size lines from /proc/meminfo
245     static bool parse_meminfo(const char* str, size_t& size);
246 };
247 
prepare_out(JsonLine & out)248 JsonLine& LinuxProcStats::prepare_out(JsonLine& out) {
249     if (out.items() == 2) {
250         out << "class" << "LinuxProcStats"
251             << "event" << "profile";
252     }
253     return out;
254 }
255 
read_stat(JsonLine & out)256 void LinuxProcStats::read_stat(JsonLine& out) {
257     if (!file_stat_.is_open()) return;
258 
259     file_stat_.clear();
260     file_stat_.seekg(0);
261     if (!file_stat_.good()) return;
262 
263     // read the number of jiffies spent in the various modes since the
264     // last tick.
265 
266     const double kNaN = 0; // we use zero since NaN is not compatible with JSON
267 
268     std::vector<double> cores_user, cores_nice, cores_sys, cores_idle,
269         cores_iowait, cores_hardirq, cores_softirq,
270         cores_steal, cores_guest, cores_guest_nice;
271 
272     std::string line;
273     while (std::getline(file_stat_, line)) {
274         if (tlx::starts_with(line, "cpu  ")) {
275 
276             CpuStat curr;
277             int ret = sscanf(
278                 line.data() + 5,
279                 "%llu %llu %llu %llu %llu %llu %llu %llu %llu %llu",
280                 &curr.user,
281                 &curr.nice,
282                 &curr.sys,
283                 &curr.idle,
284                 &curr.iowait,
285                 &curr.hardirq,
286                 &curr.softirq,
287                 &curr.steal,
288                 &curr.guest,
289                 &curr.guest_nice);
290 
291             if (ret < 4) die("/proc/stat returned too few values");
292 
293             CpuStat& prev = cpu_prev_;
294 
295             if (!prev.user) {
296                 // just store the first reading
297                 prev = curr;
298                 continue;
299             }
300 
301             jiffies_delta_ = curr.uptime() - prev.uptime();
302             unsigned long long base = jiffies_delta_;
303 
304             sLOG << "cpu"
305                  << "delta" << jiffies_delta_
306                  << "user" << perc(prev.user, curr.user, base)
307                  << "nice" << perc(prev.nice, curr.nice, base)
308                  << "sys" << perc(prev.sys, curr.sys, base)
309                  << "iowait" << (ret >= 5 ? perc(prev.iowait, curr.iowait, base) : kNaN)
310                  << "hardirq" << (ret >= 6 ? perc(prev.hardirq, curr.hardirq, base) : kNaN)
311                  << "softirq" << (ret >= 7 ? perc(prev.softirq, curr.softirq, base) : kNaN)
312                  << "steal" << (ret >= 8 ? perc(prev.steal, curr.steal, base) : kNaN)
313                  << "guest" << (ret >= 9 ? perc(prev.guest, curr.guest, base) : kNaN)
314                  << "guest_nice" << (ret >= 10 ? perc(prev.guest_nice, curr.guest_nice, base) : kNaN)
315                  << "idle" << perc(prev.idle, curr.idle, base);
316 
317             prepare_out(out)
318                 << "cpu_user" << perc(prev.user, curr.user, base)
319                 << "cpu_nice" << perc(prev.nice, curr.nice, base)
320                 << "cpu_sys" << perc(prev.sys, curr.sys, base)
321                 << "cpu_idle" << perc(prev.idle, curr.idle, base)
322                 << "cpu_iowait" << (ret >= 5 ? perc(prev.iowait, curr.iowait, base) : kNaN)
323                 << "cpu_hardirq" << (ret >= 6 ? perc(prev.hardirq, curr.hardirq, base) : kNaN)
324                 << "cpu_softirq" << (ret >= 7 ? perc(prev.softirq, curr.softirq, base) : kNaN)
325                 << "cpu_steal" << (ret >= 8 ? perc(prev.steal, curr.steal, base) : kNaN)
326                 << "cpu_guest" << (ret >= 9 ? perc(prev.guest, curr.guest, base) : kNaN)
327                 << "cpu_guest_nice" << (ret >= 10 ? perc(prev.guest_nice, curr.guest_nice, base) : kNaN);
328 
329             prev = curr;
330         }
331         else if (tlx::starts_with(line, "cpu")) {
332 
333             unsigned core_id;
334             CpuStat curr;
335             int ret = sscanf(
336                 line.data() + 3,
337                 "%u %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu",
338                 &core_id,
339                 &curr.user,
340                 &curr.nice,
341                 &curr.sys,
342                 &curr.idle,
343                 &curr.iowait,
344                 &curr.hardirq,
345                 &curr.softirq,
346                 &curr.steal,
347                 &curr.guest,
348                 &curr.guest_nice);
349 
350             if (ret < 5) die("/proc/stat returned too few values");
351 
352             if (cpu_core_prev_.size() < core_id + 1)
353                 cpu_core_prev_.resize(core_id + 1);
354 
355             CpuStat& prev = cpu_core_prev_[core_id];
356 
357             if (!prev.user) {
358                 // just store the first reading
359                 prev = curr;
360                 continue;
361             }
362 
363             jiffies_delta_ = curr.uptime() - prev.uptime();
364             unsigned long long base = jiffies_delta_;
365 
366             sLOG << "core" << core_id
367                  << "delta" << jiffies_delta_
368                  << "user" << perc(prev.user, curr.user, base)
369                  << "nice" << perc(prev.nice, curr.nice, base)
370                  << "sys" << perc(prev.sys, curr.sys, base)
371                  << "iowait" << (ret >= 6 ? perc(prev.iowait, curr.iowait, base) : kNaN)
372                  << "hardirq" << (ret >= 7 ? perc(prev.hardirq, curr.hardirq, base) : kNaN)
373                  << "softirq" << (ret >= 8 ? perc(prev.softirq, curr.softirq, base) : kNaN)
374                  << "steal" << (ret >= 9 ? perc(prev.steal, curr.steal, base) : kNaN)
375                  << "guest" << (ret >= 10 ? perc(prev.guest, curr.guest, base) : kNaN)
376                  << "guest_nice" << (ret >= 11 ? perc(prev.guest_nice, curr.guest_nice, base) : kNaN)
377                  << "idle" << perc(prev.idle, curr.idle, base);
378 
379             cores_user.emplace_back(perc(prev.user, curr.user, base));
380             cores_nice.emplace_back(perc(prev.nice, curr.nice, base));
381             cores_sys.emplace_back(perc(prev.sys, curr.sys, base));
382             cores_idle.emplace_back(perc(prev.idle, curr.idle, base));
383             cores_iowait.push_back(ret >= 6 ? perc(prev.iowait, curr.iowait, base) : kNaN);
384             cores_hardirq.push_back(ret >= 7 ? perc(prev.hardirq, curr.hardirq, base) : kNaN);
385             cores_softirq.push_back(ret >= 8 ? perc(prev.softirq, curr.softirq, base) : kNaN);
386             cores_steal.push_back(ret >= 9 ? perc(prev.steal, curr.steal, base) : kNaN);
387             cores_guest.push_back(ret >= 10 ? perc(prev.guest, curr.guest, base) : kNaN);
388             cores_guest_nice.push_back(ret >= 11 ? perc(prev.guest_nice, curr.guest_nice, base) : kNaN);
389 
390             prev = curr;
391         }
392     }
393 
394     if (!cores_user.empty()) {
395         prepare_out(out)
396             << "cores_user" << cores_user
397             << "cores_nice" << cores_nice
398             << "cores_sys" << cores_sys
399             << "cores_idle" << cores_idle
400             << "cores_iowait" << cores_iowait
401             << "cores_hardirq" << cores_hardirq
402             << "cores_softirq" << cores_softirq
403             << "cores_steal" << cores_steal
404             << "cores_guest" << cores_guest
405             << "cores_guest_nice" << cores_guest_nice;
406     }
407 }
408 
read_pid_stat(JsonLine & out)409 void LinuxProcStats::read_pid_stat(JsonLine& out) {
410     if (!file_pid_stat_.is_open()) return;
411 
412     file_pid_stat_.clear();
413     file_pid_stat_.seekg(0);
414     if (!file_pid_stat_.good()) return;
415 
416     std::string line;
417     std::getline(file_pid_stat_, line);
418 
419     PidStat curr;
420 
421     /* Field          Content */
422     /*  pid           process id */
423     /*  tcomm         filename of the executable */
424     /*  state         state (R is running, S is sleeping, D is sleeping in an */
425     /*                uninterruptible wait, Z is zombie, T is traced or stopped) */
426     /*  ppid          process id of the parent process */
427     /*  pgrp          pgrp of the process */
428     /*  sid           session id */
429     /*  tty_nr        tty the process uses */
430     /*  tty_pgrp      pgrp of the tty */
431     /*  flags         task flags */
432     /*  min_flt       number of minor faults */
433     /*  cmin_flt      number of minor faults with child's */
434     /*  maj_flt       number of major faults */
435     /*  cmaj_flt      number of major faults with child's */
436     /*  utime         user mode jiffies */
437     /*  stime         kernel mode jiffies */
438     /*  cutime        user mode jiffies with child's */
439     /*  cstime        kernel mode jiffies with child's */
440     /*  priority      priority level */
441     /*  nice          nice level */
442     /*  num_threads   number of threads */
443     /*  it_real_value (obsolete, always 0) */
444     /*  start_time    time the process started after system boot */
445     /*  vsize         virtual memory size */
446     /*  rss           resident set memory size in SC_PAGESIZE units */
447     /*  rsslim        current limit in bytes on the rss */
448     int ret = sscanf(
449         line.data(),
450         /* pid tcomm state ppid pgrp sid tty_nr tty_pgrp flags */
451         /* 19162 (firefox) R 1 19162 19162 0 -1 4218880 */
452         "%llu %*s %*s %*u %*u %*u %*u %*u %*u "
453         /* min_flt cmin_flt maj_flt cmaj_flt utime stime cutime cstime priority nice */
454         /* 340405 6560 3 0 7855 526 3 2 20 0 */
455         "%*u %*u %*u %*u %llu %llu %llu %llu %*u %*u "
456         /* num_threads it_real_value start_time vsize rss rsslim */
457         /* 44 0 130881921 1347448832 99481 18446744073709551615 */
458         "%llu %*u %*u %llu %llu",
459         /* (firefox) more: 4194304 4515388 140732862948048 140732862941536 246430093205 0 0 4096 33572015 18446744073709551615 0 0 17 0 0 0 0 0 0 8721489 8726954 14176256 140732862948868 140732862948876 140732862948876 140732862951399 0 */
460         &curr.check_pid,
461         &curr.utime, &curr.stime, &curr.cutime, &curr.cstime,
462         &curr.num_threads, &curr.vsize, &curr.rss);
463 
464     die_unequal(8, ret);
465 
466     if (!pid_stat_prev_.check_pid) {
467         pid_stat_prev_ = curr;
468         return;
469     }
470     unsigned long long base = jiffies_delta_;
471 
472     sLOG << "pid_stat"
473          << "utime" << perc(pid_stat_prev_.utime, curr.utime, base)
474          << "stime" << perc(pid_stat_prev_.stime, curr.stime, base)
475          << "cutime" << perc(pid_stat_prev_.cutime, curr.cutime, base)
476          << "cstime" << perc(pid_stat_prev_.cstime, curr.cstime, base)
477          << "num_threads" << curr.num_threads
478          << "vsize" << curr.vsize
479          << "rss" << curr.rss * sc_pagesize_;
480 
481     prepare_out(out)
482         << "pr_user" << perc(pid_stat_prev_.utime, curr.utime, base)
483         << "pr_sys" << perc(pid_stat_prev_.stime, curr.stime, base)
484         << "pr_nthreads" << curr.num_threads
485         << "pr_vsize" << curr.vsize
486         << "pr_rss" << curr.rss * sc_pagesize_;
487 
488     pid_stat_prev_ = curr;
489 }
490 
491 LinuxProcStats::NetDevStat&
find_net_dev(const std::string & if_name)492 LinuxProcStats::find_net_dev(const std::string& if_name) {
493     for (NetDevStat& i : net_dev_prev_) {
494         if (i.if_name == if_name) return i;
495     }
496     net_dev_prev_.emplace_back();
497     net_dev_prev_.back().if_name = if_name;
498     return net_dev_prev_.back();
499 }
500 
read_net_dev(const steady_clock::time_point & tp,JsonLine & out)501 void LinuxProcStats::read_net_dev(
502     const steady_clock::time_point& tp, JsonLine& out) {
503     if (!file_net_dev_.is_open()) return;
504 
505     file_net_dev_.clear();
506     file_net_dev_.seekg(0);
507     if (!file_net_dev_.good()) return;
508 
509     double elapsed = static_cast<double>(
510         std::chrono::duration_cast<std::chrono::microseconds>(
511             tp - tp_last_).count()) / 1e6;
512 
513     NetDevStat sum;
514     bool sum_output = false;
515 
516     std::string line;
517     while (std::getline(file_net_dev_, line)) {
518         std::string::size_type colonpos = line.find(':');
519         if (colonpos == std::string::npos) continue;
520 
521         std::string if_name = line.substr(0, colonpos);
522         tlx::trim(&if_name);
523 
524         NetDevStat curr;
525         int ret = sscanf(line.data() + colonpos + 1,
526                          "%llu %llu %*u %*u %*u %*u %*u %*u %llu %llu",
527                          &curr.rx_bytes, &curr.rx_pkts,
528                          &curr.tx_bytes, &curr.tx_pkts);
529         die_unequal(4, ret);
530 
531         curr.if_name = if_name;
532         NetDevStat& prev = find_net_dev(if_name);
533 
534         if (prev.rx_bytes == 0) {
535             // just store the first reading
536             prev = curr;
537             continue;
538         }
539 
540         sLOG << "net" << if_name
541              << "rx_bytes" << curr.rx_bytes - prev.rx_bytes
542              << "tx_bytes" << curr.tx_bytes - prev.tx_bytes
543              << "rx_pkts" << curr.rx_pkts - prev.rx_pkts
544              << "tx_pkts" << curr.tx_pkts - prev.tx_pkts
545              << "rx_speed"
546              << static_cast<double>(curr.rx_bytes - prev.rx_bytes) / elapsed
547              << "tx_speed"
548              << static_cast<double>(curr.tx_bytes - prev.tx_bytes) / elapsed;
549 
550         sum.rx_bytes += curr.rx_bytes - prev.rx_bytes;
551         sum.tx_bytes += curr.tx_bytes - prev.tx_bytes;
552         sum.rx_pkts += curr.rx_pkts - prev.rx_pkts;
553         sum.tx_pkts += curr.tx_pkts - prev.tx_pkts;
554         sum_output = true;
555 
556         prev = curr;
557     }
558 
559     // summarizes interfaces
560     if (sum_output)
561     {
562         sLOG << "net" << "(all)"
563              << "rx_bytes" << sum.rx_bytes
564              << "tx_bytes" << sum.tx_bytes
565              << "rx_pkts" << sum.rx_pkts
566              << "tx_pkts" << sum.tx_pkts
567              << "rx_speed" << static_cast<double>(sum.rx_bytes) / elapsed
568              << "tx_speed" << static_cast<double>(sum.tx_bytes) / elapsed;
569 
570         prepare_out(out)
571             << "net_rx_bytes" << sum.rx_bytes
572             << "net_tx_bytes" << sum.tx_bytes
573             << "net_rx_pkts" << sum.rx_pkts
574             << "net_tx_pkts" << sum.tx_pkts
575             << "net_rx_speed" << static_cast<double>(sum.rx_bytes) / elapsed
576             << "net_tx_speed" << static_cast<double>(sum.tx_bytes) / elapsed;
577     }
578 }
579 
read_pid_io(const steady_clock::time_point & tp,JsonLine & out)580 void LinuxProcStats::read_pid_io(const steady_clock::time_point& tp, JsonLine& out) {
581     if (!file_pid_io_.is_open()) return;
582 
583     file_pid_io_.clear();
584     file_pid_io_.seekg(0);
585     if (!file_pid_io_.good()) return;
586 
587     PidIoStat curr;
588 
589     std::string line;
590     while (std::getline(file_stat_, line)) {
591         if (tlx::starts_with(line, "read_bytes: ")) {
592             int ret = sscanf(line.data() + 12, "%llu", &curr.read_bytes);
593             die_unequal(1, ret);
594         }
595         else if (tlx::starts_with(line, "write_bytes: ")) {
596             int ret = sscanf(line.data() + 13, "%llu", &curr.write_bytes);
597             die_unequal(1, ret);
598         }
599     }
600 
601     if (!pid_io_prev_.read_bytes) {
602         // just store the first reading
603         pid_io_prev_ = curr;
604         return;
605     }
606 
607     double elapsed = static_cast<double>(
608         std::chrono::duration_cast<std::chrono::microseconds>(
609             tp - tp_last_).count()) / 1e6;
610 
611     PidIoStat& prev = pid_io_prev_;
612 
613     sLOG << "pid_io"
614          << "read_bytes" << curr.read_bytes - prev.read_bytes
615          << "write_bytes" << curr.write_bytes - prev.write_bytes
616          << "read_speed"
617          << static_cast<double>(curr.read_bytes - prev.read_bytes) / elapsed
618          << "write_speed"
619          << static_cast<double>(curr.write_bytes - prev.write_bytes) / elapsed;
620 
621     prepare_out(out)
622         << "pr_io_read_bytes" << curr.read_bytes - prev.read_bytes
623         << "pr_io_write_bytes" << curr.read_bytes - prev.read_bytes
624         << "pr_io_read_speed"
625         << static_cast<double>(curr.read_bytes - prev.read_bytes) / elapsed
626         << "pr_io_write_speed"
627         << static_cast<double>(curr.write_bytes - prev.write_bytes) / elapsed;
628 
629     prev = curr;
630 }
631 
632 LinuxProcStats::DiskStats*
find_diskstats(const char * dev_name)633 LinuxProcStats::find_diskstats(const char* dev_name) {
634     for (DiskStats& i : diskstats_prev_) {
635         if (strcmp(i.dev_name.c_str(), dev_name) == 0) return &i;
636     }
637     return nullptr;
638 }
639 
read_sys_block_devices()640 void LinuxProcStats::read_sys_block_devices() {
641     DIR* dirp = opendir("/sys/block");
642     if (!dirp) return;
643 
644     struct dirent* de;
645     while ((de = common::ts_readdir(dirp)) != nullptr) {
646         if (de->d_name[0] == '.') continue;
647         // push into diskstats vector
648         diskstats_prev_.emplace_back();
649         diskstats_prev_.back().dev_name = de->d_name;
650     }
651     closedir(dirp);
652 }
653 
read_diskstats(JsonLine & out)654 void LinuxProcStats::read_diskstats(JsonLine& out) {
655     if (!file_diskstats_.is_open()) return;
656 
657     file_diskstats_.clear();
658     file_diskstats_.seekg(0);
659     if (!file_diskstats_.good()) return;
660 
661     DiskStats sum;
662     bool sum_valid = false;
663     JsonLine disks = prepare_out(out).sub("disks");
664 
665     std::string line;
666     while (std::getline(file_diskstats_, line)) {
667 
668         char dev_name[32];
669         DiskStats curr;
670         int ret = sscanf(
671             line.data(),
672             "%*u %*u %31s %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu",
673             dev_name,
674             &curr.rd_ios, &curr.rd_merged, &curr.rd_sectors, &curr.rd_time,
675             &curr.wr_ios, &curr.wr_merged, &curr.wr_sectors, &curr.wr_time,
676             &curr.ios_progr, &curr.total_time, &curr.rq_time);
677         die_unequal(12, ret);
678 
679         DiskStats* ptr_prev = find_diskstats(dev_name);
680         if (!ptr_prev) continue;
681 
682         DiskStats& prev = *ptr_prev;
683         curr.dev_name = dev_name;
684 
685         if (!prev.rd_ios && !prev.wr_ios && !prev.ios_progr) {
686             // just store the first reading, also: skipped entries that remain
687             // zero.
688             prev = curr;
689             continue;
690         }
691 
692         sLOG << "diskstats"
693              << "dev" << dev_name
694              << "rd_ios" << curr.rd_ios - prev.rd_ios
695              << "rd_merged" << curr.rd_merged - prev.rd_merged
696              << "rd_bytes" << (curr.rd_sectors - prev.rd_sectors) * 512
697              << "rd_time" << double(curr.rd_time - prev.rd_time) / 1e3
698              << "wr_ios" << curr.wr_ios - prev.wr_ios
699              << "wr_merged" << curr.wr_merged - prev.wr_merged
700              << "wr_bytes" << (curr.wr_sectors - prev.wr_sectors) * 512
701              << "wr_time" << double(curr.wr_time - prev.wr_time) / 1e3
702              << "ios_progr" << curr.ios_progr
703              << "total_time" << double(curr.total_time - prev.total_time) / 1e3
704              << "rq_time" << double(curr.rq_time - prev.rq_time) / 1e3;
705 
706         disks.sub(dev_name)
707             << "rd_ios" << curr.rd_ios - prev.rd_ios
708             << "rd_merged" << curr.rd_merged - prev.rd_merged
709             << "rd_bytes" << (curr.rd_sectors - prev.rd_sectors) * 512
710             << "rd_time" << double(curr.rd_time - prev.rd_time) / 1e3
711             << "wr_ios" << curr.wr_ios - prev.wr_ios
712             << "wr_merged" << curr.wr_merged - prev.wr_merged
713             << "wr_bytes" << (curr.wr_sectors - prev.wr_sectors) * 512
714             << "wr_time" << double(curr.wr_time - prev.wr_time) / 1e3
715             << "ios_progr" << curr.ios_progr
716             << "total_time" << double(curr.total_time - prev.total_time) / 1e3
717             << "rq_time" << double(curr.rq_time - prev.rq_time) / 1e3;
718 
719         sum.rd_ios += curr.rd_ios - prev.rd_ios;
720         sum.rd_merged += curr.rd_merged - prev.rd_merged;
721         sum.rd_sectors += curr.rd_sectors - prev.rd_sectors;
722         sum.rd_time += curr.rd_time - prev.rd_time;
723         sum.wr_ios += curr.wr_ios - prev.wr_ios;
724         sum.wr_merged += curr.wr_merged - prev.wr_merged;
725         sum.wr_sectors += curr.wr_sectors - prev.wr_sectors;
726         sum.wr_time += curr.wr_time - prev.wr_time;
727         sum.ios_progr += curr.ios_progr;
728         sum.total_time += curr.total_time - prev.total_time;
729         sum.rq_time += curr.rq_time - prev.rq_time;
730         sum_valid = true;
731 
732         prev = curr;
733     }
734 
735     disks.Close();
736 
737     if (sum_valid) {
738         prepare_out(out).sub("diskstats")
739             << "rd_ios" << sum.rd_ios
740             << "rd_merged" << sum.rd_merged
741             << "rd_bytes" << sum.rd_sectors * 512
742             << "rd_time" << double(sum.rd_time) / 1e3
743             << "wr_ios" << sum.wr_ios
744             << "wr_merged" << sum.wr_merged
745             << "wr_bytes" << sum.wr_sectors * 512
746             << "wr_time" << double(sum.wr_time) / 1e3
747             << "ios_progr" << sum.ios_progr
748             << "total_time" << double(sum.total_time) / 1e3
749             << "rq_time" << double(sum.rq_time) / 1e3;
750     }
751 }
752 
753 //! helper method to parse size lines from /proc/meminfo
parse_meminfo(const char * str,size_t & size)754 bool LinuxProcStats::parse_meminfo(const char* str, size_t& size) {
755     char* endptr;
756     size = strtoul(str, &endptr, 10);
757     // parse failed, no number
758     if (!endptr) return false;
759 
760     // skip over spaces
761     while (*endptr == ' ') ++endptr;
762 
763     // multiply with 2^power
764     if (*endptr == 'k' || *endptr == 'K')
765         size *= 1024, ++endptr;
766     else if (*endptr == 'm' || *endptr == 'M')
767         size *= 1024 * 1024, ++endptr;
768     else if (*endptr == 'g' || *endptr == 'G')
769         size *= 1024 * 1024 * 1024llu, ++endptr;
770 
771     // byte indicator
772     if (*endptr == 'b' || *endptr == 'B') {
773         ++endptr;
774     }
775 
776     // skip over spaces
777     while (*endptr == ' ') ++endptr;
778 
779     return (*endptr == 0);
780 }
781 
read_meminfo(JsonLine & out)782 void LinuxProcStats::read_meminfo(JsonLine& out) {
783     if (!file_meminfo_.is_open()) return;
784 
785     file_meminfo_.clear();
786     file_meminfo_.seekg(0);
787     if (!file_meminfo_.good()) return;
788 
789     JsonLine mem = prepare_out(out).sub("meminfo");
790 
791     size_t swap_total = 0, swap_free = 0;
792 
793     std::string line;
794     while (std::getline(file_meminfo_, line)) {
795         std::string::size_type colonpos = line.find(':');
796         if (colonpos == std::string::npos) continue;
797 
798         tlx::string_view key(line.begin(), line.begin() + colonpos);
799 
800         size_t size;
801 
802         if (key == "MemTotal") {
803             if (parse_meminfo(line.data() + colonpos + 1, size))
804                 mem << "total" << size;
805         }
806         else if (key == "MemFree") {
807             if (parse_meminfo(line.data() + colonpos + 1, size))
808                 mem << "free" << size;
809         }
810         else if (key == "MemAvailable") {
811             if (parse_meminfo(line.data() + colonpos + 1, size))
812                 mem << "available" << size;
813         }
814         else if (key == "Buffers") {
815             if (parse_meminfo(line.data() + colonpos + 1, size))
816                 mem << "buffers" << size;
817         }
818         else if (key == "Cached") {
819             if (parse_meminfo(line.data() + colonpos + 1, size))
820                 mem << "cached" << size;
821         }
822         else if (key == "Mapped") {
823             if (parse_meminfo(line.data() + colonpos + 1, size))
824                 mem << "mapped" << size;
825         }
826         else if (key == "Shmem") {
827             if (parse_meminfo(line.data() + colonpos + 1, size))
828                 mem << "shmem" << size;
829         }
830         else if (key == "SwapTotal") {
831             if (parse_meminfo(line.data() + colonpos + 1, size)) {
832                 mem << "swap_total" << size;
833                 swap_total = size;
834                 if (swap_total && swap_free) {
835                     mem << "swap_used" << swap_total - swap_free;
836                     swap_total = swap_free = 0;
837                 }
838             }
839         }
840         else if (key == "SwapFree") {
841             if (parse_meminfo(line.data() + colonpos + 1, size)) {
842                 mem << "swap_free" << size;
843                 swap_free = size;
844                 if (swap_total && swap_free) {
845                     mem << "swap_used" << swap_total - swap_free;
846                     swap_total = swap_free = 0;
847                 }
848             }
849         }
850     }
851 }
852 
StartLinuxProcStatsProfiler(ProfileThread & sched,JsonLogger & logger)853 void StartLinuxProcStatsProfiler(ProfileThread& sched, JsonLogger& logger) {
854     sched.Add(std::chrono::seconds(1),
855               new LinuxProcStats(logger), /* own_task */ true);
856 }
857 
858 #else
859 
860 void StartLinuxProcStatsProfiler(ProfileThread&, JsonLogger&)
861 { }
862 
863 #endif  // __linux__
864 
865 } // namespace common
866 } // namespace thrill
867 
868 /******************************************************************************/
869