1 /*
2 ** Zabbix
3 ** Copyright (C) 2001-2021 Zabbix SIA
4 **
5 ** This program is free software; you can redistribute it and/or modify
6 ** it under the terms of the GNU General Public License as published by
7 ** the Free Software Foundation; either version 2 of the License, or
8 ** (at your option) any later version.
9 **
10 ** This program is distributed in the hope that it will be useful,
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ** GNU General Public License for more details.
14 **
15 ** You should have received a copy of the GNU General Public License
16 ** along with this program; if not, write to the Free Software
17 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 **/
19
20 #include "common.h"
21 #include "log.h"
22 #include "mutexs.h"
23 #include "stats.h"
24 #include "ipc.h"
25 #include "procstat.h"
26
27 #ifdef ZBX_PROCSTAT_COLLECTOR
28
29 /*
30 * The process CPU statistics are stored using the following memory layout.
31 *
32 * .--------------------------------------.
33 * | header |
34 * | ------------------------------------ |
35 * | process cpu utilization queries |
36 * | and historical data |
37 * | ------------------------------------ |
38 * | free space |
39 * '--------------------------------------'
40 *
41 * Because the shared memory can be resized by other processes instead of
42 * using pointers (when allocating strings, building single linked lists)
43 * the memory offsets from the beginning of shared memory segment are used.
44 * 0 offset is interpreted similarly to NULL pointer.
45 *
46 * Currently integer values are used to store offsets to internally allocated
47 * memory which leads to 2GB total size limit.
48 *
49 * During every data collection cycle collector does the following:
50 * 1) acquires list of all processes running on system
51 * 2) builds a list of processes monitored by queries
52 * 3) reads total cpu utilization snapshot for the monitored processes
53 * 4) calculates cpu utilization difference by comparing with previous snapshot
54 * 5) updates cpu utilization values for queries.
55 * 6) saves the last cpu utilization snapshot
56 *
57 * Initialisation.
58 * * zbx_procstat_init() initialises procstat dshm structure but doesn't allocate memory from the system
59 * (zbx_dshm_create() called with size 0).
60 * * the first call of procstat_add() allocates the shared memory for the header and the first query
61 * via call to zbx_dshm_realloc().
62 * * The header is initialised in procstat_copy_data() which is called back from zbx_dshm_realloc().
63 *
64 * Memory allocation within dshm.
65 * * Ensure that memory segment has enough free space with procstat_dshm_has_enough_space() before
66 * allocating space within segment with procstat_alloc() or functions that use it.
67 * * Check how much of the allocated dshm is actually used by procstat by procstat_dshm_used_size().
68 * * Change the dshm size with zbx_dshm_realloc().
69 *
70 * Synchronisation.
71 * * agentd processes share a single instance of ZBX_COLLECTOR_DATA (*collector) containing reference
72 * to shared procstat memory segment.
73 * * Each agentd process also holds local reference to procstat shared memory segment.
74 * * The system keeps the shared memory segment until the last process detaches from it.
75 * * Synchronise both references with procstat_reattach() before using procstat shared memory segment.
76 */
77
78 /* the main collector data */
79 extern ZBX_COLLECTOR_DATA *collector;
80
81 /* local reference to the procstat shared memory */
82 static zbx_dshm_ref_t procstat_ref;
83
84 typedef struct
85 {
86 /* a linked list of active queries (offset of the first active query) */
87 int queries;
88
89 /* the total size of the allocated queries and strings */
90 int size_allocated;
91
92 /* the total shared memory segment size */
93 size_t size;
94 }
95 zbx_procstat_header_t;
96
97 #define PROCSTAT_NULL_OFFSET 0
98
99 #define PROCSTAT_ALIGNED_HEADER_SIZE ZBX_SIZE_T_ALIGN8(sizeof(zbx_procstat_header_t))
100
101 #define PROCSTAT_PTR(base, offset) ((char *)base + offset)
102
103 #define PROCSTAT_PTR_NULL(base, offset) \
104 (PROCSTAT_NULL_OFFSET == offset ? NULL : PROCSTAT_PTR(base, offset))
105
106 #define PROCSTAT_QUERY_FIRST(base) \
107 (zbx_procstat_query_t*)PROCSTAT_PTR_NULL(base, ((zbx_procstat_header_t *)base)->queries)
108
109 #define PROCSTAT_QUERY_NEXT(base, query) \
110 (zbx_procstat_query_t*)PROCSTAT_PTR_NULL(base, query->next)
111
112 #define PROCSTAT_OFFSET(base, ptr) ((char *)ptr - (char *)base)
113
114 /* maximum number of active procstat queries */
115 #define PROCSTAT_MAX_QUERIES 1024
116
117 /* the time period after which inactive queries (not accessed during this period) can be removed */
118 #define PROCSTAT_MAX_INACTIVITY_PERIOD SEC_PER_DAY
119
120 /* the time interval between compressing (inactive query removal) attempts */
121 #define PROCSTAT_COMPRESS_PERIOD SEC_PER_DAY
122
123 /* data sample collected every second for the process cpu utilization queries */
124 typedef struct
125 {
126 zbx_uint64_t utime;
127 zbx_uint64_t stime;
128 zbx_timespec_t timestamp;
129 }
130 zbx_procstat_data_t;
131
132 /* process cpu utilization query */
133 typedef struct
134 {
135 /* the process attributes */
136 size_t procname;
137 size_t username;
138 size_t cmdline;
139 zbx_uint64_t flags;
140
141 /* the index of first (oldest) entry in the history data */
142 int h_first;
143
144 /* the number of entries in the history data */
145 int h_count;
146
147 /* the last access time (request from server) */
148 int last_accessed;
149
150 /* increasing id for every data collection run, used to */
151 /* identify queries that are processed during data collection */
152 int runid;
153
154 /* error code */
155 int error;
156
157 /* offset (from segment beginning) of the next process query */
158 int next;
159
160 /* the cpu utilization history data (ring buffer) */
161 zbx_procstat_data_t h_data[MAX_COLLECTOR_HISTORY];
162 }
163 zbx_procstat_query_t;
164
165 /* process cpu utilization query data */
166 typedef struct
167 {
168 /* process attributes */
169 const char *procname;
170 const char *username;
171 const char *cmdline;
172 zbx_uint64_t flags;
173
174 /* error code */
175 int error;
176
177 /* process cpu utilization */
178 zbx_uint64_t utime;
179 zbx_uint64_t stime;
180
181 /* vector of pids matching the process attributes */
182 zbx_vector_uint64_t pids;
183 }
184 zbx_procstat_query_data_t;
185
186 /* the process cpu utilization snapshot */
187 static zbx_procstat_util_t *procstat_snapshot;
188 /* the number of processes in process cpu utilization snapshot */
189 static int procstat_snapshot_num;
190
191 /******************************************************************************
192 * *
193 * Function: procstat_dshm_has_enough_space *
194 * *
195 * Purpose: check if the procstat shared memory segment has at least *
196 * the specified amount of free bytes in the segment *
197 * *
198 * Parameters: base - [IN] the procstat shared memory segment *
199 * size - [IN] number of free bytes needed *
200 * *
201 * Return value: SUCCEED - sufficient amount of bytes are available *
202 * FAIL - otherwise *
203 * *
204 ******************************************************************************/
procstat_dshm_has_enough_space(void * base,size_t size)205 static int procstat_dshm_has_enough_space(void *base, size_t size)
206 {
207 zbx_procstat_header_t *header = (zbx_procstat_header_t *)base;
208
209 if (header->size >= size + header->size_allocated)
210 return SUCCEED;
211
212 return FAIL;
213 }
214
215 /******************************************************************************
216 * *
217 * Function: procstat_dshm_used_size *
218 * *
219 * Purpose: calculate the actual shared memory size used by procstat *
220 * *
221 * Parameters: base - [IN] the procstat shared memory segment *
222 * *
223 * Return value: The number of bytes required to store current procstat data. *
224 * *
225 ******************************************************************************/
procstat_dshm_used_size(void * base)226 static size_t procstat_dshm_used_size(void *base)
227 {
228 const zbx_procstat_query_t *query;
229 size_t size;
230
231 if (NULL == base)
232 return 0;
233
234 size = PROCSTAT_ALIGNED_HEADER_SIZE;
235
236 for (query = PROCSTAT_QUERY_FIRST(base); NULL != query; query = PROCSTAT_QUERY_NEXT(base, query))
237 {
238 if (PROCSTAT_NULL_OFFSET != query->procname)
239 size += ZBX_SIZE_T_ALIGN8(strlen(PROCSTAT_PTR(base, query->procname)) + 1);
240
241 if (PROCSTAT_NULL_OFFSET != query->username)
242 size += ZBX_SIZE_T_ALIGN8(strlen(PROCSTAT_PTR(base, query->username)) + 1);
243
244 if (PROCSTAT_NULL_OFFSET != query->cmdline)
245 size += ZBX_SIZE_T_ALIGN8(strlen(PROCSTAT_PTR(base, query->cmdline)) + 1);
246
247 size += ZBX_SIZE_T_ALIGN8(sizeof(zbx_procstat_query_t));
248 }
249
250 return size;
251 }
252
253 /******************************************************************************
254 * *
255 * Function: procstat_queries_num *
256 * *
257 * Purpose: calculate the number of active queries *
258 * *
259 * Parameters: base - [IN] the procstat shared memory segment *
260 * *
261 * Return value: The number of active queries. *
262 * *
263 ******************************************************************************/
procstat_queries_num(void * base)264 static int procstat_queries_num(void *base)
265 {
266 const zbx_procstat_query_t *query;
267 int queries_num;
268
269 if (NULL == base)
270 return 0;
271
272 queries_num = 0;
273
274 for (query = PROCSTAT_QUERY_FIRST(base); NULL != query; query = PROCSTAT_QUERY_NEXT(base, query))
275 queries_num++;
276
277 return queries_num;
278 }
279
280 /******************************************************************************
281 * *
282 * Function: procstat_alloc *
283 * *
284 * Purpose: allocates memory in the shared memory segment, *
285 * calls exit() if segment is too small *
286 * *
287 * Parameters: base - [IN] the procstat shared memory segment *
288 * size - [IN] the number of bytes to allocate *
289 * *
290 * Return value: The offset of allocated data from the beginning of segment *
291 * (positive value). *
292 * *
293 ******************************************************************************/
procstat_alloc(void * base,size_t size)294 static int procstat_alloc(void *base, size_t size)
295 {
296 zbx_procstat_header_t *header = (zbx_procstat_header_t *)base;
297 int offset;
298
299 size = ZBX_SIZE_T_ALIGN8(size);
300
301 if (FAIL == procstat_dshm_has_enough_space(header, size))
302 {
303 THIS_SHOULD_NEVER_HAPPEN;
304 exit(EXIT_FAILURE);
305 }
306
307 offset = header->size_allocated;
308 header->size_allocated += size;
309
310 return offset;
311 }
312
313 /******************************************************************************
314 * *
315 * Function: procstat_strdup *
316 * *
317 * Purpose: allocates required memory in procstat memory segment and copies *
318 * the specified string (calls exit() if segment is too small) *
319 * *
320 * Parameters: base - [IN] the procstat shared memory segment *
321 * str - [IN] the string to copy *
322 * *
323 * Return value: The offset to allocated data counting from the beginning *
324 * of data segment. *
325 * 0 if the source string is NULL or the shared memory segment *
326 * does not have enough free space. *
327 * *
328 ******************************************************************************/
procstat_strdup(void * base,const char * str)329 static size_t procstat_strdup(void *base, const char *str)
330 {
331 size_t len, offset;
332
333 if (NULL == str)
334 return PROCSTAT_NULL_OFFSET;
335
336 len = strlen(str) + 1;
337
338 offset = procstat_alloc(base, len);
339 memcpy(PROCSTAT_PTR(base, offset), str, len);
340
341 return offset;
342 }
343
344 /******************************************************************************
345 * *
346 * Function: procstat_reattach *
347 * *
348 * Purpose: reattaches the procstat_ref to the shared memory segment if it *
349 * was 'resized' (a new segment created and the old data copied) by *
350 * other process. *
351 * *
352 * Comments: This function logs critical error and exits in the case of *
353 * shared memory segment operation failure. *
354 * *
355 ******************************************************************************/
procstat_reattach(void)356 static void procstat_reattach(void)
357 {
358 char *errmsg = NULL;
359
360 if (FAIL == zbx_dshm_validate_ref(&collector->procstat, &procstat_ref, &errmsg))
361 {
362 zabbix_log(LOG_LEVEL_CRIT, "cannot validate process data collector reference: %s", errmsg);
363 zbx_free(errmsg);
364 exit(EXIT_FAILURE);
365 }
366 }
367
368 /******************************************************************************
369 * *
370 * Function: procstat_copy_data *
371 * *
372 * Purpose: copies procstat data to a new shared memory segment *
373 * *
374 * Parameters: dst - [OUT] the destination segment *
375 * size_dst - [IN] the size of destination segment *
376 * src - [IN] the source segment *
377 * *
378 ******************************************************************************/
procstat_copy_data(void * dst,size_t size_dst,const void * src)379 static void procstat_copy_data(void *dst, size_t size_dst, const void *src)
380 {
381 int offset, *query_offset;
382 zbx_procstat_header_t *hdst = (zbx_procstat_header_t *)dst;
383 zbx_procstat_query_t *qsrc, *qdst = NULL;
384
385 zabbix_log(LOG_LEVEL_DEBUG, "In %s()", __func__);
386
387 hdst->size = size_dst;
388 hdst->size_allocated = PROCSTAT_ALIGNED_HEADER_SIZE;
389 hdst->queries = PROCSTAT_NULL_OFFSET;
390
391 if (NULL != src)
392 {
393 query_offset = &hdst->queries;
394
395 /* copy queries */
396 for (qsrc = PROCSTAT_QUERY_FIRST(src); NULL != qsrc; qsrc = PROCSTAT_QUERY_NEXT(src, qsrc))
397 {
398 /* the new shared memory segment must have enough space */
399 offset = procstat_alloc(dst, sizeof(zbx_procstat_query_t));
400
401 qdst = (zbx_procstat_query_t *)PROCSTAT_PTR(dst, offset);
402
403 memcpy(qdst, qsrc, sizeof(zbx_procstat_query_t));
404
405 qdst->procname = procstat_strdup(dst, PROCSTAT_PTR_NULL(src, qsrc->procname));
406 qdst->username = procstat_strdup(dst, PROCSTAT_PTR_NULL(src, qsrc->username));
407 qdst->cmdline = procstat_strdup(dst, PROCSTAT_PTR_NULL(src, qsrc->cmdline));
408
409 *query_offset = offset;
410 query_offset = &qdst->next;
411 }
412 }
413
414 zabbix_log(LOG_LEVEL_DEBUG, "End of %s()", __func__);
415 }
416
417 /******************************************************************************
418 * *
419 * Function: procstat_running *
420 * *
421 * Purpose: checks if processor statistics collector is running (at least one *
422 * one process statistics query has been made). *
423 * *
424 ******************************************************************************/
procstat_running(void)425 static int procstat_running(void)
426 {
427 if (ZBX_NONEXISTENT_SHMID == collector->procstat.shmid)
428 return FAIL;
429
430 return SUCCEED;
431 }
432
433 /******************************************************************************
434 * *
435 * Function: procstat_get_query *
436 * *
437 * Purpose: get process statistics query based on process name, user name *
438 * and command line *
439 * *
440 * Parameters: base - [IN] the procstat shared memory segment *
441 * procname - [IN] the process name *
442 * username - [IN] the user name *
443 * cmdline - [IN] the command line *
444 * flags - [IN] platform specific flags *
445 * *
446 * Return value: The process statistics query for the specified parameters or *
447 * NULL if the statistics are not being gathered for the *
448 * specified parameters. *
449 * *
450 ******************************************************************************/
procstat_get_query(void * base,const char * procname,const char * username,const char * cmdline,zbx_uint64_t flags)451 static zbx_procstat_query_t *procstat_get_query(void *base, const char *procname, const char *username,
452 const char *cmdline, zbx_uint64_t flags)
453 {
454 zbx_procstat_query_t *query;
455
456 if (SUCCEED != procstat_running())
457 return NULL;
458
459 for (query = PROCSTAT_QUERY_FIRST(base); NULL != query; query = PROCSTAT_QUERY_NEXT(base, query))
460 {
461 if (0 == zbx_strcmp_null(procname, PROCSTAT_PTR_NULL(base, query->procname)) &&
462 0 == zbx_strcmp_null(username, PROCSTAT_PTR_NULL(base, query->username)) &&
463 0 == zbx_strcmp_null(cmdline, PROCSTAT_PTR_NULL(base, query->cmdline)) &&
464 flags == query->flags)
465 {
466 return query;
467 }
468 }
469
470 return NULL;
471 }
472
473 /******************************************************************************
474 * *
475 * Function: procstat_add *
476 * *
477 * Purpose: adds a new query to process statistics collector *
478 * *
479 * Parameters: procname - [IN] the process name *
480 * username - [IN] the user name *
481 * cmdline - [IN] the command line *
482 * flags - [IN] platform specific flags *
483 * *
484 * Return value: *
485 * This function calls exit() on shared memory errors. *
486 * *
487 ******************************************************************************/
procstat_add(const char * procname,const char * username,const char * cmdline,zbx_uint64_t flags)488 static void procstat_add(const char *procname, const char *username, const char *cmdline, zbx_uint64_t flags)
489 {
490 char *errmsg = NULL;
491 size_t size = 0;
492 zbx_procstat_query_t *query;
493 zbx_procstat_header_t *header;
494 int query_offset;
495
496 zabbix_log(LOG_LEVEL_DEBUG, "In %s()", __func__);
497
498 /* when allocating a new collection reserve space for procstat header */
499 if (0 == collector->procstat.size)
500 size += PROCSTAT_ALIGNED_HEADER_SIZE;
501
502 /* reserve space for process attributes */
503 if (NULL != procname)
504 size += ZBX_SIZE_T_ALIGN8(strlen(procname) + 1);
505
506 if (NULL != username)
507 size += ZBX_SIZE_T_ALIGN8(strlen(username) + 1);
508
509 if (NULL != cmdline)
510 size += ZBX_SIZE_T_ALIGN8(strlen(cmdline) + 1);
511
512 /* procstat_add() is called when the shared memory reference has already been validated - */
513 /* no need to call procstat_reattach() */
514
515 /* reserve space for query container */
516 size += ZBX_SIZE_T_ALIGN8(sizeof(zbx_procstat_query_t));
517
518 if (NULL == procstat_ref.addr || FAIL == procstat_dshm_has_enough_space(procstat_ref.addr, size))
519 {
520 /* recalculate the space required to store existing data + new query */
521 size += procstat_dshm_used_size(procstat_ref.addr);
522
523 if (FAIL == zbx_dshm_realloc(&collector->procstat, size, &errmsg))
524 {
525 zabbix_log(LOG_LEVEL_CRIT, "cannot reallocate memory in process data collector: %s", errmsg);
526 zbx_free(errmsg);
527 zbx_dshm_unlock(&collector->procstat);
528
529 exit(EXIT_FAILURE);
530 }
531
532 /* header initialised in procstat_copy_data() which is called back from zbx_dshm_realloc() */
533 procstat_reattach();
534 }
535
536 header = (zbx_procstat_header_t *)procstat_ref.addr;
537
538 query_offset = procstat_alloc(procstat_ref.addr, sizeof(zbx_procstat_query_t));
539
540 /* initialize the created query */
541 query = (zbx_procstat_query_t *)PROCSTAT_PTR_NULL(procstat_ref.addr, query_offset);
542
543 memset(query, 0, sizeof(zbx_procstat_query_t));
544
545 query->procname = procstat_strdup(procstat_ref.addr, procname);
546 query->username = procstat_strdup(procstat_ref.addr, username);
547 query->cmdline = procstat_strdup(procstat_ref.addr, cmdline);
548 query->flags = flags;
549 query->last_accessed = time(NULL);
550 query->next = header->queries;
551 header->queries = query_offset;
552
553 zabbix_log(LOG_LEVEL_DEBUG, "End of %s()", __func__);
554 }
555
556 /******************************************************************************
557 * *
558 * Function: procstat_free_query_data *
559 * *
560 * Purpose: frees the query data structure used to store queries locally *
561 * *
562 ******************************************************************************/
procstat_free_query_data(zbx_procstat_query_data_t * data)563 static void procstat_free_query_data(zbx_procstat_query_data_t *data)
564 {
565 zbx_vector_uint64_destroy(&data->pids);
566 zbx_free(data);
567 }
568
569 /******************************************************************************
570 * *
571 * Function: procstat_try_compress *
572 * *
573 * Purpose: try to compress (remove inactive queries) the procstat shared *
574 * memory segment once per day *
575 * *
576 * Parameters: base - [IN] the procstat shared memory segment *
577 * *
578 ******************************************************************************/
procstat_try_compress(void * base)579 static void procstat_try_compress(void *base)
580 {
581 static int collector_iteration = 0;
582
583 /* The iteration counter ~ the number seconds collector has been running */
584 /* because collector data gathering is done once per second. */
585 /* This approximation is done to avoid calling time() function if there */
586 /* are no defined queries. */
587 if (0 == (++collector_iteration % PROCSTAT_COMPRESS_PERIOD))
588 {
589 zbx_procstat_header_t *header = (zbx_procstat_header_t *)procstat_ref.addr;
590 size_t size;
591 char *errmsg = NULL;
592
593 size = procstat_dshm_used_size(base);
594
595 if (size < header->size && FAIL == zbx_dshm_realloc(&collector->procstat, size, &errmsg))
596 {
597 zabbix_log(LOG_LEVEL_CRIT, "cannot reallocate memory in process data collector: %s", errmsg);
598 zbx_free(errmsg);
599 zbx_dshm_unlock(&collector->procstat);
600
601 exit(EXIT_FAILURE);
602 }
603 }
604 }
605
606 /******************************************************************************
607 * *
608 * Function: procstat_build_local_query_vector *
609 * *
610 * Purpose: builds a local copy of the process cpu utilization queries and *
611 * removes expired (not used during last 24 hours) queries *
612 * *
613 * Parameters: queries_ptr - [OUT] local copy of queries copied from queries *
614 * in shared memory segment *
615 * runid - [IN] marker for queries to be processed in the *
616 * current collector iteration *
617 * *
618 * Return value: The flags defining the process properties to be retrieved. *
619 * See ZBX_SYSINFO_PROC_ defines. *
620 * *
621 * Comments: updates queries (runid) in shared memory segment *
622 * *
623 ******************************************************************************/
procstat_build_local_query_vector(zbx_vector_ptr_t * queries_ptr,int runid)624 static int procstat_build_local_query_vector(zbx_vector_ptr_t *queries_ptr, int runid)
625 {
626 zbx_procstat_header_t *header;
627 time_t now;
628 zbx_procstat_query_t *query;
629 zbx_procstat_query_data_t *qdata;
630 int flags = ZBX_SYSINFO_PROC_NONE, *pnext_query;
631
632 zbx_dshm_lock(&collector->procstat);
633
634 procstat_reattach();
635
636 header = (zbx_procstat_header_t *)procstat_ref.addr;
637
638 if (PROCSTAT_NULL_OFFSET == header->queries)
639 goto out;
640
641 flags = ZBX_SYSINFO_PROC_PID;
642
643 now = time(NULL);
644 pnext_query = &header->queries;
645
646 for (query = PROCSTAT_QUERY_FIRST(procstat_ref.addr); NULL != query;
647 query = PROCSTAT_QUERY_NEXT(procstat_ref.addr, query))
648 {
649 /* remove unused queries, the data is still allocated until the next resize */
650 if (PROCSTAT_MAX_INACTIVITY_PERIOD < now - query->last_accessed)
651 {
652 *pnext_query = query->next;
653 continue;
654 }
655
656 qdata = (zbx_procstat_query_data_t *)zbx_malloc(NULL, sizeof(zbx_procstat_query_data_t));
657 zbx_vector_uint64_create(&qdata->pids);
658
659 /* store the reference to query attributes, which is guaranteed to be */
660 /* valid until we call process_reattach() */
661 if (NULL != (qdata->procname = PROCSTAT_PTR_NULL(procstat_ref.addr, query->procname)))
662 flags |= ZBX_SYSINFO_PROC_NAME;
663
664 if (NULL != (qdata->username = PROCSTAT_PTR_NULL(procstat_ref.addr, query->username)))
665 flags |= ZBX_SYSINFO_PROC_USER;
666
667 if (NULL != (qdata->cmdline = PROCSTAT_PTR_NULL(procstat_ref.addr, query->cmdline)))
668 flags |= ZBX_SYSINFO_PROC_CMDLINE;
669
670 qdata->flags = query->flags;
671 qdata->utime = 0;
672 qdata->stime = 0;
673 qdata->error = 0;
674
675 zbx_vector_ptr_append(queries_ptr, qdata);
676
677 /* The order of queries can be changed only by collector itself (when removing old */
678 /* queries), but during statistics gathering the shared memory is unlocked and other */
679 /* processes might insert queries at the beginning of active queries list. */
680 /* Mark the queries being processed by current data gathering cycle with id that */
681 /* is incremented at the end of every data gathering cycle. We can be sure that */
682 /* our local copy will match the queries in shared memory having the same runid. */
683 query->runid = runid;
684
685 pnext_query = &query->next;
686 }
687
688 out:
689 procstat_try_compress(procstat_ref.addr);
690
691 zbx_dshm_unlock(&collector->procstat);
692
693 return flags;
694 }
695
696 /******************************************************************************
697 * *
698 * Function: procstat_scan_query_pids *
699 * *
700 * Purpose: for every query gets the pids of processes matching query *
701 * attributes *
702 * *
703 * Parameters: queries - [IN/OUT] fills pids and error for each query *
704 * *
705 * Return value: total number of pids saved in all queries *
706 * *
707 ******************************************************************************/
procstat_scan_query_pids(zbx_vector_ptr_t * queries,const zbx_vector_ptr_t * processes)708 static int procstat_scan_query_pids(zbx_vector_ptr_t *queries, const zbx_vector_ptr_t *processes)
709 {
710 zbx_procstat_query_data_t *qdata;
711 int i, pids_num = 0;
712
713 for (i = 0; i < queries->values_num; i++)
714 {
715 qdata = (zbx_procstat_query_data_t *)queries->values[i];
716
717 zbx_proc_get_matching_pids(processes, qdata->procname, qdata->username, qdata->cmdline, qdata->flags,
718 &qdata->pids);
719
720 pids_num += qdata->pids.values_num;
721 }
722
723 return pids_num;
724 }
725
726 /******************************************************************************
727 * *
728 * Function: procstat_get_monitored_pids *
729 * *
730 * Purpose: creates a list of unique pids that are monitored by current data *
731 * gathering cycle *
732 * *
733 * Parameters: pids - [OUT] a sorted vector of unique pids *
734 * queries - [IN] local, working copy of queries *
735 * pids_num - [IN] the total number of pids monitored by queries *
736 * (might contain duplicated pids) *
737 * *
738 ******************************************************************************/
procstat_get_monitored_pids(zbx_vector_uint64_t * pids,const zbx_vector_ptr_t * queries,int pids_num)739 static void procstat_get_monitored_pids(zbx_vector_uint64_t *pids, const zbx_vector_ptr_t *queries, int pids_num)
740 {
741 zbx_procstat_query_data_t *qdata;
742 int i;
743
744 zbx_vector_uint64_reserve(pids, pids_num);
745
746 for (i = 0; i < queries->values_num; i++)
747 {
748 qdata = (zbx_procstat_query_data_t *)queries->values[i];
749
750 if (SUCCEED != qdata->error)
751 continue;
752
753 memcpy(pids->values + pids->values_num, qdata->pids.values,
754 sizeof(zbx_uint64_t) * qdata->pids.values_num);
755 pids->values_num += qdata->pids.values_num;
756 }
757
758 zbx_vector_uint64_sort(pids, ZBX_DEFAULT_UINT64_COMPARE_FUNC);
759 zbx_vector_uint64_uniq(pids, ZBX_DEFAULT_UINT64_COMPARE_FUNC);
760 }
761
762 /******************************************************************************
763 * *
764 * Function: procstat_get_cpu_util_snapshot_for_pids *
765 * *
766 * Purpose: gets cpu utilization data snapshot for the monitored processes *
767 * *
768 * Parameters: stats - [OUT] current reading of the per-pid cpu usage *
769 * statistics (array, items correspond to pids) *
770 * pids - [IN] pids (unique) for which to collect data in this *
771 * iteration *
772 * *
773 * Return value: timestamp of the snapshot *
774 * *
775 ******************************************************************************/
procstat_get_cpu_util_snapshot_for_pids(zbx_procstat_util_t * stats,zbx_vector_uint64_t * pids)776 static zbx_timespec_t procstat_get_cpu_util_snapshot_for_pids(zbx_procstat_util_t *stats,
777 zbx_vector_uint64_t *pids)
778 {
779 zbx_timespec_t snapshot_timestamp;
780 int i;
781
782 for (i = 0; i < pids->values_num; i++)
783 stats[i].pid = pids->values[i];
784
785 zbx_proc_get_process_stats(stats, pids->values_num);
786
787 zbx_timespec(&snapshot_timestamp);
788
789 return snapshot_timestamp;
790 }
791
792 /******************************************************************************
793 * *
794 * Function: procstat_util_compare *
795 * *
796 * Purpose: compare process utilization data by their pids *
797 * *
798 ******************************************************************************/
procstat_util_compare(const void * d1,const void * d2)799 static int procstat_util_compare(const void *d1, const void *d2)
800 {
801 const zbx_procstat_util_t *u1 = (zbx_procstat_util_t *)d1;
802 const zbx_procstat_util_t *u2 = (zbx_procstat_util_t *)d2;
803
804 ZBX_RETURN_IF_NOT_EQUAL(u1->pid, u2->pid);
805
806 return 0;
807 }
808
809 /******************************************************************************
810 * *
811 * Function: procstat_calculate_cpu_util_for_queries *
812 * *
813 * Purpose: calculates the cpu utilization for queries since the previous *
814 * snapshot *
815 * *
816 * Parameters: queries - [IN/OUT] local, working copy of queries, saving *
817 * utime, stime and error *
818 * pids - [IN] pids (unique) for which to collect data in *
819 * this iteration *
820 * stats - [IN] current reading of the per-pid cpu usage *
821 * statistics (array, items correspond to pids) *
822 * *
823 ******************************************************************************/
procstat_calculate_cpu_util_for_queries(zbx_vector_ptr_t * queries,zbx_vector_uint64_t * pids,const zbx_procstat_util_t * stats)824 static void procstat_calculate_cpu_util_for_queries(zbx_vector_ptr_t *queries,
825 zbx_vector_uint64_t *pids, const zbx_procstat_util_t *stats)
826 {
827 zbx_procstat_query_data_t *qdata;
828 zbx_procstat_util_t *putil;
829 int j, i;
830
831 for (j = 0; j < queries->values_num; j++)
832 {
833 qdata = (zbx_procstat_query_data_t *)queries->values[j];
834
835 /* sum the cpu utilization for processes that are present in current */
836 /* and last process cpu utilization snapshot */
837 for (i = 0; i < qdata->pids.values_num; i++)
838 {
839 zbx_uint64_t starttime, utime, stime;
840 zbx_procstat_util_t util_local;
841
842 util_local.pid = qdata->pids.values[i];
843
844 /* find the process utilization data in current snapshot */
845 putil = (zbx_procstat_util_t *)zbx_bsearch(&util_local, stats, pids->values_num,
846 sizeof(zbx_procstat_util_t), procstat_util_compare);
847
848 if (NULL == putil || SUCCEED != putil->error)
849 continue;
850
851 utime = putil->utime;
852 stime = putil->stime;
853
854 starttime = putil->starttime;
855
856 /* find the process utilization data in last snapshot */
857 putil = (zbx_procstat_util_t *)zbx_bsearch(&util_local, procstat_snapshot, procstat_snapshot_num,
858 sizeof(zbx_procstat_util_t), procstat_util_compare);
859
860 if (NULL == putil || SUCCEED != putil->error || putil->starttime != starttime)
861 continue;
862
863 qdata->utime += utime - putil->utime;
864 qdata->stime += stime - putil->stime;
865 }
866 }
867 }
868
869 /******************************************************************************
870 * *
871 * Function: procstat_update_query_statistics *
872 * *
873 * Purpose: updates cpu utilization and saves the new snapshot for queries in *
874 * shared memory segment *
875 * *
876 * Parameters: queries - [IN] local, working copy of queries (utime, stime *
877 * and error must be set) *
878 * runid - [IN] marker for queries to be processed in the *
879 * current collector iteration *
880 * snapshot_timestamp - [IN] timestamp of the current snapshot *
881 * *
882 * Comments: updates header (pids_num) and queries (h_data, h_count, h_first) *
883 * in shared memory segment, writes stats at the end of the shared *
884 * memory segment *
885 * *
886 ******************************************************************************/
procstat_update_query_statistics(zbx_vector_ptr_t * queries,int runid,const zbx_timespec_t * snapshot_timestamp)887 static void procstat_update_query_statistics(zbx_vector_ptr_t *queries, int runid,
888 const zbx_timespec_t *snapshot_timestamp)
889 {
890 zbx_procstat_query_t *query;
891 zbx_procstat_query_data_t *qdata;
892 int index;
893 int i;
894
895 zbx_dshm_lock(&collector->procstat);
896
897 procstat_reattach();
898
899 for (query = PROCSTAT_QUERY_FIRST(procstat_ref.addr), i = 0; NULL != query;
900 query = PROCSTAT_QUERY_NEXT(procstat_ref.addr, query))
901 {
902 if (runid != query->runid)
903 continue;
904
905 if (i >= queries->values_num)
906 {
907 THIS_SHOULD_NEVER_HAPPEN;
908 break;
909 }
910
911 qdata = (zbx_procstat_query_data_t *)queries->values[i++];
912
913 if (SUCCEED != (query->error = qdata->error))
914 continue;
915
916 /* find the next history data slot */
917 if (0 < query->h_count)
918 {
919 if (MAX_COLLECTOR_HISTORY <= (index = query->h_first + query->h_count - 1))
920 index -= MAX_COLLECTOR_HISTORY;
921
922 qdata->utime += query->h_data[index].utime;
923 qdata->stime += query->h_data[index].stime;
924
925 if (MAX_COLLECTOR_HISTORY <= ++index)
926 index -= MAX_COLLECTOR_HISTORY;
927 }
928 else
929 index = 0;
930
931 if (MAX_COLLECTOR_HISTORY == query->h_count)
932 {
933 if (MAX_COLLECTOR_HISTORY <= ++query->h_first)
934 query->h_first = 0;
935 }
936 else
937 query->h_count++;
938
939 query->h_data[index].utime = qdata->utime;
940 query->h_data[index].stime = qdata->stime;
941 query->h_data[index].timestamp = *snapshot_timestamp;
942 }
943
944 zbx_dshm_unlock(&collector->procstat);
945 }
946
947 /*
948 * Public API
949 */
950
951 /******************************************************************************
952 * *
953 * Function: zbx_procstat_collector_started *
954 * *
955 * Purpose: checks if processor statistics collector is enabled (the main *
956 * collector has been initialized) *
957 * *
958 ******************************************************************************/
zbx_procstat_collector_started(void)959 int zbx_procstat_collector_started(void)
960 {
961 if (NULL == collector)
962 return FAIL;
963
964 return SUCCEED;
965 }
966
967 /******************************************************************************
968 * *
969 * Function: zbx_procstat_init *
970 * *
971 * Purpose: initializes process statistics collector *
972 * *
973 * Return value: This function calls exit() on shared memory errors. *
974 * *
975 ******************************************************************************/
zbx_procstat_init(void)976 void zbx_procstat_init(void)
977 {
978 char *errmsg = NULL;
979
980 if (SUCCEED != zbx_dshm_create(&collector->procstat, 0, ZBX_MUTEX_PROCSTAT,
981 procstat_copy_data, &errmsg))
982 {
983 zabbix_log(LOG_LEVEL_CRIT, "cannot initialize process data collector: %s", errmsg);
984 zbx_free(errmsg);
985 exit(EXIT_FAILURE);
986 }
987
988 procstat_ref.shmid = ZBX_NONEXISTENT_SHMID;
989 procstat_ref.addr = NULL;
990 }
991
992 /******************************************************************************
993 * *
994 * Function: zbx_procstat_destroy *
995 * *
996 * Purpose: destroys process statistics collector *
997 * *
998 ******************************************************************************/
zbx_procstat_destroy(void)999 void zbx_procstat_destroy(void)
1000 {
1001 char *errmsg = NULL;
1002
1003 if (SUCCEED != zbx_dshm_destroy(&collector->procstat, &errmsg))
1004 {
1005 zabbix_log(LOG_LEVEL_CRIT, "cannot free resources allocated by process data collector: %s", errmsg);
1006 zbx_free(errmsg);
1007 }
1008
1009 procstat_ref.shmid = ZBX_NONEXISTENT_SHMID;
1010 procstat_ref.addr = NULL;
1011 }
1012
1013 /******************************************************************************
1014 * *
1015 * Function: zbx_procstat_get_util *
1016 * *
1017 * Purpose: gets process cpu utilization *
1018 * *
1019 * Parameters: procname - [IN] the process name, NULL - all *
1020 * username - [IN] the user name, NULL - all *
1021 * cmdline - [IN] the command line, NULL - all *
1022 * collector_func - [IN] the callback function to use for process *
1023 * statistics gathering *
1024 * period - [IN] the time period *
1025 * type - [IN] the cpu utilization type, see *
1026 * ZBX_PROCSTAT_CPU_* defines *
1027 * value - [OUT] the utilization in % *
1028 * errmsg - [OUT] the error message *
1029 * *
1030 * Return value: *
1031 * SUCCEED - the utime value was retrieved successfully *
1032 * FAIL - either collector does not have at least two data samples *
1033 * required to calculate the statistics, or an error occurred *
1034 * during the collection process. In the second case the errmsg *
1035 * will contain an error message. *
1036 * This function calls exit() on shared memory errors. *
1037 * *
1038 ******************************************************************************/
zbx_procstat_get_util(const char * procname,const char * username,const char * cmdline,zbx_uint64_t flags,int period,int type,double * value,char ** errmsg)1039 int zbx_procstat_get_util(const char *procname, const char *username, const char *cmdline, zbx_uint64_t flags,
1040 int period, int type, double *value, char **errmsg)
1041 {
1042 int ret = FAIL, current, start;
1043 zbx_procstat_query_t *query;
1044 zbx_uint64_t ticks_diff = 0, time_diff;
1045
1046 zbx_dshm_lock(&collector->procstat);
1047
1048 procstat_reattach();
1049
1050 if (NULL == (query = procstat_get_query(procstat_ref.addr, procname, username, cmdline, flags)))
1051 {
1052 if (procstat_queries_num(procstat_ref.addr) == PROCSTAT_MAX_QUERIES)
1053 *errmsg = zbx_strdup(*errmsg, "Maximum number of queries reached.");
1054 else
1055 procstat_add(procname, username, cmdline, flags);
1056
1057 goto out;
1058 }
1059
1060 query->last_accessed = time(NULL);
1061
1062 if (0 != query->error)
1063 {
1064 *errmsg = zbx_dsprintf(*errmsg, "Cannot read cpu utilization data: %s", zbx_strerror(-query->error));
1065 goto out;
1066 }
1067
1068 if (1 >= query->h_count)
1069 goto out;
1070
1071 if (period >= query->h_count)
1072 period = query->h_count - 1;
1073
1074 if (MAX_COLLECTOR_HISTORY <= (current = query->h_first + query->h_count - 1))
1075 current -= MAX_COLLECTOR_HISTORY;
1076
1077 if (0 > (start = current - period))
1078 start += MAX_COLLECTOR_HISTORY;
1079
1080 if (0 != (type & ZBX_PROCSTAT_CPU_USER))
1081 ticks_diff += query->h_data[current].utime - query->h_data[start].utime;
1082
1083 if (0 != (type & ZBX_PROCSTAT_CPU_SYSTEM))
1084 ticks_diff += query->h_data[current].stime - query->h_data[start].stime;
1085
1086 time_diff = (zbx_uint64_t)(query->h_data[current].timestamp.sec - query->h_data[start].timestamp.sec) *
1087 1000000000 + query->h_data[current].timestamp.ns - query->h_data[start].timestamp.ns;
1088
1089 /* 1e9 (nanoseconds) * 1e2 (percent) * 1e1 (one digit decimal place) */
1090 ticks_diff *= __UINT64_C(1000000000000);
1091 #ifdef HAVE_ROUND
1092 *value = round((double)ticks_diff / (time_diff * sysconf(_SC_CLK_TCK))) / 10;
1093 #else
1094 *value = (int)((double)ticks_diff / (time_diff * sysconf(_SC_CLK_TCK)) + 0.5) / 10.0;
1095 #endif
1096
1097 ret = SUCCEED;
1098 out:
1099 zbx_dshm_unlock(&collector->procstat);
1100
1101 return ret;
1102 }
1103
1104 /******************************************************************************
1105 * *
1106 * Function: zbx_procstat_collect *
1107 * *
1108 * Purpose: performs process statistics collection *
1109 * *
1110 ******************************************************************************/
zbx_procstat_collect(void)1111 void zbx_procstat_collect(void)
1112 {
1113 /* identifies current collection iteration */
1114 static int runid = 1;
1115
1116 /* number of (non-unique) pids that match queries */
1117 int pids_num = 0;
1118
1119 /* flags specifying what process properties must be retrieved */
1120 int flags;
1121
1122 /* local, working copy of queries */
1123 zbx_vector_ptr_t queries;
1124
1125 /* data about all processes on system */
1126 zbx_vector_ptr_t processes;
1127
1128 /* pids (unique) for which to collect data in this iteration */
1129 zbx_vector_uint64_t pids;
1130
1131 /* current reading of the per-pid cpu usage statistics (array, items correspond to pids) */
1132 zbx_procstat_util_t *stats;
1133
1134 /* time of the per-pid usage statistics collection */
1135 zbx_timespec_t snapshot_timestamp;
1136
1137 if (FAIL == zbx_procstat_collector_started() || FAIL == procstat_running())
1138 goto out;
1139
1140 zbx_vector_ptr_create(&queries);
1141 zbx_vector_ptr_create(&processes);
1142 zbx_vector_uint64_create(&pids);
1143
1144 if (ZBX_SYSINFO_PROC_NONE == (flags = procstat_build_local_query_vector(&queries, runid)))
1145 goto clean;
1146
1147 if (SUCCEED != zbx_proc_get_processes(&processes, flags))
1148 goto clean;
1149
1150 pids_num = procstat_scan_query_pids(&queries, &processes);
1151
1152 procstat_get_monitored_pids(&pids, &queries, pids_num);
1153
1154 stats = (zbx_procstat_util_t *)zbx_malloc(NULL, sizeof(zbx_procstat_util_t) * pids.values_num);
1155 snapshot_timestamp = procstat_get_cpu_util_snapshot_for_pids(stats, &pids);
1156
1157 procstat_calculate_cpu_util_for_queries(&queries, &pids, stats);
1158
1159 procstat_update_query_statistics(&queries, runid, &snapshot_timestamp);
1160
1161 /* replace the current snapshot with the new stats */
1162 zbx_free(procstat_snapshot);
1163 procstat_snapshot = stats;
1164 procstat_snapshot_num = pids.values_num;
1165 clean:
1166 zbx_vector_uint64_destroy(&pids);
1167
1168 zbx_proc_free_processes(&processes);
1169 zbx_vector_ptr_destroy(&processes);
1170
1171 zbx_vector_ptr_clear_ext(&queries, (zbx_mem_free_func_t)procstat_free_query_data);
1172 zbx_vector_ptr_destroy(&queries);
1173 out:
1174 runid++;
1175 }
1176
1177 #endif
1178