1 /*
2 ** Zabbix
3 ** Copyright (C) 2001-2021 Zabbix SIA
4 **
5 ** This program is free software; you can redistribute it and/or modify
6 ** it under the terms of the GNU General Public License as published by
7 ** the Free Software Foundation; either version 2 of the License, or
8 ** (at your option) any later version.
9 **
10 ** This program is distributed in the hope that it will be useful,
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ** GNU General Public License for more details.
14 **
15 ** You should have received a copy of the GNU General Public License
16 ** along with this program; if not, write to the Free Software
17 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 **/
19
20 #include "common.h"
21 #include "log.h"
22 #include "mutexs.h"
23 #include "stats.h"
24 #include "ipc.h"
25 #include "procstat.h"
26
27 #ifdef ZBX_PROCSTAT_COLLECTOR
28
29 /*
30 * The process CPU statistics are stored using the following memory layout.
31 *
32 * .--------------------------------------.
33 * | header |
34 * | ------------------------------------ |
35 * | process cpu utilization queries |
36 * | and historical data |
37 * | ------------------------------------ |
38 * | free space |
39 * '--------------------------------------'
40 *
41 * Because the shared memory can be resized by other processes instead of
42 * using pointers (when allocating strings, building single linked lists)
43 * the memory offsets from the beginning of shared memory segment are used.
44 * 0 offset is interpreted similarly to NULL pointer.
45 *
46 * Currently integer values are used to store offsets to internally allocated
47 * memory which leads to 2GB total size limit.
48 *
49 * During every data collection cycle collector does the following:
50 * 1) acquires list of all processes running on system
51 * 2) builds a list of processes monitored by queries
52 * 3) reads total cpu utilization snapshot for the monitored processes
53 * 4) calculates cpu utilization difference by comparing with previous snapshot
54 * 5) updates cpu utilization values for queries.
55 * 6) saves the last cpu utilization snapshot
56 *
57 * Initialisation.
58 * * zbx_procstat_init() initialises procstat dshm structure but doesn't allocate memory from the system
59 * (zbx_dshm_create() called with size 0).
60 * * the first call of procstat_add() allocates the shared memory for the header and the first query
61 * via call to zbx_dshm_realloc().
62 * * The header is initialised in procstat_copy_data() which is called back from zbx_dshm_realloc().
63 *
64 * Memory allocation within dshm.
65 * * Ensure that memory segment has enough free space with procstat_dshm_has_enough_space() before
66 * allocating space within segment with procstat_alloc() or functions that use it.
67 * * Check how much of the allocated dshm is actually used by procstat by procstat_dshm_used_size().
68 * * Change the dshm size with zbx_dshm_realloc().
69 *
70 * Synchronisation.
71 * * agentd processes share a single instance of ZBX_COLLECTOR_DATA (*collector) containing reference
72 * to shared procstat memory segment.
73 * * Each agentd process also holds local reference to procstat shared memory segment.
74 * * The system keeps the shared memory segment until the last process detaches from it.
75 * * Synchronise both references with procstat_reattach() before using procstat shared memory segment.
76 */
77
78 /* the main collector data */
79 extern ZBX_COLLECTOR_DATA *collector;
80
81 /* local reference to the procstat shared memory */
82 static zbx_dshm_ref_t procstat_ref;
83
84 typedef struct
85 {
86 /* a linked list of active queries (offset of the first active query) */
87 int queries;
88
89 /* the total size of the allocated queries and strings */
90 int size_allocated;
91
92 /* the total shared memory segment size */
93 size_t size;
94 }
95 zbx_procstat_header_t;
96
97 #define PROCSTAT_NULL_OFFSET 0
98
99 #define PROCSTAT_ALIGNED_HEADER_SIZE ZBX_SIZE_T_ALIGN8(sizeof(zbx_procstat_header_t))
100
101 #define PROCSTAT_PTR(base, offset) ((char *)base + offset)
102
103 #define PROCSTAT_PTR_NULL(base, offset) \
104 (PROCSTAT_NULL_OFFSET == offset ? NULL : PROCSTAT_PTR(base, offset))
105
106 #define PROCSTAT_QUERY_FIRST(base) \
107 (zbx_procstat_query_t*)PROCSTAT_PTR_NULL(base, ((zbx_procstat_header_t *)base)->queries)
108
109 #define PROCSTAT_QUERY_NEXT(base, query) \
110 (zbx_procstat_query_t*)PROCSTAT_PTR_NULL(base, query->next)
111
112 #define PROCSTAT_OFFSET(base, ptr) ((char *)ptr - (char *)base)
113
114 /* maximum number of active procstat queries */
115 #define PROCSTAT_MAX_QUERIES 1024
116
117 /* the time period after which inactive queries (not accessed during this period) can be removed */
118 #define PROCSTAT_MAX_INACTIVITY_PERIOD SEC_PER_DAY
119
120 /* the time interval between compressing (inactive query removal) attempts */
121 #define PROCSTAT_COMPRESS_PERIOD SEC_PER_DAY
122
123 /* data sample collected every second for the process cpu utilization queries */
124 typedef struct
125 {
126 zbx_uint64_t utime;
127 zbx_uint64_t stime;
128 zbx_timespec_t timestamp;
129 }
130 zbx_procstat_data_t;
131
132 /* process cpu utilization query */
133 typedef struct
134 {
135 /* the process attributes */
136 size_t procname;
137 size_t username;
138 size_t cmdline;
139 zbx_uint64_t flags;
140
141 /* the index of first (oldest) entry in the history data */
142 int h_first;
143
144 /* the number of entries in the history data */
145 int h_count;
146
147 /* the last access time (request from server) */
148 int last_accessed;
149
150 /* increasing id for every data collection run, used to */
151 /* identify queries that are processed during data collection */
152 int runid;
153
154 /* error code */
155 int error;
156
157 /* offset (from segment beginning) of the next process query */
158 int next;
159
160 /* the cpu utilization history data (ring buffer) */
161 zbx_procstat_data_t h_data[MAX_COLLECTOR_HISTORY];
162 }
163 zbx_procstat_query_t;
164
165 /* process cpu utilization query data */
166 typedef struct
167 {
168 /* process attributes */
169 const char *procname;
170 const char *username;
171 const char *cmdline;
172 zbx_uint64_t flags;
173
174 /* error code */
175 int error;
176
177 /* process cpu utilization */
178 zbx_uint64_t utime;
179 zbx_uint64_t stime;
180
181 /* vector of pids matching the process attributes */
182 zbx_vector_uint64_t pids;
183 }
184 zbx_procstat_query_data_t;
185
186 /* the process cpu utilization snapshot */
187 static zbx_procstat_util_t *procstat_snapshot;
188 /* the number of processes in process cpu utilization snapshot */
189 static int procstat_snapshot_num;
190
191 /* external functions used by procstat collector */
192 int zbx_proc_get_processes(zbx_vector_ptr_t *processes, unsigned int flags);
193
194 void zbx_proc_get_matching_pids(const zbx_vector_ptr_t *processes, const char *procname, const char *username,
195 const char *cmdline, zbx_uint64_t flags, zbx_vector_uint64_t *pids);
196
197 void zbx_proc_get_process_stats(zbx_procstat_util_t *procs, int procs_num);
198
199 void zbx_proc_free_processes(zbx_vector_ptr_t *processes);
200
201 /******************************************************************************
202 * *
203 * Function: procstat_dshm_has_enough_space *
204 * *
205 * Purpose: check if the procstat shared memory segment has at least *
206 * the specified amount of free bytes in the segment *
207 * *
208 * Parameters: base - [IN] the procstat shared memory segment *
209 * size - [IN] number of free bytes needed *
210 * *
211 * Return value: SUCCEED - sufficient amount of bytes are available *
212 * FAIL - otherwise *
213 * *
214 ******************************************************************************/
procstat_dshm_has_enough_space(void * base,size_t size)215 static int procstat_dshm_has_enough_space(void *base, size_t size)
216 {
217 zbx_procstat_header_t *header = (zbx_procstat_header_t *)base;
218
219 if (header->size >= size + header->size_allocated)
220 return SUCCEED;
221
222 return FAIL;
223 }
224
225 /******************************************************************************
226 * *
227 * Function: procstat_dshm_used_size *
228 * *
229 * Purpose: calculate the actual shared memory size used by procstat *
230 * *
231 * Parameters: base - [IN] the procstat shared memory segment *
232 * *
233 * Return value: The number of bytes required to store current procstat data. *
234 * *
235 ******************************************************************************/
procstat_dshm_used_size(void * base)236 static size_t procstat_dshm_used_size(void *base)
237 {
238 const zbx_procstat_query_t *query;
239 size_t size;
240
241 if (NULL == base)
242 return 0;
243
244 size = PROCSTAT_ALIGNED_HEADER_SIZE;
245
246 for (query = PROCSTAT_QUERY_FIRST(base); NULL != query; query = PROCSTAT_QUERY_NEXT(base, query))
247 {
248 if (PROCSTAT_NULL_OFFSET != query->procname)
249 size += ZBX_SIZE_T_ALIGN8(strlen(PROCSTAT_PTR(base, query->procname)) + 1);
250
251 if (PROCSTAT_NULL_OFFSET != query->username)
252 size += ZBX_SIZE_T_ALIGN8(strlen(PROCSTAT_PTR(base, query->username)) + 1);
253
254 if (PROCSTAT_NULL_OFFSET != query->cmdline)
255 size += ZBX_SIZE_T_ALIGN8(strlen(PROCSTAT_PTR(base, query->cmdline)) + 1);
256
257 size += ZBX_SIZE_T_ALIGN8(sizeof(zbx_procstat_query_t));
258 }
259
260 return size;
261 }
262
263 /******************************************************************************
264 * *
265 * Function: procstat_queries_num *
266 * *
267 * Purpose: calculate the number of active queries *
268 * *
269 * Parameters: base - [IN] the procstat shared memory segment *
270 * *
271 * Return value: The number of active queries. *
272 * *
273 ******************************************************************************/
procstat_queries_num(void * base)274 static int procstat_queries_num(void *base)
275 {
276 const zbx_procstat_query_t *query;
277 int queries_num;
278
279 if (NULL == base)
280 return 0;
281
282 queries_num = 0;
283
284 for (query = PROCSTAT_QUERY_FIRST(base); NULL != query; query = PROCSTAT_QUERY_NEXT(base, query))
285 queries_num++;
286
287 return queries_num;
288 }
289
290 /******************************************************************************
291 * *
292 * Function: procstat_alloc *
293 * *
294 * Purpose: allocates memory in the shared memory segment, *
295 * calls exit() if segment is too small *
296 * *
297 * Parameters: base - [IN] the procstat shared memory segment *
298 * size - [IN] the number of bytes to allocate *
299 * *
300 * Return value: The offset of allocated data from the beginning of segment *
301 * (positive value). *
302 * *
303 ******************************************************************************/
procstat_alloc(void * base,size_t size)304 static int procstat_alloc(void *base, size_t size)
305 {
306 zbx_procstat_header_t *header = (zbx_procstat_header_t *)base;
307 int offset;
308
309 size = ZBX_SIZE_T_ALIGN8(size);
310
311 if (FAIL == procstat_dshm_has_enough_space(header, size))
312 {
313 THIS_SHOULD_NEVER_HAPPEN;
314 exit(EXIT_FAILURE);
315 }
316
317 offset = header->size_allocated;
318 header->size_allocated += size;
319
320 return offset;
321 }
322
323 /******************************************************************************
324 * *
325 * Function: procstat_strdup *
326 * *
327 * Purpose: allocates required memory in procstat memory segment and copies *
328 * the specified string (calls exit() if segment is too small) *
329 * *
330 * Parameters: base - [IN] the procstat shared memory segment *
331 * str - [IN] the string to copy *
332 * *
333 * Return value: The offset to allocated data counting from the beginning *
334 * of data segment. *
335 * 0 if the source string is NULL or the shared memory segment *
336 * does not have enough free space. *
337 * *
338 ******************************************************************************/
procstat_strdup(void * base,const char * str)339 static size_t procstat_strdup(void *base, const char *str)
340 {
341 size_t len, offset;
342
343 if (NULL == str)
344 return PROCSTAT_NULL_OFFSET;
345
346 len = strlen(str) + 1;
347
348 offset = procstat_alloc(base, len);
349 memcpy(PROCSTAT_PTR(base, offset), str, len);
350
351 return offset;
352 }
353
354 /******************************************************************************
355 * *
356 * Function: procstat_reattach *
357 * *
358 * Purpose: reattaches the procstat_ref to the shared memory segment if it *
359 * was 'resized' (a new segment created and the old data copied) by *
360 * other process. *
361 * *
362 * Comments: This function logs critical error and exits in the case of *
363 * shared memory segement operation failure. *
364 * *
365 ******************************************************************************/
procstat_reattach(void)366 static void procstat_reattach(void)
367 {
368 char *errmsg = NULL;
369
370 if (FAIL == zbx_dshm_validate_ref(&collector->procstat, &procstat_ref, &errmsg))
371 {
372 zabbix_log(LOG_LEVEL_CRIT, "cannot validate process data collector reference: %s", errmsg);
373 zbx_free(errmsg);
374 exit(EXIT_FAILURE);
375 }
376 }
377
378 /******************************************************************************
379 * *
380 * Function: procstat_copy_data *
381 * *
382 * Purpose: copies procstat data to a new shared memory segment *
383 * *
384 * Parameters: dst - [OUT] the destination segment *
385 * size_dst - [IN] the size of destination segment *
386 * src - [IN] the source segment *
387 * *
388 ******************************************************************************/
procstat_copy_data(void * dst,size_t size_dst,const void * src)389 static void procstat_copy_data(void *dst, size_t size_dst, const void *src)
390 {
391 const char *__function_name = "procstat_copy_data";
392
393 int offset, *query_offset;
394 zbx_procstat_header_t *hdst = (zbx_procstat_header_t *)dst;
395 zbx_procstat_query_t *qsrc, *qdst = NULL;
396
397 zabbix_log(LOG_LEVEL_DEBUG, "In %s()", __function_name);
398
399 hdst->size = size_dst;
400 hdst->size_allocated = PROCSTAT_ALIGNED_HEADER_SIZE;
401 hdst->queries = PROCSTAT_NULL_OFFSET;
402
403 if (NULL != src)
404 {
405 query_offset = &hdst->queries;
406
407 /* copy queries */
408 for (qsrc = PROCSTAT_QUERY_FIRST(src); NULL != qsrc; qsrc = PROCSTAT_QUERY_NEXT(src, qsrc))
409 {
410 /* the new shared memory segment must have enough space */
411 offset = procstat_alloc(dst, sizeof(zbx_procstat_query_t));
412
413 qdst = (zbx_procstat_query_t *)PROCSTAT_PTR(dst, offset);
414
415 memcpy(qdst, qsrc, sizeof(zbx_procstat_query_t));
416
417 qdst->procname = procstat_strdup(dst, PROCSTAT_PTR_NULL(src, qsrc->procname));
418 qdst->username = procstat_strdup(dst, PROCSTAT_PTR_NULL(src, qsrc->username));
419 qdst->cmdline = procstat_strdup(dst, PROCSTAT_PTR_NULL(src, qsrc->cmdline));
420
421 *query_offset = offset;
422 query_offset = &qdst->next;
423 }
424 }
425
426 zabbix_log(LOG_LEVEL_DEBUG, "End of %s()", __function_name);
427 }
428
429 /******************************************************************************
430 * *
431 * Function: procstat_running *
432 * *
433 * Purpose: checks if processor statistics collector is running (at least one *
434 * one process statistics query has been made). *
435 * *
436 ******************************************************************************/
procstat_running(void)437 static int procstat_running(void)
438 {
439 if (ZBX_NONEXISTENT_SHMID == collector->procstat.shmid)
440 return FAIL;
441
442 return SUCCEED;
443 }
444
445 /******************************************************************************
446 * *
447 * Function: procstat_get_query *
448 * *
449 * Purpose: get process statistics query based on process name, user name *
450 * and command line *
451 * *
452 * Parameters: base - [IN] the procstat shared memory segment *
453 * procname - [IN] the process name *
454 * username - [IN] the user name *
455 * cmdline - [IN] the command line *
456 * flags - [IN] platform specific flags *
457 * *
458 * Return value: The process statistics query for the specified parameters or *
459 * NULL if the statistics are not being gathered for the *
460 * specified parameters. *
461 * *
462 ******************************************************************************/
procstat_get_query(void * base,const char * procname,const char * username,const char * cmdline,zbx_uint64_t flags)463 static zbx_procstat_query_t *procstat_get_query(void *base, const char *procname, const char *username,
464 const char *cmdline, zbx_uint64_t flags)
465 {
466 zbx_procstat_query_t *query;
467
468 if (SUCCEED != procstat_running())
469 return NULL;
470
471 for (query = PROCSTAT_QUERY_FIRST(base); NULL != query; query = PROCSTAT_QUERY_NEXT(base, query))
472 {
473 if (0 == zbx_strcmp_null(procname, PROCSTAT_PTR_NULL(base, query->procname)) &&
474 0 == zbx_strcmp_null(username, PROCSTAT_PTR_NULL(base, query->username)) &&
475 0 == zbx_strcmp_null(cmdline, PROCSTAT_PTR_NULL(base, query->cmdline)) &&
476 flags == query->flags)
477 {
478 return query;
479 }
480 }
481
482 return NULL;
483 }
484
485 /******************************************************************************
486 * *
487 * Function: procstat_add *
488 * *
489 * Purpose: adds a new query to process statistics collector *
490 * *
491 * Parameters: procname - [IN] the process name *
492 * username - [IN] the user name *
493 * cmdline - [IN] the command line *
494 * flags - [IN] platform specific flags *
495 * *
496 * Return value: *
497 * This function calls exit() on shared memory errors. *
498 * *
499 ******************************************************************************/
procstat_add(const char * procname,const char * username,const char * cmdline,zbx_uint64_t flags)500 static void procstat_add(const char *procname, const char *username, const char *cmdline, zbx_uint64_t flags)
501 {
502 const char *__function_name = "procstat_add";
503 char *errmsg = NULL;
504 size_t size = 0;
505 zbx_procstat_query_t *query;
506 zbx_procstat_header_t *header;
507 int query_offset;
508
509 zabbix_log(LOG_LEVEL_DEBUG, "In %s()", __function_name);
510
511 /* when allocating a new collection reserve space for procstat header */
512 if (0 == collector->procstat.size)
513 size += PROCSTAT_ALIGNED_HEADER_SIZE;
514
515 /* reserve space for process attributes */
516 if (NULL != procname)
517 size += ZBX_SIZE_T_ALIGN8(strlen(procname) + 1);
518
519 if (NULL != username)
520 size += ZBX_SIZE_T_ALIGN8(strlen(username) + 1);
521
522 if (NULL != cmdline)
523 size += ZBX_SIZE_T_ALIGN8(strlen(cmdline) + 1);
524
525 /* procstat_add() is called when the shared memory reference has already been validated - */
526 /* no need to call procstat_reattach() */
527
528 /* reserve space for query container */
529 size += ZBX_SIZE_T_ALIGN8(sizeof(zbx_procstat_query_t));
530
531 if (NULL == procstat_ref.addr || FAIL == procstat_dshm_has_enough_space(procstat_ref.addr, size))
532 {
533 /* recalculate the space required to store existing data + new query */
534 size += procstat_dshm_used_size(procstat_ref.addr);
535
536 if (FAIL == zbx_dshm_realloc(&collector->procstat, size, &errmsg))
537 {
538 zabbix_log(LOG_LEVEL_CRIT, "cannot reallocate memory in process data collector: %s", errmsg);
539 zbx_free(errmsg);
540 zbx_dshm_unlock(&collector->procstat);
541
542 exit(EXIT_FAILURE);
543 }
544
545 /* header initialised in procstat_copy_data() which is called back from zbx_dshm_realloc() */
546 procstat_reattach();
547 }
548
549 header = (zbx_procstat_header_t *)procstat_ref.addr;
550
551 query_offset = procstat_alloc(procstat_ref.addr, sizeof(zbx_procstat_query_t));
552
553 /* initialize the created query */
554 query = (zbx_procstat_query_t *)PROCSTAT_PTR_NULL(procstat_ref.addr, query_offset);
555
556 memset(query, 0, sizeof(zbx_procstat_query_t));
557
558 query->procname = procstat_strdup(procstat_ref.addr, procname);
559 query->username = procstat_strdup(procstat_ref.addr, username);
560 query->cmdline = procstat_strdup(procstat_ref.addr, cmdline);
561 query->flags = flags;
562 query->last_accessed = time(NULL);
563 query->next = header->queries;
564 header->queries = query_offset;
565
566 zabbix_log(LOG_LEVEL_DEBUG, "End of %s()", __function_name);
567 }
568
569 /******************************************************************************
570 * *
571 * Function: procstat_free_query_data *
572 * *
573 * Purpose: frees the query data structure used to store queries locally *
574 * *
575 ******************************************************************************/
procstat_free_query_data(zbx_procstat_query_data_t * data)576 static void procstat_free_query_data(zbx_procstat_query_data_t *data)
577 {
578 zbx_vector_uint64_destroy(&data->pids);
579 zbx_free(data);
580 }
581
582 /******************************************************************************
583 * *
584 * Function: procstat_try_compress *
585 * *
586 * Purpose: try to compress (remove inactive queries) the procstat shared *
587 * memory segment once per day *
588 * *
589 * Parameters: base - [IN] the procstat shared memory segment *
590 * *
591 ******************************************************************************/
procstat_try_compress(void * base)592 static void procstat_try_compress(void *base)
593 {
594 static int collector_iteration = 0;
595
596 /* The iteration counter ~ the number seconds collector has been running */
597 /* because collector data gathering is done once per second. */
598 /* This approximation is done to avoid calling time() function if there */
599 /* are no defined queries. */
600 if (0 == (++collector_iteration % PROCSTAT_COMPRESS_PERIOD))
601 {
602 zbx_procstat_header_t *header = (zbx_procstat_header_t *)procstat_ref.addr;
603 size_t size;
604 char *errmsg = NULL;
605
606 size = procstat_dshm_used_size(base);
607
608 if (size < header->size && FAIL == zbx_dshm_realloc(&collector->procstat, size, &errmsg))
609 {
610 zabbix_log(LOG_LEVEL_CRIT, "cannot reallocate memory in process data collector: %s", errmsg);
611 zbx_free(errmsg);
612 zbx_dshm_unlock(&collector->procstat);
613
614 exit(EXIT_FAILURE);
615 }
616 }
617 }
618
619 /******************************************************************************
620 * *
621 * Function: procstat_build_local_query_vector *
622 * *
623 * Purpose: builds a local copy of the process cpu utilization queries and *
624 * removes expired (not used during last 24 hours) queries *
625 * *
626 * Parameters: queries_ptr - [OUT] local copy of queries copied from queries *
627 * in shared memory segment *
628 * runid - [IN] marker for queries to be processed in the *
629 * current collector iteration *
630 * *
631 * Return value: The flags defining the process properties to be retrieved. *
632 * See ZBX_SYSINFO_PROC_ defines. *
633 * *
634 * Comments: updates queries (runid) in shared memory segment *
635 * *
636 ******************************************************************************/
procstat_build_local_query_vector(zbx_vector_ptr_t * queries_ptr,int runid)637 static int procstat_build_local_query_vector(zbx_vector_ptr_t *queries_ptr, int runid)
638 {
639 zbx_procstat_header_t *header;
640 time_t now;
641 zbx_procstat_query_t *query;
642 zbx_procstat_query_data_t *qdata;
643 int flags = ZBX_SYSINFO_PROC_NONE, *pnext_query;
644
645 zbx_dshm_lock(&collector->procstat);
646
647 procstat_reattach();
648
649 header = (zbx_procstat_header_t *)procstat_ref.addr;
650
651 if (PROCSTAT_NULL_OFFSET == header->queries)
652 goto out;
653
654 flags = ZBX_SYSINFO_PROC_PID;
655
656 now = time(NULL);
657 pnext_query = &header->queries;
658
659 for (query = PROCSTAT_QUERY_FIRST(procstat_ref.addr); NULL != query;
660 query = PROCSTAT_QUERY_NEXT(procstat_ref.addr, query))
661 {
662 /* remove unused queries, the data is still allocated until the next resize */
663 if (PROCSTAT_MAX_INACTIVITY_PERIOD < now - query->last_accessed)
664 {
665 *pnext_query = query->next;
666 continue;
667 }
668
669 qdata = (zbx_procstat_query_data_t *)zbx_malloc(NULL, sizeof(zbx_procstat_query_data_t));
670 zbx_vector_uint64_create(&qdata->pids);
671
672 /* store the reference to query attributes, which is guaranteed to be */
673 /* valid until we call process_reattach() */
674 if (NULL != (qdata->procname = PROCSTAT_PTR_NULL(procstat_ref.addr, query->procname)))
675 flags |= ZBX_SYSINFO_PROC_NAME;
676
677 if (NULL != (qdata->username = PROCSTAT_PTR_NULL(procstat_ref.addr, query->username)))
678 flags |= ZBX_SYSINFO_PROC_USER;
679
680 if (NULL != (qdata->cmdline = PROCSTAT_PTR_NULL(procstat_ref.addr, query->cmdline)))
681 flags |= ZBX_SYSINFO_PROC_CMDLINE;
682
683 qdata->flags = query->flags;
684 qdata->utime = 0;
685 qdata->stime = 0;
686 qdata->error = 0;
687
688 zbx_vector_ptr_append(queries_ptr, qdata);
689
690 /* The order of queries can be changed only by collector itself (when removing old */
691 /* queries), but during statistics gathering the shared memory is unlocked and other */
692 /* processes might insert queries at the beginning of active queries list. */
693 /* Mark the queries being processed by current data gathering cycle with id that */
694 /* is incremented at the end of every data gathering cycle. We can be sure that */
695 /* our local copy will match the queries in shared memory having the same runid. */
696 query->runid = runid;
697
698 pnext_query = &query->next;
699 }
700
701 out:
702 procstat_try_compress(procstat_ref.addr);
703
704 zbx_dshm_unlock(&collector->procstat);
705
706 return flags;
707 }
708
709 /******************************************************************************
710 * *
711 * Function: procstat_scan_query_pids *
712 * *
713 * Purpose: for every query gets the pids of processes matching query *
714 * attributes *
715 * *
716 * Parameters: queries - [IN/OUT] fills pids and error for each query *
717 * *
718 * Return value: total number of pids saved in all queries *
719 * *
720 ******************************************************************************/
procstat_scan_query_pids(zbx_vector_ptr_t * queries,const zbx_vector_ptr_t * processes)721 static int procstat_scan_query_pids(zbx_vector_ptr_t *queries, const zbx_vector_ptr_t *processes)
722 {
723 zbx_procstat_query_data_t *qdata;
724 int i, pids_num = 0;
725
726 for (i = 0; i < queries->values_num; i++)
727 {
728 qdata = (zbx_procstat_query_data_t *)queries->values[i];
729
730 zbx_proc_get_matching_pids(processes, qdata->procname, qdata->username, qdata->cmdline, qdata->flags,
731 &qdata->pids);
732
733 pids_num += qdata->pids.values_num;
734 }
735
736 return pids_num;
737 }
738
739 /******************************************************************************
740 * *
741 * Function: procstat_get_monitored_pids *
742 * *
743 * Purpose: creates a list of unique pids that are monitored by current data *
744 * gathering cycle *
745 * *
746 * Parameters: pids - [OUT] a sorted vector of unique pids *
747 * queries - [IN] local, working copy of queries *
748 * pids_num - [IN] the total number of pids monitored by queries *
749 * (might contain duplicated pids) *
750 * *
751 ******************************************************************************/
procstat_get_monitored_pids(zbx_vector_uint64_t * pids,const zbx_vector_ptr_t * queries,int pids_num)752 static void procstat_get_monitored_pids(zbx_vector_uint64_t *pids, const zbx_vector_ptr_t *queries, int pids_num)
753 {
754 zbx_procstat_query_data_t *qdata;
755 int i;
756
757 zbx_vector_uint64_reserve(pids, pids_num);
758
759 for (i = 0; i < queries->values_num; i++)
760 {
761 qdata = (zbx_procstat_query_data_t *)queries->values[i];
762
763 if (SUCCEED != qdata->error)
764 continue;
765
766 memcpy(pids->values + pids->values_num, qdata->pids.values,
767 sizeof(zbx_uint64_t) * qdata->pids.values_num);
768 pids->values_num += qdata->pids.values_num;
769 }
770
771 zbx_vector_uint64_sort(pids, ZBX_DEFAULT_UINT64_COMPARE_FUNC);
772 zbx_vector_uint64_uniq(pids, ZBX_DEFAULT_UINT64_COMPARE_FUNC);
773 }
774
775 /******************************************************************************
776 * *
777 * Function: procstat_get_cpu_util_snapshot_for_pids *
778 * *
779 * Purpose: gets cpu utilization data snapshot for the monitored processes *
780 * *
781 * Parameters: stats - [OUT] current reading of the per-pid cpu usage *
782 * statistics (array, items correspond to pids) *
783 * pids - [IN] pids (unique) for which to collect data in this *
784 * iteration *
785 * *
786 * Return value: timestamp of the snapshot *
787 * *
788 ******************************************************************************/
procstat_get_cpu_util_snapshot_for_pids(zbx_procstat_util_t * stats,zbx_vector_uint64_t * pids)789 static zbx_timespec_t procstat_get_cpu_util_snapshot_for_pids(zbx_procstat_util_t *stats,
790 zbx_vector_uint64_t *pids)
791 {
792 zbx_timespec_t snapshot_timestamp;
793 int i;
794
795 for (i = 0; i < pids->values_num; i++)
796 stats[i].pid = pids->values[i];
797
798 zbx_proc_get_process_stats(stats, pids->values_num);
799
800 zbx_timespec(&snapshot_timestamp);
801
802 return snapshot_timestamp;
803 }
804
805 /******************************************************************************
806 * *
807 * Function: procstat_util_compare *
808 * *
809 * Purpose: compare process utilization data by their pids *
810 * *
811 ******************************************************************************/
procstat_util_compare(const void * d1,const void * d2)812 static int procstat_util_compare(const void *d1, const void *d2)
813 {
814 const zbx_procstat_util_t *u1 = (zbx_procstat_util_t *)d1;
815 const zbx_procstat_util_t *u2 = (zbx_procstat_util_t *)d2;
816
817 ZBX_RETURN_IF_NOT_EQUAL(u1->pid, u2->pid);
818
819 return 0;
820 }
821
822 /******************************************************************************
823 * *
824 * Function: procstat_calculate_cpu_util_for_queries *
825 * *
826 * Purpose: calculates the cpu utilization for queries since the previous *
827 * snapshot *
828 * *
829 * Parameters: queries - [IN/OUT] local, working copy of queries, saving *
830 * utime, stime and error *
831 * pids - [IN] pids (unique) for which to collect data in *
832 * this iteration *
833 * stats - [IN] current reading of the per-pid cpu usage *
834 * statistics (array, items correspond to pids) *
835 * *
836 ******************************************************************************/
procstat_calculate_cpu_util_for_queries(zbx_vector_ptr_t * queries,zbx_vector_uint64_t * pids,const zbx_procstat_util_t * stats)837 static void procstat_calculate_cpu_util_for_queries(zbx_vector_ptr_t *queries,
838 zbx_vector_uint64_t *pids, const zbx_procstat_util_t *stats)
839 {
840 zbx_procstat_query_data_t *qdata;
841 zbx_procstat_util_t *putil;
842 int j, i;
843
844 for (j = 0; j < queries->values_num; j++)
845 {
846 qdata = (zbx_procstat_query_data_t *)queries->values[j];
847
848 /* sum the cpu utilization for processes that are present in current */
849 /* and last process cpu utilization snapshot */
850 for (i = 0; i < qdata->pids.values_num; i++)
851 {
852 zbx_uint64_t starttime, utime, stime;
853 zbx_procstat_util_t util_local;
854
855 util_local.pid = qdata->pids.values[i];
856
857 /* find the process utilization data in current snapshot */
858 putil = (zbx_procstat_util_t *)zbx_bsearch(&util_local, stats, pids->values_num,
859 sizeof(zbx_procstat_util_t), procstat_util_compare);
860
861 if (NULL == putil || SUCCEED != putil->error)
862 continue;
863
864 utime = putil->utime;
865 stime = putil->stime;
866
867 starttime = putil->starttime;
868
869 /* find the process utilization data in last snapshot */
870 putil = (zbx_procstat_util_t *)zbx_bsearch(&util_local, procstat_snapshot, procstat_snapshot_num,
871 sizeof(zbx_procstat_util_t), procstat_util_compare);
872
873 if (NULL == putil || SUCCEED != putil->error || putil->starttime != starttime)
874 continue;
875
876 qdata->utime += utime - putil->utime;
877 qdata->stime += stime - putil->stime;
878 }
879 }
880 }
881
882 /******************************************************************************
883 * *
884 * Function: procstat_update_query_statistics *
885 * *
886 * Purpose: updates cpu utilization and saves the new snapshot for queries in *
887 * shared memory segment *
888 * *
889 * Parameters: queries - [IN] local, working copy of queries (utime, stime *
890 * and error must be set) *
891 * runid - [IN] marker for queries to be processed in the *
892 * current collector iteration *
893 * snapshot_timestamp - [IN] timestamp of the current snapshot *
894 * *
895 * Comments: updates header (pids_num) and queries (h_data, h_count, h_first) *
896 * in shared memory segment, writes stats at the end of the shared *
897 * memory segment *
898 * *
899 ******************************************************************************/
procstat_update_query_statistics(zbx_vector_ptr_t * queries,int runid,const zbx_timespec_t * snapshot_timestamp)900 static void procstat_update_query_statistics(zbx_vector_ptr_t *queries, int runid,
901 const zbx_timespec_t *snapshot_timestamp)
902 {
903 zbx_procstat_query_t *query;
904 zbx_procstat_query_data_t *qdata;
905 int index;
906 int i;
907
908 zbx_dshm_lock(&collector->procstat);
909
910 procstat_reattach();
911
912 for (query = PROCSTAT_QUERY_FIRST(procstat_ref.addr), i = 0; NULL != query;
913 query = PROCSTAT_QUERY_NEXT(procstat_ref.addr, query))
914 {
915 if (runid != query->runid)
916 continue;
917
918 if (i >= queries->values_num)
919 {
920 THIS_SHOULD_NEVER_HAPPEN;
921 break;
922 }
923
924 qdata = (zbx_procstat_query_data_t *)queries->values[i++];
925
926 if (SUCCEED != (query->error = qdata->error))
927 continue;
928
929 /* find the next history data slot */
930 if (0 < query->h_count)
931 {
932 if (MAX_COLLECTOR_HISTORY <= (index = query->h_first + query->h_count - 1))
933 index -= MAX_COLLECTOR_HISTORY;
934
935 qdata->utime += query->h_data[index].utime;
936 qdata->stime += query->h_data[index].stime;
937
938 if (MAX_COLLECTOR_HISTORY <= ++index)
939 index -= MAX_COLLECTOR_HISTORY;
940 }
941 else
942 index = 0;
943
944 if (MAX_COLLECTOR_HISTORY == query->h_count)
945 {
946 if (MAX_COLLECTOR_HISTORY <= ++query->h_first)
947 query->h_first = 0;
948 }
949 else
950 query->h_count++;
951
952 query->h_data[index].utime = qdata->utime;
953 query->h_data[index].stime = qdata->stime;
954 query->h_data[index].timestamp = *snapshot_timestamp;
955 }
956
957 zbx_dshm_unlock(&collector->procstat);
958 }
959
960 /*
961 * Public API
962 */
963
964 /******************************************************************************
965 * *
966 * Function: zbx_procstat_collector_started *
967 * *
968 * Purpose: checks if processor statistics collector is enabled (the main *
969 * collector has been initialized) *
970 * *
971 ******************************************************************************/
zbx_procstat_collector_started(void)972 int zbx_procstat_collector_started(void)
973 {
974 if (NULL == collector)
975 return FAIL;
976
977 return SUCCEED;
978 }
979
980 /******************************************************************************
981 * *
982 * Function: zbx_procstat_init *
983 * *
984 * Purpose: initializes process statistics collector *
985 * *
986 * Return value: This function calls exit() on shared memory errors. *
987 * *
988 ******************************************************************************/
zbx_procstat_init(void)989 void zbx_procstat_init(void)
990 {
991 char *errmsg = NULL;
992
993 if (SUCCEED != zbx_dshm_create(&collector->procstat, ZBX_IPC_COLLECTOR_PROC_ID, 0, ZBX_MUTEX_PROCSTAT,
994 procstat_copy_data, &errmsg))
995 {
996 zabbix_log(LOG_LEVEL_CRIT, "cannot initialize process data collector: %s", errmsg);
997 zbx_free(errmsg);
998 exit(EXIT_FAILURE);
999 }
1000
1001 procstat_ref.shmid = ZBX_NONEXISTENT_SHMID;
1002 procstat_ref.addr = NULL;
1003 }
1004
1005 /******************************************************************************
1006 * *
1007 * Function: zbx_procstat_destroy *
1008 * *
1009 * Purpose: destroys process statistics collector *
1010 * *
1011 ******************************************************************************/
zbx_procstat_destroy(void)1012 void zbx_procstat_destroy(void)
1013 {
1014 char *errmsg = NULL;
1015
1016 if (SUCCEED != zbx_dshm_destroy(&collector->procstat, &errmsg))
1017 {
1018 zabbix_log(LOG_LEVEL_CRIT, "cannot free resources allocated by process data collector: %s", errmsg);
1019 zbx_free(errmsg);
1020 }
1021
1022 procstat_ref.shmid = ZBX_NONEXISTENT_SHMID;
1023 procstat_ref.addr = NULL;
1024 }
1025
1026 /******************************************************************************
1027 * *
1028 * Function: zbx_procstat_get_util *
1029 * *
1030 * Purpose: gets process cpu utilization *
1031 * *
1032 * Parameters: procname - [IN] the process name, NULL - all *
1033 * username - [IN] the user name, NULL - all *
1034 * cmdline - [IN] the command line, NULL - all *
1035 * collector_func - [IN] the callback function to use for process *
1036 * statistics gathering *
1037 * period - [IN] the time period *
1038 * type - [IN] the cpu utilization type, see *
1039 * ZBX_PROCSTAT_CPU_* defines *
1040 * value - [OUT] the utilization in % *
1041 * errmsg - [OUT] the error message *
1042 * *
1043 * Return value: *
1044 * SUCCEED - the utime value was retrieved successfully *
1045 * FAIL - either collector does not have at least two data samples *
1046 * required to calculate the statistics, or an error occurred *
1047 * during the collection process. In the second case the errmsg *
1048 * will contain an error message. *
1049 * This function calls exit() on shared memory errors. *
1050 * *
1051 ******************************************************************************/
zbx_procstat_get_util(const char * procname,const char * username,const char * cmdline,zbx_uint64_t flags,int period,int type,double * value,char ** errmsg)1052 int zbx_procstat_get_util(const char *procname, const char *username, const char *cmdline, zbx_uint64_t flags,
1053 int period, int type, double *value, char **errmsg)
1054 {
1055 int ret = FAIL, current, start;
1056 zbx_procstat_query_t *query;
1057 zbx_uint64_t ticks_diff = 0, time_diff;
1058
1059 zbx_dshm_lock(&collector->procstat);
1060
1061 procstat_reattach();
1062
1063 if (NULL == (query = procstat_get_query(procstat_ref.addr, procname, username, cmdline, flags)))
1064 {
1065 if (procstat_queries_num(procstat_ref.addr) == PROCSTAT_MAX_QUERIES)
1066 *errmsg = zbx_strdup(*errmsg, "Maximum number of queries reached.");
1067 else
1068 procstat_add(procname, username, cmdline, flags);
1069
1070 goto out;
1071 }
1072
1073 query->last_accessed = time(NULL);
1074
1075 if (0 != query->error)
1076 {
1077 *errmsg = zbx_dsprintf(*errmsg, "Cannot read cpu utilization data: %s", zbx_strerror(-query->error));
1078 goto out;
1079 }
1080
1081 if (1 >= query->h_count)
1082 goto out;
1083
1084 if (period >= query->h_count)
1085 period = query->h_count - 1;
1086
1087 if (MAX_COLLECTOR_HISTORY <= (current = query->h_first + query->h_count - 1))
1088 current -= MAX_COLLECTOR_HISTORY;
1089
1090 if (0 > (start = current - period))
1091 start += MAX_COLLECTOR_HISTORY;
1092
1093 if (0 != (type & ZBX_PROCSTAT_CPU_USER))
1094 ticks_diff += query->h_data[current].utime - query->h_data[start].utime;
1095
1096 if (0 != (type & ZBX_PROCSTAT_CPU_SYSTEM))
1097 ticks_diff += query->h_data[current].stime - query->h_data[start].stime;
1098
1099 time_diff = (zbx_uint64_t)(query->h_data[current].timestamp.sec - query->h_data[start].timestamp.sec) *
1100 1000000000 + query->h_data[current].timestamp.ns - query->h_data[start].timestamp.ns;
1101
1102 /* 1e9 (nanoseconds) * 1e2 (percent) * 1e1 (one digit decimal place) */
1103 ticks_diff *= __UINT64_C(1000000000000);
1104 #ifdef HAVE_ROUND
1105 *value = round((double)ticks_diff / (time_diff * sysconf(_SC_CLK_TCK))) / 10;
1106 #else
1107 *value = (int)((double)ticks_diff / (time_diff * sysconf(_SC_CLK_TCK)) + 0.5) / 10.0;
1108 #endif
1109
1110 ret = SUCCEED;
1111 out:
1112 zbx_dshm_unlock(&collector->procstat);
1113
1114 return ret;
1115 }
1116
1117 /******************************************************************************
1118 * *
1119 * Function: zbx_procstat_collect *
1120 * *
1121 * Purpose: performs process statistics collection *
1122 * *
1123 ******************************************************************************/
zbx_procstat_collect(void)1124 void zbx_procstat_collect(void)
1125 {
1126 /* identifies current collection iteration */
1127 static int runid = 1;
1128
1129 /* number of (non-unique) pids that match queries */
1130 int pids_num = 0;
1131
1132 /* flags specifying what process properties must be retrieved */
1133 int flags;
1134
1135 /* local, working copy of queries */
1136 zbx_vector_ptr_t queries;
1137
1138 /* data about all processes on system */
1139 zbx_vector_ptr_t processes;
1140
1141 /* pids (unique) for which to collect data in this iteration */
1142 zbx_vector_uint64_t pids;
1143
1144 /* current reading of the per-pid cpu usage statistics (array, items correspond to pids) */
1145 zbx_procstat_util_t *stats;
1146
1147 /* time of the per-pid usage statistics collection */
1148 zbx_timespec_t snapshot_timestamp;
1149
1150 if (FAIL == zbx_procstat_collector_started() || FAIL == procstat_running())
1151 goto out;
1152
1153 zbx_vector_ptr_create(&queries);
1154 zbx_vector_ptr_create(&processes);
1155 zbx_vector_uint64_create(&pids);
1156
1157 if (ZBX_SYSINFO_PROC_NONE == (flags = procstat_build_local_query_vector(&queries, runid)))
1158 goto clean;
1159
1160 if (SUCCEED != zbx_proc_get_processes(&processes, flags))
1161 goto clean;
1162
1163 pids_num = procstat_scan_query_pids(&queries, &processes);
1164
1165 procstat_get_monitored_pids(&pids, &queries, pids_num);
1166
1167 stats = (zbx_procstat_util_t *)zbx_malloc(NULL, sizeof(zbx_procstat_util_t) * pids.values_num);
1168 snapshot_timestamp = procstat_get_cpu_util_snapshot_for_pids(stats, &pids);
1169
1170 procstat_calculate_cpu_util_for_queries(&queries, &pids, stats);
1171
1172 procstat_update_query_statistics(&queries, runid, &snapshot_timestamp);
1173
1174 /* replace the current snapshot with the new stats */
1175 zbx_free(procstat_snapshot);
1176 procstat_snapshot = stats;
1177 procstat_snapshot_num = pids.values_num;
1178 clean:
1179 zbx_vector_uint64_destroy(&pids);
1180
1181 zbx_proc_free_processes(&processes);
1182 zbx_vector_ptr_destroy(&processes);
1183
1184 zbx_vector_ptr_clear_ext(&queries, (zbx_mem_free_func_t)procstat_free_query_data);
1185 zbx_vector_ptr_destroy(&queries);
1186 out:
1187 runid++;
1188 }
1189
1190 #endif
1191