1 /*
2 ** Zabbix
3 ** Copyright (C) 2001-2021 Zabbix SIA
4 **
5 ** This program is free software; you can redistribute it and/or modify
6 ** it under the terms of the GNU General Public License as published by
7 ** the Free Software Foundation; either version 2 of the License, or
8 ** (at your option) any later version.
9 **
10 ** This program is distributed in the hope that it will be useful,
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ** GNU General Public License for more details.
14 **
15 ** You should have received a copy of the GNU General Public License
16 ** along with this program; if not, write to the Free Software
17 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
18 **/
19 
20 #include "common.h"
21 #include "log.h"
22 #include "mutexs.h"
23 #include "stats.h"
24 #include "ipc.h"
25 #include "procstat.h"
26 
27 #ifdef ZBX_PROCSTAT_COLLECTOR
28 
29 /*
30  * The process CPU statistics are stored using the following memory layout.
31  *
32  *  .--------------------------------------.
33  *  | header                               |
34  *  | ------------------------------------ |
35  *  | process cpu utilization queries      |
36  *  | and historical data                  |
37  *  | ------------------------------------ |
38  *  | free space                           |
39  *  '--------------------------------------'
40  *
41  * Because the shared memory can be resized by other processes instead of
42  * using pointers (when allocating strings, building single linked lists)
43  * the memory offsets from the beginning of shared memory segment are used.
44  * 0 offset is interpreted similarly to NULL pointer.
45  *
46  * Currently integer values are used to store offsets to internally allocated
47  * memory which leads to 2GB total size limit.
48  *
49  * During every data collection cycle collector does the following:
50  * 1) acquires list of all processes running on system
51  * 2) builds a list of processes monitored by queries
52  * 3) reads total cpu utilization snapshot for the monitored processes
53  * 4) calculates cpu utilization difference by comparing with previous snapshot
54  * 5) updates cpu utilization values for queries.
55  * 6) saves the last cpu utilization snapshot
56  *
57  * Initialisation.
58  * * zbx_procstat_init() initialises procstat dshm structure but doesn't allocate memory from the system
59  *   (zbx_dshm_create() called with size 0).
60  * * the first call of procstat_add() allocates the shared memory for the header and the first query
61  *   via call to zbx_dshm_realloc().
62  * * The header is initialised in procstat_copy_data() which is called back from zbx_dshm_realloc().
63  *
64  * Memory allocation within dshm.
65  * * Ensure that memory segment has enough free space with procstat_dshm_has_enough_space() before
66  *   allocating space within segment with procstat_alloc() or functions that use it.
67  * * Check how much of the allocated dshm is actually used by procstat by procstat_dshm_used_size().
68  * * Change the dshm size with zbx_dshm_realloc().
69  *
70  * Synchronisation.
71  * * agentd processes share a single instance of ZBX_COLLECTOR_DATA (*collector) containing reference
72  *   to shared procstat memory segment.
73  * * Each agentd process also holds local reference to procstat shared memory segment.
74  * * The system keeps the shared memory segment until the last process detaches from it.
75  * * Synchronise both references with procstat_reattach() before using procstat shared memory segment.
76  */
77 
78 /* the main collector data */
79 extern ZBX_COLLECTOR_DATA	*collector;
80 
81 /* local reference to the procstat shared memory */
82 static zbx_dshm_ref_t	procstat_ref;
83 
84 typedef struct
85 {
86 	/* a linked list of active queries (offset of the first active query) */
87 	int	queries;
88 
89 	/* the total size of the allocated queries and strings */
90 	int	size_allocated;
91 
92 	/* the total shared memory segment size */
93 	size_t	size;
94 }
95 zbx_procstat_header_t;
96 
97 #define PROCSTAT_NULL_OFFSET		0
98 
99 #define PROCSTAT_ALIGNED_HEADER_SIZE	ZBX_SIZE_T_ALIGN8(sizeof(zbx_procstat_header_t))
100 
101 #define PROCSTAT_PTR(base, offset)	((char *)base + offset)
102 
103 #define PROCSTAT_PTR_NULL(base, offset)									\
104 		(PROCSTAT_NULL_OFFSET == offset ? NULL : PROCSTAT_PTR(base, offset))
105 
106 #define PROCSTAT_QUERY_FIRST(base)									\
107 		(zbx_procstat_query_t*)PROCSTAT_PTR_NULL(base, ((zbx_procstat_header_t *)base)->queries)
108 
109 #define PROCSTAT_QUERY_NEXT(base, query)								\
110 		(zbx_procstat_query_t*)PROCSTAT_PTR_NULL(base, query->next)
111 
112 #define PROCSTAT_OFFSET(base, ptr) ((char *)ptr - (char *)base)
113 
114 /* maximum number of active procstat queries */
115 #define PROCSTAT_MAX_QUERIES	1024
116 
117 /* the time period after which inactive queries (not accessed during this period) can be removed */
118 #define PROCSTAT_MAX_INACTIVITY_PERIOD	SEC_PER_DAY
119 
120 /* the time interval between compressing (inactive query removal) attempts */
121 #define PROCSTAT_COMPRESS_PERIOD	SEC_PER_DAY
122 
123 /* data sample collected every second for the process cpu utilization queries */
124 typedef struct
125 {
126 	zbx_uint64_t	utime;
127 	zbx_uint64_t	stime;
128 	zbx_timespec_t	timestamp;
129 }
130 zbx_procstat_data_t;
131 
132 /* process cpu utilization query */
133 typedef struct
134 {
135 	/* the process attributes */
136 	size_t				procname;
137 	size_t				username;
138 	size_t				cmdline;
139 	zbx_uint64_t			flags;
140 
141 	/* the index of first (oldest) entry in the history data */
142 	int				h_first;
143 
144 	/* the number of entries in the history data */
145 	int				h_count;
146 
147 	/* the last access time (request from server) */
148 	int				last_accessed;
149 
150 	/* increasing id for every data collection run, used to       */
151 	/* identify queries that are processed during data collection */
152 	int				runid;
153 
154 	/* error code */
155 	int				error;
156 
157 	/* offset (from segment beginning) of the next process query */
158 	int				next;
159 
160 	/* the cpu utilization history data (ring buffer) */
161 	zbx_procstat_data_t		h_data[MAX_COLLECTOR_HISTORY];
162 }
163 zbx_procstat_query_t;
164 
165 /* process cpu utilization query data */
166 typedef struct
167 {
168 	/* process attributes */
169 	const char		*procname;
170 	const char		*username;
171 	const char		*cmdline;
172 	zbx_uint64_t		flags;
173 
174 	/* error code */
175 	int			error;
176 
177 	/* process cpu utilization */
178 	zbx_uint64_t		utime;
179 	zbx_uint64_t		stime;
180 
181 	/* vector of pids matching the process attributes */
182 	zbx_vector_uint64_t	pids;
183 }
184 zbx_procstat_query_data_t;
185 
186 /* the process cpu utilization snapshot */
187 static zbx_procstat_util_t	*procstat_snapshot;
188 /* the number of processes in process cpu utilization snapshot */
189 static int			procstat_snapshot_num;
190 
191 /******************************************************************************
192  *                                                                            *
193  * Function: procstat_dshm_has_enough_space                                   *
194  *                                                                            *
195  * Purpose: check if the procstat shared memory segment has at least          *
196  *          the specified amount of free bytes in the segment                 *
197  *                                                                            *
198  * Parameters: base - [IN] the procstat shared memory segment                 *
199  *             size - [IN] number of free bytes needed                        *
200  *                                                                            *
201  * Return value: SUCCEED - sufficient amount of bytes are available           *
202  *               FAIL    - otherwise                                          *
203  *                                                                            *
204  ******************************************************************************/
procstat_dshm_has_enough_space(void * base,size_t size)205 static int	procstat_dshm_has_enough_space(void *base, size_t size)
206 {
207 	zbx_procstat_header_t	*header = (zbx_procstat_header_t *)base;
208 
209 	if (header->size >= size + header->size_allocated)
210 		return SUCCEED;
211 
212 	return FAIL;
213 }
214 
215 /******************************************************************************
216  *                                                                            *
217  * Function: procstat_dshm_used_size                                          *
218  *                                                                            *
219  * Purpose: calculate the actual shared memory size used by procstat          *
220  *                                                                            *
221  * Parameters: base - [IN] the procstat shared memory segment                 *
222  *                                                                            *
223  * Return value: The number of bytes required to store current procstat data. *
224  *                                                                            *
225  ******************************************************************************/
procstat_dshm_used_size(void * base)226 static size_t	procstat_dshm_used_size(void *base)
227 {
228 	const zbx_procstat_query_t	*query;
229 	size_t				size;
230 
231 	if (NULL == base)
232 		return 0;
233 
234 	size = PROCSTAT_ALIGNED_HEADER_SIZE;
235 
236 	for (query = PROCSTAT_QUERY_FIRST(base); NULL != query; query = PROCSTAT_QUERY_NEXT(base, query))
237 	{
238 		if (PROCSTAT_NULL_OFFSET != query->procname)
239 			size += ZBX_SIZE_T_ALIGN8(strlen(PROCSTAT_PTR(base, query->procname)) + 1);
240 
241 		if (PROCSTAT_NULL_OFFSET != query->username)
242 			size += ZBX_SIZE_T_ALIGN8(strlen(PROCSTAT_PTR(base, query->username)) + 1);
243 
244 		if (PROCSTAT_NULL_OFFSET != query->cmdline)
245 			size += ZBX_SIZE_T_ALIGN8(strlen(PROCSTAT_PTR(base, query->cmdline)) + 1);
246 
247 		size += ZBX_SIZE_T_ALIGN8(sizeof(zbx_procstat_query_t));
248 	}
249 
250 	return size;
251 }
252 
253 /******************************************************************************
254  *                                                                            *
255  * Function: procstat_queries_num                                             *
256  *                                                                            *
257  * Purpose: calculate the number of active queries                            *
258  *                                                                            *
259  * Parameters: base - [IN] the procstat shared memory segment                 *
260  *                                                                            *
261  * Return value: The number of active queries.                                *
262  *                                                                            *
263  ******************************************************************************/
procstat_queries_num(void * base)264 static int	procstat_queries_num(void *base)
265 {
266 	const zbx_procstat_query_t	*query;
267 	int				queries_num;
268 
269 	if (NULL == base)
270 		return 0;
271 
272 	queries_num = 0;
273 
274 	for (query = PROCSTAT_QUERY_FIRST(base); NULL != query; query = PROCSTAT_QUERY_NEXT(base, query))
275 		queries_num++;
276 
277 	return queries_num;
278 }
279 
280 /******************************************************************************
281  *                                                                            *
282  * Function: procstat_alloc                                                   *
283  *                                                                            *
284  * Purpose: allocates memory in the shared memory segment,                    *
285  *          calls exit() if segment is too small                              *
286  *                                                                            *
287  * Parameters: base - [IN] the procstat shared memory segment                 *
288  *             size - [IN] the number of bytes to allocate                    *
289  *                                                                            *
290  * Return value: The offset of allocated data from the beginning of segment   *
291  *               (positive value).                                            *
292  *                                                                            *
293  ******************************************************************************/
procstat_alloc(void * base,size_t size)294 static int	procstat_alloc(void *base, size_t size)
295 {
296 	zbx_procstat_header_t	*header = (zbx_procstat_header_t *)base;
297 	int			offset;
298 
299 	size = ZBX_SIZE_T_ALIGN8(size);
300 
301 	if (FAIL == procstat_dshm_has_enough_space(header, size))
302 	{
303 		THIS_SHOULD_NEVER_HAPPEN;
304 		exit(EXIT_FAILURE);
305 	}
306 
307 	offset = header->size_allocated;
308 	header->size_allocated += size;
309 
310 	return offset;
311 }
312 
313 /******************************************************************************
314  *                                                                            *
315  * Function: procstat_strdup                                                  *
316  *                                                                            *
317  * Purpose: allocates required memory in procstat memory segment and copies   *
318  *          the specified string (calls exit() if segment is too small)       *
319  *                                                                            *
320  * Parameters: base - [IN] the procstat shared memory segment                 *
321  *             str  - [IN] the string to copy                                 *
322  *                                                                            *
323  * Return value: The offset to allocated data counting from the beginning     *
324  *               of data segment.                                             *
325  *               0 if the source string is NULL or the shared memory segment  *
326  *               does not have enough free space.                             *
327  *                                                                            *
328  ******************************************************************************/
procstat_strdup(void * base,const char * str)329 static size_t	procstat_strdup(void *base, const char *str)
330 {
331 	size_t	len, offset;
332 
333 	if (NULL == str)
334 		return PROCSTAT_NULL_OFFSET;
335 
336 	len = strlen(str) + 1;
337 
338 	offset = procstat_alloc(base, len);
339 		memcpy(PROCSTAT_PTR(base, offset), str, len);
340 
341 	return offset;
342 }
343 
344 /******************************************************************************
345  *                                                                            *
346  * Function: procstat_reattach                                                *
347  *                                                                            *
348  * Purpose: reattaches the procstat_ref to the shared memory segment if it    *
349  *          was 'resized' (a new segment created and the old data copied) by  *
350  *          other process.                                                    *
351  *                                                                            *
352  * Comments: This function logs critical error and exits in the case of       *
353  *           shared memory segment operation failure.                         *
354  *                                                                            *
355  ******************************************************************************/
procstat_reattach(void)356 static void	procstat_reattach(void)
357 {
358 	char	*errmsg = NULL;
359 
360 	if (FAIL == zbx_dshm_validate_ref(&collector->procstat, &procstat_ref, &errmsg))
361 	{
362 		zabbix_log(LOG_LEVEL_CRIT, "cannot validate process data collector reference: %s", errmsg);
363 		zbx_free(errmsg);
364 		exit(EXIT_FAILURE);
365 	}
366 }
367 
368 /******************************************************************************
369  *                                                                            *
370  * Function: procstat_copy_data                                               *
371  *                                                                            *
372  * Purpose: copies procstat data to a new shared memory segment               *
373  *                                                                            *
374  * Parameters: dst      - [OUT] the destination segment                       *
375  *             size_dst - [IN] the size of destination segment                *
376  *             src      - [IN] the source segment                             *
377  *                                                                            *
378  ******************************************************************************/
procstat_copy_data(void * dst,size_t size_dst,const void * src)379 static void	procstat_copy_data(void *dst, size_t size_dst, const void *src)
380 {
381 	int			offset, *query_offset;
382 	zbx_procstat_header_t	*hdst = (zbx_procstat_header_t *)dst;
383 	zbx_procstat_query_t	*qsrc, *qdst = NULL;
384 
385 	zabbix_log(LOG_LEVEL_DEBUG, "In %s()", __func__);
386 
387 	hdst->size = size_dst;
388 	hdst->size_allocated = PROCSTAT_ALIGNED_HEADER_SIZE;
389 	hdst->queries = PROCSTAT_NULL_OFFSET;
390 
391 	if (NULL != src)
392 	{
393 		query_offset = &hdst->queries;
394 
395 		/* copy queries */
396 		for (qsrc = PROCSTAT_QUERY_FIRST(src); NULL != qsrc; qsrc = PROCSTAT_QUERY_NEXT(src, qsrc))
397 		{
398 			/* the new shared memory segment must have enough space */
399 			offset = procstat_alloc(dst, sizeof(zbx_procstat_query_t));
400 
401 			qdst = (zbx_procstat_query_t *)PROCSTAT_PTR(dst, offset);
402 
403 			memcpy(qdst, qsrc, sizeof(zbx_procstat_query_t));
404 
405 			qdst->procname = procstat_strdup(dst, PROCSTAT_PTR_NULL(src, qsrc->procname));
406 			qdst->username = procstat_strdup(dst, PROCSTAT_PTR_NULL(src, qsrc->username));
407 			qdst->cmdline = procstat_strdup(dst, PROCSTAT_PTR_NULL(src, qsrc->cmdline));
408 
409 			*query_offset = offset;
410 			query_offset = &qdst->next;
411 		}
412 	}
413 
414 	zabbix_log(LOG_LEVEL_DEBUG, "End of %s()", __func__);
415 }
416 
417 /******************************************************************************
418  *                                                                            *
419  * Function: procstat_running                                                 *
420  *                                                                            *
421  * Purpose: checks if processor statistics collector is running (at least one *
422  *          one process statistics query has been made).                      *
423  *                                                                            *
424  ******************************************************************************/
procstat_running(void)425 static int	procstat_running(void)
426 {
427 	if (ZBX_NONEXISTENT_SHMID == collector->procstat.shmid)
428 		return FAIL;
429 
430 	return SUCCEED;
431 }
432 
433 /******************************************************************************
434  *                                                                            *
435  * Function: procstat_get_query                                               *
436  *                                                                            *
437  * Purpose: get process statistics query based on process name, user name     *
438  *          and command line                                                  *
439  *                                                                            *
440  * Parameters: base     - [IN] the procstat shared memory segment             *
441  *             procname - [IN] the process name                               *
442  *             username - [IN] the user name                                  *
443  *             cmdline  - [IN] the command line                               *
444  *             flags    - [IN] platform specific flags                        *
445  *                                                                            *
446  * Return value: The process statistics query for the specified parameters or *
447  *               NULL if the statistics are not being gathered for the        *
448  *               specified parameters.                                        *
449  *                                                                            *
450  ******************************************************************************/
procstat_get_query(void * base,const char * procname,const char * username,const char * cmdline,zbx_uint64_t flags)451 static	zbx_procstat_query_t	*procstat_get_query(void *base, const char *procname, const char *username,
452 		const char *cmdline, zbx_uint64_t flags)
453 {
454 	zbx_procstat_query_t	*query;
455 
456 	if (SUCCEED != procstat_running())
457 		return NULL;
458 
459 	for (query = PROCSTAT_QUERY_FIRST(base); NULL != query; query = PROCSTAT_QUERY_NEXT(base, query))
460 	{
461 		if (0 == zbx_strcmp_null(procname, PROCSTAT_PTR_NULL(base, query->procname)) &&
462 				0 == zbx_strcmp_null(username, PROCSTAT_PTR_NULL(base, query->username)) &&
463 				0 == zbx_strcmp_null(cmdline, PROCSTAT_PTR_NULL(base, query->cmdline)) &&
464 				flags == query->flags)
465 		{
466 			return query;
467 		}
468 	}
469 
470 	return NULL;
471 }
472 
473 /******************************************************************************
474  *                                                                            *
475  * Function: procstat_add                                                     *
476  *                                                                            *
477  * Purpose: adds a new query to process statistics collector                  *
478  *                                                                            *
479  * Parameters: procname - [IN] the process name                               *
480  *             username - [IN] the user name                                  *
481  *             cmdline  - [IN] the command line                               *
482  *             flags    - [IN] platform specific flags                        *
483  *                                                                            *
484  * Return value:                                                              *
485  *     This function calls exit() on shared memory errors.                    *
486  *                                                                            *
487  ******************************************************************************/
procstat_add(const char * procname,const char * username,const char * cmdline,zbx_uint64_t flags)488 static void	procstat_add(const char *procname, const char *username, const char *cmdline, zbx_uint64_t flags)
489 {
490 	char			*errmsg = NULL;
491 	size_t			size = 0;
492 	zbx_procstat_query_t	*query;
493 	zbx_procstat_header_t	*header;
494 	int			query_offset;
495 
496 	zabbix_log(LOG_LEVEL_DEBUG, "In %s()", __func__);
497 
498 	/* when allocating a new collection reserve space for procstat header */
499 	if (0 == collector->procstat.size)
500 		size += PROCSTAT_ALIGNED_HEADER_SIZE;
501 
502 	/* reserve space for process attributes */
503 	if (NULL != procname)
504 		size += ZBX_SIZE_T_ALIGN8(strlen(procname) + 1);
505 
506 	if (NULL != username)
507 		size += ZBX_SIZE_T_ALIGN8(strlen(username) + 1);
508 
509 	if (NULL != cmdline)
510 		size += ZBX_SIZE_T_ALIGN8(strlen(cmdline) + 1);
511 
512 	/* procstat_add() is called when the shared memory reference has already been validated - */
513 	/* no need to call procstat_reattach()                                                    */
514 
515 	/* reserve space for query container */
516 	size += ZBX_SIZE_T_ALIGN8(sizeof(zbx_procstat_query_t));
517 
518 	if (NULL == procstat_ref.addr || FAIL == procstat_dshm_has_enough_space(procstat_ref.addr, size))
519 	{
520 		/* recalculate the space required to store existing data + new query */
521 		size += procstat_dshm_used_size(procstat_ref.addr);
522 
523 		if (FAIL == zbx_dshm_realloc(&collector->procstat, size, &errmsg))
524 		{
525 			zabbix_log(LOG_LEVEL_CRIT, "cannot reallocate memory in process data collector: %s", errmsg);
526 			zbx_free(errmsg);
527 			zbx_dshm_unlock(&collector->procstat);
528 
529 			exit(EXIT_FAILURE);
530 		}
531 
532 		/* header initialised in procstat_copy_data() which is called back from zbx_dshm_realloc() */
533 		procstat_reattach();
534 	}
535 
536 	header = (zbx_procstat_header_t *)procstat_ref.addr;
537 
538 	query_offset = procstat_alloc(procstat_ref.addr, sizeof(zbx_procstat_query_t));
539 
540 	/* initialize the created query */
541 	query = (zbx_procstat_query_t *)PROCSTAT_PTR_NULL(procstat_ref.addr, query_offset);
542 
543 	memset(query, 0, sizeof(zbx_procstat_query_t));
544 
545 	query->procname = procstat_strdup(procstat_ref.addr, procname);
546 	query->username = procstat_strdup(procstat_ref.addr, username);
547 	query->cmdline = procstat_strdup(procstat_ref.addr, cmdline);
548 	query->flags = flags;
549 	query->last_accessed = time(NULL);
550 	query->next = header->queries;
551 	header->queries = query_offset;
552 
553 	zabbix_log(LOG_LEVEL_DEBUG, "End of %s()", __func__);
554 }
555 
556 /******************************************************************************
557  *                                                                            *
558  * Function: procstat_free_query_data                                         *
559  *                                                                            *
560  * Purpose: frees the query data structure used to store queries locally      *
561  *                                                                            *
562  ******************************************************************************/
procstat_free_query_data(zbx_procstat_query_data_t * data)563 static void	procstat_free_query_data(zbx_procstat_query_data_t *data)
564 {
565 	zbx_vector_uint64_destroy(&data->pids);
566 	zbx_free(data);
567 }
568 
569 /******************************************************************************
570  *                                                                            *
571  * Function: procstat_try_compress                                            *
572  *                                                                            *
573  * Purpose: try to compress (remove inactive queries) the procstat shared     *
574  *          memory segment once per day                                       *
575  *                                                                            *
576  * Parameters: base - [IN] the procstat shared memory segment                 *
577  *                                                                            *
578  ******************************************************************************/
procstat_try_compress(void * base)579 static void	procstat_try_compress(void *base)
580 {
581 	static int	collector_iteration = 0;
582 
583 	/* The iteration counter ~ the number seconds collector has been running */
584 	/* because collector data gathering is done once per second.             */
585 	/* This approximation is done to avoid calling time() function if there  */
586 	/* are no defined queries.                                               */
587 	if (0 == (++collector_iteration % PROCSTAT_COMPRESS_PERIOD))
588 	{
589 		zbx_procstat_header_t	*header = (zbx_procstat_header_t *)procstat_ref.addr;
590 		size_t			size;
591 		char			*errmsg = NULL;
592 
593 		size = procstat_dshm_used_size(base);
594 
595 		if (size < header->size && FAIL == zbx_dshm_realloc(&collector->procstat, size, &errmsg))
596 		{
597 			zabbix_log(LOG_LEVEL_CRIT, "cannot reallocate memory in process data collector: %s", errmsg);
598 			zbx_free(errmsg);
599 			zbx_dshm_unlock(&collector->procstat);
600 
601 			exit(EXIT_FAILURE);
602 		}
603 	}
604 }
605 
606 /******************************************************************************
607  *                                                                            *
608  * Function: procstat_build_local_query_vector                                *
609  *                                                                            *
610  * Purpose: builds a local copy of the process cpu utilization queries and    *
611  *          removes expired (not used during last 24 hours) queries           *
612  *                                                                            *
613  * Parameters: queries_ptr - [OUT] local copy of queries copied from queries  *
614  *                                 in shared memory segment                   *
615  *             runid       - [IN] marker for queries to be processed in the   *
616  *                                current collector iteration                 *
617  *                                                                            *
618  * Return value: The flags defining the process properties to be retrieved.   *
619  *               See ZBX_SYSINFO_PROC_ defines.                               *
620  *                                                                            *
621  * Comments: updates queries (runid) in shared memory segment                 *
622  *                                                                            *
623  ******************************************************************************/
procstat_build_local_query_vector(zbx_vector_ptr_t * queries_ptr,int runid)624 static int	procstat_build_local_query_vector(zbx_vector_ptr_t *queries_ptr, int runid)
625 {
626 	zbx_procstat_header_t		*header;
627 	time_t				now;
628 	zbx_procstat_query_t		*query;
629 	zbx_procstat_query_data_t	*qdata;
630 	int				flags = ZBX_SYSINFO_PROC_NONE, *pnext_query;
631 
632 	zbx_dshm_lock(&collector->procstat);
633 
634 	procstat_reattach();
635 
636 	header = (zbx_procstat_header_t *)procstat_ref.addr;
637 
638 	if (PROCSTAT_NULL_OFFSET == header->queries)
639 		goto out;
640 
641 	flags = ZBX_SYSINFO_PROC_PID;
642 
643 	now = time(NULL);
644 	pnext_query = &header->queries;
645 
646 	for (query = PROCSTAT_QUERY_FIRST(procstat_ref.addr); NULL != query;
647 			query = PROCSTAT_QUERY_NEXT(procstat_ref.addr, query))
648 	{
649 		/* remove unused queries, the data is still allocated until the next resize */
650 		if (PROCSTAT_MAX_INACTIVITY_PERIOD < now - query->last_accessed)
651 		{
652 			*pnext_query = query->next;
653 			continue;
654 		}
655 
656 		qdata = (zbx_procstat_query_data_t *)zbx_malloc(NULL, sizeof(zbx_procstat_query_data_t));
657 		zbx_vector_uint64_create(&qdata->pids);
658 
659 		/* store the reference to query attributes, which is guaranteed to be */
660 		/* valid until we call process_reattach()                             */
661 		if (NULL != (qdata->procname = PROCSTAT_PTR_NULL(procstat_ref.addr, query->procname)))
662 			flags |= ZBX_SYSINFO_PROC_NAME;
663 
664 		if (NULL != (qdata->username = PROCSTAT_PTR_NULL(procstat_ref.addr, query->username)))
665 			flags |= ZBX_SYSINFO_PROC_USER;
666 
667 		if (NULL != (qdata->cmdline = PROCSTAT_PTR_NULL(procstat_ref.addr, query->cmdline)))
668 			flags |= ZBX_SYSINFO_PROC_CMDLINE;
669 
670 		qdata->flags = query->flags;
671 		qdata->utime = 0;
672 		qdata->stime = 0;
673 		qdata->error = 0;
674 
675 		zbx_vector_ptr_append(queries_ptr, qdata);
676 
677 		/* The order of queries can be changed only by collector itself (when removing old    */
678 		/* queries), but during statistics gathering the shared memory is unlocked and other  */
679 		/* processes might insert queries at the beginning of active queries list.            */
680 		/* Mark the queries being processed by current data gathering cycle with id that      */
681 		/* is incremented at the end of every data gathering cycle. We can be sure that       */
682 		/* our local copy will match the queries in shared memory having the same runid.      */
683 		query->runid = runid;
684 
685 		pnext_query = &query->next;
686 	}
687 
688 out:
689 	procstat_try_compress(procstat_ref.addr);
690 
691 	zbx_dshm_unlock(&collector->procstat);
692 
693 	return flags;
694 }
695 
696 /******************************************************************************
697  *                                                                            *
698  * Function: procstat_scan_query_pids                                         *
699  *                                                                            *
700  * Purpose: for every query gets the pids of processes matching query         *
701  *          attributes                                                        *
702  *                                                                            *
703  * Parameters: queries - [IN/OUT] fills pids and error for each query         *
704  *                                                                            *
705  * Return value: total number of pids saved in all queries                    *
706  *                                                                            *
707  ******************************************************************************/
procstat_scan_query_pids(zbx_vector_ptr_t * queries,const zbx_vector_ptr_t * processes)708 static int	procstat_scan_query_pids(zbx_vector_ptr_t *queries, const zbx_vector_ptr_t *processes)
709 {
710 	zbx_procstat_query_data_t	*qdata;
711 	int				i, pids_num = 0;
712 
713 	for (i = 0; i < queries->values_num; i++)
714 	{
715 		qdata = (zbx_procstat_query_data_t *)queries->values[i];
716 
717 		zbx_proc_get_matching_pids(processes, qdata->procname, qdata->username, qdata->cmdline, qdata->flags,
718 				&qdata->pids);
719 
720 		pids_num += qdata->pids.values_num;
721 	}
722 
723 	return pids_num;
724 }
725 
726 /******************************************************************************
727  *                                                                            *
728  * Function: procstat_get_monitored_pids                                      *
729  *                                                                            *
730  * Purpose: creates a list of unique pids that are monitored by current data  *
731  *          gathering cycle                                                   *
732  *                                                                            *
733  * Parameters: pids     - [OUT] a sorted vector of unique pids                *
734  *             queries  - [IN] local, working copy of queries                 *
735  *             pids_num - [IN] the total number of pids monitored by queries  *
736  *                             (might contain duplicated pids)                *
737  *                                                                            *
738  ******************************************************************************/
procstat_get_monitored_pids(zbx_vector_uint64_t * pids,const zbx_vector_ptr_t * queries,int pids_num)739 static void	procstat_get_monitored_pids(zbx_vector_uint64_t *pids, const zbx_vector_ptr_t *queries, int pids_num)
740 {
741 	zbx_procstat_query_data_t	*qdata;
742 	int				i;
743 
744 	zbx_vector_uint64_reserve(pids, pids_num);
745 
746 	for (i = 0; i < queries->values_num; i++)
747 	{
748 		qdata = (zbx_procstat_query_data_t *)queries->values[i];
749 
750 		if (SUCCEED != qdata->error)
751 			continue;
752 
753 		memcpy(pids->values + pids->values_num, qdata->pids.values,
754 				sizeof(zbx_uint64_t) * qdata->pids.values_num);
755 		pids->values_num += qdata->pids.values_num;
756 	}
757 
758 	zbx_vector_uint64_sort(pids, ZBX_DEFAULT_UINT64_COMPARE_FUNC);
759 	zbx_vector_uint64_uniq(pids, ZBX_DEFAULT_UINT64_COMPARE_FUNC);
760 }
761 
762 /******************************************************************************
763  *                                                                            *
764  * Function: procstat_get_cpu_util_snapshot_for_pids                          *
765  *                                                                            *
766  * Purpose: gets cpu utilization data snapshot for the monitored processes    *
767  *                                                                            *
768  * Parameters: stats - [OUT] current reading of the per-pid cpu usage         *
769  *                               statistics (array, items correspond to pids) *
770  *             pids  - [IN]  pids (unique) for which to collect data in this  *
771  *                               iteration                                    *
772  *                                                                            *
773  * Return value: timestamp of the snapshot                                    *
774  *                                                                            *
775  ******************************************************************************/
procstat_get_cpu_util_snapshot_for_pids(zbx_procstat_util_t * stats,zbx_vector_uint64_t * pids)776 static zbx_timespec_t	procstat_get_cpu_util_snapshot_for_pids(zbx_procstat_util_t *stats,
777 				zbx_vector_uint64_t *pids)
778 {
779 	zbx_timespec_t	snapshot_timestamp;
780 	int		i;
781 
782 	for (i = 0; i < pids->values_num; i++)
783 		stats[i].pid = pids->values[i];
784 
785 	zbx_proc_get_process_stats(stats, pids->values_num);
786 
787 	zbx_timespec(&snapshot_timestamp);
788 
789 	return snapshot_timestamp;
790 }
791 
792 /******************************************************************************
793  *                                                                            *
794  * Function: procstat_util_compare                                            *
795  *                                                                            *
796  * Purpose: compare process utilization data by their pids                    *
797  *                                                                            *
798  ******************************************************************************/
procstat_util_compare(const void * d1,const void * d2)799 static int	procstat_util_compare(const void *d1, const void *d2)
800 {
801 	const zbx_procstat_util_t	*u1 = (zbx_procstat_util_t *)d1;
802 	const zbx_procstat_util_t	*u2 = (zbx_procstat_util_t *)d2;
803 
804 	ZBX_RETURN_IF_NOT_EQUAL(u1->pid, u2->pid);
805 
806 	return 0;
807 }
808 
809 /******************************************************************************
810  *                                                                            *
811  * Function: procstat_calculate_cpu_util_for_queries                          *
812  *                                                                            *
813  * Purpose: calculates the cpu utilization for queries since the previous     *
814  *          snapshot                                                          *
815  *                                                                            *
816  * Parameters: queries - [IN/OUT] local, working copy of queries, saving      *
817  *                                utime, stime and error                      *
818  *             pids    - [IN] pids (unique) for which to collect data in      *
819  *                            this iteration                                  *
820  *             stats   - [IN] current reading of the per-pid cpu usage        *
821  *                            statistics (array, items correspond to pids)    *
822  *                                                                            *
823  ******************************************************************************/
procstat_calculate_cpu_util_for_queries(zbx_vector_ptr_t * queries,zbx_vector_uint64_t * pids,const zbx_procstat_util_t * stats)824 static void	procstat_calculate_cpu_util_for_queries(zbx_vector_ptr_t *queries,
825 			zbx_vector_uint64_t *pids, const zbx_procstat_util_t *stats)
826 {
827 	zbx_procstat_query_data_t	*qdata;
828 	zbx_procstat_util_t		*putil;
829 	int				j, i;
830 
831 	for (j = 0; j < queries->values_num; j++)
832 	{
833 		qdata = (zbx_procstat_query_data_t *)queries->values[j];
834 
835 		/* sum the cpu utilization for processes that are present in current */
836 		/* and last process cpu utilization snapshot                         */
837 		for (i = 0; i < qdata->pids.values_num; i++)
838 		{
839 			zbx_uint64_t		starttime, utime, stime;
840 			zbx_procstat_util_t	util_local;
841 
842 			util_local.pid = qdata->pids.values[i];
843 
844 			/* find the process utilization data in current snapshot */
845 			putil = (zbx_procstat_util_t *)zbx_bsearch(&util_local, stats, pids->values_num,
846 					sizeof(zbx_procstat_util_t), procstat_util_compare);
847 
848 			if (NULL == putil || SUCCEED != putil->error)
849 				continue;
850 
851 			utime = putil->utime;
852 			stime = putil->stime;
853 
854 			starttime = putil->starttime;
855 
856 			/* find the process utilization data in last snapshot */
857 			putil = (zbx_procstat_util_t *)zbx_bsearch(&util_local, procstat_snapshot, procstat_snapshot_num,
858 					sizeof(zbx_procstat_util_t), procstat_util_compare);
859 
860 			if (NULL == putil || SUCCEED != putil->error || putil->starttime != starttime)
861 				continue;
862 
863 			qdata->utime += utime - putil->utime;
864 			qdata->stime += stime - putil->stime;
865 		}
866 	}
867 }
868 
869 /******************************************************************************
870  *                                                                            *
871  * Function: procstat_update_query_statistics                                 *
872  *                                                                            *
873  * Purpose: updates cpu utilization and saves the new snapshot for queries in *
874  *          shared memory segment                                             *
875  *                                                                            *
876  * Parameters: queries - [IN] local, working copy of queries (utime, stime    *
877  *                            and error must be set)                          *
878  *             runid   - [IN] marker for queries to be processed in the       *
879  *                            current collector iteration                     *
880  *             snapshot_timestamp - [IN] timestamp of the current snapshot    *
881  *                                                                            *
882  * Comments: updates header (pids_num) and queries (h_data, h_count, h_first) *
883  *           in shared memory segment, writes stats at the end of the shared  *
884  *           memory segment                                                   *
885  *                                                                            *
886  ******************************************************************************/
procstat_update_query_statistics(zbx_vector_ptr_t * queries,int runid,const zbx_timespec_t * snapshot_timestamp)887 static void	procstat_update_query_statistics(zbx_vector_ptr_t *queries, int runid,
888 		const zbx_timespec_t *snapshot_timestamp)
889 {
890 	zbx_procstat_query_t		*query;
891 	zbx_procstat_query_data_t	*qdata;
892 	int				index;
893 	int				i;
894 
895 	zbx_dshm_lock(&collector->procstat);
896 
897 	procstat_reattach();
898 
899 	for (query = PROCSTAT_QUERY_FIRST(procstat_ref.addr), i = 0; NULL != query;
900 			query = PROCSTAT_QUERY_NEXT(procstat_ref.addr, query))
901 	{
902 		if (runid != query->runid)
903 			continue;
904 
905 		if (i >= queries->values_num)
906 		{
907 			THIS_SHOULD_NEVER_HAPPEN;
908 			break;
909 		}
910 
911 		qdata = (zbx_procstat_query_data_t *)queries->values[i++];
912 
913 		if (SUCCEED != (query->error = qdata->error))
914 			continue;
915 
916 		/* find the next history data slot */
917 		if (0 < query->h_count)
918 		{
919 			if (MAX_COLLECTOR_HISTORY <= (index = query->h_first + query->h_count - 1))
920 				index -= MAX_COLLECTOR_HISTORY;
921 
922 			qdata->utime += query->h_data[index].utime;
923 			qdata->stime += query->h_data[index].stime;
924 
925 			if (MAX_COLLECTOR_HISTORY <= ++index)
926 				index -= MAX_COLLECTOR_HISTORY;
927 		}
928 		else
929 			index = 0;
930 
931 		if (MAX_COLLECTOR_HISTORY == query->h_count)
932 		{
933 			if (MAX_COLLECTOR_HISTORY <= ++query->h_first)
934 				query->h_first = 0;
935 		}
936 		else
937 			query->h_count++;
938 
939 		query->h_data[index].utime = qdata->utime;
940 		query->h_data[index].stime = qdata->stime;
941 		query->h_data[index].timestamp = *snapshot_timestamp;
942 	}
943 
944 	zbx_dshm_unlock(&collector->procstat);
945 }
946 
947 /*
948  * Public API
949  */
950 
951 /******************************************************************************
952  *                                                                            *
953  * Function: zbx_procstat_collector_started                                   *
954  *                                                                            *
955  * Purpose: checks if processor statistics collector is enabled (the main     *
956  *          collector has been initialized)                                   *
957  *                                                                            *
958  ******************************************************************************/
zbx_procstat_collector_started(void)959 int	zbx_procstat_collector_started(void)
960 {
961 	if (NULL == collector)
962 		return FAIL;
963 
964 	return SUCCEED;
965 }
966 
967 /******************************************************************************
968  *                                                                            *
969  * Function: zbx_procstat_init                                                *
970  *                                                                            *
971  * Purpose: initializes process statistics collector                          *
972  *                                                                            *
973  * Return value: This function calls exit() on shared memory errors.          *
974  *                                                                            *
975  ******************************************************************************/
zbx_procstat_init(void)976 void	zbx_procstat_init(void)
977 {
978 	char	*errmsg = NULL;
979 
980 	if (SUCCEED != zbx_dshm_create(&collector->procstat, 0, ZBX_MUTEX_PROCSTAT,
981 			procstat_copy_data, &errmsg))
982 	{
983 		zabbix_log(LOG_LEVEL_CRIT, "cannot initialize process data collector: %s", errmsg);
984 		zbx_free(errmsg);
985 		exit(EXIT_FAILURE);
986 	}
987 
988 	procstat_ref.shmid = ZBX_NONEXISTENT_SHMID;
989 	procstat_ref.addr = NULL;
990 }
991 
992 /******************************************************************************
993  *                                                                            *
994  * Function: zbx_procstat_destroy                                             *
995  *                                                                            *
996  * Purpose: destroys process statistics collector                             *
997  *                                                                            *
998  ******************************************************************************/
zbx_procstat_destroy(void)999 void	zbx_procstat_destroy(void)
1000 {
1001 	char	*errmsg = NULL;
1002 
1003 	if (SUCCEED != zbx_dshm_destroy(&collector->procstat, &errmsg))
1004 	{
1005 		zabbix_log(LOG_LEVEL_CRIT, "cannot free resources allocated by process data collector: %s", errmsg);
1006 		zbx_free(errmsg);
1007 	}
1008 
1009 	procstat_ref.shmid = ZBX_NONEXISTENT_SHMID;
1010 	procstat_ref.addr = NULL;
1011 }
1012 
1013 /******************************************************************************
1014  *                                                                            *
1015  * Function: zbx_procstat_get_util                                            *
1016  *                                                                            *
1017  * Purpose: gets process cpu utilization                                      *
1018  *                                                                            *
1019  * Parameters: procname       - [IN] the process name, NULL - all             *
1020  *             username       - [IN] the user name, NULL - all                *
1021  *             cmdline        - [IN] the command line, NULL - all             *
1022  *             collector_func - [IN] the callback function to use for process *
1023  *                              statistics gathering                          *
1024  *             period         - [IN] the time period                          *
1025  *             type           - [IN] the cpu utilization type, see            *
1026  *                              ZBX_PROCSTAT_CPU_* defines                    *
1027  *             value          - [OUT] the utilization in %                    *
1028  *             errmsg         - [OUT] the error message                       *
1029  *                                                                            *
1030  * Return value:                                                              *
1031  *     SUCCEED - the utime value was retrieved successfully                   *
1032  *     FAIL    - either collector does not have at least two data samples     *
1033  *               required to calculate the statistics, or an error occurred   *
1034  *               during the collection process. In the second case the errmsg *
1035  *               will contain an error message.                               *
1036  *     This function calls exit() on shared memory errors.                    *
1037  *                                                                            *
1038  ******************************************************************************/
zbx_procstat_get_util(const char * procname,const char * username,const char * cmdline,zbx_uint64_t flags,int period,int type,double * value,char ** errmsg)1039 int	zbx_procstat_get_util(const char *procname, const char *username, const char *cmdline, zbx_uint64_t flags,
1040 		int period, int type, double *value, char **errmsg)
1041 {
1042 	int			ret = FAIL, current, start;
1043 	zbx_procstat_query_t	*query;
1044 	zbx_uint64_t		ticks_diff = 0, time_diff;
1045 
1046 	zbx_dshm_lock(&collector->procstat);
1047 
1048 	procstat_reattach();
1049 
1050 	if (NULL == (query = procstat_get_query(procstat_ref.addr, procname, username, cmdline, flags)))
1051 	{
1052 		if (procstat_queries_num(procstat_ref.addr) == PROCSTAT_MAX_QUERIES)
1053 			*errmsg = zbx_strdup(*errmsg, "Maximum number of queries reached.");
1054 		else
1055 			procstat_add(procname, username, cmdline, flags);
1056 
1057 		goto out;
1058 	}
1059 
1060 	query->last_accessed = time(NULL);
1061 
1062 	if (0 != query->error)
1063 	{
1064 		*errmsg = zbx_dsprintf(*errmsg, "Cannot read cpu utilization data: %s", zbx_strerror(-query->error));
1065 		goto out;
1066 	}
1067 
1068 	if (1 >= query->h_count)
1069 		goto out;
1070 
1071 	if (period >= query->h_count)
1072 		period = query->h_count - 1;
1073 
1074 	if (MAX_COLLECTOR_HISTORY <= (current = query->h_first + query->h_count - 1))
1075 		current -= MAX_COLLECTOR_HISTORY;
1076 
1077 	if (0 > (start = current - period))
1078 		start += MAX_COLLECTOR_HISTORY;
1079 
1080 	if (0 != (type & ZBX_PROCSTAT_CPU_USER))
1081 		ticks_diff += query->h_data[current].utime - query->h_data[start].utime;
1082 
1083 	if (0 != (type & ZBX_PROCSTAT_CPU_SYSTEM))
1084 		ticks_diff += query->h_data[current].stime - query->h_data[start].stime;
1085 
1086 	time_diff = (zbx_uint64_t)(query->h_data[current].timestamp.sec - query->h_data[start].timestamp.sec) *
1087 			1000000000 + query->h_data[current].timestamp.ns - query->h_data[start].timestamp.ns;
1088 
1089 	/* 1e9 (nanoseconds) * 1e2 (percent) * 1e1 (one digit decimal place) */
1090 	ticks_diff *= __UINT64_C(1000000000000);
1091 #ifdef HAVE_ROUND
1092 	*value = round((double)ticks_diff / (time_diff * sysconf(_SC_CLK_TCK))) / 10;
1093 #else
1094 	*value = (int)((double)ticks_diff / (time_diff * sysconf(_SC_CLK_TCK)) + 0.5) / 10.0;
1095 #endif
1096 
1097 	ret = SUCCEED;
1098 out:
1099 	zbx_dshm_unlock(&collector->procstat);
1100 
1101 	return ret;
1102 }
1103 
1104 /******************************************************************************
1105  *                                                                            *
1106  * Function: zbx_procstat_collect                                             *
1107  *                                                                            *
1108  * Purpose: performs process statistics collection                            *
1109  *                                                                            *
1110  ******************************************************************************/
zbx_procstat_collect(void)1111 void	zbx_procstat_collect(void)
1112 {
1113 	/* identifies current collection iteration */
1114 	static int			runid = 1;
1115 
1116 	/* number of (non-unique) pids that match queries */
1117 	int				pids_num = 0;
1118 
1119 	/* flags specifying what process properties must be retrieved */
1120 	int				flags;
1121 
1122 	/* local, working copy of queries */
1123 	zbx_vector_ptr_t		queries;
1124 
1125 	/* data about all processes on system */
1126 	zbx_vector_ptr_t		processes;
1127 
1128 	/* pids (unique) for which to collect data in this iteration */
1129 	zbx_vector_uint64_t		pids;
1130 
1131 	/* current reading of the per-pid cpu usage statistics (array, items correspond to pids) */
1132 	zbx_procstat_util_t		*stats;
1133 
1134 	/* time of the per-pid usage statistics collection */
1135 	zbx_timespec_t			snapshot_timestamp;
1136 
1137 	if (FAIL == zbx_procstat_collector_started() || FAIL == procstat_running())
1138 		goto out;
1139 
1140 	zbx_vector_ptr_create(&queries);
1141 	zbx_vector_ptr_create(&processes);
1142 	zbx_vector_uint64_create(&pids);
1143 
1144 	if (ZBX_SYSINFO_PROC_NONE == (flags = procstat_build_local_query_vector(&queries, runid)))
1145 		goto clean;
1146 
1147 	if (SUCCEED != zbx_proc_get_processes(&processes, flags))
1148 		goto clean;
1149 
1150 	pids_num = procstat_scan_query_pids(&queries, &processes);
1151 
1152 	procstat_get_monitored_pids(&pids, &queries, pids_num);
1153 
1154 	stats = (zbx_procstat_util_t *)zbx_malloc(NULL, sizeof(zbx_procstat_util_t) * pids.values_num);
1155 	snapshot_timestamp = procstat_get_cpu_util_snapshot_for_pids(stats, &pids);
1156 
1157 	procstat_calculate_cpu_util_for_queries(&queries, &pids, stats);
1158 
1159 	procstat_update_query_statistics(&queries, runid, &snapshot_timestamp);
1160 
1161 	/* replace the current snapshot with the new stats */
1162 	zbx_free(procstat_snapshot);
1163 	procstat_snapshot = stats;
1164 	procstat_snapshot_num = pids.values_num;
1165 clean:
1166 	zbx_vector_uint64_destroy(&pids);
1167 
1168 	zbx_proc_free_processes(&processes);
1169 	zbx_vector_ptr_destroy(&processes);
1170 
1171 	zbx_vector_ptr_clear_ext(&queries, (zbx_mem_free_func_t)procstat_free_query_data);
1172 	zbx_vector_ptr_destroy(&queries);
1173 out:
1174 	runid++;
1175 }
1176 
1177 #endif
1178