1 /*
2 ** Zabbix
3 ** Copyright (C) 2001-2021 Zabbix SIA
4 **
5 ** This program is free software; you can redistribute it and/or modify
6 ** it under the terms of the GNU General Public License as published by
7 ** the Free Software Foundation; either version 2 of the License, or
8 ** (at your option) any later version.
9 **
10 ** This program is distributed in the hope that it will be useful,
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ** GNU General Public License for more details.
14 **
15 ** You should have received a copy of the GNU General Public License
16 ** along with this program; if not, write to the Free Software
17 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
18 **/
19 
20 #include "common.h"
21 #include "log.h"
22 #include "mutexs.h"
23 #include "stats.h"
24 #include "ipc.h"
25 #include "procstat.h"
26 
27 #ifdef ZBX_PROCSTAT_COLLECTOR
28 
29 /*
30  * The process CPU statistics are stored using the following memory layout.
31  *
32  *  .--------------------------------------.
33  *  | header                               |
34  *  | ------------------------------------ |
35  *  | process cpu utilization queries      |
36  *  | and historical data                  |
37  *  | ------------------------------------ |
38  *  | free space                           |
39  *  '--------------------------------------'
40  *
41  * Because the shared memory can be resized by other processes instead of
42  * using pointers (when allocating strings, building single linked lists)
43  * the memory offsets from the beginning of shared memory segment are used.
44  * 0 offset is interpreted similarly to NULL pointer.
45  *
46  * Currently integer values are used to store offsets to internally allocated
47  * memory which leads to 2GB total size limit.
48  *
49  * During every data collection cycle collector does the following:
50  * 1) acquires list of all processes running on system
51  * 2) builds a list of processes monitored by queries
52  * 3) reads total cpu utilization snapshot for the monitored processes
53  * 4) calculates cpu utilization difference by comparing with previous snapshot
54  * 5) updates cpu utilization values for queries.
55  * 6) saves the last cpu utilization snapshot
56  *
57  * Initialisation.
58  * * zbx_procstat_init() initialises procstat dshm structure but doesn't allocate memory from the system
59  *   (zbx_dshm_create() called with size 0).
60  * * the first call of procstat_add() allocates the shared memory for the header and the first query
61  *   via call to zbx_dshm_realloc().
62  * * The header is initialised in procstat_copy_data() which is called back from zbx_dshm_realloc().
63  *
64  * Memory allocation within dshm.
65  * * Ensure that memory segment has enough free space with procstat_dshm_has_enough_space() before
66  *   allocating space within segment with procstat_alloc() or functions that use it.
67  * * Check how much of the allocated dshm is actually used by procstat by procstat_dshm_used_size().
68  * * Change the dshm size with zbx_dshm_realloc().
69  *
70  * Synchronisation.
71  * * agentd processes share a single instance of ZBX_COLLECTOR_DATA (*collector) containing reference
72  *   to shared procstat memory segment.
73  * * Each agentd process also holds local reference to procstat shared memory segment.
74  * * The system keeps the shared memory segment until the last process detaches from it.
75  * * Synchronise both references with procstat_reattach() before using procstat shared memory segment.
76  */
77 
78 /* the main collector data */
79 extern ZBX_COLLECTOR_DATA	*collector;
80 
81 /* local reference to the procstat shared memory */
82 static zbx_dshm_ref_t	procstat_ref;
83 
84 typedef struct
85 {
86 	/* a linked list of active queries (offset of the first active query) */
87 	int	queries;
88 
89 	/* the total size of the allocated queries and strings */
90 	int	size_allocated;
91 
92 	/* the total shared memory segment size */
93 	size_t	size;
94 }
95 zbx_procstat_header_t;
96 
97 #define PROCSTAT_NULL_OFFSET		0
98 
99 #define PROCSTAT_ALIGNED_HEADER_SIZE	ZBX_SIZE_T_ALIGN8(sizeof(zbx_procstat_header_t))
100 
101 #define PROCSTAT_PTR(base, offset)	((char *)base + offset)
102 
103 #define PROCSTAT_PTR_NULL(base, offset)									\
104 		(PROCSTAT_NULL_OFFSET == offset ? NULL : PROCSTAT_PTR(base, offset))
105 
106 #define PROCSTAT_QUERY_FIRST(base)									\
107 		(zbx_procstat_query_t*)PROCSTAT_PTR_NULL(base, ((zbx_procstat_header_t *)base)->queries)
108 
109 #define PROCSTAT_QUERY_NEXT(base, query)								\
110 		(zbx_procstat_query_t*)PROCSTAT_PTR_NULL(base, query->next)
111 
112 #define PROCSTAT_OFFSET(base, ptr) ((char *)ptr - (char *)base)
113 
114 /* maximum number of active procstat queries */
115 #define PROCSTAT_MAX_QUERIES	1024
116 
117 /* the time period after which inactive queries (not accessed during this period) can be removed */
118 #define PROCSTAT_MAX_INACTIVITY_PERIOD	SEC_PER_DAY
119 
120 /* the time interval between compressing (inactive query removal) attempts */
121 #define PROCSTAT_COMPRESS_PERIOD	SEC_PER_DAY
122 
123 /* data sample collected every second for the process cpu utilization queries */
124 typedef struct
125 {
126 	zbx_uint64_t	utime;
127 	zbx_uint64_t	stime;
128 	zbx_timespec_t	timestamp;
129 }
130 zbx_procstat_data_t;
131 
132 /* process cpu utilization query */
133 typedef struct
134 {
135 	/* the process attributes */
136 	size_t				procname;
137 	size_t				username;
138 	size_t				cmdline;
139 	zbx_uint64_t			flags;
140 
141 	/* the index of first (oldest) entry in the history data */
142 	int				h_first;
143 
144 	/* the number of entries in the history data */
145 	int				h_count;
146 
147 	/* the last access time (request from server) */
148 	int				last_accessed;
149 
150 	/* increasing id for every data collection run, used to       */
151 	/* identify queries that are processed during data collection */
152 	int				runid;
153 
154 	/* error code */
155 	int				error;
156 
157 	/* offset (from segment beginning) of the next process query */
158 	int				next;
159 
160 	/* the cpu utilization history data (ring buffer) */
161 	zbx_procstat_data_t		h_data[MAX_COLLECTOR_HISTORY];
162 }
163 zbx_procstat_query_t;
164 
165 /* process cpu utilization query data */
166 typedef struct
167 {
168 	/* process attributes */
169 	const char		*procname;
170 	const char		*username;
171 	const char		*cmdline;
172 	zbx_uint64_t		flags;
173 
174 	/* error code */
175 	int			error;
176 
177 	/* process cpu utilization */
178 	zbx_uint64_t		utime;
179 	zbx_uint64_t		stime;
180 
181 	/* vector of pids matching the process attributes */
182 	zbx_vector_uint64_t	pids;
183 }
184 zbx_procstat_query_data_t;
185 
186 /* the process cpu utilization snapshot */
187 static zbx_procstat_util_t	*procstat_snapshot;
188 /* the number of processes in process cpu utilization snapshot */
189 static int			procstat_snapshot_num;
190 
191 /* external functions used by procstat collector */
192 int	zbx_proc_get_processes(zbx_vector_ptr_t *processes, unsigned int flags);
193 
194 void	zbx_proc_get_matching_pids(const zbx_vector_ptr_t *processes, const char *procname, const char *username,
195 		const char *cmdline, zbx_uint64_t flags, zbx_vector_uint64_t *pids);
196 
197 void	zbx_proc_get_process_stats(zbx_procstat_util_t *procs, int procs_num);
198 
199 void	zbx_proc_free_processes(zbx_vector_ptr_t *processes);
200 
201 /******************************************************************************
202  *                                                                            *
203  * Function: procstat_dshm_has_enough_space                                   *
204  *                                                                            *
205  * Purpose: check if the procstat shared memory segment has at least          *
206  *          the specified amount of free bytes in the segment                 *
207  *                                                                            *
208  * Parameters: base - [IN] the procstat shared memory segment                 *
209  *             size - [IN] number of free bytes needed                        *
210  *                                                                            *
211  * Return value: SUCCEED - sufficient amount of bytes are available           *
212  *               FAIL    - otherwise                                          *
213  *                                                                            *
214  ******************************************************************************/
procstat_dshm_has_enough_space(void * base,size_t size)215 static int	procstat_dshm_has_enough_space(void *base, size_t size)
216 {
217 	zbx_procstat_header_t	*header = (zbx_procstat_header_t *)base;
218 
219 	if (header->size >= size + header->size_allocated)
220 		return SUCCEED;
221 
222 	return FAIL;
223 }
224 
225 /******************************************************************************
226  *                                                                            *
227  * Function: procstat_dshm_used_size                                          *
228  *                                                                            *
229  * Purpose: calculate the actual shared memory size used by procstat          *
230  *                                                                            *
231  * Parameters: base - [IN] the procstat shared memory segment                 *
232  *                                                                            *
233  * Return value: The number of bytes required to store current procstat data. *
234  *                                                                            *
235  ******************************************************************************/
procstat_dshm_used_size(void * base)236 static size_t	procstat_dshm_used_size(void *base)
237 {
238 	const zbx_procstat_query_t	*query;
239 	size_t				size;
240 
241 	if (NULL == base)
242 		return 0;
243 
244 	size = PROCSTAT_ALIGNED_HEADER_SIZE;
245 
246 	for (query = PROCSTAT_QUERY_FIRST(base); NULL != query; query = PROCSTAT_QUERY_NEXT(base, query))
247 	{
248 		if (PROCSTAT_NULL_OFFSET != query->procname)
249 			size += ZBX_SIZE_T_ALIGN8(strlen(PROCSTAT_PTR(base, query->procname)) + 1);
250 
251 		if (PROCSTAT_NULL_OFFSET != query->username)
252 			size += ZBX_SIZE_T_ALIGN8(strlen(PROCSTAT_PTR(base, query->username)) + 1);
253 
254 		if (PROCSTAT_NULL_OFFSET != query->cmdline)
255 			size += ZBX_SIZE_T_ALIGN8(strlen(PROCSTAT_PTR(base, query->cmdline)) + 1);
256 
257 		size += ZBX_SIZE_T_ALIGN8(sizeof(zbx_procstat_query_t));
258 	}
259 
260 	return size;
261 }
262 
263 /******************************************************************************
264  *                                                                            *
265  * Function: procstat_queries_num                                             *
266  *                                                                            *
267  * Purpose: calculate the number of active queries                            *
268  *                                                                            *
269  * Parameters: base - [IN] the procstat shared memory segment                 *
270  *                                                                            *
271  * Return value: The number of active queries.                                *
272  *                                                                            *
273  ******************************************************************************/
procstat_queries_num(void * base)274 static int	procstat_queries_num(void *base)
275 {
276 	const zbx_procstat_query_t	*query;
277 	int				queries_num;
278 
279 	if (NULL == base)
280 		return 0;
281 
282 	queries_num = 0;
283 
284 	for (query = PROCSTAT_QUERY_FIRST(base); NULL != query; query = PROCSTAT_QUERY_NEXT(base, query))
285 		queries_num++;
286 
287 	return queries_num;
288 }
289 
290 /******************************************************************************
291  *                                                                            *
292  * Function: procstat_alloc                                                   *
293  *                                                                            *
294  * Purpose: allocates memory in the shared memory segment,                    *
295  *          calls exit() if segment is too small                              *
296  *                                                                            *
297  * Parameters: base - [IN] the procstat shared memory segment                 *
298  *             size - [IN] the number of bytes to allocate                    *
299  *                                                                            *
300  * Return value: The offset of allocated data from the beginning of segment   *
301  *               (positive value).                                            *
302  *                                                                            *
303  ******************************************************************************/
procstat_alloc(void * base,size_t size)304 static int	procstat_alloc(void *base, size_t size)
305 {
306 	zbx_procstat_header_t	*header = (zbx_procstat_header_t *)base;
307 	int			offset;
308 
309 	size = ZBX_SIZE_T_ALIGN8(size);
310 
311 	if (FAIL == procstat_dshm_has_enough_space(header, size))
312 	{
313 		THIS_SHOULD_NEVER_HAPPEN;
314 		exit(EXIT_FAILURE);
315 	}
316 
317 	offset = header->size_allocated;
318 	header->size_allocated += size;
319 
320 	return offset;
321 }
322 
323 /******************************************************************************
324  *                                                                            *
325  * Function: procstat_strdup                                                  *
326  *                                                                            *
327  * Purpose: allocates required memory in procstat memory segment and copies   *
328  *          the specified string (calls exit() if segment is too small)       *
329  *                                                                            *
330  * Parameters: base - [IN] the procstat shared memory segment                 *
331  *             str  - [IN] the string to copy                                 *
332  *                                                                            *
333  * Return value: The offset to allocated data counting from the beginning     *
334  *               of data segment.                                             *
335  *               0 if the source string is NULL or the shared memory segment  *
336  *               does not have enough free space.                             *
337  *                                                                            *
338  ******************************************************************************/
procstat_strdup(void * base,const char * str)339 static size_t	procstat_strdup(void *base, const char *str)
340 {
341 	size_t	len, offset;
342 
343 	if (NULL == str)
344 		return PROCSTAT_NULL_OFFSET;
345 
346 	len = strlen(str) + 1;
347 
348 	offset = procstat_alloc(base, len);
349 		memcpy(PROCSTAT_PTR(base, offset), str, len);
350 
351 	return offset;
352 }
353 
354 /******************************************************************************
355  *                                                                            *
356  * Function: procstat_reattach                                                *
357  *                                                                            *
358  * Purpose: reattaches the procstat_ref to the shared memory segment if it    *
359  *          was 'resized' (a new segment created and the old data copied) by  *
360  *          other process.                                                    *
361  *                                                                            *
362  * Comments: This function logs critical error and exits in the case of       *
363  *           shared memory segement operation failure.                        *
364  *                                                                            *
365  ******************************************************************************/
procstat_reattach(void)366 static void	procstat_reattach(void)
367 {
368 	char	*errmsg = NULL;
369 
370 	if (FAIL == zbx_dshm_validate_ref(&collector->procstat, &procstat_ref, &errmsg))
371 	{
372 		zabbix_log(LOG_LEVEL_CRIT, "cannot validate process data collector reference: %s", errmsg);
373 		zbx_free(errmsg);
374 		exit(EXIT_FAILURE);
375 	}
376 }
377 
378 /******************************************************************************
379  *                                                                            *
380  * Function: procstat_copy_data                                               *
381  *                                                                            *
382  * Purpose: copies procstat data to a new shared memory segment               *
383  *                                                                            *
384  * Parameters: dst      - [OUT] the destination segment                       *
385  *             size_dst - [IN] the size of destination segment                *
386  *             src      - [IN] the source segment                             *
387  *                                                                            *
388  ******************************************************************************/
procstat_copy_data(void * dst,size_t size_dst,const void * src)389 static void	procstat_copy_data(void *dst, size_t size_dst, const void *src)
390 {
391 	const char		*__function_name = "procstat_copy_data";
392 
393 	int			offset, *query_offset;
394 	zbx_procstat_header_t	*hdst = (zbx_procstat_header_t *)dst;
395 	zbx_procstat_query_t	*qsrc, *qdst = NULL;
396 
397 	zabbix_log(LOG_LEVEL_DEBUG, "In %s()", __function_name);
398 
399 	hdst->size = size_dst;
400 	hdst->size_allocated = PROCSTAT_ALIGNED_HEADER_SIZE;
401 	hdst->queries = PROCSTAT_NULL_OFFSET;
402 
403 	if (NULL != src)
404 	{
405 		query_offset = &hdst->queries;
406 
407 		/* copy queries */
408 		for (qsrc = PROCSTAT_QUERY_FIRST(src); NULL != qsrc; qsrc = PROCSTAT_QUERY_NEXT(src, qsrc))
409 		{
410 			/* the new shared memory segment must have enough space */
411 			offset = procstat_alloc(dst, sizeof(zbx_procstat_query_t));
412 
413 			qdst = (zbx_procstat_query_t *)PROCSTAT_PTR(dst, offset);
414 
415 			memcpy(qdst, qsrc, sizeof(zbx_procstat_query_t));
416 
417 			qdst->procname = procstat_strdup(dst, PROCSTAT_PTR_NULL(src, qsrc->procname));
418 			qdst->username = procstat_strdup(dst, PROCSTAT_PTR_NULL(src, qsrc->username));
419 			qdst->cmdline = procstat_strdup(dst, PROCSTAT_PTR_NULL(src, qsrc->cmdline));
420 
421 			*query_offset = offset;
422 			query_offset = &qdst->next;
423 		}
424 	}
425 
426 	zabbix_log(LOG_LEVEL_DEBUG, "End of %s()", __function_name);
427 }
428 
429 /******************************************************************************
430  *                                                                            *
431  * Function: procstat_running                                                 *
432  *                                                                            *
433  * Purpose: checks if processor statistics collector is running (at least one *
434  *          one process statistics query has been made).                      *
435  *                                                                            *
436  ******************************************************************************/
procstat_running(void)437 static int	procstat_running(void)
438 {
439 	if (ZBX_NONEXISTENT_SHMID == collector->procstat.shmid)
440 		return FAIL;
441 
442 	return SUCCEED;
443 }
444 
445 /******************************************************************************
446  *                                                                            *
447  * Function: procstat_get_query                                               *
448  *                                                                            *
449  * Purpose: get process statistics query based on process name, user name     *
450  *          and command line                                                  *
451  *                                                                            *
452  * Parameters: base     - [IN] the procstat shared memory segment             *
453  *             procname - [IN] the process name                               *
454  *             username - [IN] the user name                                  *
455  *             cmdline  - [IN] the command line                               *
456  *             flags    - [IN] platform specific flags                        *
457  *                                                                            *
458  * Return value: The process statistics query for the specified parameters or *
459  *               NULL if the statistics are not being gathered for the        *
460  *               specified parameters.                                        *
461  *                                                                            *
462  ******************************************************************************/
procstat_get_query(void * base,const char * procname,const char * username,const char * cmdline,zbx_uint64_t flags)463 static	zbx_procstat_query_t	*procstat_get_query(void *base, const char *procname, const char *username,
464 		const char *cmdline, zbx_uint64_t flags)
465 {
466 	zbx_procstat_query_t	*query;
467 
468 	if (SUCCEED != procstat_running())
469 		return NULL;
470 
471 	for (query = PROCSTAT_QUERY_FIRST(base); NULL != query; query = PROCSTAT_QUERY_NEXT(base, query))
472 	{
473 		if (0 == zbx_strcmp_null(procname, PROCSTAT_PTR_NULL(base, query->procname)) &&
474 				0 == zbx_strcmp_null(username, PROCSTAT_PTR_NULL(base, query->username)) &&
475 				0 == zbx_strcmp_null(cmdline, PROCSTAT_PTR_NULL(base, query->cmdline)) &&
476 				flags == query->flags)
477 		{
478 			return query;
479 		}
480 	}
481 
482 	return NULL;
483 }
484 
485 /******************************************************************************
486  *                                                                            *
487  * Function: procstat_add                                                     *
488  *                                                                            *
489  * Purpose: adds a new query to process statistics collector                  *
490  *                                                                            *
491  * Parameters: procname - [IN] the process name                               *
492  *             username - [IN] the user name                                  *
493  *             cmdline  - [IN] the command line                               *
494  *             flags    - [IN] platform specific flags                        *
495  *                                                                            *
496  * Return value:                                                              *
497  *     This function calls exit() on shared memory errors.                    *
498  *                                                                            *
499  ******************************************************************************/
procstat_add(const char * procname,const char * username,const char * cmdline,zbx_uint64_t flags)500 static void	procstat_add(const char *procname, const char *username, const char *cmdline, zbx_uint64_t flags)
501 {
502 	const char		*__function_name = "procstat_add";
503 	char			*errmsg = NULL;
504 	size_t			size = 0;
505 	zbx_procstat_query_t	*query;
506 	zbx_procstat_header_t	*header;
507 	int			query_offset;
508 
509 	zabbix_log(LOG_LEVEL_DEBUG, "In %s()", __function_name);
510 
511 	/* when allocating a new collection reserve space for procstat header */
512 	if (0 == collector->procstat.size)
513 		size += PROCSTAT_ALIGNED_HEADER_SIZE;
514 
515 	/* reserve space for process attributes */
516 	if (NULL != procname)
517 		size += ZBX_SIZE_T_ALIGN8(strlen(procname) + 1);
518 
519 	if (NULL != username)
520 		size += ZBX_SIZE_T_ALIGN8(strlen(username) + 1);
521 
522 	if (NULL != cmdline)
523 		size += ZBX_SIZE_T_ALIGN8(strlen(cmdline) + 1);
524 
525 	/* procstat_add() is called when the shared memory reference has already been validated - */
526 	/* no need to call procstat_reattach()                                                    */
527 
528 	/* reserve space for query container */
529 	size += ZBX_SIZE_T_ALIGN8(sizeof(zbx_procstat_query_t));
530 
531 	if (NULL == procstat_ref.addr || FAIL == procstat_dshm_has_enough_space(procstat_ref.addr, size))
532 	{
533 		/* recalculate the space required to store existing data + new query */
534 		size += procstat_dshm_used_size(procstat_ref.addr);
535 
536 		if (FAIL == zbx_dshm_realloc(&collector->procstat, size, &errmsg))
537 		{
538 			zabbix_log(LOG_LEVEL_CRIT, "cannot reallocate memory in process data collector: %s", errmsg);
539 			zbx_free(errmsg);
540 			zbx_dshm_unlock(&collector->procstat);
541 
542 			exit(EXIT_FAILURE);
543 		}
544 
545 		/* header initialised in procstat_copy_data() which is called back from zbx_dshm_realloc() */
546 		procstat_reattach();
547 	}
548 
549 	header = (zbx_procstat_header_t *)procstat_ref.addr;
550 
551 	query_offset = procstat_alloc(procstat_ref.addr, sizeof(zbx_procstat_query_t));
552 
553 	/* initialize the created query */
554 	query = (zbx_procstat_query_t *)PROCSTAT_PTR_NULL(procstat_ref.addr, query_offset);
555 
556 	memset(query, 0, sizeof(zbx_procstat_query_t));
557 
558 	query->procname = procstat_strdup(procstat_ref.addr, procname);
559 	query->username = procstat_strdup(procstat_ref.addr, username);
560 	query->cmdline = procstat_strdup(procstat_ref.addr, cmdline);
561 	query->flags = flags;
562 	query->last_accessed = time(NULL);
563 	query->next = header->queries;
564 	header->queries = query_offset;
565 
566 	zabbix_log(LOG_LEVEL_DEBUG, "End of %s()", __function_name);
567 }
568 
569 /******************************************************************************
570  *                                                                            *
571  * Function: procstat_free_query_data                                         *
572  *                                                                            *
573  * Purpose: frees the query data structure used to store queries locally      *
574  *                                                                            *
575  ******************************************************************************/
procstat_free_query_data(zbx_procstat_query_data_t * data)576 static void	procstat_free_query_data(zbx_procstat_query_data_t *data)
577 {
578 	zbx_vector_uint64_destroy(&data->pids);
579 	zbx_free(data);
580 }
581 
582 /******************************************************************************
583  *                                                                            *
584  * Function: procstat_try_compress                                            *
585  *                                                                            *
586  * Purpose: try to compress (remove inactive queries) the procstat shared     *
587  *          memory segment once per day                                       *
588  *                                                                            *
589  * Parameters: base - [IN] the procstat shared memory segment                 *
590  *                                                                            *
591  ******************************************************************************/
procstat_try_compress(void * base)592 static void	procstat_try_compress(void *base)
593 {
594 	static int	collector_iteration = 0;
595 
596 	/* The iteration counter ~ the number seconds collector has been running */
597 	/* because collector data gathering is done once per second.             */
598 	/* This approximation is done to avoid calling time() function if there  */
599 	/* are no defined queries.                                               */
600 	if (0 == (++collector_iteration % PROCSTAT_COMPRESS_PERIOD))
601 	{
602 		zbx_procstat_header_t	*header = (zbx_procstat_header_t *)procstat_ref.addr;
603 		size_t			size;
604 		char			*errmsg = NULL;
605 
606 		size = procstat_dshm_used_size(base);
607 
608 		if (size < header->size && FAIL == zbx_dshm_realloc(&collector->procstat, size, &errmsg))
609 		{
610 			zabbix_log(LOG_LEVEL_CRIT, "cannot reallocate memory in process data collector: %s", errmsg);
611 			zbx_free(errmsg);
612 			zbx_dshm_unlock(&collector->procstat);
613 
614 			exit(EXIT_FAILURE);
615 		}
616 	}
617 }
618 
619 /******************************************************************************
620  *                                                                            *
621  * Function: procstat_build_local_query_vector                                *
622  *                                                                            *
623  * Purpose: builds a local copy of the process cpu utilization queries and    *
624  *          removes expired (not used during last 24 hours) queries           *
625  *                                                                            *
626  * Parameters: queries_ptr - [OUT] local copy of queries copied from queries  *
627  *                                 in shared memory segment                   *
628  *             runid       - [IN] marker for queries to be processed in the   *
629  *                                current collector iteration                 *
630  *                                                                            *
631  * Return value: The flags defining the process properties to be retrieved.   *
632  *               See ZBX_SYSINFO_PROC_ defines.                               *
633  *                                                                            *
634  * Comments: updates queries (runid) in shared memory segment                 *
635  *                                                                            *
636  ******************************************************************************/
procstat_build_local_query_vector(zbx_vector_ptr_t * queries_ptr,int runid)637 static int	procstat_build_local_query_vector(zbx_vector_ptr_t *queries_ptr, int runid)
638 {
639 	zbx_procstat_header_t		*header;
640 	time_t				now;
641 	zbx_procstat_query_t		*query;
642 	zbx_procstat_query_data_t	*qdata;
643 	int				flags = ZBX_SYSINFO_PROC_NONE, *pnext_query;
644 
645 	zbx_dshm_lock(&collector->procstat);
646 
647 	procstat_reattach();
648 
649 	header = (zbx_procstat_header_t *)procstat_ref.addr;
650 
651 	if (PROCSTAT_NULL_OFFSET == header->queries)
652 		goto out;
653 
654 	flags = ZBX_SYSINFO_PROC_PID;
655 
656 	now = time(NULL);
657 	pnext_query = &header->queries;
658 
659 	for (query = PROCSTAT_QUERY_FIRST(procstat_ref.addr); NULL != query;
660 			query = PROCSTAT_QUERY_NEXT(procstat_ref.addr, query))
661 	{
662 		/* remove unused queries, the data is still allocated until the next resize */
663 		if (PROCSTAT_MAX_INACTIVITY_PERIOD < now - query->last_accessed)
664 		{
665 			*pnext_query = query->next;
666 			continue;
667 		}
668 
669 		qdata = (zbx_procstat_query_data_t *)zbx_malloc(NULL, sizeof(zbx_procstat_query_data_t));
670 		zbx_vector_uint64_create(&qdata->pids);
671 
672 		/* store the reference to query attributes, which is guaranteed to be */
673 		/* valid until we call process_reattach()                             */
674 		if (NULL != (qdata->procname = PROCSTAT_PTR_NULL(procstat_ref.addr, query->procname)))
675 			flags |= ZBX_SYSINFO_PROC_NAME;
676 
677 		if (NULL != (qdata->username = PROCSTAT_PTR_NULL(procstat_ref.addr, query->username)))
678 			flags |= ZBX_SYSINFO_PROC_USER;
679 
680 		if (NULL != (qdata->cmdline = PROCSTAT_PTR_NULL(procstat_ref.addr, query->cmdline)))
681 			flags |= ZBX_SYSINFO_PROC_CMDLINE;
682 
683 		qdata->flags = query->flags;
684 		qdata->utime = 0;
685 		qdata->stime = 0;
686 		qdata->error = 0;
687 
688 		zbx_vector_ptr_append(queries_ptr, qdata);
689 
690 		/* The order of queries can be changed only by collector itself (when removing old    */
691 		/* queries), but during statistics gathering the shared memory is unlocked and other  */
692 		/* processes might insert queries at the beginning of active queries list.            */
693 		/* Mark the queries being processed by current data gathering cycle with id that      */
694 		/* is incremented at the end of every data gathering cycle. We can be sure that       */
695 		/* our local copy will match the queries in shared memory having the same runid.      */
696 		query->runid = runid;
697 
698 		pnext_query = &query->next;
699 	}
700 
701 out:
702 	procstat_try_compress(procstat_ref.addr);
703 
704 	zbx_dshm_unlock(&collector->procstat);
705 
706 	return flags;
707 }
708 
709 /******************************************************************************
710  *                                                                            *
711  * Function: procstat_scan_query_pids                                         *
712  *                                                                            *
713  * Purpose: for every query gets the pids of processes matching query         *
714  *          attributes                                                        *
715  *                                                                            *
716  * Parameters: queries - [IN/OUT] fills pids and error for each query         *
717  *                                                                            *
718  * Return value: total number of pids saved in all queries                    *
719  *                                                                            *
720  ******************************************************************************/
procstat_scan_query_pids(zbx_vector_ptr_t * queries,const zbx_vector_ptr_t * processes)721 static int	procstat_scan_query_pids(zbx_vector_ptr_t *queries, const zbx_vector_ptr_t *processes)
722 {
723 	zbx_procstat_query_data_t	*qdata;
724 	int				i, pids_num = 0;
725 
726 	for (i = 0; i < queries->values_num; i++)
727 	{
728 		qdata = (zbx_procstat_query_data_t *)queries->values[i];
729 
730 		zbx_proc_get_matching_pids(processes, qdata->procname, qdata->username, qdata->cmdline, qdata->flags,
731 				&qdata->pids);
732 
733 		pids_num += qdata->pids.values_num;
734 	}
735 
736 	return pids_num;
737 }
738 
739 /******************************************************************************
740  *                                                                            *
741  * Function: procstat_get_monitored_pids                                      *
742  *                                                                            *
743  * Purpose: creates a list of unique pids that are monitored by current data  *
744  *          gathering cycle                                                   *
745  *                                                                            *
746  * Parameters: pids     - [OUT] a sorted vector of unique pids                *
747  *             queries  - [IN] local, working copy of queries                 *
748  *             pids_num - [IN] the total number of pids monitored by queries  *
749  *                             (might contain duplicated pids)                *
750  *                                                                            *
751  ******************************************************************************/
procstat_get_monitored_pids(zbx_vector_uint64_t * pids,const zbx_vector_ptr_t * queries,int pids_num)752 static void	procstat_get_monitored_pids(zbx_vector_uint64_t *pids, const zbx_vector_ptr_t *queries, int pids_num)
753 {
754 	zbx_procstat_query_data_t	*qdata;
755 	int				i;
756 
757 	zbx_vector_uint64_reserve(pids, pids_num);
758 
759 	for (i = 0; i < queries->values_num; i++)
760 	{
761 		qdata = (zbx_procstat_query_data_t *)queries->values[i];
762 
763 		if (SUCCEED != qdata->error)
764 			continue;
765 
766 		memcpy(pids->values + pids->values_num, qdata->pids.values,
767 				sizeof(zbx_uint64_t) * qdata->pids.values_num);
768 		pids->values_num += qdata->pids.values_num;
769 	}
770 
771 	zbx_vector_uint64_sort(pids, ZBX_DEFAULT_UINT64_COMPARE_FUNC);
772 	zbx_vector_uint64_uniq(pids, ZBX_DEFAULT_UINT64_COMPARE_FUNC);
773 }
774 
775 /******************************************************************************
776  *                                                                            *
777  * Function: procstat_get_cpu_util_snapshot_for_pids                          *
778  *                                                                            *
779  * Purpose: gets cpu utilization data snapshot for the monitored processes    *
780  *                                                                            *
781  * Parameters: stats - [OUT] current reading of the per-pid cpu usage         *
782  *                               statistics (array, items correspond to pids) *
783  *             pids  - [IN]  pids (unique) for which to collect data in this  *
784  *                               iteration                                    *
785  *                                                                            *
786  * Return value: timestamp of the snapshot                                    *
787  *                                                                            *
788  ******************************************************************************/
procstat_get_cpu_util_snapshot_for_pids(zbx_procstat_util_t * stats,zbx_vector_uint64_t * pids)789 static zbx_timespec_t	procstat_get_cpu_util_snapshot_for_pids(zbx_procstat_util_t *stats,
790 				zbx_vector_uint64_t *pids)
791 {
792 	zbx_timespec_t	snapshot_timestamp;
793 	int		i;
794 
795 	for (i = 0; i < pids->values_num; i++)
796 		stats[i].pid = pids->values[i];
797 
798 	zbx_proc_get_process_stats(stats, pids->values_num);
799 
800 	zbx_timespec(&snapshot_timestamp);
801 
802 	return snapshot_timestamp;
803 }
804 
805 /******************************************************************************
806  *                                                                            *
807  * Function: procstat_util_compare                                            *
808  *                                                                            *
809  * Purpose: compare process utilization data by their pids                    *
810  *                                                                            *
811  ******************************************************************************/
procstat_util_compare(const void * d1,const void * d2)812 static int	procstat_util_compare(const void *d1, const void *d2)
813 {
814 	const zbx_procstat_util_t	*u1 = (zbx_procstat_util_t *)d1;
815 	const zbx_procstat_util_t	*u2 = (zbx_procstat_util_t *)d2;
816 
817 	ZBX_RETURN_IF_NOT_EQUAL(u1->pid, u2->pid);
818 
819 	return 0;
820 }
821 
822 /******************************************************************************
823  *                                                                            *
824  * Function: procstat_calculate_cpu_util_for_queries                          *
825  *                                                                            *
826  * Purpose: calculates the cpu utilization for queries since the previous     *
827  *          snapshot                                                          *
828  *                                                                            *
829  * Parameters: queries - [IN/OUT] local, working copy of queries, saving      *
830  *                                utime, stime and error                      *
831  *             pids    - [IN] pids (unique) for which to collect data in      *
832  *                            this iteration                                  *
833  *             stats   - [IN] current reading of the per-pid cpu usage        *
834  *                            statistics (array, items correspond to pids)    *
835  *                                                                            *
836  ******************************************************************************/
procstat_calculate_cpu_util_for_queries(zbx_vector_ptr_t * queries,zbx_vector_uint64_t * pids,const zbx_procstat_util_t * stats)837 static void	procstat_calculate_cpu_util_for_queries(zbx_vector_ptr_t *queries,
838 			zbx_vector_uint64_t *pids, const zbx_procstat_util_t *stats)
839 {
840 	zbx_procstat_query_data_t	*qdata;
841 	zbx_procstat_util_t		*putil;
842 	int				j, i;
843 
844 	for (j = 0; j < queries->values_num; j++)
845 	{
846 		qdata = (zbx_procstat_query_data_t *)queries->values[j];
847 
848 		/* sum the cpu utilization for processes that are present in current */
849 		/* and last process cpu utilization snapshot                         */
850 		for (i = 0; i < qdata->pids.values_num; i++)
851 		{
852 			zbx_uint64_t		starttime, utime, stime;
853 			zbx_procstat_util_t	util_local;
854 
855 			util_local.pid = qdata->pids.values[i];
856 
857 			/* find the process utilization data in current snapshot */
858 			putil = (zbx_procstat_util_t *)zbx_bsearch(&util_local, stats, pids->values_num,
859 					sizeof(zbx_procstat_util_t), procstat_util_compare);
860 
861 			if (NULL == putil || SUCCEED != putil->error)
862 				continue;
863 
864 			utime = putil->utime;
865 			stime = putil->stime;
866 
867 			starttime = putil->starttime;
868 
869 			/* find the process utilization data in last snapshot */
870 			putil = (zbx_procstat_util_t *)zbx_bsearch(&util_local, procstat_snapshot, procstat_snapshot_num,
871 					sizeof(zbx_procstat_util_t), procstat_util_compare);
872 
873 			if (NULL == putil || SUCCEED != putil->error || putil->starttime != starttime)
874 				continue;
875 
876 			qdata->utime += utime - putil->utime;
877 			qdata->stime += stime - putil->stime;
878 		}
879 	}
880 }
881 
882 /******************************************************************************
883  *                                                                            *
884  * Function: procstat_update_query_statistics                                 *
885  *                                                                            *
886  * Purpose: updates cpu utilization and saves the new snapshot for queries in *
887  *          shared memory segment                                             *
888  *                                                                            *
889  * Parameters: queries - [IN] local, working copy of queries (utime, stime    *
890  *                            and error must be set)                          *
891  *             runid   - [IN] marker for queries to be processed in the       *
892  *                            current collector iteration                     *
893  *             snapshot_timestamp - [IN] timestamp of the current snapshot    *
894  *                                                                            *
895  * Comments: updates header (pids_num) and queries (h_data, h_count, h_first) *
896  *           in shared memory segment, writes stats at the end of the shared  *
897  *           memory segment                                                   *
898  *                                                                            *
899  ******************************************************************************/
procstat_update_query_statistics(zbx_vector_ptr_t * queries,int runid,const zbx_timespec_t * snapshot_timestamp)900 static void	procstat_update_query_statistics(zbx_vector_ptr_t *queries, int runid,
901 		const zbx_timespec_t *snapshot_timestamp)
902 {
903 	zbx_procstat_query_t		*query;
904 	zbx_procstat_query_data_t	*qdata;
905 	int				index;
906 	int				i;
907 
908 	zbx_dshm_lock(&collector->procstat);
909 
910 	procstat_reattach();
911 
912 	for (query = PROCSTAT_QUERY_FIRST(procstat_ref.addr), i = 0; NULL != query;
913 			query = PROCSTAT_QUERY_NEXT(procstat_ref.addr, query))
914 	{
915 		if (runid != query->runid)
916 			continue;
917 
918 		if (i >= queries->values_num)
919 		{
920 			THIS_SHOULD_NEVER_HAPPEN;
921 			break;
922 		}
923 
924 		qdata = (zbx_procstat_query_data_t *)queries->values[i++];
925 
926 		if (SUCCEED != (query->error = qdata->error))
927 			continue;
928 
929 		/* find the next history data slot */
930 		if (0 < query->h_count)
931 		{
932 			if (MAX_COLLECTOR_HISTORY <= (index = query->h_first + query->h_count - 1))
933 				index -= MAX_COLLECTOR_HISTORY;
934 
935 			qdata->utime += query->h_data[index].utime;
936 			qdata->stime += query->h_data[index].stime;
937 
938 			if (MAX_COLLECTOR_HISTORY <= ++index)
939 				index -= MAX_COLLECTOR_HISTORY;
940 		}
941 		else
942 			index = 0;
943 
944 		if (MAX_COLLECTOR_HISTORY == query->h_count)
945 		{
946 			if (MAX_COLLECTOR_HISTORY <= ++query->h_first)
947 				query->h_first = 0;
948 		}
949 		else
950 			query->h_count++;
951 
952 		query->h_data[index].utime = qdata->utime;
953 		query->h_data[index].stime = qdata->stime;
954 		query->h_data[index].timestamp = *snapshot_timestamp;
955 	}
956 
957 	zbx_dshm_unlock(&collector->procstat);
958 }
959 
960 /*
961  * Public API
962  */
963 
964 /******************************************************************************
965  *                                                                            *
966  * Function: zbx_procstat_collector_started                                   *
967  *                                                                            *
968  * Purpose: checks if processor statistics collector is enabled (the main     *
969  *          collector has been initialized)                                   *
970  *                                                                            *
971  ******************************************************************************/
zbx_procstat_collector_started(void)972 int	zbx_procstat_collector_started(void)
973 {
974 	if (NULL == collector)
975 		return FAIL;
976 
977 	return SUCCEED;
978 }
979 
980 /******************************************************************************
981  *                                                                            *
982  * Function: zbx_procstat_init                                                *
983  *                                                                            *
984  * Purpose: initializes process statistics collector                          *
985  *                                                                            *
986  * Return value: This function calls exit() on shared memory errors.          *
987  *                                                                            *
988  ******************************************************************************/
zbx_procstat_init(void)989 void	zbx_procstat_init(void)
990 {
991 	char	*errmsg = NULL;
992 
993 	if (SUCCEED != zbx_dshm_create(&collector->procstat, ZBX_IPC_COLLECTOR_PROC_ID, 0, ZBX_MUTEX_PROCSTAT,
994 			procstat_copy_data, &errmsg))
995 	{
996 		zabbix_log(LOG_LEVEL_CRIT, "cannot initialize process data collector: %s", errmsg);
997 		zbx_free(errmsg);
998 		exit(EXIT_FAILURE);
999 	}
1000 
1001 	procstat_ref.shmid = ZBX_NONEXISTENT_SHMID;
1002 	procstat_ref.addr = NULL;
1003 }
1004 
1005 /******************************************************************************
1006  *                                                                            *
1007  * Function: zbx_procstat_destroy                                             *
1008  *                                                                            *
1009  * Purpose: destroys process statistics collector                             *
1010  *                                                                            *
1011  ******************************************************************************/
zbx_procstat_destroy(void)1012 void	zbx_procstat_destroy(void)
1013 {
1014 	char	*errmsg = NULL;
1015 
1016 	if (SUCCEED != zbx_dshm_destroy(&collector->procstat, &errmsg))
1017 	{
1018 		zabbix_log(LOG_LEVEL_CRIT, "cannot free resources allocated by process data collector: %s", errmsg);
1019 		zbx_free(errmsg);
1020 	}
1021 
1022 	procstat_ref.shmid = ZBX_NONEXISTENT_SHMID;
1023 	procstat_ref.addr = NULL;
1024 }
1025 
1026 /******************************************************************************
1027  *                                                                            *
1028  * Function: zbx_procstat_get_util                                            *
1029  *                                                                            *
1030  * Purpose: gets process cpu utilization                                      *
1031  *                                                                            *
1032  * Parameters: procname       - [IN] the process name, NULL - all             *
1033  *             username       - [IN] the user name, NULL - all                *
1034  *             cmdline        - [IN] the command line, NULL - all             *
1035  *             collector_func - [IN] the callback function to use for process *
1036  *                              statistics gathering                          *
1037  *             period         - [IN] the time period                          *
1038  *             type           - [IN] the cpu utilization type, see            *
1039  *                              ZBX_PROCSTAT_CPU_* defines                    *
1040  *             value          - [OUT] the utilization in %                    *
1041  *             errmsg         - [OUT] the error message                       *
1042  *                                                                            *
1043  * Return value:                                                              *
1044  *     SUCCEED - the utime value was retrieved successfully                   *
1045  *     FAIL    - either collector does not have at least two data samples     *
1046  *               required to calculate the statistics, or an error occurred   *
1047  *               during the collection process. In the second case the errmsg *
1048  *               will contain an error message.                               *
1049  *     This function calls exit() on shared memory errors.                    *
1050  *                                                                            *
1051  ******************************************************************************/
zbx_procstat_get_util(const char * procname,const char * username,const char * cmdline,zbx_uint64_t flags,int period,int type,double * value,char ** errmsg)1052 int	zbx_procstat_get_util(const char *procname, const char *username, const char *cmdline, zbx_uint64_t flags,
1053 		int period, int type, double *value, char **errmsg)
1054 {
1055 	int			ret = FAIL, current, start;
1056 	zbx_procstat_query_t	*query;
1057 	zbx_uint64_t		ticks_diff = 0, time_diff;
1058 
1059 	zbx_dshm_lock(&collector->procstat);
1060 
1061 	procstat_reattach();
1062 
1063 	if (NULL == (query = procstat_get_query(procstat_ref.addr, procname, username, cmdline, flags)))
1064 	{
1065 		if (procstat_queries_num(procstat_ref.addr) == PROCSTAT_MAX_QUERIES)
1066 			*errmsg = zbx_strdup(*errmsg, "Maximum number of queries reached.");
1067 		else
1068 			procstat_add(procname, username, cmdline, flags);
1069 
1070 		goto out;
1071 	}
1072 
1073 	query->last_accessed = time(NULL);
1074 
1075 	if (0 != query->error)
1076 	{
1077 		*errmsg = zbx_dsprintf(*errmsg, "Cannot read cpu utilization data: %s", zbx_strerror(-query->error));
1078 		goto out;
1079 	}
1080 
1081 	if (1 >= query->h_count)
1082 		goto out;
1083 
1084 	if (period >= query->h_count)
1085 		period = query->h_count - 1;
1086 
1087 	if (MAX_COLLECTOR_HISTORY <= (current = query->h_first + query->h_count - 1))
1088 		current -= MAX_COLLECTOR_HISTORY;
1089 
1090 	if (0 > (start = current - period))
1091 		start += MAX_COLLECTOR_HISTORY;
1092 
1093 	if (0 != (type & ZBX_PROCSTAT_CPU_USER))
1094 		ticks_diff += query->h_data[current].utime - query->h_data[start].utime;
1095 
1096 	if (0 != (type & ZBX_PROCSTAT_CPU_SYSTEM))
1097 		ticks_diff += query->h_data[current].stime - query->h_data[start].stime;
1098 
1099 	time_diff = (zbx_uint64_t)(query->h_data[current].timestamp.sec - query->h_data[start].timestamp.sec) *
1100 			1000000000 + query->h_data[current].timestamp.ns - query->h_data[start].timestamp.ns;
1101 
1102 	/* 1e9 (nanoseconds) * 1e2 (percent) * 1e1 (one digit decimal place) */
1103 	ticks_diff *= __UINT64_C(1000000000000);
1104 #ifdef HAVE_ROUND
1105 	*value = round((double)ticks_diff / (time_diff * sysconf(_SC_CLK_TCK))) / 10;
1106 #else
1107 	*value = (int)((double)ticks_diff / (time_diff * sysconf(_SC_CLK_TCK)) + 0.5) / 10.0;
1108 #endif
1109 
1110 	ret = SUCCEED;
1111 out:
1112 	zbx_dshm_unlock(&collector->procstat);
1113 
1114 	return ret;
1115 }
1116 
1117 /******************************************************************************
1118  *                                                                            *
1119  * Function: zbx_procstat_collect                                             *
1120  *                                                                            *
1121  * Purpose: performs process statistics collection                            *
1122  *                                                                            *
1123  ******************************************************************************/
zbx_procstat_collect(void)1124 void	zbx_procstat_collect(void)
1125 {
1126 	/* identifies current collection iteration */
1127 	static int			runid = 1;
1128 
1129 	/* number of (non-unique) pids that match queries */
1130 	int				pids_num = 0;
1131 
1132 	/* flags specifying what process properties must be retrieved */
1133 	int				flags;
1134 
1135 	/* local, working copy of queries */
1136 	zbx_vector_ptr_t		queries;
1137 
1138 	/* data about all processes on system */
1139 	zbx_vector_ptr_t		processes;
1140 
1141 	/* pids (unique) for which to collect data in this iteration */
1142 	zbx_vector_uint64_t		pids;
1143 
1144 	/* current reading of the per-pid cpu usage statistics (array, items correspond to pids) */
1145 	zbx_procstat_util_t		*stats;
1146 
1147 	/* time of the per-pid usage statistics collection */
1148 	zbx_timespec_t			snapshot_timestamp;
1149 
1150 	if (FAIL == zbx_procstat_collector_started() || FAIL == procstat_running())
1151 		goto out;
1152 
1153 	zbx_vector_ptr_create(&queries);
1154 	zbx_vector_ptr_create(&processes);
1155 	zbx_vector_uint64_create(&pids);
1156 
1157 	if (ZBX_SYSINFO_PROC_NONE == (flags = procstat_build_local_query_vector(&queries, runid)))
1158 		goto clean;
1159 
1160 	if (SUCCEED != zbx_proc_get_processes(&processes, flags))
1161 		goto clean;
1162 
1163 	pids_num = procstat_scan_query_pids(&queries, &processes);
1164 
1165 	procstat_get_monitored_pids(&pids, &queries, pids_num);
1166 
1167 	stats = (zbx_procstat_util_t *)zbx_malloc(NULL, sizeof(zbx_procstat_util_t) * pids.values_num);
1168 	snapshot_timestamp = procstat_get_cpu_util_snapshot_for_pids(stats, &pids);
1169 
1170 	procstat_calculate_cpu_util_for_queries(&queries, &pids, stats);
1171 
1172 	procstat_update_query_statistics(&queries, runid, &snapshot_timestamp);
1173 
1174 	/* replace the current snapshot with the new stats */
1175 	zbx_free(procstat_snapshot);
1176 	procstat_snapshot = stats;
1177 	procstat_snapshot_num = pids.values_num;
1178 clean:
1179 	zbx_vector_uint64_destroy(&pids);
1180 
1181 	zbx_proc_free_processes(&processes);
1182 	zbx_vector_ptr_destroy(&processes);
1183 
1184 	zbx_vector_ptr_clear_ext(&queries, (zbx_mem_free_func_t)procstat_free_query_data);
1185 	zbx_vector_ptr_destroy(&queries);
1186 out:
1187 	runid++;
1188 }
1189 
1190 #endif
1191