1 /*
2 ** Zabbix
3 ** Copyright (C) 2001-2021 Zabbix SIA
4 **
5 ** This program is free software; you can redistribute it and/or modify
6 ** it under the terms of the GNU General Public License as published by
7 ** the Free Software Foundation; either version 2 of the License, or
8 ** (at your option) any later version.
9 **
10 ** This program is distributed in the hope that it will be useful,
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ** GNU General Public License for more details.
14 **
15 ** You should have received a copy of the GNU General Public License
16 ** along with this program; if not, write to the Free Software
17 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
18 **/
19 
20 #include "common.h"
21 #include "logfiles.h"
22 #include "log.h"
23 #include "sysinfo.h"
24 #include "persistent_state.h"
25 
26 #if defined(_WINDOWS) || defined(__MINGW32__)
27 #	include "symbols.h"
28 #	include "zbxtypes.h"	/* ssize_t */
29 #endif /* _WINDOWS */
30 
31 #define MAX_LEN_MD5	512	/* maximum size of the first and the last blocks of the file to calculate MD5 sum for */
32 
33 #define ZBX_SAME_FILE_ERROR	-1
34 #define ZBX_SAME_FILE_NO	0
35 #define ZBX_SAME_FILE_YES	1
36 #define ZBX_SAME_FILE_RETRY	2
37 #define ZBX_NO_FILE_ERROR	3
38 #define ZBX_SAME_FILE_COPY	4
39 
40 #define ZBX_FILE_PLACE_UNKNOWN	-1	/* cannot compare file device and inode numbers */
41 #define ZBX_FILE_PLACE_OTHER	0	/* both files have different device or inode numbers */
42 #define ZBX_FILE_PLACE_SAME	1	/* both files have the same device and inode numbers */
43 
44 extern int	CONFIG_MAX_LINES_PER_SECOND;
45 
46 extern ZBX_THREAD_LOCAL char	*CONFIG_HOSTNAME;
47 
48 /******************************************************************************
49  *                                                                            *
50  * Function: split_string                                                     *
51  *                                                                            *
52  * Purpose: separates given string to two parts by given delimiter in string  *
53  *                                                                            *
54  * Parameters:                                                                *
55  *     str -   [IN] a not-empty string to split                               *
56  *     del -   [IN] pointer to a character in the string                      *
57  *     part1 - [OUT] pointer to buffer for the first part with delimiter      *
58  *     part2 - [OUT] pointer to buffer for the second part                    *
59  *                                                                            *
60  * Return value: SUCCEED - on splitting without errors                        *
61  *               FAIL - on splitting with errors                              *
62  *                                                                            *
63  * Author: Dmitry Borovikov, Aleksandrs Saveljevs                             *
64  *                                                                            *
65  * Comments: Memory for "part1" and "part2" is allocated only on SUCCEED.     *
66  *                                                                            *
67  ******************************************************************************/
split_string(const char * str,const char * del,char ** part1,char ** part2)68 static int	split_string(const char *str, const char *del, char **part1, char **part2)
69 {
70 	size_t	str_length, part1_length, part2_length;
71 	int	ret = FAIL;
72 
73 	zabbix_log(LOG_LEVEL_DEBUG, "In %s() str:'%s' del:'%s'", __func__, str, del);
74 
75 	str_length = strlen(str);
76 
77 	/* since the purpose of this function is to be used in split_filename(), we allow part1 to be */
78 	/* just *del (e.g., "/" - file system root), but we do not allow part2 (filename) to be empty */
79 	if (del < str || del >= (str + str_length - 1))
80 	{
81 		zabbix_log(LOG_LEVEL_DEBUG, "%s() cannot proceed: delimiter is out of range", __func__);
82 		goto out;
83 	}
84 
85 	part1_length = (size_t)(del - str + 1);
86 	part2_length = str_length - part1_length;
87 
88 	*part1 = (char *)zbx_malloc(*part1, part1_length + 1);
89 	zbx_strlcpy(*part1, str, part1_length + 1);
90 
91 	*part2 = (char *)zbx_malloc(*part2, part2_length + 1);
92 	zbx_strlcpy(*part2, str + part1_length, part2_length + 1);
93 
94 	ret = SUCCEED;
95 out:
96 	zabbix_log(LOG_LEVEL_DEBUG, "End of %s():%s part1:'%s' part2:'%s'", __func__, zbx_result_string(ret),
97 			ZBX_NULL2STR(*part1), ZBX_NULL2STR(*part2));
98 
99 	return ret;
100 }
101 
102 /******************************************************************************
103  *                                                                            *
104  * Function: split_filename                                                   *
105  *                                                                            *
106  * Purpose: separates full-path file name into directory and file name regexp *
107  *          parts                                                             *
108  *                                                                            *
109  * Parameters:                                                                *
110  *     filename        - [IN] first parameter of logrt[] or logrt.count[]     *
111  *                       item                                                 *
112  *     directory       - [IN/OUT] directory part of the 'filename'            *
113  *     filename_regexp - [IN/OUT] file name regular expression part           *
114  *     err_msg         - [IN/OUT] error message why an item became            *
115  *                       NOTSUPPORTED                                         *
116  *                                                                            *
117  * Return value: SUCCEED - on successful splitting                            *
118  *               FAIL - on unable to split sensibly                           *
119  *                                                                            *
120  * Author: Dmitry Borovikov                                                   *
121  *                                                                            *
122  * Comments: Allocates memory for "directory" and "filename_regexp" only on   *
123  *           SUCCEED. On FAIL memory, allocated for "directory" and           *
124  *           "filename_regexp" is freed.                                      *
125  *                                                                            *
126  *           Thread-safe                                                      *
127  *                                                                            *
128  ******************************************************************************/
split_filename(const char * filename,char ** directory,char ** filename_regexp,char ** err_msg)129 static int	split_filename(const char *filename, char **directory, char **filename_regexp, char **err_msg)
130 {
131 	const char	*separator = NULL;
132 	zbx_stat_t	buf;
133 	int		ret = FAIL;
134 #if defined(_WINDOWS) || defined(__MINGW32__)
135 	size_t		sz;
136 #endif
137 	zabbix_log(LOG_LEVEL_DEBUG, "In %s() filename:'%s'", __func__, ZBX_NULL2STR(filename));
138 
139 	if (NULL == filename || '\0' == *filename)
140 	{
141 		*err_msg = zbx_strdup(*err_msg, "Cannot split empty path.");
142 		goto out;
143 	}
144 
145 #if defined(_WINDOWS) || defined(__MINGW32__)
146 	/* special processing for Windows, since directory name cannot be simply separated from file name regexp */
147 	for (sz = strlen(filename) - 1, separator = &filename[sz]; separator >= filename; separator--)
148 	{
149 		if (PATH_SEPARATOR != *separator)
150 			continue;
151 
152 		zabbix_log(LOG_LEVEL_DEBUG, "%s() %s", __func__, filename);
153 		zabbix_log(LOG_LEVEL_DEBUG, "%s() %*s", __func__, separator - filename + 1, "^");
154 
155 		/* separator must be relative delimiter of the original filename */
156 		if (FAIL == split_string(filename, separator, directory, filename_regexp))
157 		{
158 			*err_msg = zbx_dsprintf(*err_msg, "Cannot split path by \"%c\".", PATH_SEPARATOR);
159 			goto out;
160 		}
161 
162 		sz = strlen(*directory);
163 
164 		/* Windows world verification */
165 		if (sz + 1 > MAX_PATH)
166 		{
167 			*err_msg = zbx_strdup(*err_msg, "Directory path is too long.");
168 			zbx_free(*directory);
169 			zbx_free(*filename_regexp);
170 			goto out;
171 		}
172 
173 		/* Windows "stat" functions cannot get info about directories with '\' at the end of the path, */
174 		/* except for root directories 'x:\' */
175 		if (0 == zbx_stat(*directory, &buf) && S_ISDIR(buf.st_mode))
176 			break;
177 
178 		if (sz > 0 && PATH_SEPARATOR == (*directory)[sz - 1])
179 		{
180 			(*directory)[sz - 1] = '\0';
181 
182 			if (0 == zbx_stat(*directory, &buf) && S_ISDIR(buf.st_mode))
183 			{
184 				(*directory)[sz - 1] = PATH_SEPARATOR;
185 				break;
186 			}
187 		}
188 
189 		zabbix_log(LOG_LEVEL_DEBUG, "cannot find directory '%s'", *directory);
190 		zbx_free(*directory);
191 		zbx_free(*filename_regexp);
192 	}
193 
194 	if (separator < filename)
195 	{
196 		*err_msg = zbx_strdup(*err_msg, "Non-existing disk or directory.");
197 		goto out;
198 	}
199 #else	/* not _WINDOWS */
200 	if (NULL == (separator = strrchr(filename, PATH_SEPARATOR)))
201 	{
202 		*err_msg = zbx_dsprintf(*err_msg, "Cannot find separator \"%c\" in path.", PATH_SEPARATOR);
203 		goto out;
204 	}
205 
206 	if (SUCCEED != split_string(filename, separator, directory, filename_regexp))
207 	{
208 		*err_msg = zbx_dsprintf(*err_msg, "Cannot split path by \"%c\".", PATH_SEPARATOR);
209 		goto out;
210 	}
211 
212 	if (-1 == zbx_stat(*directory, &buf))
213 	{
214 		*err_msg = zbx_dsprintf(*err_msg, "Cannot obtain directory information: %s", zbx_strerror(errno));
215 		zbx_free(*directory);
216 		zbx_free(*filename_regexp);
217 		goto out;
218 	}
219 
220 	if (0 == S_ISDIR(buf.st_mode))
221 	{
222 		*err_msg = zbx_dsprintf(*err_msg, "Base path \"%s\" is not a directory.", ZBX_NULL2STR(*directory));
223 		zbx_free(*directory);
224 		zbx_free(*filename_regexp);
225 		goto out;
226 	}
227 #endif	/* _WINDOWS */
228 
229 	ret = SUCCEED;
230 out:
231 	zabbix_log(LOG_LEVEL_DEBUG, "End of %s():%s directory:'%s' filename_regexp:'%s'", __func__,
232 			zbx_result_string(ret), ZBX_NULL2STR(*directory), ZBX_NULL2STR(*filename_regexp));
233 
234 	return ret;
235 }
236 
237 /******************************************************************************
238  *                                                                            *
239  * Function: file_part_md5                                                    *
240  *                                                                            *
241  * Purpose: calculate the MD5 sum of the specified part of the file           *
242  *                                                                            *
243  * Parameters:                                                                *
244  *     f        - [IN] file descriptor                                        *
245  *     offset   - [IN] start position of the part                             *
246  *     length   - [IN] length of the part in bytes. Maximum is 512 bytes.     *
247  *     md5buf   - [OUT] output buffer, MD5_DIGEST_SIZE-bytes long, where the  *
248  *                calculated MD5 sum is placed                                *
249  *     filename - [IN] file name, used in error logging                       *
250  *     err_msg  - [IN/OUT] error message why FAIL-ed                          *
251  *                                                                            *
252  * Return value: SUCCEED or FAIL                                              *
253  *                                                                            *
254  ******************************************************************************/
file_part_md5(int f,size_t offset,int length,md5_byte_t * md5buf,const char * filename,char ** err_msg)255 static int	file_part_md5(int f, size_t offset, int length, md5_byte_t *md5buf, const char *filename,
256 		char **err_msg)
257 {
258 	md5_state_t	state;
259 	char		buf[MAX_LEN_MD5];
260 	int		rc;
261 
262 	if (MAX_LEN_MD5 < length)
263 	{
264 		*err_msg = zbx_dsprintf(*err_msg, "Length %d exceeds maximum MD5 fragment length %d.", length,
265 				MAX_LEN_MD5);
266 		return FAIL;
267 	}
268 
269 	if ((zbx_offset_t)-1 == zbx_lseek(f, offset, SEEK_SET))
270 	{
271 		*err_msg = zbx_dsprintf(*err_msg, "Cannot set position to " ZBX_FS_SIZE_T " for file \"%s\": %s",
272 				(zbx_fs_size_t)offset, filename, zbx_strerror(errno));
273 		return FAIL;
274 	}
275 
276 	if (length != (rc = (int)read(f, buf, (size_t)length)))
277 	{
278 		if (-1 == rc)
279 		{
280 			*err_msg = zbx_dsprintf(*err_msg, "Cannot read %d bytes from file \"%s\": %s", length, filename,
281 					zbx_strerror(errno));
282 		}
283 		else
284 		{
285 			*err_msg = zbx_dsprintf(*err_msg, "Cannot read %d bytes from file \"%s\". Read %d bytes only.",
286 					length, filename, rc);
287 		}
288 
289 		return FAIL;
290 	}
291 
292 	zbx_md5_init(&state);
293 	zbx_md5_append(&state, (const md5_byte_t *)buf, length);
294 	zbx_md5_finish(&state, md5buf);
295 
296 	return SUCCEED;
297 }
298 
299 #if defined(_WINDOWS) || defined(__MINGW32__)
300 /******************************************************************************
301  *                                                                            *
302  * Function: file_id                                                          *
303  *                                                                            *
304  * Purpose: get Microsoft Windows file device ID, 64-bit FileIndex or         *
305  *          128-bit FileId                                                    *
306  *                                                                            *
307  * Parameters:                                                                *
308  *     f        - [IN] file descriptor                                        *
309  *     use_ino  - [IN] how to use file IDs                                    *
310  *     dev      - [OUT] device ID                                             *
311  *     ino_lo   - [OUT] 64-bit nFileIndex or lower 64-bits of FileId          *
312  *     ino_hi   - [OUT] higher 64-bits of FileId                              *
313  *     filename - [IN] file name, used in error logging                       *
314  *     err_msg  - [IN/OUT] error message why an item became NOTSUPPORTED      *
315  *                                                                            *
316  * Return value: SUCCEED or FAIL                                              *
317  *                                                                            *
318  ******************************************************************************/
file_id(int f,int use_ino,zbx_uint64_t * dev,zbx_uint64_t * ino_lo,zbx_uint64_t * ino_hi,const char * filename,char ** err_msg)319 static int	file_id(int f, int use_ino, zbx_uint64_t *dev, zbx_uint64_t *ino_lo, zbx_uint64_t *ino_hi,
320 		const char *filename, char **err_msg)
321 {
322 	int				ret = FAIL;
323 	intptr_t			h;	/* file HANDLE */
324 	BY_HANDLE_FILE_INFORMATION	hfi;
325 	ZBX_FILE_ID_INFO		fid;
326 
327 	if (-1 == (h = _get_osfhandle(f)))
328 	{
329 		*err_msg = zbx_dsprintf(*err_msg, "Cannot obtain handle from descriptor of file \"%s\": %s",
330 				filename, zbx_strerror(errno));
331 		return ret;
332 	}
333 
334 	if (1 == use_ino || 0 == use_ino)
335 	{
336 		/* Although nFileIndexHigh and nFileIndexLow cannot be reliably used to identify files when */
337 		/* use_ino = 0 (e.g. on FAT32, exFAT), we copy indexes to have at least correct debug logs. */
338 		if (0 != GetFileInformationByHandle((HANDLE)h, &hfi))
339 		{
340 			*dev = hfi.dwVolumeSerialNumber;
341 			*ino_lo = (zbx_uint64_t)hfi.nFileIndexHigh << 32 | (zbx_uint64_t)hfi.nFileIndexLow;
342 			*ino_hi = 0;
343 		}
344 		else
345 		{
346 			*err_msg = zbx_dsprintf(*err_msg, "Cannot obtain information for file \"%s\": %s",
347 					filename, strerror_from_system(GetLastError()));
348 			return ret;
349 		}
350 	}
351 	else if (2 == use_ino)
352 	{
353 		if (NULL != zbx_GetFileInformationByHandleEx)
354 		{
355 			if (0 != zbx_GetFileInformationByHandleEx((HANDLE)h, zbx_FileIdInfo, &fid, sizeof(fid)))
356 			{
357 				*dev = fid.VolumeSerialNumber;
358 				*ino_lo = fid.FileId.LowPart;
359 				*ino_hi = fid.FileId.HighPart;
360 			}
361 			else
362 			{
363 				*err_msg = zbx_dsprintf(*err_msg, "Cannot obtain extended information for file"
364 						" \"%s\": %s", filename, strerror_from_system(GetLastError()));
365 				return ret;
366 			}
367 		}
368 	}
369 	else
370 	{
371 		THIS_SHOULD_NEVER_HAPPEN;
372 		return ret;
373 	}
374 
375 	ret = SUCCEED;
376 
377 	return ret;
378 }
379 
380 /******************************************************************************
381  *                                                                            *
382  * Function: set_use_ino_by_fs_type                                           *
383  *                                                                            *
384  * Purpose: find file system type and set 'use_ino' parameter                 *
385  *                                                                            *
386  * Parameters:                                                                *
387  *     path     - [IN] directory or file name                                 *
388  *     use_ino  - [IN] how to use file IDs                                    *
389  *     err_msg  - [IN/OUT] error message why an item became NOTSUPPORTED      *
390  *                                                                            *
391  * Return value: SUCCEED or FAIL                                              *
392  *                                                                            *
393  ******************************************************************************/
set_use_ino_by_fs_type(const char * path,int * use_ino,char ** err_msg)394 static int	set_use_ino_by_fs_type(const char *path, int *use_ino, char **err_msg)
395 {
396 	char	*utf8;
397 	wchar_t	*path_uni, mount_point[MAX_PATH + 1], fs_type[MAX_PATH + 1];
398 
399 	path_uni = zbx_utf8_to_unicode(path);
400 
401 	/* get volume mount point */
402 	if (0 == GetVolumePathName(path_uni, mount_point,
403 			sizeof(mount_point) / sizeof(wchar_t)))
404 	{
405 		*err_msg = zbx_dsprintf(*err_msg, "Cannot obtain volume mount point for file \"%s\": %s", path,
406 				strerror_from_system(GetLastError()));
407 		zbx_free(path_uni);
408 		return FAIL;
409 	}
410 
411 	zbx_free(path_uni);
412 
413 	/* Which file system type this directory resides on ? */
414 	if (0 == GetVolumeInformation(mount_point, NULL, 0, NULL, NULL, NULL, fs_type,
415 			sizeof(fs_type) / sizeof(wchar_t)))
416 	{
417 		utf8 = zbx_unicode_to_utf8(mount_point);
418 		*err_msg = zbx_dsprintf(*err_msg, "Cannot obtain volume information for directory \"%s\": %s", utf8,
419 				strerror_from_system(GetLastError()));
420 		zbx_free(utf8);
421 		return FAIL;
422 	}
423 
424 	utf8 = zbx_unicode_to_utf8(fs_type);
425 
426 	if (0 == strcmp(utf8, "NTFS"))
427 		*use_ino = 1;			/* 64-bit FileIndex */
428 	else if (0 == strcmp(utf8, "ReFS"))
429 		*use_ino = 2;			/* 128-bit FileId */
430 	else
431 		*use_ino = 0;			/* cannot use inodes to identify files (e.g. FAT32) */
432 
433 	zabbix_log(LOG_LEVEL_DEBUG, "log files reside on '%s' file system", utf8);
434 	zbx_free(utf8);
435 
436 	return SUCCEED;
437 }
438 #endif
439 
440 /******************************************************************************
441  *                                                                            *
442  * Function: print_logfile_list                                               *
443  *                                                                            *
444  * Purpose: write logfile list into log for debugging                         *
445  *                                                                            *
446  * Parameters:                                                                *
447  *     logfiles     - [IN] array of logfiles                                  *
448  *     logfiles_num - [IN] number of elements in the array                    *
449  *                                                                            *
450  ******************************************************************************/
print_logfile_list(const struct st_logfile * logfiles,int logfiles_num)451 static void	print_logfile_list(const struct st_logfile *logfiles, int logfiles_num)
452 {
453 	if (SUCCEED == ZBX_CHECK_LOG_LEVEL(LOG_LEVEL_DEBUG))
454 	{
455 		int	i;
456 
457 		for (i = 0; i < logfiles_num; i++)
458 		{
459 			char	first_buf[ZBX_MD5_PRINT_BUF_LEN], last_buf[ZBX_MD5_PRINT_BUF_LEN];
460 
461 			zbx_md5buf2str(logfiles[i].first_block_md5, first_buf);
462 			zbx_md5buf2str(logfiles[i].last_block_md5, last_buf);
463 
464 			zabbix_log(LOG_LEVEL_DEBUG, "   nr:%d filename:'%s' mtime:%d size:" ZBX_FS_UI64
465 					" processed_size:" ZBX_FS_UI64 " seq:%d copy_of:%d incomplete:%d dev:"
466 					ZBX_FS_UI64 " ino_hi:" ZBX_FS_UI64 " ino_lo:" ZBX_FS_UI64 " md5_block_size:%d"
467 					" first_block_md5:%s last_block_offset:" ZBX_FS_UI64 " last_block_md5:%s", i,
468 					logfiles[i].filename, logfiles[i].mtime, logfiles[i].size,
469 					logfiles[i].processed_size, logfiles[i].seq, logfiles[i].copy_of,
470 					logfiles[i].incomplete, logfiles[i].dev, logfiles[i].ino_hi, logfiles[i].ino_lo,
471 					logfiles[i].md5_block_size, first_buf, logfiles[i].last_block_offset, last_buf);
472 		}
473 	}
474 }
475 
476 /******************************************************************************
477  *                                                                            *
478  * Function: compare_file_places                                              *
479  *                                                                            *
480  * Purpose: compare device numbers and inode numbers of 2 files               *
481  *                                                                            *
482  * Parameters: old_file - [IN] details of the 1st log file                    *
483  *             new_file - [IN] details of the 2nd log file                    *
484  *             use_ino  - [IN] 0 - do not use inodes in comparison,           *
485  *                             1 - use up to 64-bit inodes in comparison,     *
486  *                             2 - use 128-bit inodes in comparison.          *
487  *                                                                            *
488  * Return value: ZBX_FILE_PLACE_SAME - both files have the same place         *
489  *               ZBX_FILE_PLACE_OTHER - files reside in different places      *
490  *               ZBX_FILE_PLACE_UNKNOWN - cannot compare places (no inodes)   *
491  *                                                                            *
492  ******************************************************************************/
compare_file_places(const struct st_logfile * old_file,const struct st_logfile * new_file,int use_ino)493 static int	compare_file_places(const struct st_logfile *old_file, const struct st_logfile *new_file, int use_ino)
494 {
495 	if (1 == use_ino || 2 == use_ino)
496 	{
497 		if (old_file->ino_lo != new_file->ino_lo || old_file->dev != new_file->dev ||
498 				(2 == use_ino && old_file->ino_hi != new_file->ino_hi))
499 		{
500 			return ZBX_FILE_PLACE_OTHER;
501 		}
502 		else
503 			return ZBX_FILE_PLACE_SAME;
504 	}
505 
506 	return ZBX_FILE_PLACE_UNKNOWN;
507 }
508 
509 /******************************************************************************
510  *                                                                            *
511  * Function: open_file_helper                                                 *
512  *                                                                            *
513  * Purpose: open specified file for reading                                   *
514  *                                                                            *
515  * Parameters: pathname - [IN] full pathname of file                          *
516  *             err_msg  - [IN/OUT] error message why file could not be opened *
517  *                                                                            *
518  * Return value: file descriptor on success or -1 on error                    *
519  *                                                                            *
520  ******************************************************************************/
open_file_helper(const char * pathname,char ** err_msg)521 static int	open_file_helper(const char *pathname, char **err_msg)
522 {
523 	int	fd;
524 
525 	if (-1 == (fd = zbx_open(pathname, O_RDONLY)))
526 		*err_msg = zbx_dsprintf(*err_msg, "Cannot open file \"%s\": %s", pathname, zbx_strerror(errno));
527 
528 	return fd;
529 }
530 
531 /******************************************************************************
532  *                                                                            *
533  * Function: close_file_helper                                                *
534  *                                                                            *
535  * Purpose: close specified file                                              *
536  *                                                                            *
537  * Parameters: fd       - [IN] file descriptor to close                       *
538  *             pathname - [IN] pathname of file, used for error reporting     *
539  *             err_msg  - [IN/OUT] error message why file could not be closed *
540  *                             unless an earlier error has been already       *
541  *                             reported                                       *
542  *                                                                            *
543  * Return value: SUCCEED or FAIL                                              *
544  *                                                                            *
545  ******************************************************************************/
close_file_helper(int fd,const char * pathname,char ** err_msg)546 static int	close_file_helper(int fd, const char *pathname, char **err_msg)
547 {
548 	if (0 == close(fd))
549 		return SUCCEED;
550 
551 	if (NULL == *err_msg)
552 		*err_msg = zbx_dsprintf(NULL, "Cannot close file \"%s\": %s", pathname, zbx_strerror(errno));
553 
554 	return FAIL;
555 }
556 
557 /******************************************************************************
558  *                                                                            *
559  * Function: examine_md5_and_place                                            *
560  *                                                                            *
561  * Purpose: from MD5 sums of blocks and places of 2 files make a conclusion   *
562  *          is it the same file, a pair 'original/copy' or 2 different files  *
563  *                                                                            *
564  * Parameters:  buf1          - [IN] MD5 sum of block in the 1st file         *
565  *              buf2          - [IN] MD5 sum of block in the 2nd file         *
566  *              size          - [IN] size of MD5 sum                          *
567  *              is_same_place - [IN] equality of file places                  *
568  *                                                                            *
569  * Return value: ZBX_SAME_FILE_NO - they are 2 different files                *
570  *               ZBX_SAME_FILE_YES - 2 files are (assumed) to be the same     *
571  *               ZBX_SAME_FILE_COPY - one file is copy of the other           *
572  *                                                                            *
573  * Comments: in case files places are unknown but MD5 sums of block pairs     *
574  *           match it is assumed to be the same file                          *
575  *                                                                            *
576  ******************************************************************************/
examine_md5_and_place(const md5_byte_t * buf1,const md5_byte_t * buf2,size_t size,int is_same_place)577 static int	examine_md5_and_place(const md5_byte_t *buf1, const md5_byte_t *buf2, size_t size, int is_same_place)
578 {
579 	if (0 == memcmp(buf1, buf2, size))
580 	{
581 		switch (is_same_place)
582 		{
583 			case ZBX_FILE_PLACE_UNKNOWN:
584 			case ZBX_FILE_PLACE_SAME:
585 				return ZBX_SAME_FILE_YES;
586 			case ZBX_FILE_PLACE_OTHER:
587 				return ZBX_SAME_FILE_COPY;
588 		}
589 	}
590 
591 	return ZBX_SAME_FILE_NO;
592 }
593 
594 /******************************************************************************
595  *                                                                            *
596  * Function: is_same_file_logcpt                                              *
597  *                                                                            *
598  * Purpose: find out if a file from the old list and a file from the new list *
599  *          could be the same file or copy in case of copy/truncate rotation  *
600  *                                                                            *
601  * Parameters:                                                                *
602  *          old_file - [IN] file from the old list                            *
603  *          new_file - [IN] file from the new list                            *
604  *          use_ino  - [IN] 0 - do not use inodes in comparison,              *
605  *                          1 - use up to 64-bit inodes in comparison,        *
606  *                          2 - use 128-bit inodes in comparison.             *
607  *         new_files - [IN] new file list                                     *
608  *          num_new  - [IN] number of elements in the new file list           *
609  *          err_msg  - [IN/OUT] error message why an item became              *
610  *                     NOTSUPPORTED                                           *
611  *                                                                            *
612  * Return value: ZBX_SAME_FILE_NO - it is not the same file                   *
613  *               ZBX_SAME_FILE_YES - it could be the same file                *
614  *               ZBX_SAME_FILE_COPY - it is a copy                            *
615  *               ZBX_SAME_FILE_ERROR - error                                  *
616  *                                                                            *
617  * Comments: In some cases we can say that it IS NOT the same file.           *
618  *           In other cases it COULD BE the same file or copy.                *
619  *                                                                            *
620  *           Thread-safe                                                      *
621  *                                                                            *
622  ******************************************************************************/
is_same_file_logcpt(const struct st_logfile * old_file,const struct st_logfile * new_file,int use_ino,const struct st_logfile * new_files,int num_new,char ** err_msg)623 static int	is_same_file_logcpt(const struct st_logfile *old_file, const struct st_logfile *new_file, int use_ino,
624 		const struct st_logfile *new_files, int num_new, char **err_msg)
625 {
626 	int	is_same_place, ret = ZBX_SAME_FILE_NO, found_matching_md5 = 0, same_name_in_new_list = 0, i, f;
627 
628 	if (old_file->mtime > new_file->mtime)
629 		return ZBX_SAME_FILE_NO;
630 
631 	if (-1 == old_file->md5_block_size || -1 == new_file->md5_block_size)
632 	{
633 		/* Cannot compare MD5 sums. Assume two different files - reporting twice is better than skipping. */
634 		return ZBX_SAME_FILE_NO;
635 	}
636 
637 	is_same_place = compare_file_places(old_file, new_file, use_ino);
638 
639 	if (old_file->md5_block_size == new_file->md5_block_size &&
640 			old_file->last_block_offset == new_file->last_block_offset)
641 	{
642 		if (ZBX_SAME_FILE_NO == (ret = examine_md5_and_place(old_file->first_block_md5,
643 				new_file->first_block_md5, sizeof(new_file->first_block_md5), is_same_place)))
644 		{
645 			return ret;
646 		}
647 
648 		return examine_md5_and_place(old_file->last_block_md5, new_file->last_block_md5,
649 				sizeof(new_file->last_block_md5), is_same_place);
650 	}
651 
652 	if (0 == old_file->md5_block_size || 0 == new_file->md5_block_size)
653 		return ZBX_SAME_FILE_NO;
654 
655 	/* MD5 sums have been calculated from blocks of different sizes or last blocks offsets differ */
656 
657 	if (old_file->md5_block_size < new_file->md5_block_size ||
658 			(old_file->md5_block_size == new_file->md5_block_size &&
659 			old_file->last_block_offset != new_file->last_block_offset))
660 	{
661 		md5_byte_t	md5tmp[MD5_DIGEST_SIZE];
662 
663 		if (-1 == (f = open_file_helper(new_file->filename, err_msg)))
664 			return ZBX_SAME_FILE_ERROR;
665 
666 		if (SUCCEED != file_part_md5(f, 0, old_file->md5_block_size, md5tmp, new_file->filename, err_msg))
667 		{
668 			ret = ZBX_SAME_FILE_ERROR;
669 			goto clean1;
670 		}
671 
672 		if (ZBX_SAME_FILE_NO == (ret = examine_md5_and_place(old_file->first_block_md5, md5tmp, sizeof(md5tmp),
673 				is_same_place)))
674 		{
675 			goto clean1;
676 		}
677 
678 		if (0 < old_file->last_block_offset)
679 		{
680 			if (SUCCEED != file_part_md5(f, old_file->last_block_offset, old_file->md5_block_size, md5tmp,
681 					new_file->filename, err_msg))
682 			{
683 				ret = ZBX_SAME_FILE_ERROR;
684 				goto clean1;
685 			}
686 
687 			ret = examine_md5_and_place(old_file->last_block_md5, md5tmp, sizeof(md5tmp), is_same_place);
688 		}
689 clean1:
690 		if (0 != close(f) && ZBX_SAME_FILE_ERROR != ret)
691 		{
692 			*err_msg = zbx_dsprintf(*err_msg, "Cannot close file \"%s\": %s",
693 					new_file->filename, zbx_strerror(errno));
694 			ret = ZBX_SAME_FILE_ERROR;
695 		}
696 
697 		return ret;
698 	}
699 
700 	/* Remaining case: old_file->md5_block_size > new_file->md5_block_size */
701 
702 	/* Now it is necessary to read the first 'new_file->md5_block_size' bytes */
703 	/* of the old file to calculate MD5 sum to compare. Unfortunately we  */
704 	/* cannot reliably use 'old_file->filename' to open the file because: */
705 	/*    - being from the old list it might be no longer available,      */
706 	/*    - it can have a different name in the new file list;            */
707 	/*    - 'old_file->filename' can be the same as 'new_file->filename'  */
708 	/*      (see ZBX-18883) making comparison pointless.                  */
709 
710 	for (i = 0; i < num_new; i++)
711 	{
712 		md5_byte_t	md5tmp[MD5_DIGEST_SIZE];
713 
714 		if ((zbx_uint64_t)new_file->md5_block_size > new_files[i].size)
715 			continue;
716 
717 		if (0 == strcmp(old_file->filename, new_file->filename) ||
718 				0 == strcmp(new_files[i].filename, new_file->filename))	/* do not compare with self */
719 		{
720 			same_name_in_new_list = 1;
721 			continue;
722 		}
723 
724 		if (-1 == (f = open_file_helper(new_files[i].filename, err_msg)))
725 			return ZBX_SAME_FILE_ERROR;
726 
727 		if (SUCCEED != file_part_md5(f, 0, new_file->md5_block_size, md5tmp, new_files[i].filename, err_msg))
728 		{
729 			ret = ZBX_SAME_FILE_ERROR;
730 			goto clean2;
731 		}
732 
733 		if (ZBX_SAME_FILE_NO == (ret = examine_md5_and_place(new_file->first_block_md5, md5tmp, sizeof(md5tmp),
734 				compare_file_places(old_file, new_files + i, use_ino))))
735 		{
736 			goto clean2;
737 		}
738 
739 		if (0 < new_file->last_block_offset)
740 		{
741 			if (SUCCEED != file_part_md5(f, new_file->last_block_offset, new_file->md5_block_size, md5tmp,
742 					new_files[i].filename, err_msg))
743 			{
744 				ret = ZBX_SAME_FILE_ERROR;
745 				goto clean2;
746 			}
747 
748 			ret = examine_md5_and_place(new_file->last_block_md5, md5tmp, sizeof(md5tmp),
749 					compare_file_places(old_file, new_files + i, use_ino));
750 		}
751 
752 		if (ZBX_SAME_FILE_YES == ret || ZBX_SAME_FILE_COPY == ret)
753 			found_matching_md5 = 1;
754 clean2:
755 		if (0 != close(f) && ZBX_SAME_FILE_ERROR != ret)
756 		{
757 			*err_msg = zbx_dsprintf(*err_msg, "Cannot close file \"%s\": %s", new_files[i].filename,
758 					zbx_strerror(errno));
759 			ret = ZBX_SAME_FILE_ERROR;
760 		}
761 
762 		if (0 != found_matching_md5)
763 			break;
764 	}
765 
766 	if (0 == found_matching_md5 && 0 == same_name_in_new_list)
767 	{
768 		md5_byte_t	md5tmp[MD5_DIGEST_SIZE];
769 
770 		/* last try - opening file with the name from the old list */
771 
772 		if (-1 == (f = open_file_helper(old_file->filename, err_msg)))
773 			return ZBX_SAME_FILE_NO;	/* not an error if it is no longer available */
774 
775 		if (SUCCEED != file_part_md5(f, 0, new_file->md5_block_size, md5tmp, old_file->filename, err_msg))
776 		{
777 			ret = ZBX_SAME_FILE_NO;
778 			goto clean3;
779 		}
780 
781 		if (ZBX_SAME_FILE_NO == (ret = examine_md5_and_place(new_file->first_block_md5, md5tmp, sizeof(md5tmp),
782 				compare_file_places(old_file, new_file, use_ino))))
783 		{
784 			goto clean3;
785 		}
786 
787 		if (0 < new_file->last_block_offset)
788 		{
789 			if (SUCCEED != file_part_md5(f, new_file->last_block_offset, new_file->md5_block_size, md5tmp,
790 					old_file->filename, err_msg))
791 			{
792 				ret = ZBX_SAME_FILE_NO;
793 				goto clean3;
794 			}
795 
796 			ret = examine_md5_and_place(new_file->last_block_md5, md5tmp, sizeof(md5tmp),
797 					compare_file_places(old_file, new_file, use_ino));
798 		}
799 clean3:
800 		if (0 != close(f))
801 		{
802 			*err_msg = zbx_dsprintf(*err_msg, "Cannot close file \"%s\": %s", old_file->filename,
803 					zbx_strerror(errno));
804 			ret = ZBX_SAME_FILE_ERROR;
805 		}
806 	}
807 
808 	return ret;
809 }
810 
811 /******************************************************************************
812  *                                                                            *
813  * Function: is_same_file_logrt                                               *
814  *                                                                            *
815  * Purpose: find out if a file from the old list and a file from the new list *
816  *          could be the same file in case of simple rotation                 *
817  *                                                                            *
818  * Parameters:                                                                *
819  *          old_file - [IN] file from the old list                            *
820  *          new_file - [IN] file from the new list                            *
821  *          use_ino  - [IN] 0 - do not use inodes in comparison,              *
822  *                          1 - use up to 64-bit inodes in comparison,        *
823  *                          2 - use 128-bit inodes in comparison.             *
824  *          options  - [IN] log rotation options                              *
825  *         new_files - [IN] new file list                                     *
826  *          num_new  - [IN] number of elements in the new file list           *
827  *          err_msg  - [IN/OUT] error message why an item became              *
828  *                     NOTSUPPORTED                                           *
829  *                                                                            *
830  * Return value: ZBX_SAME_FILE_NO - it is not the same file,                  *
831  *               ZBX_SAME_FILE_YES - it could be the same file,               *
832  *               ZBX_SAME_FILE_ERROR - error,                                 *
833  *               ZBX_SAME_FILE_RETRY - retry on the next check.               *
834  *                                                                            *
835  * Comments: In some cases we can say that it IS NOT the same file.           *
836  *           We can never say that it IS the same file and it has not been    *
837  *           truncated and replaced with a similar one.                       *
838  *                                                                            *
839  * Comments: Thread-safe                                                      *
840  *                                                                            *
841  ******************************************************************************/
is_same_file_logrt(const struct st_logfile * old_file,const struct st_logfile * new_file,int use_ino,zbx_log_rotation_options_t options,const struct st_logfile * new_files,int num_new,char ** err_msg)842 static int	is_same_file_logrt(const struct st_logfile *old_file, const struct st_logfile *new_file, int use_ino,
843 		zbx_log_rotation_options_t options, const struct st_logfile *new_files, int num_new, char **err_msg)
844 {
845 	if (ZBX_LOG_ROTATION_LOGCPT == options)
846 		return is_same_file_logcpt(old_file, new_file, use_ino, new_files, num_new, err_msg);
847 
848 	if (ZBX_FILE_PLACE_OTHER == compare_file_places(old_file, new_file, use_ino))
849 	{
850 		/* files cannot reside on different devices or occupy different inodes and be the same */
851 		return ZBX_SAME_FILE_NO;
852 	}
853 
854 	if (old_file->size > new_file->size || old_file->processed_size > new_file->size)
855 	{
856 		/* File size cannot decrease. Truncating or replacing a file with a smaller one */
857 		/* counts as 2 different files. */
858 		return ZBX_SAME_FILE_NO;
859 	}
860 
861 	/* the old file and the new file occupy the same device and inode (index), */
862 	/* the new file is not smaller than the old one */
863 
864 	if (old_file->size == new_file->size && old_file->mtime < new_file->mtime)
865 	{
866 		int	same_first_block = 0, same_last_block = 0;
867 
868 		if (0 < old_file->md5_block_size && old_file->md5_block_size == new_file->md5_block_size)
869 		{
870 			if (0 != memcmp(old_file->first_block_md5, new_file->first_block_md5,
871 					sizeof(new_file->first_block_md5)))
872 			{
873 				return ZBX_SAME_FILE_NO;
874 			}
875 
876 			same_first_block = 1;
877 
878 			if (old_file->last_block_offset == new_file->last_block_offset)
879 			{
880 				if (0 != memcmp(old_file->last_block_md5, new_file->last_block_md5,
881 						sizeof(new_file->last_block_md5)))
882 				{
883 					return ZBX_SAME_FILE_NO;
884 				}
885 
886 				same_last_block = 1;
887 			}
888 		}
889 
890 		/* There is one problematic case: log file size stays the same   */
891 		/* but its modification time (mtime) changes. This can be caused */
892 		/* by 3 scenarios:                                               */
893 		/*   1) the log file is rewritten with the same content at the   */
894 		/*     same location on disk. Very rare but possible.            */
895 		/*   2) depending on file system it's possible that stat() was   */
896 		/*     called between mtime and file size update. In this        */
897 		/*     situation the agent registers a file with the old size    */
898 		/*     and a new mtime.                                          */
899 		/*   3) application somehow "touch"-es the log file: mtime       */
900 		/*     increases, size does not.                                 */
901 		/*                                                               */
902 		/* Agent cannot distinguish between these cases. Only users      */
903 		/* familiar with their applications and log file rotation can    */
904 		/* know which scenario takes place with which log file.          */
905 		/* Most users would choose "noreread" option (it is not enabled  */
906 		/* by default!) to handle it as the same log file without no     */
907 		/* new records to report.                                        */
908 		/* Some users might want to handle it as a new log file (it is   */
909 		/* the default setting) (e.g. for log*.count[] purpose).         */
910 
911 		if (0 != same_first_block && 0 != same_last_block && ZBX_LOG_ROTATION_NO_REREAD == options)
912 			return ZBX_SAME_FILE_YES;
913 
914 		/* On the first try we assume it's the same file, just its size  */
915 		/* has not been changed yet.                                     */
916 		/* If the size has not changed on the next check, then we assume */
917 		/* that some tampering was done and to be safe we will treat it  */
918 		/* as a different file unless "noreread" option is specified.    */
919 		if (0 == old_file->retry)
920 		{
921 			if (ZBX_LOG_ROTATION_NO_REREAD != options)
922 			{
923 				zabbix_log(LOG_LEVEL_WARNING, "the modification time of log file \"%s\" has been"
924 						" updated without changing its size, try checking again later",
925 						old_file->filename);
926 			}
927 
928 			return ZBX_SAME_FILE_RETRY;
929 		}
930 
931 		if (ZBX_LOG_ROTATION_NO_REREAD == options)
932 		{
933 			zabbix_log(LOG_LEVEL_WARNING, "after changing modification time the size of log file \"%s\""
934 					" still has not been updated, consider it to be same file",
935 					old_file->filename);
936 			return ZBX_SAME_FILE_YES;
937 		}
938 
939 		zabbix_log(LOG_LEVEL_WARNING, "after changing modification time the size of log file \"%s\""
940 				" still has not been updated, consider it to be a new file", old_file->filename);
941 		return ZBX_SAME_FILE_NO;
942 	}
943 
944 	if (-1 == old_file->md5_block_size || -1 == new_file->md5_block_size)
945 	{
946 		/* Cannot compare MD5 sums. Assume two different files - reporting twice is better than skipping. */
947 		return ZBX_SAME_FILE_NO;
948 	}
949 
950 	if (old_file->md5_block_size > new_file->md5_block_size)
951 	{
952 		/* file initial block size from which MD5 sum is calculated cannot decrease */
953 		return ZBX_SAME_FILE_NO;
954 	}
955 
956 	if (old_file->md5_block_size == new_file->md5_block_size)
957 	{
958 		if (0 != memcmp(old_file->first_block_md5, new_file->first_block_md5,
959 				sizeof(new_file->first_block_md5)))
960 		{
961 			return ZBX_SAME_FILE_NO;
962 		}
963 
964 		if (old_file->last_block_offset == new_file->last_block_offset &&
965 				0 == memcmp(old_file->last_block_md5, new_file->last_block_md5,
966 				sizeof(new_file->last_block_md5)))
967 		{
968 			return ZBX_SAME_FILE_YES;
969 		}
970 	}
971 
972 	if (0 < old_file->md5_block_size)
973 	{
974 		/* MD5 for the old file has been calculated from a smaller block or */
975 		/* with a different offset than for the new file */
976 
977 		int		f, ret;
978 		md5_byte_t	md5tmp[MD5_DIGEST_SIZE];
979 
980 		if (-1 == (f = open_file_helper(new_file->filename, err_msg)))
981 			return ZBX_SAME_FILE_ERROR;
982 
983 		if (SUCCEED != file_part_md5(f, 0, old_file->md5_block_size, md5tmp, new_file->filename, err_msg))
984 		{
985 			ret = ZBX_SAME_FILE_ERROR;
986 			goto clean;
987 		}
988 
989 		if (0 != memcmp(old_file->first_block_md5, md5tmp, sizeof(md5tmp)))
990 		{
991 			ret = ZBX_SAME_FILE_NO;
992 			goto clean;
993 		}
994 
995 		if (0 == old_file->last_block_offset)
996 		{
997 			ret = ZBX_SAME_FILE_YES;
998 			goto clean;
999 		}
1000 
1001 		if (SUCCEED != file_part_md5(f, old_file->last_block_offset, old_file->md5_block_size, md5tmp,
1002 				new_file->filename, err_msg))
1003 		{
1004 			ret = ZBX_SAME_FILE_ERROR;
1005 			goto clean;
1006 		}
1007 
1008 		if (0 == memcmp(old_file->last_block_md5, md5tmp, sizeof(md5tmp)))
1009 			ret = ZBX_SAME_FILE_YES;
1010 		else
1011 			ret = ZBX_SAME_FILE_NO;
1012 clean:
1013 		if (0 != close(f) && ZBX_SAME_FILE_ERROR != ret)
1014 		{
1015 			*err_msg = zbx_dsprintf(*err_msg, "Cannot close file \"%s\": %s", new_file->filename,
1016 					zbx_strerror(errno));
1017 			ret = ZBX_SAME_FILE_ERROR;
1018 		}
1019 
1020 		return ret;
1021 	}
1022 
1023 	return ZBX_SAME_FILE_YES;
1024 }
1025 
1026 /******************************************************************************
1027  *                                                                            *
1028  * Function: cross_out                                                        *
1029  *                                                                            *
1030  * Purpose: fill the given row and column with '0' except the element at the  *
1031  *          cross point and protected columns and protected rows              *
1032  *                                                                            *
1033  * Parameters:                                                                *
1034  *          arr    - [IN/OUT] two dimensional array                           *
1035  *          n_rows - [IN] number of rows in the array                         *
1036  *          n_cols - [IN] number of columns in the array                      *
1037  *          row    - [IN] number of cross point row                           *
1038  *          col    - [IN] number of cross point column                        *
1039  *          p_rows - [IN] vector with 'n_rows' elements.                      *
1040  *                        Value '1' means protected row.                      *
1041  *          p_cols - [IN] vector with 'n_cols' elements.                      *
1042  *                        Value '1' means protected column.                   *
1043  *                                                                            *
1044  * Example:                                                                   *
1045  *     Given array                                                            *
1046  *                                                                            *
1047  *         1 1 1 1                                                            *
1048  *         1 1 1 1                                                            *
1049  *         1 1 1 1                                                            *
1050  *                                                                            *
1051  *     and row = 1, col = 2 and no protected rows and columns                 *
1052  *     the array is modified as                                               *
1053  *                                                                            *
1054  *         1 1 0 1                                                            *
1055  *         0 0 1 0                                                            *
1056  *         1 1 0 1                                                            *
1057  *                                                                            *
1058  ******************************************************************************/
cross_out(char * arr,int n_rows,int n_cols,int row,int col,const char * p_rows,const char * p_cols)1059 static void	cross_out(char *arr, int n_rows, int n_cols, int row, int col, const char *p_rows, const char *p_cols)
1060 {
1061 	int	i;
1062 	char	*p;
1063 
1064 	p = arr + row * n_cols;		/* point to the first element of the 'row' */
1065 
1066 	for (i = 0; i < n_cols; i++)	/* process row */
1067 	{
1068 		if ('1' != p_cols[i] && col != i)
1069 			p[i] = '0';
1070 	}
1071 
1072 	p = arr + col;			/* point to the top element of the 'col' */
1073 
1074 	for (i = 0; i < n_rows; i++)	/* process column */
1075 	{
1076 		if ('1' != p_rows[i] && row != i)
1077 			p[i * n_cols] = '0';
1078 	}
1079 }
1080 
1081 /******************************************************************************
1082  *                                                                            *
1083  * Function: is_uniq_row                                                      *
1084  *                                                                            *
1085  * Purpose: check if there is only one element '1' or '2' in the given row    *
1086  *                                                                            *
1087  * Parameters:                                                                *
1088  *          arr    - [IN] two dimensional array                               *
1089  *          n_cols - [IN] number of columns in the array                      *
1090  *          row    - [IN] number of row to search                             *
1091  *                                                                            *
1092  * Return value: number of column where the element '1' or '2' was found or   *
1093  *               -1 if there are zero or multiple elements '1' or '2' in the  *
1094  *               row                                                          *
1095  *                                                                            *
1096  ******************************************************************************/
is_uniq_row(const char * const arr,int n_cols,int row)1097 static int	is_uniq_row(const char * const arr, int n_cols, int row)
1098 {
1099 	int		i, mappings = 0, ret = -1;
1100 	const char	*p;
1101 
1102 	p = arr + row * n_cols;			/* point to the first element of the 'row' */
1103 
1104 	for (i = 0; i < n_cols; i++)
1105 	{
1106 		if ('1' == *p || '2' == *p)
1107 		{
1108 			if (2 == ++mappings)
1109 			{
1110 				ret = -1;	/* non-unique mapping in the row */
1111 				break;
1112 			}
1113 
1114 			ret = i;
1115 		}
1116 
1117 		p++;
1118 	}
1119 
1120 	return ret;
1121 }
1122 
1123 /******************************************************************************
1124  *                                                                            *
1125  * Function: is_uniq_col                                                      *
1126  *                                                                            *
1127  * Purpose: check if there is only one element '1' or '2' in the given column *
1128  *                                                                            *
1129  * Parameters:                                                                *
1130  *          arr    - [IN] two dimensional array                               *
1131  *          n_rows - [IN] number of rows in the array                         *
1132  *          n_cols - [IN] number of columns in the array                      *
1133  *          col    - [IN] number of column to search                          *
1134  *                                                                            *
1135  * Return value: number of row where the element '1' or '2 ' was found or     *
1136  *               -1 if there are zero or multiple elements '1' or '2' in the  *
1137  *               column                                                       *
1138  *                                                                            *
1139  ******************************************************************************/
is_uniq_col(const char * const arr,int n_rows,int n_cols,int col)1140 static int	is_uniq_col(const char * const arr, int n_rows, int n_cols, int col)
1141 {
1142 	int		i, mappings = 0, ret = -1;
1143 	const char	*p;
1144 
1145 	p = arr + col;				/* point to the top element of the 'col' */
1146 
1147 	for (i = 0; i < n_rows; i++)
1148 	{
1149 		if ('1' == *p || '2' == *p)
1150 		{
1151 			if (2 == ++mappings)
1152 			{
1153 				ret = -1;	/* non-unique mapping in the column */
1154 				break;
1155 			}
1156 
1157 			ret = i;
1158 		}
1159 
1160 		p += n_cols;
1161 	}
1162 
1163 	return ret;
1164 }
1165 
1166 /******************************************************************************
1167  *                                                                            *
1168  * Function: is_old2new_unique_mapping                                        *
1169  *                                                                            *
1170  * Purpose: check if 'old2new' array has only unique mappings                 *
1171  *                                                                            *
1172  * Parameters:                                                                *
1173  *          old2new - [IN] two dimensional array of possible mappings         *
1174  *          num_old - [IN] number of elements in the old file list            *
1175  *          num_new - [IN] number of elements in the new file list            *
1176  *                                                                            *
1177  * Return value: SUCCEED - all mappings are unique,                           *
1178  *               FAIL - there are non-unique mappings                         *
1179  *                                                                            *
1180  ******************************************************************************/
is_old2new_unique_mapping(const char * const old2new,int num_old,int num_new)1181 static int	is_old2new_unique_mapping(const char * const old2new, int num_old, int num_new)
1182 {
1183 	int	i;
1184 
1185 	/* Is there 1:1 mapping in both directions between files in the old and the new list ? */
1186 	/* In this case every row and column has not more than one element '1' or '2', others are '0'. */
1187 	/* This is expected on UNIX (using inode numbers) and MS Windows (using FileID on NTFS, ReFS) */
1188 	/* unless 'copytruncate' rotation type is combined with multiple log file copies. */
1189 
1190 	for (i = 0; i < num_old; i++)		/* loop over rows (old files) */
1191 	{
1192 		if (-1 == is_uniq_row(old2new, num_new, i))
1193 			return FAIL;
1194 	}
1195 
1196 	for (i = 0; i < num_new; i++)		/* loop over columns (new files) */
1197 	{
1198 		if (-1 == is_uniq_col(old2new, num_old, num_new, i))
1199 			return FAIL;
1200 	}
1201 
1202 	return SUCCEED;
1203 }
1204 
1205 /******************************************************************************
1206  *                                                                            *
1207  * Function: resolve_old2new                                                  *
1208  *                                                                            *
1209  * Purpose: resolve non-unique mappings                                       *
1210  *                                                                            *
1211  * Parameters:                                                                *
1212  *     old2new - [IN] two dimensional array of possible mappings              *
1213  *     num_old - [IN] number of elements in the old file list                 *
1214  *     num_new - [IN] number of elements in the new file list                 *
1215  *                                                                            *
1216  ******************************************************************************/
resolve_old2new(char * old2new,int num_old,int num_new)1217 static void	resolve_old2new(char *old2new, int num_old, int num_new)
1218 {
1219 	int	i;
1220 	char	*protected_rows = NULL, *protected_cols = NULL;
1221 
1222 	if (SUCCEED == is_old2new_unique_mapping(old2new, num_old, num_new))
1223 		return;
1224 
1225 	/* Non-unique mapping is expected: */
1226 	/*   - on MS Windows using FAT32 and other file systems where inodes or file indexes are either not */
1227 	/*     preserved if a file is renamed or are not applicable, */
1228 	/*   - in 'copytruncate' rotation mode if multiple copies of log files are present. */
1229 
1230 	zabbix_log(LOG_LEVEL_DEBUG, "resolve_old2new(): non-unique mapping");
1231 
1232 	/* protect unique mappings from further modifications */
1233 
1234 	protected_rows = (char *)zbx_calloc(protected_rows, (size_t)num_old, sizeof(char));
1235 	protected_cols = (char *)zbx_calloc(protected_cols, (size_t)num_new, sizeof(char));
1236 
1237 	for (i = 0; i < num_old; i++)
1238 	{
1239 		int	c;
1240 
1241 		if (-1 != (c = is_uniq_row(old2new, num_new, i)) && -1 != is_uniq_col(old2new, num_old, num_new, c))
1242 		{
1243 			protected_rows[i] = '1';
1244 			protected_cols[c] = '1';
1245 		}
1246 	}
1247 
1248 	/* resolve the remaining non-unique mappings - turn them into unique ones */
1249 
1250 	if (num_old <= num_new)				/* square or wide array */
1251 	{
1252 		/****************************************************************************************************
1253 		 *                                                                                                  *
1254 		 * Example for a wide array:                                                                        *
1255 		 *                                                                                                  *
1256 		 *            D.log C.log B.log A.log                                                               *
1257 		 *           ------------------------                                                               *
1258 		 *    3.log | <1>    1     1     1                                                                  *
1259 		 *    2.log |  1    <1>    1     1                                                                  *
1260 		 *    1.log |  1     1    <1>    1                                                                  *
1261 		 *                                                                                                  *
1262 		 * There are 3 files in the old log file list and 4 files in the new log file list.                 *
1263 		 * The mapping is totally non-unique: the old log file '3.log' could have become the new 'D.log' or *
1264 		 * 'C.log', or 'B.log', or 'A.log' - we don't know for sure.                                        *
1265 		 * We make an assumption that a reasonable solution will be to proceed as if '3.log' was renamed to *
1266 		 * 'D.log', '2.log' - to 'C.log' and '1.log' - to 'B.log'.                                          *
1267 		 * We modify the array according to this assumption:                                                *
1268 		 *                                                                                                  *
1269 		 *            D.log C.log B.log A.log                                                               *
1270 		 *           ------------------------                                                               *
1271 		 *    3.log | <1>    0     0     0                                                                  *
1272 		 *    2.log |  0    <1>    0     0                                                                  *
1273 		 *    1.log |  0     0    <1>    0                                                                  *
1274 		 *                                                                                                  *
1275 		 * Now the mapping is unique. The file 'A.log' is counted as a new file to be analyzed from the     *
1276 		 * start.                                                                                           *
1277 		 *                                                                                                  *
1278 		 ****************************************************************************************************/
1279 
1280 		for (i = 0; i < num_old; i++)		/* loop over rows from top-left corner */
1281 		{
1282 			char	*p;
1283 			int	j;
1284 
1285 			if ('1' == protected_rows[i])
1286 				continue;
1287 
1288 			p = old2new + i * num_new;	/* the first element of the current row */
1289 
1290 			for (j = 0; j < num_new; j++)
1291 			{
1292 				if (('1' == p[j] || '2' == p[j]) && '1' != protected_cols[j])
1293 				{
1294 					cross_out(old2new, num_old, num_new, i, j, protected_rows, protected_cols);
1295 					break;
1296 				}
1297 			}
1298 		}
1299 	}
1300 	else	/* tall array */
1301 	{
1302 		/****************************************************************************************************
1303 		 *                                                                                                  *
1304 		 * Example for a tall array:                                                                        *
1305 		 *                                                                                                  *
1306 		 *            D.log C.log B.log A.log                                                               *
1307 		 *           ------------------------                                                               *
1308 		 *    6.log |  1     1     1     1                                                                  *
1309 		 *    5.log |  1     1     1     1                                                                  *
1310 		 *    4.log | <1>    1     1     1                                                                  *
1311 		 *    3.log |  1    <1>    1     1                                                                  *
1312 		 *    2.log |  1     1    <1>    1                                                                  *
1313 		 *    1.log |  1     1     1    <1>                                                                 *
1314 		 *                                                                                                  *
1315 		 * There are 6 files in the old log file list and 4 files in the new log file list.                 *
1316 		 * The mapping is totally non-unique: the old log file '6.log' could have become the new 'D.log' or *
1317 		 * 'C.log', or 'B.log', or 'A.log' - we don't know for sure.                                        *
1318 		 * We make an assumption that a reasonable solution will be to proceed as if '1.log' was renamed to *
1319 		 * 'A.log', '2.log' - to 'B.log', '3.log' - to 'C.log', '4.log' - to 'D.log'.                       *
1320 		 * We modify the array according to this assumption:                                                *
1321 		 *                                                                                                  *
1322 		 *            D.log C.log B.log A.log                                                               *
1323 		 *           ------------------------                                                               *
1324 		 *    6.log |  0     0     0     0                                                                  *
1325 		 *    5.log |  0     0     0     0                                                                  *
1326 		 *    4.log | <1>    0     0     0                                                                  *
1327 		 *    3.log |  0    <1>    0     0                                                                  *
1328 		 *    2.log |  0     0    <1>    0                                                                  *
1329 		 *    1.log |  0     0     0    <1>                                                                 *
1330 		 *                                                                                                  *
1331 		 * Now the mapping is unique. Files '6.log' and '5.log' are counted as not present in the new file. *
1332 		 *                                                                                                  *
1333 		 ****************************************************************************************************/
1334 
1335 		for (i = num_old - 1; i >= 0; i--)	/* loop over rows from bottom-right corner */
1336 		{
1337 			char	*p;
1338 			int	j;
1339 
1340 			if ('1' == protected_rows[i])
1341 				continue;
1342 
1343 			p = old2new + i * num_new;	/* the first element of the current row */
1344 
1345 			for (j = num_new - 1; j >= 0; j--)
1346 			{
1347 				if (('1' == p[j] || '2' == p[j]) && '1' != protected_cols[j])
1348 				{
1349 					cross_out(old2new, num_old, num_new, i, j, protected_rows, protected_cols);
1350 					break;
1351 				}
1352 			}
1353 		}
1354 	}
1355 
1356 	zbx_free(protected_cols);
1357 	zbx_free(protected_rows);
1358 }
1359 
1360 /******************************************************************************
1361  *                                                                            *
1362  * Function: create_old2new_and_copy_of                                       *
1363  *                                                                            *
1364  * Purpose: allocate and fill an array of possible mappings from the old log  *
1365  *          files to the new log files                                        *
1366  *                                                                            *
1367  * Parameters:                                                                *
1368  *     rotation_type - [IN] file rotation type                                *
1369  *     old_files     - [IN] old file list                                     *
1370  *     num_old       - [IN] number of elements in the old file list           *
1371  *     new_files     - [IN] new file list                                     *
1372  *     num_new       - [IN] number of elements in the new file list           *
1373  *     use_ino       - [IN] how to use inodes in is_same_file()               *
1374  *     err_msg       - [IN/OUT] error message why an item became NOTSUPPORTED *
1375  *                                                                            *
1376  * Return value: pointer to allocated array or NULL                           *
1377  *                                                                            *
1378  * Comments:                                                                  *
1379  *    The array is filled with '0', '1' and '2'  which mean:                  *
1380  *       old2new[i][j] = '0' - the i-th old file IS NOT the j-th new file     *
1381  *       old2new[i][j] = '1' - the i-th old file COULD BE the j-th new file   *
1382  *       old2new[i][j] = '2' - the j-th new file is a copy of the i-th old    *
1383  *                             file                                           *
1384  *                                                                            *
1385  *    Thread-safe                                                             *
1386  *                                                                            *
1387  ******************************************************************************/
create_old2new_and_copy_of(zbx_log_rotation_options_t rotation_type,struct st_logfile * old_files,int num_old,struct st_logfile * new_files,int num_new,int use_ino,char ** err_msg)1388 static char	*create_old2new_and_copy_of(zbx_log_rotation_options_t rotation_type, struct st_logfile *old_files,
1389 		int num_old, struct st_logfile *new_files, int num_new, int use_ino, char **err_msg)
1390 {
1391 	int		i, j;
1392 	char		*old2new, *p;
1393 
1394 	/* set up a two dimensional array of possible mappings from old files to new files */
1395 	old2new = (char *)zbx_malloc(NULL, (size_t)num_new * (size_t)num_old * sizeof(char));
1396 	p = old2new;
1397 
1398 	for (i = 0; i < num_old; i++)
1399 	{
1400 		for (j = 0; j < num_new; j++)
1401 		{
1402 			switch (is_same_file_logrt(old_files + i, new_files + j, use_ino, rotation_type, new_files,
1403 					num_new, err_msg))
1404 			{
1405 				case ZBX_SAME_FILE_NO:
1406 					p[j] = '0';
1407 					break;
1408 				case ZBX_SAME_FILE_YES:
1409 					if (1 == old_files[i].retry)
1410 					{
1411 						zabbix_log(LOG_LEVEL_DEBUG, "%s(): the size of log file \"%s\" has been"
1412 								" updated since modification time change, consider"
1413 								" it to be the same file", __func__,
1414 								old_files[i].filename);
1415 						old_files[i].retry = 0;
1416 					}
1417 					p[j] = '1';
1418 					break;
1419 				case ZBX_SAME_FILE_COPY:
1420 					p[j] = '2';
1421 					new_files[j].copy_of = i;
1422 					break;
1423 				case ZBX_SAME_FILE_RETRY:
1424 					old_files[i].retry = 1;
1425 					zbx_free(old2new);
1426 					return NULL;
1427 				case ZBX_SAME_FILE_ERROR:
1428 					zbx_free(old2new);
1429 					return NULL;
1430 			}
1431 
1432 			zabbix_log(LOG_LEVEL_DEBUG, "%s(): is_same_file(%s, %s) = %c", __func__,
1433 					old_files[i].filename, new_files[j].filename, p[j]);
1434 		}
1435 
1436 		p += (size_t)num_new;
1437 	}
1438 
1439 	if (ZBX_LOG_ROTATION_LOGCPT != rotation_type && (1 < num_old || 1 < num_new))
1440 		resolve_old2new(old2new, num_old, num_new);
1441 
1442 	return old2new;
1443 }
1444 
1445 /******************************************************************************
1446  *                                                                            *
1447  * Function: find_old2new                                                     *
1448  *                                                                            *
1449  * Purpose: find a mapping from old to new file                               *
1450  *                                                                            *
1451  * Parameters:                                                                *
1452  *          old2new - [IN] two dimensional array of possible mappings         *
1453  *          num_new - [IN] number of elements in the new file list            *
1454  *          i_old   - [IN] index of the old file                              *
1455  *                                                                            *
1456  * Return value: index of the new file or                                     *
1457  *               -1 if no mapping was found                                   *
1458  *                                                                            *
1459  ******************************************************************************/
find_old2new(const char * const old2new,int num_new,int i_old)1460 static int	find_old2new(const char * const old2new, int num_new, int i_old)
1461 {
1462 	int		i;
1463 	const char	*p = old2new + i_old * num_new;
1464 
1465 	for (i = 0; i < num_new; i++)		/* loop over columns (new files) on i_old-th row */
1466 	{
1467 		if ('1' == *p || '2' == *p)
1468 			return i;
1469 
1470 		p++;
1471 	}
1472 
1473 	return -1;
1474 }
1475 
1476 /******************************************************************************
1477  *                                                                            *
1478  * Function: add_logfile                                                      *
1479  *                                                                            *
1480  * Purpose: adds information of a logfile to the list of logfiles             *
1481  *                                                                            *
1482  * Parameters: logfiles - pointer to the list of logfiles                     *
1483  *             logfiles_alloc - number of logfiles memory was allocated for   *
1484  *             logfiles_num - number of already inserted logfiles             *
1485  *             filename - name of a logfile (with full path)                  *
1486  *             st - structure returned by stat()                              *
1487  *                                                                            *
1488  * Author: Dmitry Borovikov                                                   *
1489  *                                                                            *
1490  ******************************************************************************/
add_logfile(struct st_logfile ** logfiles,int * logfiles_alloc,int * logfiles_num,const char * filename,zbx_stat_t * st)1491 static void	add_logfile(struct st_logfile **logfiles, int *logfiles_alloc, int *logfiles_num, const char *filename,
1492 		zbx_stat_t *st)
1493 {
1494 	int	i = 0;
1495 
1496 	zabbix_log(LOG_LEVEL_DEBUG, "In %s() filename:'%s' mtime:%d size:" ZBX_FS_UI64, __func__, filename,
1497 			(int)st->st_mtime, (zbx_uint64_t)st->st_size);
1498 
1499 	if (*logfiles_alloc == *logfiles_num)
1500 	{
1501 		*logfiles_alloc += 64;
1502 		*logfiles = (struct st_logfile *)zbx_realloc(*logfiles,
1503 				(size_t)*logfiles_alloc * sizeof(struct st_logfile));
1504 
1505 		zabbix_log(LOG_LEVEL_DEBUG, "%s() logfiles:%p logfiles_alloc:%d",
1506 				__func__, (void *)*logfiles, *logfiles_alloc);
1507 	}
1508 
1509 	/************************************************************************************************/
1510 	/* (1) sort by ascending mtimes                                                                 */
1511 	/* (2) if mtimes are equal, sort alphabetically by descending names                             */
1512 	/* the oldest is put first, the most current is at the end                                      */
1513 	/*                                                                                              */
1514 	/*      filename.log.3 mtime3, filename.log.2 mtime2, filename.log1 mtime1, filename.log mtime  */
1515 	/*      --------------------------------------------------------------------------------------  */
1516 	/*      mtime3          <=      mtime2          <=      mtime1          <=      mtime           */
1517 	/*      --------------------------------------------------------------------------------------  */
1518 	/*      filename.log.3  >      filename.log.2   >       filename.log.1  >       filename.log    */
1519 	/*      --------------------------------------------------------------------------------------  */
1520 	/*      array[i=0]             array[i=1]               array[i=2]              array[i=3]      */
1521 	/*                                                                                              */
1522 	/* note: the application is writing into filename.log, mtimes are more important than filenames */
1523 	/************************************************************************************************/
1524 
1525 	for (; i < *logfiles_num; i++)
1526 	{
1527 		if (st->st_mtime > (*logfiles)[i].mtime)
1528 			continue;	/* (1) sort by ascending mtime */
1529 
1530 		if (st->st_mtime == (*logfiles)[i].mtime)
1531 		{
1532 			int	cmp = strcmp(filename, (*logfiles)[i].filename);
1533 
1534 			if (0 > cmp)
1535 				continue;	/* (2) sort by descending name */
1536 
1537 			if (0 == cmp)
1538 			{
1539 				/* the file already exists, quite impossible branch */
1540 				zabbix_log(LOG_LEVEL_WARNING, "%s() file '%s' already added", __func__, filename);
1541 				goto out;
1542 			}
1543 
1544 			/* filename is smaller, must insert here */
1545 		}
1546 
1547 		/* the place is found, move all from the position forward by one struct */
1548 		break;
1549 	}
1550 
1551 	if (*logfiles_num > i)
1552 	{
1553 		/* free a gap for inserting the new element */
1554 		memmove((void *)&(*logfiles)[i + 1], (const void *)&(*logfiles)[i],
1555 				(size_t)(*logfiles_num - i) * sizeof(struct st_logfile));
1556 	}
1557 
1558 	(*logfiles)[i].filename = zbx_strdup(NULL, filename);
1559 	(*logfiles)[i].mtime = (int)st->st_mtime;
1560 	(*logfiles)[i].seq = 0;
1561 	(*logfiles)[i].retry = 0;
1562 	(*logfiles)[i].incomplete = 0;
1563 	(*logfiles)[i].copy_of = -1;
1564 #if !defined(_WINDOWS) && !defined(__MINGW32__)		/* on MS Windows these attributes are not initialized here */
1565 	(*logfiles)[i].dev = (zbx_uint64_t)st->st_dev;
1566 	(*logfiles)[i].ino_lo = (zbx_uint64_t)st->st_ino;
1567 	(*logfiles)[i].ino_hi = 0;
1568 #endif
1569 	(*logfiles)[i].size = (zbx_uint64_t)st->st_size;
1570 	(*logfiles)[i].processed_size = 0;
1571 	(*logfiles)[i].md5_block_size = -1;
1572 	(*logfiles)[i].last_block_offset = 0;
1573 	/* 'first_block_md5' and 'last_block_md5' are not initialized here */
1574 
1575 	++(*logfiles_num);
1576 out:
1577 	zabbix_log(LOG_LEVEL_DEBUG, "End of %s()", __func__);
1578 }
1579 
1580 /******************************************************************************
1581  *                                                                            *
1582  * Function: destroy_logfile_list                                             *
1583  *                                                                            *
1584  * Purpose: release resources allocated to a logfile list                     *
1585  *                                                                            *
1586  * Parameters:                                                                *
1587  *     logfiles       - [IN/OUT] pointer to the list of logfiles, can be NULL *
1588  *     logfiles_alloc - [IN/OUT] pointer to number of logfiles memory was     *
1589  *                               allocated for, can be NULL.                  *
1590  *     logfiles_num   - [IN/OUT] valid pointer to number of inserted logfiles *
1591  *                                                                            *
1592  ******************************************************************************/
destroy_logfile_list(struct st_logfile ** logfiles,int * logfiles_alloc,int * logfiles_num)1593 void	destroy_logfile_list(struct st_logfile **logfiles, int *logfiles_alloc, int *logfiles_num)
1594 {
1595 	int	i;
1596 
1597 	for (i = 0; i < *logfiles_num; i++)
1598 		zbx_free((*logfiles)[i].filename);
1599 
1600 	*logfiles_num = 0;
1601 
1602 	if (NULL != logfiles_alloc)
1603 		*logfiles_alloc = 0;
1604 
1605 	zbx_free(*logfiles);
1606 }
1607 
1608 /******************************************************************************
1609  *                                                                            *
1610  * Function: pick_logfile                                                     *
1611  *                                                                            *
1612  * Purpose: checks if the specified file meets requirements and adds it to    *
1613  *          the logfile list                                                  *
1614  *                                                                            *
1615  * Parameters:                                                                *
1616  *     directory      - [IN] directory where the logfiles reside              *
1617  *     filename       - [IN] name of the logfile (without path)               *
1618  *     mtime          - [IN] selection criterion "logfile modification time"  *
1619  *                      The logfile will be selected if modified not before   *
1620  *                      'mtime'.                                              *
1621  *     re             - [IN] selection criterion "regexp describing filename  *
1622  *                      pattern"                                              *
1623  *     logfiles       - [IN/OUT] pointer to the list of logfiles              *
1624  *     logfiles_alloc - [IN/OUT] number of logfiles memory was allocated for  *
1625  *     logfiles_num   - [IN/OUT] number of already inserted logfiles          *
1626  *                                                                            *
1627  * Comments: This is a helper function for pick_logfiles()                    *
1628  *                                                                            *
1629  ******************************************************************************/
pick_logfile(const char * directory,const char * filename,int mtime,const zbx_regexp_t * re,struct st_logfile ** logfiles,int * logfiles_alloc,int * logfiles_num)1630 static void	pick_logfile(const char *directory, const char *filename, int mtime, const zbx_regexp_t *re,
1631 		struct st_logfile **logfiles, int *logfiles_alloc, int *logfiles_num)
1632 {
1633 	char		*logfile_candidate;
1634 	zbx_stat_t	file_buf;
1635 
1636 	logfile_candidate = zbx_dsprintf(NULL, "%s%s", directory, filename);
1637 
1638 	if (0 == zbx_stat(logfile_candidate, &file_buf))
1639 	{
1640 		if (S_ISREG(file_buf.st_mode) &&
1641 				mtime <= file_buf.st_mtime &&
1642 				0 == zbx_regexp_match_precompiled(filename, re))
1643 		{
1644 			add_logfile(logfiles, logfiles_alloc, logfiles_num, logfile_candidate, &file_buf);
1645 		}
1646 	}
1647 	else
1648 		zabbix_log(LOG_LEVEL_DEBUG, "cannot process entry '%s': %s", logfile_candidate, zbx_strerror(errno));
1649 
1650 	zbx_free(logfile_candidate);
1651 }
1652 
1653 /******************************************************************************
1654  *                                                                            *
1655  * Function: pick_logfiles                                                    *
1656  *                                                                            *
1657  * Purpose: find logfiles in a directory and put them into a list             *
1658  *                                                                            *
1659  * Parameters:                                                                *
1660  *     directory      - [IN] directory where the logfiles reside              *
1661  *     mtime          - [IN] selection criterion "logfile modification time"  *
1662  *                      The logfile will be selected if modified not before   *
1663  *                      'mtime'.                                              *
1664  *     re             - [IN] selection criterion "regexp describing filename  *
1665  *                      pattern"                                              *
1666  *     use_ino        - [OUT] how to use inodes in is_same_file()             *
1667  *     logfiles       - [IN/OUT] pointer to the list of logfiles              *
1668  *     logfiles_alloc - [IN/OUT] number of logfiles memory was allocated for  *
1669  *     logfiles_num   - [IN/OUT] number of already inserted logfiles          *
1670  *     err_msg        - [IN/OUT] error message why an item became             *
1671  *                      NOTSUPPORTED                                          *
1672  *                                                                            *
1673  * Return value: SUCCEED or FAIL                                              *
1674  *                                                                            *
1675  * Comments: This is a helper function for make_logfile_list()                *
1676  *                                                                            *
1677  * Comments: Thead-safety - readdir() is a gray area, supposed to work on     *
1678  *           modern implementations when the directory stream is not shared   *
1679  *           between threads.                                                 *
1680  *                                                                            *
1681  ******************************************************************************/
pick_logfiles(const char * directory,int mtime,const zbx_regexp_t * re,int * use_ino,struct st_logfile ** logfiles,int * logfiles_alloc,int * logfiles_num,char ** err_msg)1682 static int	pick_logfiles(const char *directory, int mtime, const zbx_regexp_t *re, int *use_ino,
1683 		struct st_logfile **logfiles, int *logfiles_alloc, int *logfiles_num, char **err_msg)
1684 {
1685 #if defined(_WINDOWS) || defined(__MINGW32__)
1686 	int			ret = FAIL;
1687 	intptr_t		find_handle;
1688 	struct _wfinddata_t	find_data;
1689 
1690 	/* "open" Windows directory */
1691 	char	*find_path = zbx_dsprintf(NULL, "%s*", directory);
1692 	wchar_t	*find_wpath = zbx_utf8_to_unicode(find_path);
1693 
1694 	if (-1 == (find_handle = _wfindfirst(find_wpath, &find_data)))
1695 	{
1696 		*err_msg = zbx_dsprintf(*err_msg, "Cannot open directory \"%s\" for reading: %s", directory,
1697 				zbx_strerror(errno));
1698 		zbx_free(find_wpath);
1699 		zbx_free(find_path);
1700 		return FAIL;
1701 	}
1702 
1703 	if (SUCCEED != set_use_ino_by_fs_type(find_path, use_ino, err_msg))
1704 		goto clean;
1705 
1706 	do
1707 	{
1708 		char	*file_name_utf8 = zbx_unicode_to_utf8(find_data.name);
1709 
1710 		pick_logfile(directory, file_name_utf8, mtime, re, logfiles, logfiles_alloc, logfiles_num);
1711 		zbx_free(file_name_utf8);
1712 	}
1713 	while (0 == _wfindnext(find_handle, &find_data));
1714 
1715 	ret = SUCCEED;
1716 clean:
1717 	if (-1 == _findclose(find_handle))
1718 	{
1719 		*err_msg = zbx_dsprintf(*err_msg, "Cannot close directory \"%s\": %s", directory, zbx_strerror(errno));
1720 		ret = FAIL;
1721 	}
1722 
1723 	zbx_free(find_wpath);
1724 	zbx_free(find_path);
1725 
1726 	return ret;
1727 #else
1728 	DIR		*dir = NULL;
1729 	struct dirent	*d_ent = NULL;
1730 
1731 	if (NULL == (dir = opendir(directory)))
1732 	{
1733 		*err_msg = zbx_dsprintf(*err_msg, "Cannot open directory \"%s\" for reading: %s", directory,
1734 				zbx_strerror(errno));
1735 		return FAIL;
1736 	}
1737 
1738 	/* on UNIX file systems we always assume that inodes can be used to identify files */
1739 	*use_ino = 1;
1740 
1741 	while (NULL != (d_ent = readdir(dir)))
1742 		pick_logfile(directory, d_ent->d_name, mtime, re, logfiles, logfiles_alloc, logfiles_num);
1743 
1744 	if (-1 == closedir(dir))
1745 	{
1746 		*err_msg = zbx_dsprintf(*err_msg, "Cannot close directory \"%s\": %s", directory, zbx_strerror(errno));
1747 		return FAIL;
1748 	}
1749 
1750 	return SUCCEED;
1751 #endif
1752 }
1753 
1754 /******************************************************************************
1755  *                                                                            *
1756  * Function: compile_filename_regexp                                          *
1757  *                                                                            *
1758  * Purpose: compile regular expression                                        *
1759  *                                                                            *
1760  * Parameters:                                                                *
1761  *     filename_regexp - [IN] regexp to be compiled                           *
1762  *     re              - [OUT] compiled regexp                                *
1763  *     err_msg         - [OUT] error message why regexp could not be          *
1764  *                       compiled                                             *
1765  *                                                                            *
1766  * Return value: SUCCEED or FAIL                                              *
1767  *                                                                            *
1768  ******************************************************************************/
compile_filename_regexp(const char * filename_regexp,zbx_regexp_t ** re,char ** err_msg)1769 static int	compile_filename_regexp(const char *filename_regexp, zbx_regexp_t **re, char **err_msg)
1770 {
1771 	const char	*regexp_err;
1772 
1773 	if (SUCCEED != zbx_regexp_compile(filename_regexp, re, &regexp_err))
1774 	{
1775 		*err_msg = zbx_dsprintf(*err_msg, "Cannot compile a regular expression describing filename pattern: %s",
1776 				regexp_err);
1777 		return FAIL;
1778 	}
1779 
1780 	return SUCCEED;
1781 }
1782 
1783 /******************************************************************************
1784  *                                                                            *
1785  * Function: fill_file_details                                                *
1786  *                                                                            *
1787  * Purpose: fill-in MD5 sums, device and inode numbers for files in the list  *
1788  *                                                                            *
1789  * Parameters:                                                                *
1790  *     logfiles     - [IN/OUT] list of log files                              *
1791  *     logfiles_num - [IN] number of elements in 'logfiles'                   *
1792  *     use_ino      - [IN] how to get file IDs in file_id()                   *
1793  *     err_msg      - [IN/OUT] error message why operation failed             *
1794  *                                                                            *
1795  * Return value: SUCCEED or FAIL                                              *
1796  *                                                                            *
1797  * Comments: Thread-safe                                                      *
1798  *                                                                            *
1799  ******************************************************************************/
1800 #if defined(_WINDOWS) || defined(__MINGW32__)
fill_file_details(struct st_logfile * logfiles,int logfiles_num,int use_ino,char ** err_msg)1801 static int	fill_file_details(struct st_logfile *logfiles, int logfiles_num, int use_ino, char **err_msg)
1802 #else
1803 static int	fill_file_details(struct st_logfile *logfiles, int logfiles_num, char **err_msg)
1804 #endif
1805 {
1806 	int	i, ret = SUCCEED;
1807 
1808 	/* Fill in MD5 sums and file indexes in the logfile list. */
1809 	/* These operations require opening of file, therefore we group them together. */
1810 
1811 	for (i = 0; i < logfiles_num; i++)
1812 	{
1813 		int			f;
1814 		struct st_logfile	*p = logfiles + i;
1815 
1816 		if (-1 == (f = open_file_helper(p->filename, err_msg)))
1817 			return FAIL;
1818 
1819 		/* get MD5 sums of the first and the last blocks */
1820 
1821 		p->md5_block_size = (int)MIN(MAX_LEN_MD5, p->size);
1822 
1823 		if (SUCCEED != (ret = file_part_md5(f, 0, p->md5_block_size, p->first_block_md5, p->filename,
1824 				err_msg)))
1825 		{
1826 			goto clean;
1827 		}
1828 
1829 		if (0 < (p->last_block_offset = p->size - (size_t)p->md5_block_size))
1830 		{
1831 			if (SUCCEED != (ret = file_part_md5(f, p->last_block_offset, p->md5_block_size,
1832 					p->last_block_md5, p->filename, err_msg)))
1833 			{
1834 				goto clean;
1835 			}
1836 		}
1837 		else	/* file is small, set the last block MD5 equal to the first block's one */
1838 			memcpy(p->last_block_md5, p->first_block_md5, sizeof(p->last_block_md5));
1839 
1840 #if defined(_WINDOWS) || defined(__MINGW32__)
1841 		ret = file_id(f, use_ino, &p->dev, &p->ino_lo, &p->ino_hi, p->filename, err_msg);
1842 #endif	/*_WINDOWS*/
1843 clean:
1844 		if (SUCCEED != close_file_helper(f, p->filename, err_msg) || FAIL == ret)
1845 			return FAIL;
1846 	}
1847 
1848 	return ret;
1849 }
1850 
1851 /******************************************************************************
1852  *                                                                            *
1853  * Function: make_logfile_list                                                *
1854  *                                                                            *
1855  * Purpose: select log files to be analyzed and make a list, set 'use_ino'    *
1856  *          parameter                                                         *
1857  *                                                                            *
1858  * Parameters:                                                                *
1859  *     flags          - [IN] bit flags with item type: log, logrt, log.count  *
1860  *                      or logrt.count                                        *
1861  *     filename       - [IN] logfile name (regular expression with a path)    *
1862  *     mtime          - [IN] last modification time of the file               *
1863  *     logfiles       - [IN/OUT] pointer to the list of logfiles              *
1864  *     logfiles_alloc - [IN/OUT] number of logfiles memory was allocated for  *
1865  *     logfiles_num   - [IN/OUT] number of already inserted logfiles          *
1866  *     use_ino        - [IN/OUT] how to use inode numbers                     *
1867  *     err_msg        - [IN/OUT] error message (if FAIL or ZBX_NO_FILE_ERROR  *
1868  *                      is returned)                                          *
1869  *                                                                            *
1870  * Return value: SUCCEED - file list successfully built,                      *
1871  *               ZBX_NO_FILE_ERROR - file(s) do not exist,                    *
1872  *               FAIL - other errors                                          *
1873  *                                                                            *
1874  * Comments: Supposed to be thread-safe, see pick_logfiles() comments.        *
1875  *                                                                            *
1876  ******************************************************************************/
make_logfile_list(unsigned char flags,const char * filename,int mtime,struct st_logfile ** logfiles,int * logfiles_alloc,int * logfiles_num,int * use_ino,char ** err_msg)1877 static int	make_logfile_list(unsigned char flags, const char *filename, int mtime,
1878 		struct st_logfile **logfiles, int *logfiles_alloc, int *logfiles_num, int *use_ino, char **err_msg)
1879 {
1880 	int	ret = SUCCEED;
1881 
1882 	if (0 != (ZBX_METRIC_FLAG_LOG_LOG & flags))	/* log[] or log.count[] item */
1883 	{
1884 		zbx_stat_t	file_buf;
1885 
1886 		if (0 != zbx_stat(filename, &file_buf))
1887 		{
1888 			*err_msg = zbx_dsprintf(*err_msg, "Cannot obtain information for file \"%s\": %s", filename,
1889 					zbx_strerror(errno));
1890 			ret = ZBX_NO_FILE_ERROR;
1891 			goto clean;
1892 		}
1893 
1894 		if (!S_ISREG(file_buf.st_mode))
1895 		{
1896 			*err_msg = zbx_dsprintf(*err_msg, "\"%s\" is not a regular file.", filename);
1897 			ret = FAIL;
1898 			goto clean;
1899 		}
1900 
1901 		/* mtime is not used for log, log.count items, reset to ignore */
1902 		file_buf.st_mtime = 0;
1903 
1904 		add_logfile(logfiles, logfiles_alloc, logfiles_num, filename, &file_buf);
1905 #if defined(_WINDOWS) || defined(__MINGW32__)
1906 		if (SUCCEED != (ret = set_use_ino_by_fs_type(filename, use_ino, err_msg)))
1907 			goto clean;
1908 #else
1909 		/* on UNIX file systems we always assume that inodes can be used to identify files */
1910 		*use_ino = 1;
1911 #endif
1912 	}
1913 	else if (0 != (ZBX_METRIC_FLAG_LOG_LOGRT & flags))	/* logrt[] or logrt.count[] item */
1914 	{
1915 		char	*directory = NULL, *filename_regexp = NULL;
1916 		zbx_regexp_t	*re;
1917 
1918 		/* split a filename into directory and file name regular expression parts */
1919 		if (SUCCEED != (ret = split_filename(filename, &directory, &filename_regexp, err_msg)))
1920 			goto clean;
1921 
1922 		if (SUCCEED != (ret = compile_filename_regexp(filename_regexp, &re, err_msg)))
1923 			goto clean1;
1924 
1925 		if (SUCCEED != (ret = pick_logfiles(directory, mtime, re, use_ino, logfiles, logfiles_alloc,
1926 				logfiles_num, err_msg)))
1927 		{
1928 			goto clean2;
1929 		}
1930 
1931 		if (0 == *logfiles_num)
1932 		{
1933 			/* do not make logrt[] and logrt.count[] items NOTSUPPORTED if there are no matching log */
1934 			/* files or they are not accessible (can happen during a rotation), just log the problem */
1935 #if defined(_WINDOWS) || defined(__MINGW32__)
1936 			zabbix_log(LOG_LEVEL_WARNING, "there are no recently modified files matching \"%s\" in \"%s\"",
1937 					filename_regexp, directory);
1938 
1939 			ret = ZBX_NO_FILE_ERROR;
1940 #else
1941 			if (0 != access(directory, X_OK))
1942 			{
1943 				zabbix_log(LOG_LEVEL_WARNING, "insufficient access rights (no \"execute\" permission) "
1944 						"to directory \"%s\": %s", directory, zbx_strerror(errno));
1945 				/* No access could be a transient condition if file rotation is manipulating */
1946 				/* directories. Therefore 'ret' is not set to FAIL or ZBX_NO_FILE_ERROR here. */
1947 			}
1948 			else
1949 			{
1950 				zabbix_log(LOG_LEVEL_WARNING, "there are no recently modified files matching \"%s\" in"
1951 						" \"%s\"", filename_regexp, directory);
1952 				ret = ZBX_NO_FILE_ERROR;
1953 			}
1954 #endif
1955 		}
1956 clean2:
1957 		zbx_regexp_free(re);
1958 clean1:
1959 		zbx_free(directory);
1960 		zbx_free(filename_regexp);
1961 
1962 		if (FAIL == ret || ZBX_NO_FILE_ERROR == ret)
1963 			goto clean;
1964 	}
1965 	else
1966 	{
1967 		THIS_SHOULD_NEVER_HAPPEN;
1968 		*err_msg = zbx_dsprintf(*err_msg, "%s(): internal error: invalid flags:%hhu", __func__, flags);
1969 		ret = FAIL;
1970 		goto clean;
1971 	}
1972 
1973 #if defined(_WINDOWS) || defined(__MINGW32__)
1974 	ret = fill_file_details(*logfiles, *logfiles_num, *use_ino, err_msg);
1975 #else
1976 	ret = fill_file_details(*logfiles, *logfiles_num, err_msg);
1977 #endif
1978 clean:
1979 	if ((FAIL == ret || ZBX_NO_FILE_ERROR == ret) && NULL != *logfiles)
1980 		destroy_logfile_list(logfiles, logfiles_alloc, logfiles_num);
1981 
1982 	return	ret;
1983 }
1984 
buf_find_newline(char * p,char ** p_next,const char * p_end,const char * cr,const char * lf,size_t szbyte)1985 static char	*buf_find_newline(char *p, char **p_next, const char *p_end, const char *cr, const char *lf,
1986 		size_t szbyte)
1987 {
1988 	if (1 == szbyte)	/* single-byte character set */
1989 	{
1990 		for (; p < p_end; p++)
1991 		{
1992 			/* detect NULL byte and replace it with '?' character */
1993 			if (0x0 == *p)
1994 			{
1995 				*p = '?';
1996 				continue;
1997 			}
1998 
1999 			if (0xd < *p || 0xa > *p)
2000 				continue;
2001 
2002 			if (0xa == *p)  /* LF (Unix) */
2003 			{
2004 				*p_next = p + 1;
2005 				return p;
2006 			}
2007 
2008 			if (0xd == *p)	/* CR (Mac) */
2009 			{
2010 				if (p < p_end - 1 && 0xa == *(p + 1))   /* CR+LF (Windows) */
2011 				{
2012 					*p_next = p + 2;
2013 					return p;
2014 				}
2015 
2016 				*p_next = p + 1;
2017 				return p;
2018 			}
2019 		}
2020 		return (char *)NULL;
2021 	}
2022 	else
2023 	{
2024 		while (p <= p_end - szbyte)
2025 		{
2026 			/* detect NULL byte in UTF-16 encoding and replace it with '?' character */
2027 			if (2 == szbyte && 0x0 == *p && 0x0 == *(p + 1))
2028 			{
2029 				if (0x0 == *cr)			/* Big-endian */
2030 					p[1] = '?';
2031 				else				/* Little-endian */
2032 					*p = '?';
2033 			}
2034 
2035 			if (0 == memcmp(p, lf, szbyte))		/* LF (Unix) */
2036 			{
2037 				*p_next = p + szbyte;
2038 				return p;
2039 			}
2040 
2041 			if (0 == memcmp(p, cr, szbyte))		/* CR (Mac) */
2042 			{
2043 				if (p <= p_end - szbyte - szbyte && 0 == memcmp(p + szbyte, lf, szbyte))
2044 				{
2045 					/* CR+LF (Windows) */
2046 					*p_next = p + szbyte + szbyte;
2047 					return p;
2048 				}
2049 
2050 				*p_next = p + szbyte;
2051 				return p;
2052 			}
2053 
2054 			p += szbyte;
2055 		}
2056 		return (char *)NULL;
2057 	}
2058 }
2059 
zbx_match_log_rec(const zbx_vector_ptr_t * regexps,const char * value,const char * pattern,const char * output_template,char ** output,char ** err_msg)2060 static int	zbx_match_log_rec(const zbx_vector_ptr_t *regexps, const char *value, const char *pattern,
2061 		const char *output_template, char **output, char **err_msg)
2062 {
2063 	int	ret;
2064 
2065 	if (FAIL == (ret = regexp_sub_ex(regexps, value, pattern, ZBX_CASE_SENSITIVE, output_template, output)))
2066 		*err_msg = zbx_dsprintf(*err_msg, "cannot compile regular expression");
2067 
2068 	return ret;	/* ZBX_REGEXP_MATCH, ZBX_REGEXP_NO_MATCH or FAIL */
2069 }
2070 
2071 /******************************************************************************
2072  *                                                                            *
2073  * Function: zbx_read2                                                        *
2074  *                                                                            *
2075  * Comments: Thread-safe                                                      *
2076  *                                                                            *
2077  ******************************************************************************/
zbx_read2(int fd,unsigned char flags,struct st_logfile * logfile,zbx_uint64_t * lastlogsize,const int * mtime,int * big_rec,const char * encoding,zbx_vector_ptr_t * regexps,const char * pattern,const char * output_template,int * p_count,int * s_count,zbx_process_value_func_t process_value,const char * server,unsigned short port,const char * hostname,const char * key,zbx_uint64_t * lastlogsize_sent,int * mtime_sent,const char * persistent_file_name,zbx_vector_pre_persistent_t * prep_vec,char ** err_msg)2078 static int	zbx_read2(int fd, unsigned char flags, struct st_logfile *logfile, zbx_uint64_t *lastlogsize,
2079 		const int *mtime, int *big_rec, const char *encoding, zbx_vector_ptr_t *regexps, const char *pattern,
2080 		const char *output_template, int *p_count, int *s_count, zbx_process_value_func_t process_value,
2081 		const char *server, unsigned short port, const char *hostname, const char *key,
2082 		zbx_uint64_t *lastlogsize_sent, int *mtime_sent, const char *persistent_file_name,
2083 		zbx_vector_pre_persistent_t *prep_vec, char **err_msg)
2084 {
2085 	static ZBX_THREAD_LOCAL char	*buf = NULL;
2086 
2087 	int				ret, nbytes, regexp_ret;
2088 	const char			*cr, *lf, *p_end;
2089 	char				*p_start, *p, *p_nl, *p_next, *item_value = NULL;
2090 	size_t				szbyte;
2091 	zbx_offset_t			offset;
2092 	const int			is_count_item = (0 != (ZBX_METRIC_FLAG_LOG_COUNT & flags)) ? 1 : 0;
2093 #if !defined(_WINDOWS) && !defined(__MINGW32__)
2094 	int				prep_vec_idx = -1;	/* index in 'prep_vec' vector */
2095 #endif
2096 	zbx_uint64_t			processed_size;
2097 
2098 #define BUF_SIZE	(256 * ZBX_KIBIBYTE)	/* The longest encodings use 4 bytes for every character. To send */
2099 						/* up to 64 k characters to Zabbix server a 256 kB buffer might be */
2100 						/* required. */
2101 	if (NULL == buf)
2102 		buf = (char *)zbx_malloc(buf, (size_t)(BUF_SIZE + 1));
2103 
2104 	find_cr_lf_szbyte(encoding, &cr, &lf, &szbyte);
2105 
2106 	for (;;)
2107 	{
2108 		if (0 >= *p_count || 0 >= *s_count)
2109 		{
2110 			/* limit on number of processed or sent-to-server lines reached */
2111 			ret = SUCCEED;
2112 			goto out;
2113 		}
2114 
2115 		if ((zbx_offset_t)-1 == (offset = zbx_lseek(fd, 0, SEEK_CUR)))
2116 		{
2117 			*big_rec = 0;
2118 			*err_msg = zbx_dsprintf(*err_msg, "Cannot set position to 0 in file: %s", zbx_strerror(errno));
2119 			ret = FAIL;
2120 			goto out;
2121 		}
2122 
2123 		if (-1 == (nbytes = (int)read(fd, buf, (size_t)BUF_SIZE)))
2124 		{
2125 			/* error on read */
2126 			*big_rec = 0;
2127 			*err_msg = zbx_dsprintf(*err_msg, "Cannot read from file: %s", zbx_strerror(errno));
2128 			ret = FAIL;
2129 			goto out;
2130 		}
2131 
2132 		if (0 == nbytes)
2133 		{
2134 			/* end of file reached */
2135 			ret = SUCCEED;
2136 			goto out;
2137 		}
2138 
2139 		p_start = buf;			/* beginning of current line */
2140 		p = buf;			/* current byte */
2141 		p_end = buf + (size_t)nbytes;	/* no data from this position */
2142 
2143 		if (NULL == (p_nl = buf_find_newline(p, &p_next, p_end, cr, lf, szbyte)))
2144 		{
2145 			if (p_end > p)
2146 				logfile->incomplete = 1;
2147 
2148 			if (BUF_SIZE > nbytes)
2149 			{
2150 				/* Buffer is not full (no more data available) and there is no "newline" in it. */
2151 				/* Do not analyze it now, keep the same position in the file and wait the next check, */
2152 				/* maybe more data will come. */
2153 
2154 				*lastlogsize = (zbx_uint64_t)offset;
2155 				ret = SUCCEED;
2156 				goto out;
2157 			}
2158 			else
2159 			{
2160 				/* buffer is full and there is no "newline" in it */
2161 
2162 				if (0 == *big_rec)
2163 				{
2164 					/* It is the first, beginning part of a long record. Match it against the */
2165 					/* regexp now (our buffer length corresponds to what we can save in the */
2166 					/* database). */
2167 
2168 					char	*value;
2169 					int	send_err;
2170 
2171 					buf[BUF_SIZE] = '\0';
2172 
2173 					if ('\0' != *encoding)
2174 						value = convert_to_utf8(buf, (size_t)BUF_SIZE, encoding);
2175 					else
2176 						value = buf;
2177 
2178 					zabbix_log(LOG_LEVEL_WARNING, "Logfile contains a large record: \"%.64s\""
2179 							" (showing only the first 64 characters). Only the first 256 kB"
2180 							" will be analyzed, the rest will be ignored while Zabbix agent"
2181 							" is running.", value);
2182 
2183 					processed_size = (size_t)offset + (size_t)nbytes;
2184 					send_err = FAIL;
2185 
2186 					regexp_ret = zbx_match_log_rec(regexps, value, pattern,
2187 							(0 == is_count_item) ? output_template : NULL,
2188 							(0 == is_count_item) ? &item_value : NULL, err_msg);
2189 #if !defined(_WINDOWS) && !defined(__MINGW32__)
2190 					if (NULL != persistent_file_name && (ZBX_REGEXP_MATCH == regexp_ret ||
2191 							ZBX_REGEXP_NO_MATCH == regexp_ret))
2192 					{
2193 						/* Prepare 'prep_vec' element even if the current record won't match. */
2194 						/* Its mtime and lastlogsize could be sent to server later as */
2195 						/* metadata update, then a persistent file should be written. */
2196 						/* 'prep_vec' can be emptied at any call to process_value() which */
2197 						/* calls send_buffer(), so be ready to reinitialize. */
2198 						if (-1 == prep_vec_idx || 0 == prep_vec->values_num)
2199 						{
2200 							prep_vec_idx = zbx_find_or_create_prep_vec_element(prep_vec,
2201 									key, persistent_file_name);
2202 							zbx_init_prep_vec_data(logfile,
2203 									prep_vec->values + prep_vec_idx);
2204 						}
2205 
2206 						zbx_update_prep_vec_data(logfile, processed_size,
2207 								prep_vec->values + prep_vec_idx);
2208 					}
2209 #else
2210 					ZBX_UNUSED(persistent_file_name);
2211 					ZBX_UNUSED(prep_vec);
2212 #endif
2213 					if (ZBX_REGEXP_MATCH == regexp_ret)
2214 					{
2215 						if (0 == is_count_item)		/* log[] or logrt[] */
2216 						{
2217 							if (SUCCEED == (send_err = process_value(server, port,
2218 									hostname, key, item_value, ITEM_STATE_NORMAL,
2219 									&processed_size, mtime, NULL, NULL, NULL, NULL,
2220 									flags | ZBX_METRIC_FLAG_PERSISTENT)))
2221 							{
2222 								*lastlogsize_sent = processed_size;
2223 
2224 								if (NULL != mtime_sent)
2225 									*mtime_sent = *mtime;
2226 
2227 								(*s_count)--;
2228 								zbx_free(item_value);
2229 							}
2230 							else
2231 							{
2232 								zbx_free(item_value);
2233 
2234 								if ('\0' != *encoding)
2235 									zbx_free(value);
2236 
2237 								/* Sending of buffer failed. */
2238 								/* Try to resend it in the next check. */
2239 								ret = SUCCEED;
2240 								goto out;
2241 							}
2242 						}
2243 						else	/* log.count[] or logrt.count[] */
2244 							(*s_count)--;
2245 					}
2246 
2247 					if ('\0' != *encoding)
2248 						zbx_free(value);
2249 
2250 					if (FAIL == regexp_ret)
2251 					{
2252 						ret = FAIL;
2253 						goto out;
2254 					}
2255 
2256 					(*p_count)--;
2257 
2258 					if (0 != is_count_item ||
2259 							ZBX_REGEXP_NO_MATCH == regexp_ret || SUCCEED == send_err)
2260 					{
2261 						*lastlogsize = processed_size;
2262 						*big_rec = 1;	/* ignore the rest of this record */
2263 					}
2264 				}
2265 				else
2266 				{
2267 					/* It is a middle part of a long record. Ignore it. We have already */
2268 					/* checked the first part against the regexp. */
2269 					*lastlogsize = (size_t)offset + (size_t)nbytes;
2270 				}
2271 			}
2272 		}
2273 		else
2274 		{
2275 			/* the "newline" was found, so there is at least one complete record */
2276 			/* (or trailing part of a large record) in the buffer */
2277 			logfile->incomplete = 0;
2278 
2279 			for (;;)
2280 			{
2281 				if (0 >= *p_count || 0 >= *s_count)
2282 				{
2283 					/* limit on number of processed or sent-to-server lines reached */
2284 					ret = SUCCEED;
2285 					goto out;
2286 				}
2287 
2288 				if (0 == *big_rec)
2289 				{
2290 					char	*value;
2291 					int	send_err;
2292 
2293 					*p_nl = '\0';
2294 
2295 					if ('\0' != *encoding)
2296 						value = convert_to_utf8(p_start, (size_t)(p_nl - p_start), encoding);
2297 					else
2298 						value = p_start;
2299 
2300 					processed_size = (size_t)offset + (size_t)(p_next - buf);
2301 					send_err = FAIL;
2302 
2303 					regexp_ret = zbx_match_log_rec(regexps, value, pattern,
2304 							(0 == is_count_item) ? output_template : NULL,
2305 							(0 == is_count_item) ? &item_value : NULL, err_msg);
2306 #if !defined(_WINDOWS) && !defined(__MINGW32__)
2307 					if (NULL != persistent_file_name && (ZBX_REGEXP_MATCH == regexp_ret ||
2308 							ZBX_REGEXP_NO_MATCH == regexp_ret))
2309 					{
2310 						/* Prepare 'prep_vec' element even if the current record won't match. */
2311 						/* Its mtime and lastlogsize could be sent to server later as */
2312 						/* metadata update, then a persistent file should be written. */
2313 						/* 'prep_vec' can be emptied at any call to process_value() which */
2314 						/* calls send_buffer(), so be ready to reinitialize. */
2315 						if (-1 == prep_vec_idx || 0 == prep_vec->values_num)
2316 						{
2317 							prep_vec_idx = zbx_find_or_create_prep_vec_element(prep_vec,
2318 									key, persistent_file_name);
2319 							zbx_init_prep_vec_data(logfile,
2320 									prep_vec->values + prep_vec_idx);
2321 						}
2322 
2323 						zbx_update_prep_vec_data(logfile, processed_size,
2324 								prep_vec->values + prep_vec_idx);
2325 					}
2326 #endif
2327 					if (ZBX_REGEXP_MATCH == regexp_ret)
2328 					{
2329 						if (0 == is_count_item)		/* log[] or logrt[] */
2330 						{
2331 							if (SUCCEED == (send_err = process_value(server, port,
2332 									hostname, key, item_value, ITEM_STATE_NORMAL,
2333 									&processed_size, mtime, NULL, NULL, NULL, NULL,
2334 									flags | ZBX_METRIC_FLAG_PERSISTENT)))
2335 							{
2336 								*lastlogsize_sent = processed_size;
2337 
2338 								if (NULL != mtime_sent)
2339 									*mtime_sent = *mtime;
2340 
2341 								(*s_count)--;
2342 								zbx_free(item_value);
2343 							}
2344 							else
2345 							{
2346 								zbx_free(item_value);
2347 
2348 								if ('\0' != *encoding)
2349 									zbx_free(value);
2350 
2351 								/* Sending of buffer failed. */
2352 								/* Try to resend it in the next check. */
2353 								ret = SUCCEED;
2354 								goto out;
2355 							}
2356 						}
2357 						else	/* log.count[] or logrt.count[] */
2358 							(*s_count)--;
2359 					}
2360 
2361 					if ('\0' != *encoding)
2362 						zbx_free(value);
2363 
2364 					if (FAIL == regexp_ret)
2365 					{
2366 						ret = FAIL;
2367 						goto out;
2368 					}
2369 
2370 					(*p_count)--;
2371 
2372 					if (0 != is_count_item ||
2373 							ZBX_REGEXP_NO_MATCH == regexp_ret || SUCCEED == send_err)
2374 					{
2375 						*lastlogsize = processed_size;
2376 					}
2377 				}
2378 				else
2379 				{
2380 					/* skip the trailing part of a long record */
2381 					*lastlogsize = (size_t)offset + (size_t)(p_next - buf);
2382 					*big_rec = 0;
2383 				}
2384 
2385 				/* move to the next record in the buffer */
2386 				p_start = p_next;
2387 				p = p_next;
2388 
2389 				if (NULL == (p_nl = buf_find_newline(p, &p_next, p_end, cr, lf, szbyte)))
2390 				{
2391 					/* There are no complete records in the buffer. */
2392 					/* Try to read more data from this position if available. */
2393 					if (p_end > p)
2394 						logfile->incomplete = 1;
2395 
2396 					if ((zbx_offset_t)-1 == zbx_lseek(fd, *lastlogsize, SEEK_SET))
2397 					{
2398 						*err_msg = zbx_dsprintf(*err_msg, "Cannot set position to " ZBX_FS_UI64
2399 								" in file: %s", *lastlogsize, zbx_strerror(errno));
2400 						ret = FAIL;
2401 						goto out;
2402 					}
2403 					else
2404 						break;
2405 				}
2406 				else
2407 					logfile->incomplete = 0;
2408 			}
2409 		}
2410 	}
2411 out:
2412 	return ret;
2413 
2414 #undef BUF_SIZE
2415 }
2416 
2417 /******************************************************************************
2418  *                                                                            *
2419  * Function: process_log                                                      *
2420  *                                                                            *
2421  * Purpose: Match new records in logfile with regexp, transmit matching       *
2422  *          records to Zabbix server                                          *
2423  *                                                                            *
2424  * Parameters:                                                                *
2425  *     flags           - [IN] bit flags with item type: log, logrt, log.count *
2426  *                       or logrt.count                                       *
2427  *     logfile         - [IN/OUT] logfile attributes                          *
2428  *     lastlogsize     - [IN/OUT] offset from the beginning of the file       *
2429  *     mtime           - [IN/OUT] file modification time for reporting to     *
2430  *                       server                                               *
2431  *     lastlogsize_sent - [OUT] lastlogsize value that was last sent          *
2432  *     mtime_sent      - [OUT] mtime value that was last sent                 *
2433  *     skip_old_data   - [IN/OUT] start from the beginning of the file or     *
2434  *                       jump to the end                                      *
2435  *     big_rec         - [IN/OUT] state variable to remember whether a long   *
2436  *                       record is being processed                            *
2437  *     encoding        - [IN] text string describing encoding.                *
2438  *                       See function find_cr_lf_szbyte() for supported       *
2439  *                       encodings.                                           *
2440  *                       "" (empty string) means a single-byte character set  *
2441  *                       (e.g. ASCII).                                        *
2442  *     regexps         - [IN] array of regexps                                *
2443  *     pattern         - [IN] pattern to match                                *
2444  *     output_template - [IN] output formatting template                      *
2445  *     p_count         - [IN/OUT] limit of records to be processed            *
2446  *     s_count         - [IN/OUT] limit of records to be sent to server       *
2447  *     process_value   - [IN] pointer to function process_value()             *
2448  *     server          - [IN] server to send data to                          *
2449  *     port            - [IN] port to send data to                            *
2450  *     hostname        - [IN] hostname the data comes from                    *
2451  *     key             - [IN] item key the data belongs to                    *
2452  *     processed_bytes - [OUT] number of processed bytes in logfile           *
2453  *     seek_offset     - [IN] position to seek in file                        *
2454  *     persistent_file_name - [IN] name of file for saving persistent data    *
2455  *     prep_vec        - [IN/OUT] vector with data for writing into           *
2456  *                                persistent files                            *
2457  *     err_msg         - [IN/OUT] error message why an item became            *
2458  *                       NOTSUPPORTED                                         *
2459  *                                                                            *
2460  * Return value: returns SUCCEED on successful reading,                       *
2461  *               FAIL on other cases                                          *
2462  *                                                                            *
2463  * Author: Eugene Grigorjev                                                   *
2464  *                                                                            *
2465  * Comments:                                                                  *
2466  *           This function does not deal with log file rotation.              *
2467  *                                                                            *
2468  *           Thread-safe                                                      *
2469  *                                                                            *
2470  ******************************************************************************/
process_log(unsigned char flags,struct st_logfile * logfile,zbx_uint64_t * lastlogsize,int * mtime,zbx_uint64_t * lastlogsize_sent,int * mtime_sent,unsigned char * skip_old_data,int * big_rec,const char * encoding,zbx_vector_ptr_t * regexps,const char * pattern,const char * output_template,int * p_count,int * s_count,zbx_process_value_func_t process_value,const char * server,unsigned short port,const char * hostname,const char * key,zbx_uint64_t * processed_bytes,zbx_uint64_t seek_offset,const char * persistent_file_name,zbx_vector_pre_persistent_t * prep_vec,char ** err_msg)2471 static int	process_log(unsigned char flags, struct st_logfile *logfile, zbx_uint64_t *lastlogsize, int *mtime,
2472 		zbx_uint64_t *lastlogsize_sent, int *mtime_sent, unsigned char *skip_old_data, int *big_rec,
2473 		const char *encoding, zbx_vector_ptr_t *regexps, const char *pattern, const char *output_template,
2474 		int *p_count, int *s_count, zbx_process_value_func_t process_value, const char *server,
2475 		unsigned short port, const char *hostname, const char *key, zbx_uint64_t *processed_bytes,
2476 		zbx_uint64_t seek_offset, const char *persistent_file_name, zbx_vector_pre_persistent_t *prep_vec,
2477 		char **err_msg)
2478 {
2479 	int	f, ret = FAIL;
2480 
2481 	zabbix_log(LOG_LEVEL_DEBUG, "In %s() filename:'%s' lastlogsize:" ZBX_FS_UI64 " mtime:%d seek_offset:"
2482 			ZBX_FS_UI64, __func__, logfile->filename, *lastlogsize, NULL != mtime ? *mtime : 0,
2483 			seek_offset);
2484 
2485 	if (-1 == (f = open_file_helper(logfile->filename, err_msg)))
2486 		goto out;
2487 
2488 	if ((zbx_offset_t)-1 != zbx_lseek(f, seek_offset, SEEK_SET))
2489 	{
2490 		*lastlogsize = seek_offset;
2491 		*skip_old_data = 0;
2492 
2493 		if (SUCCEED == (ret = zbx_read2(f, flags, logfile, lastlogsize, mtime, big_rec, encoding, regexps,
2494 				pattern, output_template, p_count, s_count, process_value, server, port, hostname, key,
2495 				lastlogsize_sent, mtime_sent, persistent_file_name, prep_vec, err_msg)))
2496 		{
2497 			*processed_bytes = *lastlogsize - seek_offset;
2498 		}
2499 	}
2500 	else
2501 	{
2502 		*err_msg = zbx_dsprintf(*err_msg, "Cannot set position to " ZBX_FS_UI64 " in file \"%s\": %s",
2503 				seek_offset, logfile->filename, zbx_strerror(errno));
2504 	}
2505 
2506 	if (SUCCEED != close_file_helper(f, logfile->filename, err_msg))
2507 		ret = FAIL;
2508 out:
2509 	zabbix_log(LOG_LEVEL_DEBUG, "End of %s() filename:'%s' lastlogsize:" ZBX_FS_UI64 " mtime:%d ret:%s"
2510 			" processed_bytes:" ZBX_FS_UI64, __func__, logfile->filename, *lastlogsize,
2511 			NULL != mtime ? *mtime : 0, zbx_result_string(ret),
2512 			SUCCEED == ret ? *processed_bytes : (zbx_uint64_t)0);
2513 
2514 	return ret;
2515 }
2516 
adjust_mtime_to_clock(int * mtime)2517 static void	adjust_mtime_to_clock(int *mtime)
2518 {
2519 	time_t	now;
2520 
2521 	/* Adjust 'mtime' if the system clock has been set back in time. */
2522 	/* Setting the clock ahead of time is harmless in our case. */
2523 
2524 	if (*mtime > (now = time(NULL)))
2525 	{
2526 		int	old_mtime;
2527 
2528 		old_mtime = *mtime;
2529 		*mtime = (int)now;
2530 
2531 		zabbix_log(LOG_LEVEL_WARNING, "System clock has been set back in time. Setting agent mtime %d "
2532 				"seconds back.", (int)(old_mtime - now));
2533 	}
2534 }
2535 
is_swap_required(const struct st_logfile * old_files,struct st_logfile * new_files,int use_ino,int idx)2536 static int	is_swap_required(const struct st_logfile *old_files, struct st_logfile *new_files, int use_ino, int idx)
2537 {
2538 	int	is_same_place;
2539 
2540 	/* if the 1st file is not processed at all while the 2nd file was processed (at least partially) */
2541 	/* then swap them */
2542 	if (0 == new_files[idx].seq && 0 < new_files[idx + 1].seq)
2543 		return SUCCEED;
2544 
2545 	/* if the 2nd file is not a copy of some other file then no need to swap */
2546 	if (-1 == new_files[idx + 1].copy_of)
2547 		return FAIL;
2548 
2549 	/* The 2nd file is a copy. But is it a copy of the 1st file ? */
2550 
2551 	/* On file systems with inodes or file indices if a file is copied and truncated, we assume that */
2552 	/* there is a high possibility that the truncated file has the same inode (index) as before. */
2553 
2554 	if (NULL == old_files)	/* cannot consult the old file list */
2555 		return FAIL;
2556 
2557 	is_same_place = compare_file_places(old_files + new_files[idx + 1].copy_of, new_files + idx, use_ino);
2558 
2559 	if (ZBX_FILE_PLACE_SAME == is_same_place && new_files[idx].seq >= new_files[idx + 1].seq)
2560 		return SUCCEED;
2561 
2562 	/* The last attempt - compare file names. It is less reliable as file rotation can change file names. */
2563 	if (ZBX_FILE_PLACE_OTHER == is_same_place || ZBX_FILE_PLACE_UNKNOWN == is_same_place)
2564 	{
2565 		if (0 == strcmp((old_files + new_files[idx + 1].copy_of)->filename, (new_files + idx)->filename))
2566 			return SUCCEED;
2567 	}
2568 
2569 	return FAIL;
2570 }
2571 
swap_logfile_array_elements(struct st_logfile * array,int idx1,int idx2)2572 static void	swap_logfile_array_elements(struct st_logfile *array, int idx1, int idx2)
2573 {
2574 	struct st_logfile	*p1 = array + idx1;
2575 	struct st_logfile	*p2 = array + idx2;
2576 	struct st_logfile	tmp;
2577 
2578 	memcpy(&tmp, p1, sizeof(struct st_logfile));
2579 	memcpy(p1, p2, sizeof(struct st_logfile));
2580 	memcpy(p2, &tmp, sizeof(struct st_logfile));
2581 }
2582 
ensure_order_if_mtimes_equal(const struct st_logfile * logfiles_old,struct st_logfile * logfiles,int logfiles_num,int use_ino,int * start_idx)2583 static void	ensure_order_if_mtimes_equal(const struct st_logfile *logfiles_old, struct st_logfile *logfiles,
2584 		int logfiles_num, int use_ino, int *start_idx)
2585 {
2586 	int	i;
2587 
2588 	/* There is a special case when within 1 second of time:       */
2589 	/*   1. a log file ORG.log is copied to other file COPY.log,   */
2590 	/*   2. the original file ORG.log is truncated,                */
2591 	/*   3. new records are appended to the original file ORG.log, */
2592 	/*   4. both files ORG.log and COPY.log have the same 'mtime'. */
2593 	/* Now in the list 'logfiles' the file ORG.log precedes the COPY.log because if 'mtime' is the same   */
2594 	/* then add_logfile() function sorts files by name in descending order. This would lead to an error - */
2595 	/* processing ORG.log before COPY.log. We need to correct the order by swapping ORG.log and COPY.log  */
2596 	/* elements in the 'logfiles' list. */
2597 
2598 	for (i = 0; i < logfiles_num - 1; i++)
2599 	{
2600 		if (logfiles[i].mtime == logfiles[i + 1].mtime &&
2601 				SUCCEED == is_swap_required(logfiles_old, logfiles, use_ino, i))
2602 		{
2603 			zabbix_log(LOG_LEVEL_DEBUG, "ensure_order_if_mtimes_equal() swapping files '%s' and '%s'",
2604 					logfiles[i].filename, logfiles[i + 1].filename);
2605 
2606 			swap_logfile_array_elements(logfiles, i, i + 1);
2607 
2608 			if (*start_idx == i + 1)
2609 				*start_idx = i;
2610 		}
2611 	}
2612 }
2613 
2614 /******************************************************************************
2615  *                                                                            *
2616  * Function: files_have_same_blocks_md5                                       *
2617  *                                                                            *
2618  * Purpose: compare MD5 sums of first and last blocks between 2 files. If MD5 *
2619  *          sums have been calculated for blocks of different sizes or        *
2620  *          offsets then open the log file with larger size of blocks and get *
2621  *          MD5 for the smaller size                                          *
2622  *                                                                            *
2623  * Parameters:                                                                *
2624  *     log1 - [IN] log file 1 attributes                                      *
2625  *     log2 - [IN] log file 2 attributes                                      *
2626  *                                                                            *
2627  * Return value: SUCCEED or FAIL                                              *
2628  *                                                                            *
2629  * Comments: Use this function to compare log files from the 'new' file list. *
2630  *           DO NOT use it with a file which belongs to the 'old' list (the   *
2631  *           old file name now could point to different file)                 *
2632  *                                                                            *
2633  ******************************************************************************/
files_have_same_blocks_md5(const struct st_logfile * log1,const struct st_logfile * log2)2634 static int	files_have_same_blocks_md5(const struct st_logfile *log1, const struct st_logfile *log2)
2635 {
2636 	if (-1 == log1->md5_block_size || -1 == log2->md5_block_size)
2637 		return FAIL;
2638 
2639 	if (log1->md5_block_size == log2->md5_block_size)	/* this works for empty files, too */
2640 	{
2641 		if (0 != memcmp(log1->first_block_md5, log2->first_block_md5, sizeof(log1->first_block_md5)))
2642 			return FAIL;
2643 
2644 		if (log1->last_block_offset == log2->last_block_offset)
2645 		{
2646 			if (0 != memcmp(log1->last_block_md5, log2->last_block_md5, sizeof(log1->last_block_md5)))
2647 				return FAIL;
2648 
2649 			return SUCCEED;
2650 		}
2651 	}
2652 
2653 	/* we have MD5 sums but they were calculated from blocks of different sizes or offsets */
2654 
2655 	if (0 < log1->md5_block_size && 0 < log2->md5_block_size)
2656 	{
2657 		const struct st_logfile	*file_smaller, *file_larger;
2658 		int			fd, ret = FAIL;
2659 		char			*err_msg = NULL;		/* required, but not used */
2660 		md5_byte_t		md5tmp[MD5_DIGEST_SIZE];
2661 
2662 		if (log1->md5_block_size < log2->md5_block_size)
2663 		{
2664 			file_smaller = log1;
2665 			file_larger = log2;
2666 		}
2667 		else
2668 		{
2669 			file_smaller = log2;
2670 			file_larger = log1;
2671 		}
2672 
2673 		if (-1 == (fd = zbx_open(file_larger->filename, O_RDONLY)))
2674 			return FAIL;
2675 
2676 		if (SUCCEED != file_part_md5(fd, 0, file_smaller->md5_block_size, md5tmp, "", &err_msg))
2677 			goto clean;
2678 
2679 		if (0 != memcmp(file_smaller->first_block_md5, md5tmp, sizeof(md5tmp)))
2680 			goto clean;
2681 
2682 		if (0 == file_smaller->last_block_offset)
2683 		{
2684 			ret = SUCCEED;
2685 			goto clean;
2686 		}
2687 
2688 		if (SUCCEED != file_part_md5(fd, file_smaller->last_block_offset, file_smaller->md5_block_size, md5tmp,
2689 				"", &err_msg))
2690 		{
2691 			goto clean;
2692 		}
2693 
2694 		if (0 == memcmp(file_smaller->last_block_md5, md5tmp, sizeof(md5tmp)))
2695 			ret = SUCCEED;
2696 clean:
2697 		zbx_free(err_msg);
2698 		close(fd);
2699 
2700 		return ret;
2701 	}
2702 
2703 	return FAIL;
2704 }
2705 
handle_multiple_copies(struct st_logfile * logfiles,int logfiles_num,int i)2706 static void	handle_multiple_copies(struct st_logfile *logfiles, int logfiles_num, int i)
2707 {
2708 	/* There is a special case when the latest log file is copied to other file but not yet truncated. */
2709 	/* So there are two files and we don't know which one will stay as the copy and which one will be  */
2710 	/* truncated. Similar cases: the latest log file is copied but never truncated or is copied multiple */
2711 	/* times. */
2712 
2713 	int	j;
2714 
2715 	for (j = i + 1; j < logfiles_num; j++)
2716 	{
2717 		if (SUCCEED == files_have_same_blocks_md5(logfiles + i, logfiles + j))
2718 		{
2719 			/* logfiles[i] and logfiles[j] are original and copy (or vice versa). */
2720 			/* If logfiles[i] has been at least partially processed then transfer its */
2721 			/* processed size to logfiles[j], too. */
2722 
2723 			if (logfiles[j].processed_size < logfiles[i].processed_size)
2724 			{
2725 				logfiles[j].processed_size = MIN(logfiles[i].processed_size, logfiles[j].size);
2726 
2727 				zabbix_log(LOG_LEVEL_DEBUG, "handle_multiple_copies() file '%s' processed_size:"
2728 						ZBX_FS_UI64 " transferred to" " file '%s' processed_size:" ZBX_FS_UI64,
2729 						logfiles[i].filename, logfiles[i].processed_size,
2730 						logfiles[j].filename, logfiles[j].processed_size);
2731 			}
2732 			else if (logfiles[i].processed_size < logfiles[j].processed_size)
2733 			{
2734 				logfiles[i].processed_size = MIN(logfiles[j].processed_size, logfiles[i].size);
2735 
2736 				zabbix_log(LOG_LEVEL_DEBUG, "handle_multiple_copies() file '%s' processed_size:"
2737 						ZBX_FS_UI64 " transferred to" " file '%s' processed_size:" ZBX_FS_UI64,
2738 						logfiles[j].filename, logfiles[j].processed_size,
2739 						logfiles[i].filename, logfiles[i].processed_size);
2740 			}
2741 		}
2742 	}
2743 }
2744 
delay_update_if_copies(struct st_logfile * logfiles,int logfiles_num,int * mtime,zbx_uint64_t * lastlogsize)2745 static void	delay_update_if_copies(struct st_logfile *logfiles, int logfiles_num, int *mtime,
2746 		zbx_uint64_t *lastlogsize)
2747 {
2748 	int	i, idx_to_keep = logfiles_num - 1;
2749 
2750 	/* If there are copies in 'logfiles' list then find the element with the smallest index which must be */
2751 	/* preserved in the list to keep information about copies. */
2752 
2753 	for (i = 0; i < logfiles_num - 1; i++)
2754 	{
2755 		int	j, largest_for_i = -1;
2756 
2757 		if (0 == logfiles[i].size)
2758 			continue;
2759 
2760 		for (j = i + 1; j < logfiles_num; j++)
2761 		{
2762 			if (0 == logfiles[j].size)
2763 				continue;
2764 
2765 			if (SUCCEED == files_have_same_blocks_md5(logfiles + i, logfiles + j))
2766 			{
2767 				int	more_processed;
2768 
2769 				/* logfiles[i] and logfiles[j] are original and copy (or vice versa) */
2770 
2771 				more_processed = (logfiles[i].processed_size > logfiles[j].processed_size) ? i : j;
2772 
2773 				if (largest_for_i < more_processed)
2774 					largest_for_i = more_processed;
2775 			}
2776 		}
2777 
2778 		if (-1 != largest_for_i && idx_to_keep > largest_for_i)
2779 			idx_to_keep = largest_for_i;
2780 	}
2781 
2782 	if (logfiles[idx_to_keep].mtime < *mtime)
2783 	{
2784 		zabbix_log(LOG_LEVEL_DEBUG, "delay_update_if_copies(): setting mtime back from %d to %d,"
2785 				" lastlogsize from " ZBX_FS_UI64 " to " ZBX_FS_UI64, *mtime,
2786 				logfiles[idx_to_keep].mtime, *lastlogsize, logfiles[idx_to_keep].processed_size);
2787 
2788 		/* ensure that next time element 'idx_to_keep' is included in file list with the right 'lastlogsize' */
2789 		*mtime = logfiles[idx_to_keep].mtime;
2790 		*lastlogsize = logfiles[idx_to_keep].processed_size;
2791 
2792 		if (logfiles_num - 1 > idx_to_keep)
2793 		{
2794 			/* ensure that next time processing starts from element'idx_to_keep' */
2795 			for (i = idx_to_keep + 1; i < logfiles_num; i++)
2796 				logfiles[i].seq = 0;
2797 		}
2798 	}
2799 }
2800 
max_processed_size_in_copies(const struct st_logfile * logfiles,int logfiles_num,int i)2801 static zbx_uint64_t	max_processed_size_in_copies(const struct st_logfile *logfiles, int logfiles_num, int i)
2802 {
2803 	zbx_uint64_t	max_processed = 0;
2804 	int		j;
2805 
2806 	for (j = 0; j < logfiles_num; j++)
2807 	{
2808 		if (i != j && SUCCEED == files_have_same_blocks_md5(logfiles + i, logfiles + j))
2809 		{
2810 			/* logfiles[i] and logfiles[j] are original and copy (or vice versa). */
2811 			if (max_processed < logfiles[j].processed_size)
2812 				max_processed = logfiles[j].processed_size;
2813 		}
2814 	}
2815 
2816 	return max_processed;
2817 }
2818 
2819 /******************************************************************************
2820  *                                                                            *
2821  * Function: calculate_delay                                                  *
2822  *                                                                            *
2823  * Purpose: calculate delay based on number of processed and remaining bytes, *
2824  *          and processing time                                               *
2825  *                                                                            *
2826  * Parameters:                                                                *
2827  *     processed_bytes - [IN] number of processed bytes in logfile            *
2828  *     remaining_bytes - [IN] number of remaining bytes in all logfiles       *
2829  *     t_proc          - [IN] processing time, s                              *
2830  *                                                                            *
2831  * Return value:                                                              *
2832  *     delay in seconds or 0 (if cannot be calculated)                        *
2833  *                                                                            *
2834  ******************************************************************************/
calculate_delay(zbx_uint64_t processed_bytes,zbx_uint64_t remaining_bytes,double t_proc)2835 static double	calculate_delay(zbx_uint64_t processed_bytes, zbx_uint64_t remaining_bytes, double t_proc)
2836 {
2837 	double	delay = 0.0;
2838 
2839 	/* Processing time could be negative or 0 if the system clock has been set back in time. */
2840 	/* In this case return 0, then a jump over log lines will not take place. */
2841 
2842 	if (0 != processed_bytes && 0.0 < t_proc)
2843 	{
2844 		delay = (double)remaining_bytes * t_proc / (double)processed_bytes;
2845 
2846 		zabbix_log(LOG_LEVEL_DEBUG, "calculate_delay(): processed bytes:" ZBX_FS_UI64
2847 				" remaining bytes:" ZBX_FS_UI64 " t_proc:%e s speed:%e B/s remaining full checks:"
2848 				ZBX_FS_UI64 " delay:%e s", processed_bytes, remaining_bytes, t_proc,
2849 				(double)processed_bytes / t_proc, remaining_bytes / processed_bytes, delay);
2850 	}
2851 
2852 	return delay;
2853 }
2854 
jump_remaining_bytes_logrt(struct st_logfile * logfiles,int logfiles_num,const char * key,int start_from,zbx_uint64_t bytes_to_jump,int * seq,zbx_uint64_t * lastlogsize,int * mtime,int * jumped_to)2855 static void	jump_remaining_bytes_logrt(struct st_logfile *logfiles, int logfiles_num, const char *key,
2856 		int start_from, zbx_uint64_t bytes_to_jump, int *seq, zbx_uint64_t *lastlogsize, int *mtime,
2857 		int *jumped_to)
2858 {
2859 	int	first_pass = 1;
2860 	int	i = start_from;		/* enter the loop with index of the last file processed, */
2861 					/* later continue the loop from the start */
2862 
2863 	while (i < logfiles_num)
2864 	{
2865 		if (logfiles[i].size != logfiles[i].processed_size)
2866 		{
2867 			zbx_uint64_t	bytes_jumped, new_processed_size;
2868 
2869 			bytes_jumped = MIN(bytes_to_jump, logfiles[i].size - logfiles[i].processed_size);
2870 			new_processed_size = logfiles[i].processed_size + bytes_jumped;
2871 
2872 			zabbix_log(LOG_LEVEL_WARNING, "item:\"%s\" logfile:\"%s\" skipping " ZBX_FS_UI64 " bytes (from"
2873 					" byte " ZBX_FS_UI64 " to byte " ZBX_FS_UI64 ") to meet maxdelay", key,
2874 					logfiles[i].filename, bytes_jumped, logfiles[i].processed_size,
2875 					new_processed_size);
2876 
2877 			logfiles[i].processed_size = new_processed_size;
2878 			*lastlogsize = new_processed_size;
2879 			*mtime = logfiles[i].mtime;
2880 
2881 			logfiles[i].seq = (*seq)++;
2882 
2883 			bytes_to_jump -= bytes_jumped;
2884 
2885 			*jumped_to = i;
2886 		}
2887 
2888 		if (0 == bytes_to_jump)
2889 			break;
2890 
2891 		if (0 != first_pass)
2892 		{
2893 			/* 'start_from' element was processed, now proceed from the beginning of file list */
2894 			first_pass = 0;
2895 			i = 0;
2896 			continue;
2897 		}
2898 
2899 		i++;
2900 	}
2901 }
2902 
2903 /******************************************************************************
2904  *                                                                            *
2905  * Function: adjust_position_after_jump                                       *
2906  *                                                                            *
2907  * Purpose:                                                                   *
2908  *    After jumping over a number of bytes we "land" most likely somewhere in *
2909  *    the middle of log file line. This function tries to adjust position to  *
2910  *    the beginning of the log line.                                          *
2911  *                                                                            *
2912  * Parameters:                                                                *
2913  *     logfile     - [IN/OUT] log file data                                   *
2914  *     lastlogsize - [IN/OUT] offset from the beginning of the file           *
2915  *     min_size    - [IN] minimum offset to search from                       *
2916  *     encoding    - [IN] text string describing encoding                     *
2917  *     err_msg     - [IN/OUT] error message                                   *
2918  *                                                                            *
2919  * Return value: SUCCEED or FAIL (with error message allocated in 'err_msg')  *
2920  *                                                                            *
2921  ******************************************************************************/
adjust_position_after_jump(struct st_logfile * logfile,zbx_uint64_t * lastlogsize,zbx_uint64_t min_size,const char * encoding,char ** err_msg)2922 static int	adjust_position_after_jump(struct st_logfile *logfile, zbx_uint64_t *lastlogsize, zbx_uint64_t min_size,
2923 		const char *encoding, char **err_msg)
2924 {
2925 	int		fd, ret = FAIL;
2926 	size_t		szbyte;
2927 	ssize_t		nbytes;
2928 	const char	*cr, *lf, *p_end;
2929 	char		*p, *p_next;
2930 	zbx_uint64_t	lastlogsize_tmp, lastlogsize_aligned, lastlogsize_org, seek_pos, remainder;
2931 	char   		buf[32 * ZBX_KIBIBYTE];		/* buffer must be of size multiple of 4 as some character */
2932 							/* encodings use 4 bytes for every character */
2933 
2934 	if (-1 == (fd = open_file_helper(logfile->filename, err_msg)))
2935 		return FAIL;
2936 
2937 	find_cr_lf_szbyte(encoding, &cr, &lf, &szbyte);
2938 
2939 	/* For multibyte character encodings 'lastlogsize' needs to be aligned to character border. */
2940 	/* Align it towards smaller offset. We assume that log file contains no corrupted data stream. */
2941 
2942 	lastlogsize_org = *lastlogsize;
2943 	lastlogsize_aligned = *lastlogsize;
2944 
2945 	if (1 < szbyte && 0 != (remainder = lastlogsize_aligned % szbyte))	/* remainder can be 0, 1, 2 or 3 */
2946 	{
2947 		if (min_size <= lastlogsize_aligned - remainder)
2948 			lastlogsize_aligned -= remainder;
2949 		else
2950 			lastlogsize_aligned = min_size;
2951 	}
2952 
2953 	if ((zbx_offset_t)-1 == zbx_lseek(fd, lastlogsize_aligned, SEEK_SET))
2954 	{
2955 		*err_msg = zbx_dsprintf(*err_msg, "Cannot set position to " ZBX_FS_UI64 " in file \"%s\": %s",
2956 				lastlogsize_aligned, logfile->filename, zbx_strerror(errno));
2957 		goto out;
2958 	}
2959 
2960 	/* search forward for the first newline until EOF */
2961 
2962 	lastlogsize_tmp = lastlogsize_aligned;
2963 
2964 	for (;;)
2965 	{
2966 		if (-1 == (nbytes = read(fd, buf, sizeof(buf))))
2967 		{
2968 			*err_msg = zbx_dsprintf(*err_msg, "Cannot read from file \"%s\": %s", logfile->filename,
2969 					zbx_strerror(errno));
2970 			goto out;
2971 		}
2972 
2973 		if (0 == nbytes)	/* end of file reached */
2974 			break;
2975 
2976 		p = buf;
2977 		p_end = buf + nbytes;	/* no data from this position */
2978 
2979 		if (NULL != buf_find_newline(p, &p_next, p_end, cr, lf, szbyte))
2980 		{
2981 			/* found the beginning of line */
2982 
2983 			*lastlogsize = lastlogsize_tmp + (zbx_uint64_t)(p_next - buf);
2984 			logfile->processed_size = *lastlogsize;
2985 			ret = SUCCEED;
2986 			goto out;
2987 		}
2988 
2989 		lastlogsize_tmp += (zbx_uint64_t)nbytes;
2990 	}
2991 
2992 	/* Searching forward did not find a newline. Now search backwards until 'min_size'. */
2993 
2994 	seek_pos = lastlogsize_aligned;
2995 
2996 	for (;;)
2997 	{
2998 		if (sizeof(buf) <= seek_pos)
2999 			seek_pos -= MIN(sizeof(buf), seek_pos - min_size);
3000 		else
3001 			seek_pos = min_size;
3002 
3003 		if ((zbx_offset_t)-1 == zbx_lseek(fd, seek_pos, SEEK_SET))
3004 		{
3005 			*err_msg = zbx_dsprintf(*err_msg, "Cannot set position to " ZBX_FS_UI64 " in file \"%s\": %s",
3006 					lastlogsize_aligned, logfile->filename, zbx_strerror(errno));
3007 			goto out;
3008 		}
3009 
3010 		if (-1 == (nbytes = read(fd, buf, sizeof(buf))))
3011 		{
3012 			*err_msg = zbx_dsprintf(*err_msg, "Cannot read from file \"%s\": %s", logfile->filename,
3013 					zbx_strerror(errno));
3014 			goto out;
3015 		}
3016 
3017 		if (0 == nbytes)	/* end of file reached */
3018 		{
3019 			*err_msg = zbx_dsprintf(*err_msg, "Unexpected end of file while reading file \"%s\"",
3020 					logfile->filename);
3021 			goto out;
3022 		}
3023 
3024 		p = buf;
3025 		p_end = buf + nbytes;	/* no data from this position */
3026 
3027 		if (NULL != buf_find_newline(p, &p_next, p_end, cr, lf, szbyte))
3028 		{
3029 			/* Found the beginning of line. It may not be the one closest to place we jumped to */
3030 			/* (it could be about sizeof(buf) bytes away) but it is ok for our purposes. */
3031 
3032 			*lastlogsize = seek_pos + (zbx_uint64_t)(p_next - buf);
3033 			logfile->processed_size = *lastlogsize;
3034 			ret = SUCCEED;
3035 			goto out;
3036 		}
3037 
3038 		if (min_size == seek_pos)
3039 		{
3040 			/* We have searched backwards until 'min_size' and did not find a 'newline'. */
3041 			/* Effectively it turned out to be a jump with zero-length. */
3042 
3043 			*lastlogsize = min_size;
3044 			logfile->processed_size = *lastlogsize;
3045 			ret = SUCCEED;
3046 			goto out;
3047 		}
3048 	}
3049 out:
3050 	if (SUCCEED != close_file_helper(fd, logfile->filename, err_msg))
3051 		ret = FAIL;
3052 
3053 	if (SUCCEED == ZBX_CHECK_LOG_LEVEL(LOG_LEVEL_DEBUG))
3054 	{
3055 		const char	*dbg_msg;
3056 
3057 		if (SUCCEED == ret)
3058 			dbg_msg = "NEWLINE FOUND";
3059 		else
3060 			dbg_msg = "NEWLINE NOT FOUND";
3061 
3062 		zabbix_log(LOG_LEVEL_DEBUG, "adjust_position_after_jump(): szbyte:" ZBX_FS_SIZE_T " lastlogsize_org:"
3063 				ZBX_FS_UI64 " lastlogsize_aligned:" ZBX_FS_UI64 " (change " ZBX_FS_I64 " bytes)"
3064 				" lastlogsize_after:" ZBX_FS_UI64 " (change " ZBX_FS_I64 " bytes) %s %s",
3065 				(zbx_fs_size_t)szbyte, lastlogsize_org, lastlogsize_aligned,
3066 				(zbx_int64_t)lastlogsize_aligned - (zbx_int64_t)lastlogsize_org, *lastlogsize,
3067 				(zbx_int64_t)*lastlogsize - (zbx_int64_t)lastlogsize_aligned,
3068 				dbg_msg, ZBX_NULL2EMPTY_STR(*err_msg));
3069 	}
3070 
3071 	return ret;
3072 }
3073 
3074 /******************************************************************************
3075  *                                                                            *
3076  * Function: jump_ahead                                                       *
3077  *                                                                            *
3078  * Purpose: move forward to a new position in the log file list               *
3079  *                                                                            *
3080  * Parameters:                                                                *
3081  *     key           - [IN] item key for logging                              *
3082  *     logfiles      - [IN/OUT] list of log files                             *
3083  *     logfiles_num  - [IN] number of elements in 'logfiles'                  *
3084  *     jump_from_to  - [IN/OUT] on input - number of element where to start   *
3085  *                     jump, on output - number of element we jumped into     *
3086  *     seq           - [IN/OUT] sequence number of last processed file        *
3087  *     lastlogsize   - [IN/OUT] offset from the beginning of the file         *
3088  *     mtime         - [IN/OUT] last modification time of the file            *
3089  *     encoding      - [IN] text string describing encoding                   *
3090  *     bytes_to_jump - [IN] number of bytes to jump ahead                     *
3091  *     err_msg       - [IN/OUT] error message                                 *
3092  *                                                                            *
3093  * Return value: SUCCEED or FAIL (with error message allocated in 'err_msg')  *
3094  *                                                                            *
3095  * Comments: Thread-safe                                                      *
3096  *                                                                            *
3097  ******************************************************************************/
jump_ahead(const char * key,struct st_logfile * logfiles,int logfiles_num,int * jump_from_to,int * seq,zbx_uint64_t * lastlogsize,int * mtime,const char * encoding,zbx_uint64_t bytes_to_jump,char ** err_msg)3098 static int	jump_ahead(const char *key, struct st_logfile *logfiles, int logfiles_num,
3099 		int *jump_from_to, int *seq, zbx_uint64_t *lastlogsize, int *mtime, const char *encoding,
3100 		zbx_uint64_t bytes_to_jump, char **err_msg)
3101 {
3102 	zbx_uint64_t	lastlogsize_org, min_size;
3103 	int		jumped_to = -1;		/* number of file in 'logfiles' list we jumped to */
3104 
3105 	lastlogsize_org = *lastlogsize;
3106 
3107 	jump_remaining_bytes_logrt(logfiles, logfiles_num, key, *jump_from_to, bytes_to_jump, seq, lastlogsize,
3108 			mtime, &jumped_to);
3109 
3110 	if (-1 == jumped_to)		/* no actual jump took place, no need to modify 'jump_from_to' */
3111 		return SUCCEED;
3112 
3113 	/* We have jumped into file, most likely somewhere in the middle of log line. Now find the beginning */
3114 	/* of a line to avoid pattern-matching a line from a random position. */
3115 
3116 	if (*jump_from_to == jumped_to)
3117 	{
3118 		/* jumped within the same file - do not search the beginning of a line before "pre-jump" position */
3119 		min_size = lastlogsize_org;
3120 	}
3121 	else
3122 	{
3123 		*jump_from_to = jumped_to;
3124 
3125 		/* jumped into different file - may search the beginning of a line from beginning of file */
3126 		min_size = 0;
3127 	}
3128 
3129 	return adjust_position_after_jump(&logfiles[jumped_to], lastlogsize, min_size, encoding, err_msg);
3130 }
3131 
calculate_remaining_bytes(struct st_logfile * logfiles,int logfiles_num)3132 static zbx_uint64_t	calculate_remaining_bytes(struct st_logfile *logfiles, int logfiles_num)
3133 {
3134 	zbx_uint64_t	remaining_bytes = 0;
3135 	int		i;
3136 
3137 	for (i = 0; i < logfiles_num; i++)
3138 		remaining_bytes += logfiles[i].size - logfiles[i].processed_size;
3139 
3140 	return remaining_bytes;
3141 }
3142 
transfer_for_rotate(const struct st_logfile * logfiles_old,int idx,struct st_logfile * logfiles,int logfiles_num,const char * old2new,int * seq)3143 static void	transfer_for_rotate(const struct st_logfile *logfiles_old, int idx, struct st_logfile *logfiles,
3144 		int logfiles_num, const char *old2new, int *seq)
3145 {
3146 	int	j;
3147 
3148 	if (0 < logfiles_old[idx].processed_size && 0 == logfiles_old[idx].incomplete &&
3149 			-1 != (j = find_old2new(old2new, logfiles_num, idx)))
3150 	{
3151 		if (logfiles_old[idx].size == logfiles_old[idx].processed_size &&
3152 				logfiles_old[idx].size == logfiles[j].size)
3153 		{
3154 			/* the file was fully processed during the previous check and must be ignored during this */
3155 			/* check */
3156 			logfiles[j].processed_size = logfiles[j].size;
3157 			logfiles[j].seq = (*seq)++;
3158 		}
3159 		else
3160 		{
3161 			/* the file was not fully processed during the previous check or has grown */
3162 			if (logfiles[j].processed_size < logfiles_old[idx].processed_size)
3163 				logfiles[j].processed_size = MIN(logfiles[j].size, logfiles_old[idx].processed_size);
3164 		}
3165 	}
3166 	else if (1 == logfiles_old[idx].incomplete && -1 != (j = find_old2new(old2new, logfiles_num, idx)))
3167 	{
3168 		if (logfiles_old[idx].size < logfiles[j].size)
3169 		{
3170 			/* The file was not fully processed because of incomplete last record but it has grown. */
3171 			/* Try to process it further. */
3172 			logfiles[j].incomplete = 0;
3173 		}
3174 		else
3175 			logfiles[j].incomplete = 1;
3176 
3177 		if (logfiles[j].processed_size < logfiles_old[idx].processed_size)
3178 			logfiles[j].processed_size = MIN(logfiles[j].size, logfiles_old[idx].processed_size);
3179 	}
3180 }
3181 
transfer_for_copytruncate(const struct st_logfile * logfiles_old,int idx,struct st_logfile * logfiles,int logfiles_num,const char * old2new,int * seq)3182 static void	transfer_for_copytruncate(const struct st_logfile *logfiles_old, int idx, struct st_logfile *logfiles,
3183 		int logfiles_num, const char *old2new, int *seq)
3184 {
3185 	const char	*p = old2new + idx * logfiles_num;	/* start of idx-th row in 'old2new' array */
3186 	int		j;
3187 
3188 	if (0 < logfiles_old[idx].processed_size && 0 == logfiles_old[idx].incomplete)
3189 	{
3190 		for (j = 0; j < logfiles_num; j++, p++)		/* loop over columns (new files) on idx-th row */
3191 		{
3192 			if ('1' == *p || '2' == *p)
3193 			{
3194 				if (logfiles_old[idx].size == logfiles_old[idx].processed_size &&
3195 						logfiles_old[idx].size == logfiles[j].size)
3196 				{
3197 					/* the file was fully processed during the previous check and must be ignored */
3198 					/* during this check */
3199 					logfiles[j].processed_size = logfiles[j].size;
3200 					logfiles[j].seq = (*seq)++;
3201 				}
3202 				else
3203 				{
3204 					/* the file was not fully processed during the previous check or has grown */
3205 					if (logfiles[j].processed_size < logfiles_old[idx].processed_size)
3206 					{
3207 						logfiles[j].processed_size = MIN(logfiles[j].size,
3208 								logfiles_old[idx].processed_size);
3209 					}
3210 				}
3211 			}
3212 		}
3213 	}
3214 	else if (1 == logfiles_old[idx].incomplete)
3215 	{
3216 		for (j = 0; j < logfiles_num; j++, p++)		/* loop over columns (new files) on idx-th row */
3217 		{
3218 			if ('1' == *p || '2' == *p)
3219 			{
3220 				if (logfiles_old[idx].size < logfiles[j].size)
3221 				{
3222 					/* The file was not fully processed because of incomplete last record but it */
3223 					/* has grown. Try to process it further. */
3224 					logfiles[j].incomplete = 0;
3225 				}
3226 				else
3227 					logfiles[j].incomplete = 1;
3228 
3229 				if (logfiles[j].processed_size < logfiles_old[idx].processed_size)
3230 				{
3231 					logfiles[j].processed_size = MIN(logfiles[j].size,
3232 							logfiles_old[idx].processed_size);
3233 				}
3234 			}
3235 		}
3236 	}
3237 }
3238 
3239 /******************************************************************************
3240  *                                                                            *
3241  * Function: update_new_list_from_old                                         *
3242  *                                                                            *
3243  * Comments: Thread-safe                                                      *
3244  *                                                                            *
3245  ******************************************************************************/
update_new_list_from_old(zbx_log_rotation_options_t rotation_type,struct st_logfile * logfiles_old,int logfiles_num_old,struct st_logfile * logfiles,int logfiles_num,int use_ino,int * seq,int * start_idx,zbx_uint64_t * lastlogsize,char ** err_msg)3246 static int	update_new_list_from_old(zbx_log_rotation_options_t rotation_type, struct st_logfile *logfiles_old,
3247 		int logfiles_num_old, struct st_logfile *logfiles, int logfiles_num, int use_ino, int *seq,
3248 		int *start_idx, zbx_uint64_t *lastlogsize, char **err_msg)
3249 {
3250 	char	*old2new;
3251 	int	i, max_old_seq = 0, old_last;
3252 
3253 	if (NULL == (old2new = create_old2new_and_copy_of(rotation_type, logfiles_old, logfiles_num_old,
3254 			logfiles, logfiles_num, use_ino, err_msg)))
3255 	{
3256 		return FAIL;
3257 	}
3258 
3259 	/* transfer data about fully and partially processed files from the old file list to the new list */
3260 	for (i = 0; i < logfiles_num_old; i++)
3261 	{
3262 		if (ZBX_LOG_ROTATION_LOGCPT == rotation_type)
3263 			transfer_for_copytruncate(logfiles_old, i, logfiles, logfiles_num, old2new, seq);
3264 		else
3265 			transfer_for_rotate(logfiles_old, i, logfiles, logfiles_num, old2new, seq);
3266 
3267 		/* find the last file processed (fully or partially) in the previous check */
3268 		if (max_old_seq < logfiles_old[i].seq)
3269 		{
3270 			max_old_seq = logfiles_old[i].seq;
3271 			old_last = i;
3272 		}
3273 	}
3274 
3275 	/* find the first file to continue from in the new file list */
3276 	if (0 < max_old_seq && -1 == (*start_idx = find_old2new(old2new, logfiles_num, old_last)))
3277 	{
3278 		/* Cannot find the successor of the last processed file from the previous check. */
3279 		/* Adjust 'lastlogsize' for this case. */
3280 		*start_idx = 0;
3281 		*lastlogsize = logfiles[*start_idx].processed_size;
3282 	}
3283 
3284 	zbx_free(old2new);
3285 
3286 	return SUCCEED;
3287 }
3288 
3289 /******************************************************************************
3290  *                                                                            *
3291  * Function: process_logrt                                                    *
3292  *                                                                            *
3293  * Purpose: Find new records in logfiles                                      *
3294  *                                                                            *
3295  * Parameters:                                                                *
3296  *     flags            - [IN] bit flags with item type: log, logrt,          *
3297  *                        log.count or logrt.count                            *
3298  *     filename         - [IN] logfile name (regular expression with a path)  *
3299  *     lastlogsize      - [IN/OUT] offset from the beginning of the file      *
3300  *     mtime            - [IN/OUT] last modification time of the file         *
3301  *     lastlogsize_sent - [OUT] lastlogsize value that was last sent          *
3302  *     mtime_sent       - [OUT] mtime value that was last sent                *
3303  *     skip_old_data    - [IN/OUT] start from the beginning of the file or    *
3304  *                        jump to the end                                     *
3305  *     big_rec          - [IN/OUT] state variable to remember whether a long  *
3306  *                        record is being processed                           *
3307  *     use_ino          - [IN/OUT] how to use inode numbers                   *
3308  *     err_msg          - [IN/OUT] error message why an item became           *
3309  *                        NOTSUPPORTED                                        *
3310  *     logfiles_old     - [IN/OUT] array of logfiles from the last check      *
3311  *     logfiles_num_old - [IN] number of elements in "logfiles_old"           *
3312  *     logfiles_new     - [OUT] new array of logfiles                         *
3313  *     logfiles_num_new - [OUT] number of elements in "logfiles_new"          *
3314  *     encoding         - [IN] text string describing encoding.               *
3315  *                        See function find_cr_lf_szbyte() for supported      *
3316  *                        encodings.                                          *
3317  *                        "" (empty string) means a single-byte character set *
3318  *                        (e.g. ASCII).                                       *
3319  *     regexps          - [IN] array of regexps                               *
3320  *     pattern          - [IN] pattern to match                               *
3321  *     output_template  - [IN] output formatting template                     *
3322  *     p_count          - [IN/OUT] limit of records to be processed           *
3323  *     s_count          - [IN/OUT] limit of records to be sent to server      *
3324  *     process_value    - [IN] pointer to function process_value()            *
3325  *     server           - [IN] server to send data to                         *
3326  *     port             - [IN] port to send data to                           *
3327  *     hostname         - [IN] hostname the data comes from                   *
3328  *     key              - [IN] item key the data belongs to                   *
3329  *     jumped           - [OUT] flag to indicate that a jump took place       *
3330  *     max_delay        - [IN] maximum allowed delay, s                       *
3331  *     start_time       - [IN/OUT] start time of check                        *
3332  *     processed_bytes  - [IN/OUT] number of bytes processed                  *
3333  *     rotation_type    - [IN] simple rotation or copy/truncate rotation      *
3334  *     persistent_file_name - [IN] name of file for saving persistent data    *
3335  *     prep_vec         - [IN/OUT] vector with data for writing into          *
3336  *                                 persistent files                           *
3337  *                                                                            *
3338  * Return value: returns SUCCEED on successful reading,                       *
3339  *               FAIL on other cases                                          *
3340  *                                                                            *
3341  * Comments: Supposed to be thread-safe, see pick_logfiles() comments.        *
3342  *                                                                            *
3343  ******************************************************************************/
process_logrt(unsigned char flags,const char * filename,zbx_uint64_t * lastlogsize,int * mtime,zbx_uint64_t * lastlogsize_sent,int * mtime_sent,unsigned char * skip_old_data,int * big_rec,int * use_ino,char ** err_msg,struct st_logfile ** logfiles_old,int logfiles_num_old,struct st_logfile ** logfiles_new,int * logfiles_num_new,const char * encoding,zbx_vector_ptr_t * regexps,const char * pattern,const char * output_template,int * p_count,int * s_count,zbx_process_value_func_t process_value,const char * server,unsigned short port,const char * hostname,const char * key,int * jumped,float max_delay,double * start_time,zbx_uint64_t * processed_bytes,zbx_log_rotation_options_t rotation_type,const char * persistent_file_name,zbx_vector_pre_persistent_t * prep_vec)3344 static int	process_logrt(unsigned char flags, const char *filename, zbx_uint64_t *lastlogsize, int *mtime,
3345 		zbx_uint64_t *lastlogsize_sent, int *mtime_sent, unsigned char *skip_old_data, int *big_rec,
3346 		int *use_ino, char **err_msg, struct st_logfile **logfiles_old, int logfiles_num_old,
3347 		struct st_logfile **logfiles_new, int *logfiles_num_new, const char *encoding,
3348 		zbx_vector_ptr_t *regexps, const char *pattern, const char *output_template, int *p_count, int *s_count,
3349 		zbx_process_value_func_t process_value, const char *server, unsigned short port, const char *hostname,
3350 		const char *key, int *jumped, float max_delay, double *start_time, zbx_uint64_t *processed_bytes,
3351 		zbx_log_rotation_options_t rotation_type, const char *persistent_file_name,
3352 		zbx_vector_pre_persistent_t *prep_vec)
3353 {
3354 	int			i, start_idx, ret = FAIL, logfiles_num = 0, logfiles_alloc = 0, seq = 1,
3355 				from_first_file = 1, last_processed, limit_reached = 0, res;
3356 	struct st_logfile	*logfiles = NULL;
3357 	zbx_uint64_t		processed_bytes_sum = 0;
3358 
3359 	zabbix_log(LOG_LEVEL_DEBUG, "In %s() flags:0x%02x filename:'%s' lastlogsize:" ZBX_FS_UI64 " mtime:%d",
3360 			__func__, (unsigned int)flags, filename, *lastlogsize, *mtime);
3361 
3362 	adjust_mtime_to_clock(mtime);
3363 
3364 	if (SUCCEED != (res = make_logfile_list(flags, filename, *mtime, &logfiles, &logfiles_alloc, &logfiles_num,
3365 			use_ino, err_msg)))
3366 	{
3367 		if (ZBX_NO_FILE_ERROR == res)
3368 		{
3369 			if (1 == *skip_old_data)
3370 			{
3371 				*skip_old_data = 0;
3372 
3373 				zabbix_log(LOG_LEVEL_DEBUG, "%s(): no files, setting skip_old_data to 0", __func__);
3374 			}
3375 
3376 			if (0 != (ZBX_METRIC_FLAG_LOG_LOGRT & flags) && 0 == logfiles_num_old)
3377 			{
3378 				/* Both the old and the new log file lists are empty. That means the agent has not */
3379 				/* seen any log files for this logrt[] item since started. If log files appear later */
3380 				/* then analyze them from start, do not apply the 'lastlogsize' received from server */
3381 				/* anymore. */
3382 
3383 				*lastlogsize = 0;
3384 			}
3385 		}
3386 
3387 		/* file was not accessible for a log[] or log.count[] item or an error occurred */
3388 		if (0 != (ZBX_METRIC_FLAG_LOG_LOG & flags) || (0 != (ZBX_METRIC_FLAG_LOG_LOGRT & flags) && FAIL == res))
3389 			goto out;
3390 	}
3391 
3392 	if (0 == logfiles_num)
3393 	{
3394 		/* there were no files for a logrt[] or logrt.count[] item to analyze */
3395 		ret = SUCCEED;
3396 		goto out;
3397 	}
3398 
3399 	if (1 == *skip_old_data)
3400 	{
3401 		start_idx = logfiles_num - 1;
3402 
3403 		/* mark files to be skipped as processed (except the last one) */
3404 		for (i = 0; i < start_idx; i++)
3405 		{
3406 			logfiles[i].processed_size = logfiles[i].size;
3407 			logfiles[i].seq = seq++;
3408 		}
3409 	}
3410 	else
3411 		start_idx = 0;
3412 
3413 	if (0 < logfiles_num_old && 0 < logfiles_num && SUCCEED != update_new_list_from_old(rotation_type,
3414 			*logfiles_old, logfiles_num_old, logfiles, logfiles_num, *use_ino, &seq, &start_idx,
3415 			lastlogsize, err_msg))
3416 	{
3417 		destroy_logfile_list(&logfiles, &logfiles_alloc, &logfiles_num);
3418 		goto out;
3419 	}
3420 
3421 	if (ZBX_LOG_ROTATION_LOGCPT == rotation_type && 1 < logfiles_num)
3422 		ensure_order_if_mtimes_equal(*logfiles_old, logfiles, logfiles_num, *use_ino, &start_idx);
3423 
3424 	if (SUCCEED == ZBX_CHECK_LOG_LEVEL(LOG_LEVEL_DEBUG))
3425 	{
3426 		zabbix_log(LOG_LEVEL_DEBUG, "%s() old file list:", __func__);
3427 		if (NULL != *logfiles_old)
3428 			print_logfile_list(*logfiles_old, logfiles_num_old);
3429 		else
3430 			zabbix_log(LOG_LEVEL_DEBUG, "   file list empty");
3431 
3432 		zabbix_log(LOG_LEVEL_DEBUG, "%s() new file list: (mtime:%d lastlogsize:" ZBX_FS_UI64 " start_idx:%d)",
3433 				__func__, *mtime, *lastlogsize, start_idx);
3434 		if (NULL != logfiles)
3435 			print_logfile_list(logfiles, logfiles_num);
3436 		else
3437 			zabbix_log(LOG_LEVEL_DEBUG, "   file list empty");
3438 	}
3439 
3440 	/* number of file last processed - start from this */
3441 	last_processed = start_idx;
3442 
3443 	/* from now assume success - it could be that there is nothing to do */
3444 	ret = SUCCEED;
3445 
3446 	if (0.0f != max_delay)
3447 	{
3448 		if (0.0 != *start_time)
3449 		{
3450 			zbx_uint64_t	remaining_bytes;
3451 
3452 			if (0 != (remaining_bytes = calculate_remaining_bytes(logfiles, logfiles_num)))
3453 			{
3454 				/* calculate delay and jump if necessary */
3455 
3456 				double	delay;
3457 
3458 				if ((double)max_delay < (delay = calculate_delay(*processed_bytes, remaining_bytes,
3459 						zbx_time() - *start_time)))
3460 				{
3461 					zbx_uint64_t	bytes_to_jump;
3462 
3463 					bytes_to_jump = (zbx_uint64_t)((double)remaining_bytes *
3464 							(delay - (double)max_delay) / delay);
3465 
3466 					if (SUCCEED == (ret = jump_ahead(key, logfiles, logfiles_num,
3467 							&last_processed, &seq, lastlogsize, mtime, encoding,
3468 							bytes_to_jump, err_msg)))
3469 					{
3470 						*jumped = 1;
3471 					}
3472 				}
3473 			}
3474 		}
3475 
3476 		*start_time = zbx_time();	/* mark new start time for using in the next check */
3477 	}
3478 
3479 	/* enter the loop with index of the first file to be processed, later continue the loop from the start */
3480 	i = last_processed;
3481 
3482 	while (NULL != logfiles && i < logfiles_num)
3483 	{
3484 		if (0 == logfiles[i].incomplete &&
3485 				(logfiles[i].size != logfiles[i].processed_size || 0 == logfiles[i].seq))
3486 		{
3487 			zbx_uint64_t	processed_bytes_tmp = 0, seek_offset;
3488 			int		process_this_file = 1;
3489 
3490 			*mtime = logfiles[i].mtime;
3491 
3492 			if (start_idx != i)
3493 			{
3494 				*lastlogsize = logfiles[i].processed_size;
3495 			}
3496 			else
3497 			{
3498 				/* When agent starts it can receive from server an out-of-date lastlogsize value, */
3499 				/* larger than current log file size. */
3500 
3501 				if (*lastlogsize > logfiles[i].size)
3502 				{
3503 					int	j, found = 0;
3504 
3505 					/* check if there are other log files with the same mtime and size */
3506 					/* greater or equal to lastlogsize */
3507 					for (j = 0; j < logfiles_num; j++)
3508 					{
3509 						if (i == j || logfiles[i].mtime != logfiles[j].mtime)
3510 							continue;
3511 
3512 						if (*lastlogsize <= logfiles[j].size)
3513 						{
3514 							found = 1;
3515 							break;
3516 						}
3517 					}
3518 
3519 					if (0 == found)
3520 						*lastlogsize = logfiles[i].processed_size;
3521 				}
3522 			}
3523 
3524 			if (0 == *skip_old_data)
3525 			{
3526 				seek_offset = *lastlogsize;
3527 			}
3528 			else
3529 			{
3530 				seek_offset = logfiles[i].size;
3531 
3532 				zabbix_log(LOG_LEVEL_DEBUG, "skipping old data in filename:'%s' to seek_offset:"
3533 						ZBX_FS_UI64, logfiles[i].filename, seek_offset);
3534 			}
3535 
3536 			if (ZBX_LOG_ROTATION_LOGCPT == rotation_type)
3537 			{
3538 				zbx_uint64_t	max_processed;
3539 
3540 				if (seek_offset < (max_processed = max_processed_size_in_copies(logfiles, logfiles_num,
3541 						i)))
3542 				{
3543 					logfiles[i].processed_size = MIN(logfiles[i].size, max_processed);
3544 
3545 					if (logfiles[i].size == logfiles[i].processed_size)
3546 						process_this_file = 0;
3547 
3548 					*lastlogsize = max_processed;
3549 				}
3550 			}
3551 
3552 			if (0 != process_this_file)
3553 			{
3554 				ret = process_log(flags, logfiles + i, lastlogsize, mtime, lastlogsize_sent,
3555 						mtime_sent, skip_old_data, big_rec, encoding, regexps, pattern,
3556 						output_template, p_count, s_count, process_value, server, port,
3557 						hostname, key, &processed_bytes_tmp, seek_offset, persistent_file_name,
3558 						prep_vec, err_msg);
3559 
3560 				/* process_log() advances 'lastlogsize' only on success therefore */
3561 				/* we do not check for errors here */
3562 				logfiles[i].processed_size = *lastlogsize;
3563 
3564 				/* log file could grow during processing, update size in our list */
3565 				if (*lastlogsize > logfiles[i].size)
3566 					logfiles[i].size = *lastlogsize;
3567 			}
3568 
3569 			/* Mark file as processed (at least partially). In case if process_log() failed we will stop */
3570 			/* the current checking. In the next check the file will be marked in the list of old files */
3571 			/* and we will know where we left off. */
3572 			logfiles[i].seq = seq++;
3573 
3574 			if (ZBX_LOG_ROTATION_LOGCPT == rotation_type && 1 < logfiles_num)
3575 			{
3576 				int	k;
3577 
3578 				for (k = 0; k < logfiles_num - 1; k++)
3579 					handle_multiple_copies(logfiles, logfiles_num, k);
3580 			}
3581 
3582 			if (SUCCEED != ret)
3583 				break;
3584 
3585 			if (0.0f != max_delay)
3586 				processed_bytes_sum += processed_bytes_tmp;
3587 
3588 			if (0 >= *p_count || 0 >= *s_count)
3589 			{
3590 				limit_reached = 1;
3591 				break;
3592 			}
3593 		}
3594 
3595 		if (0 != from_first_file)
3596 		{
3597 			/* We have processed the file where we left off in the previous check. */
3598 			from_first_file = 0;
3599 
3600 			/* Now proceed from the beginning of the new file list to process the remaining files. */
3601 			i = 0;
3602 			continue;
3603 		}
3604 
3605 		i++;
3606 	}
3607 
3608 	if (ZBX_LOG_ROTATION_LOGCPT == rotation_type && 1 < logfiles_num)
3609 	{
3610 		/* If logrt[] or logrt.count[] item is checked often but rotation by copying is slow it could happen */
3611 		/* that the original file is completely processed but the copy with a newer timestamp is still in */
3612 		/* progress. The original file goes out of the list of files and the copy is analyzed as new file, */
3613 		/* so the matching lines are reported twice. To prevent this we manipulate our stored 'mtime' */
3614 		/* and 'lastlogsize' to keep information about copies in the list as long as necessary to prevent */
3615 		/* reporting twice. */
3616 
3617 		delay_update_if_copies(logfiles, logfiles_num, mtime, lastlogsize);
3618 	}
3619 
3620 	/* store the new log file list for using in the next check */
3621 	*logfiles_num_new = logfiles_num;
3622 
3623 	if (0 < logfiles_num)
3624 	{
3625 		/* Try to update MD5 sums of initial blocks if they were calculated for small blocks. */
3626 		/* Log file processing has been done. Errors here only prevent updating MD5 sums for */
3627 		/* a single file but do not affect function return value. */
3628 		char	*err_tmp = NULL;
3629 		int	k;
3630 
3631 		for (k = 0; k < logfiles_num; k++)
3632 		{
3633 			if (MAX_LEN_MD5 > logfiles[k].md5_block_size &&
3634 					logfiles[k].size > (zbx_uint64_t)logfiles[k].md5_block_size)
3635 			{
3636 				int		f, new_md5_block_size = (int)MIN(MAX_LEN_MD5, logfiles[k].size);
3637 				size_t		new_last_block_offset;
3638 				md5_byte_t	new_first_block_md5[MD5_DIGEST_SIZE],
3639 						new_last_block_md5[MD5_DIGEST_SIZE];
3640 
3641 				if (-1 == (f = zbx_open(logfiles[k].filename, O_RDONLY)))
3642 					continue;
3643 
3644 				if (SUCCEED != file_part_md5(f, 0, new_md5_block_size, new_first_block_md5,
3645 						logfiles[k].filename, &err_tmp))
3646 				{
3647 					zbx_free(err_tmp);
3648 					goto clean;
3649 				}
3650 
3651 				if (0 < (new_last_block_offset = logfiles[k].size - (size_t)new_md5_block_size))
3652 				{
3653 					if (SUCCEED != file_part_md5(f, new_last_block_offset,
3654 							new_md5_block_size, new_last_block_md5, logfiles[k].filename,
3655 							&err_tmp))
3656 					{
3657 						zbx_free(err_tmp);
3658 						goto clean;
3659 					}
3660 				}
3661 				else	/* file is small, set the last block MD5 equal to the first block's one */
3662 					memcpy(new_last_block_md5, new_first_block_md5, sizeof(new_last_block_md5));
3663 
3664 				logfiles[k].md5_block_size = new_md5_block_size;
3665 				logfiles[k].last_block_offset = new_last_block_offset;
3666 
3667 				memcpy(logfiles[k].first_block_md5, new_first_block_md5,
3668 						sizeof(logfiles[k].first_block_md5));
3669 				memcpy(logfiles[k].last_block_md5, new_last_block_md5,
3670 						sizeof(logfiles[k].last_block_md5));
3671 clean:
3672 				if (0 != close(f))
3673 					continue;
3674 			}
3675 		}
3676 
3677 		*logfiles_new = logfiles;
3678 	}
3679 out:
3680 	if (0.0f != max_delay)
3681 	{
3682 		if (SUCCEED == ret)
3683 			*processed_bytes = processed_bytes_sum;
3684 
3685 		if (SUCCEED != ret || 0 == limit_reached)
3686 		{
3687 			/* FAIL or number of lines limits were not reached. */
3688 			/* Invalidate start_time to prevent jump in the next check. */
3689 			*start_time = 0.0;
3690 		}
3691 	}
3692 
3693 	zabbix_log(LOG_LEVEL_DEBUG, "End of %s():%s", __func__, zbx_result_string(ret));
3694 
3695 	return ret;
3696 }
3697 
check_number_of_parameters(unsigned char flags,const AGENT_REQUEST * request,char ** error)3698 static int	check_number_of_parameters(unsigned char flags, const AGENT_REQUEST *request, char **error)
3699 {
3700 	int	parameter_num, max_parameter_num;
3701 
3702 	if (0 == (parameter_num = get_rparams_num(request)))
3703 	{
3704 		*error = zbx_strdup(*error, "Invalid number of parameters.");
3705 		return FAIL;
3706 	}
3707 
3708 	if (0 != (ZBX_METRIC_FLAG_LOG_COUNT & flags))
3709 		max_parameter_num = 8;	/* log.count or logrt.count */
3710 	else
3711 		max_parameter_num = 9;	/* log or logrt */
3712 
3713 	if (max_parameter_num < parameter_num)
3714 	{
3715 		*error = zbx_strdup(*error, "Too many parameters.");
3716 		return FAIL;
3717 	}
3718 
3719 	return SUCCEED;
3720 }
3721 
3722 /******************************************************************************
3723  *                                                                            *
3724  * Function: init_max_lines_per_sec                                           *
3725  *                                                                            *
3726  * Comments: thread-safe if CONFIG_MAX_LINES_PER_SECOND is updated when log   *
3727  *           checks are not running                                           *
3728  *                                                                            *
3729  ******************************************************************************/
init_max_lines_per_sec(int is_count_item,const AGENT_REQUEST * request,int * max_lines_per_sec,char ** error)3730 static int	init_max_lines_per_sec(int is_count_item, const AGENT_REQUEST *request, int *max_lines_per_sec,
3731 		char **error)
3732 {
3733 	const char	*p;
3734 	int		rate;
3735 
3736 	if (NULL == (p = get_rparam(request, 3)) || '\0' == *p)
3737 	{
3738 		if (0 == is_count_item)				/* log[], logrt[] */
3739 			*max_lines_per_sec = CONFIG_MAX_LINES_PER_SECOND;
3740 		else						/* log.count[], logrt.count[] */
3741 			*max_lines_per_sec = MAX_VALUE_LINES_MULTIPLIER * CONFIG_MAX_LINES_PER_SECOND;
3742 
3743 		return SUCCEED;
3744 	}
3745 
3746 	if (MIN_VALUE_LINES > (rate = atoi(p)) ||
3747 			(0 == is_count_item && MAX_VALUE_LINES < rate) ||
3748 			(0 != is_count_item && MAX_VALUE_LINES_MULTIPLIER * MAX_VALUE_LINES < rate))
3749 	{
3750 		*error = zbx_strdup(*error, "Invalid fourth parameter.");
3751 		return FAIL;
3752 	}
3753 
3754 	*max_lines_per_sec = rate;
3755 	return SUCCEED;
3756 }
3757 
init_max_delay(int is_count_item,const AGENT_REQUEST * request,float * max_delay,char ** error)3758 static int	init_max_delay(int is_count_item, const AGENT_REQUEST *request, float *max_delay, char **error)
3759 {
3760 	const char	*max_delay_str;
3761 	double		max_delay_tmp;
3762 	int		max_delay_par_nr;
3763 
3764 	/* <maxdelay> is parameter 6 for log[], logrt[], but parameter 5 for log.count[], logrt.count[] */
3765 
3766 	if (0 == is_count_item)
3767 		max_delay_par_nr = 6;
3768 	else
3769 		max_delay_par_nr = 5;
3770 
3771 	if (NULL == (max_delay_str = get_rparam(request, max_delay_par_nr)) || '\0' == *max_delay_str)
3772 	{
3773 		*max_delay = 0.0f;
3774 		return SUCCEED;
3775 	}
3776 
3777 	if (SUCCEED != is_double(max_delay_str, &max_delay_tmp) || 0.0 > max_delay_tmp)
3778 	{
3779 		*error = zbx_dsprintf(*error, "Invalid %s parameter.", (5 == max_delay_par_nr) ? "sixth" : "seventh");
3780 		return FAIL;
3781 	}
3782 
3783 	*max_delay = (float)max_delay_tmp;
3784 	return SUCCEED;
3785 }
3786 
init_rotation_type(unsigned char flags,const AGENT_REQUEST * request,zbx_log_rotation_options_t * rotation_type,char ** error)3787 static int	init_rotation_type(unsigned char flags, const AGENT_REQUEST *request,
3788 		zbx_log_rotation_options_t *rotation_type, char **error)
3789 {
3790 	char	*options;
3791 	int	options_par_nr;
3792 
3793 	if (0 == (ZBX_METRIC_FLAG_LOG_COUNT & flags))	/* log, logrt */
3794 		options_par_nr = 7;
3795 	else						/* log.count, logrt.count */
3796 		options_par_nr = 6;
3797 
3798 	options = get_rparam(request, options_par_nr);
3799 
3800 	if (NULL == options || '\0' == *options)	/* default options */
3801 	{
3802 		if (0 != (ZBX_METRIC_FLAG_LOG_LOGRT & flags))
3803 			*rotation_type = ZBX_LOG_ROTATION_LOGRT;
3804 		else
3805 			*rotation_type = ZBX_LOG_ROTATION_REREAD;
3806 	}
3807 	else
3808 	{
3809 		if (0 != (ZBX_METRIC_FLAG_LOG_LOGRT & flags))	/* logrt, logrt.count */
3810 		{
3811 			if (0 == strcmp(options, "copytruncate"))
3812 				*rotation_type = ZBX_LOG_ROTATION_LOGCPT;
3813 			else if (0 == strcmp(options, "rotate") || 0 == strcmp(options, "mtime-reread"))
3814 				*rotation_type = ZBX_LOG_ROTATION_LOGRT;
3815 			else if (0 == strcmp(options, "mtime-noreread"))
3816 				*rotation_type = ZBX_LOG_ROTATION_NO_REREAD;
3817 			else
3818 				goto err;
3819 		}
3820 		else	/* log, log.count */
3821 		{
3822 			if (0 == strcmp(options, "mtime-reread"))
3823 				*rotation_type = ZBX_LOG_ROTATION_REREAD;
3824 			else if (0 == strcmp(options, "mtime-noreread"))
3825 				*rotation_type = ZBX_LOG_ROTATION_NO_REREAD;
3826 			else
3827 				goto err;
3828 		}
3829 	}
3830 
3831 	return SUCCEED;
3832 err:
3833 	*error = zbx_strdup(*error, "Invalid parameter \"options\".");
3834 
3835 	return FAIL;
3836 }
3837 
init_persistent_dir_parameter(const char * server,unsigned short port,const char * item_key,int is_count_item,const AGENT_REQUEST * request,char ** persistent_file_name,char ** error)3838 static int	init_persistent_dir_parameter(const char *server, unsigned short port, const char *item_key,
3839 		int is_count_item, const AGENT_REQUEST *request, char **persistent_file_name, char **error)
3840 {
3841 	/* <persistent_dir> is parameter 8 for log[], logrt[], but parameter 7 for log.count[], logrt.count[] */
3842 	/* (here counting starts from 0) */
3843 
3844 	const int	persistent_dir_param_nr = (0 == is_count_item) ? 8 : 7;
3845 	char		*persistent_dir;
3846 #if !defined(_WINDOWS) && !defined(__MINGW32__)
3847 	char		*persistent_serv_dir;
3848 #endif
3849 
3850 	if (NULL == (persistent_dir = get_rparam(request, persistent_dir_param_nr)) || '\0' == *persistent_dir)
3851 		return SUCCEED;
3852 
3853 #if defined(_WINDOWS) || defined(__MINGW32__)
3854 	ZBX_UNUSED(server);
3855 	ZBX_UNUSED(port);
3856 	ZBX_UNUSED(item_key);
3857 	ZBX_UNUSED(persistent_file_name);
3858 
3859 	*error = zbx_dsprintf(*error, "The %s parameter (persistent directory) is not supported on Microsoft Windows.",
3860 			(8 == persistent_dir_param_nr) ? "ninth" : "eighth");
3861 	return FAIL;
3862 #else
3863 	if (NULL != *persistent_file_name)	/* name is set, so all preparation has been done earlier */
3864 		return SUCCEED;
3865 
3866 	/* set up directory for persistent file */
3867 
3868 	if (SUCCEED != is_ascii_string(persistent_dir))		/* reject non-ASCII directory name */
3869 	{
3870 		*error = zbx_dsprintf(*error, "Invalid %s parameter. It contains non-ASCII characters.",
3871 				(8 == persistent_dir_param_nr) ? "ninth" : "eighth");
3872 		return FAIL;
3873 	}
3874 
3875 	if (NULL == (persistent_serv_dir = zbx_create_persistent_server_directory(persistent_dir, server, port, error)))
3876 		return FAIL;
3877 
3878 	*persistent_file_name = zbx_make_persistent_file_name(persistent_serv_dir, item_key);
3879 
3880 	zbx_free(persistent_serv_dir);
3881 
3882 	zabbix_log(LOG_LEVEL_DEBUG, "%s(): set persistent_file_name:[%s]", __func__, *persistent_file_name);
3883 
3884 	return SUCCEED;
3885 #endif
3886 }
3887 
3888 /******************************************************************************
3889  *                                                                            *
3890  * Function: process_log_check                                                *
3891  *                                                                            *
3892  * Comments: Function body is thread-safe if CONFIG_HOSTNAME is not updated   *
3893  *           while log checks are running. Uses callback function             *
3894  *           process_value_cb, so overall thread-safety depends on caller.    *
3895  *           Otherwise supposed to be thread-safe, see pick_logfiles()        *
3896  *           comments.                                                        *
3897  *                                                                            *
3898  ******************************************************************************/
process_log_check(char * server,unsigned short port,zbx_vector_ptr_t * regexps,ZBX_ACTIVE_METRIC * metric,zbx_process_value_func_t process_value_cb,zbx_uint64_t * lastlogsize_sent,int * mtime_sent,char ** error,zbx_vector_pre_persistent_t * prep_vec)3899 int	process_log_check(char *server, unsigned short port, zbx_vector_ptr_t *regexps, ZBX_ACTIVE_METRIC *metric,
3900 		zbx_process_value_func_t process_value_cb, zbx_uint64_t *lastlogsize_sent, int *mtime_sent,
3901 		char **error, zbx_vector_pre_persistent_t *prep_vec)
3902 {
3903 	AGENT_REQUEST			request;
3904 	const char			*filename, *regexp, *encoding, *skip, *output_template;
3905 	char				*encoding_uc = NULL;
3906 	int				max_lines_per_sec, ret = FAIL, s_count, p_count, s_count_orig, is_count_item,
3907 					mtime_orig, big_rec_orig, logfiles_num_new = 0, jumped = 0;
3908 	zbx_log_rotation_options_t	rotation_type;
3909 	zbx_uint64_t			lastlogsize_orig;
3910 	float				max_delay;
3911 	struct st_logfile		*logfiles_new = NULL;
3912 
3913 	if (0 != (ZBX_METRIC_FLAG_LOG_COUNT & metric->flags))
3914 		is_count_item = 1;
3915 	else
3916 		is_count_item = 0;
3917 
3918 	init_request(&request);
3919 
3920 	/* Expected parameters by item: */
3921 	/* log        [file,       <regexp>,<encoding>,<maxlines>,    <mode>,<output>,<maxdelay>, <options>,<persistent_dir>] 9 params */
3922 	/* log.count  [file,       <regexp>,<encoding>,<maxproclines>,<mode>,         <maxdelay>, <options>,<persistent_dir>] 8 params */
3923 	/* logrt      [file_regexp,<regexp>,<encoding>,<maxlines>,    <mode>,<output>,<maxdelay>, <options>,<persistent_dir>] 9 params */
3924 	/* logrt.count[file_regexp,<regexp>,<encoding>,<maxproclines>,<mode>,         <maxdelay>, <options>,<persistent_dir>] 8 params */
3925 
3926 	if (SUCCEED != parse_item_key(metric->key, &request))
3927 	{
3928 		*error = zbx_strdup(*error, "Invalid item key format.");
3929 		goto out;
3930 	}
3931 
3932 	if (SUCCEED != check_number_of_parameters(metric->flags, &request, error))
3933 		goto out;
3934 
3935 	/* parameter 'file' or 'file_regexp' */
3936 
3937 	if (NULL == (filename = get_rparam(&request, 0)) || '\0' == *filename)
3938 	{
3939 		*error = zbx_strdup(*error, "Invalid first parameter.");
3940 		goto out;
3941 	}
3942 
3943 	/* parameter 'regexp' */
3944 
3945 	if (NULL == (regexp = get_rparam(&request, 1)))
3946 	{
3947 		regexp = "";
3948 	}
3949 	else if ('@' == *regexp && SUCCEED != zbx_global_regexp_exists(regexp + 1, regexps))
3950 	{
3951 		*error = zbx_dsprintf(*error, "Global regular expression \"%s\" does not exist.", regexp + 1);
3952 		goto out;
3953 	}
3954 
3955 	/* parameter 'encoding' */
3956 
3957 	if (NULL == (encoding = get_rparam(&request, 2)))
3958 	{
3959 		encoding = "";
3960 	}
3961 	else
3962 	{
3963 		encoding_uc = zbx_strdup(encoding_uc, encoding);
3964 		zbx_strupper(encoding_uc);
3965 		encoding = encoding_uc;
3966 	}
3967 
3968 	/* parameter 'maxlines' or 'maxproclines' */
3969 	if (SUCCEED !=  init_max_lines_per_sec(is_count_item, &request, &max_lines_per_sec, error))
3970 		goto out;
3971 
3972 	/* parameter 'mode' */
3973 
3974 	if (NULL == (skip = get_rparam(&request, 4)) || '\0' == *skip || 0 == strcmp(skip, "all"))
3975 	{
3976 		metric->skip_old_data = 0;
3977 	}
3978 	else if (0 != strcmp(skip, "skip"))
3979 	{
3980 		*error = zbx_strdup(*error, "Invalid fifth parameter.");
3981 		goto out;
3982 	}
3983 
3984 	/* parameter 'output' (not used for log.count[], logrt.count[]) */
3985 	if (0 != is_count_item || (NULL == (output_template = get_rparam(&request, 5))))
3986 		output_template = "";
3987 
3988 	/* parameter 'maxdelay' */
3989 	if (SUCCEED != init_max_delay(is_count_item, &request, &max_delay, error))
3990 		goto out;
3991 
3992 	/* parameter 'options' */
3993 	if (SUCCEED != init_rotation_type(metric->flags, &request, &rotation_type, error))
3994 		goto out;
3995 
3996 	/* parameter 'persistent_dir' */
3997 	if (SUCCEED != init_persistent_dir_parameter(server, port, metric->key, is_count_item, &request,
3998 			&metric->persistent_file_name, error))
3999 	{
4000 		goto out;
4001 	}
4002 
4003 	/* jumping over fast growing log files is not supported with 'copytruncate' */
4004 	if (ZBX_LOG_ROTATION_LOGCPT == rotation_type && 0.0f != max_delay)
4005 	{
4006 		*error = zbx_strdup(*error, "maxdelay > 0 is not supported with copytruncate option.");
4007 		goto out;
4008 	}
4009 
4010 	/* do not flood Zabbix server if file grows too fast */
4011 	s_count = max_lines_per_sec * metric->refresh;
4012 
4013 	/* do not flood local system if file grows too fast */
4014 	if (0 == is_count_item)
4015 	{
4016 		p_count = MAX_VALUE_LINES_MULTIPLIER * s_count;	/* log[], logrt[] */
4017 	}
4018 	else
4019 	{
4020 		/* In log.count[] and logrt.count[] items the variable 's_count' (max number of lines allowed to be */
4021 		/* sent to server) is used for counting matching lines in logfile(s). 's_count' is counted from max */
4022 		/* value down towards 0. */
4023 
4024 		p_count = s_count_orig = s_count;
4025 
4026 		/* remember current state, we may need to restore it if log.count[] or logrt.count[] result cannot */
4027 		/* be sent to server */
4028 
4029 		lastlogsize_orig = metric->lastlogsize;
4030 		mtime_orig = metric->mtime;
4031 		big_rec_orig = metric->big_rec;
4032 
4033 		/* process_logrt() may modify old log file list 'metric->logfiles' but currently modifications are */
4034 		/* limited to 'retry' flag in existing list elements. We do not preserve original 'retry' flag values */
4035 		/* as there is no need to "rollback" their modifications if log.count[] or logrt.count[] result can */
4036 		/* not be sent to server. */
4037 	}
4038 
4039 #if !defined(_WINDOWS) && !defined(__MINGW32__)
4040 	/* recover state from persistent file only if agent has no already established state */
4041 	if (0 != (ZBX_METRIC_FLAG_NEW & metric->flags) && NULL != metric->persistent_file_name &&
4042 			0 == metric->logfiles_num)
4043 	{
4044 		/* try to restore state from persistent file */
4045 		char	*err_msg = NULL;
4046 		char	buf[MAX_STRING_LEN];
4047 
4048 		if (SUCCEED == zbx_read_persistent_file(metric->persistent_file_name, buf, sizeof(buf), &err_msg))
4049 		{
4050 			zbx_uint64_t	processed_size_tmp = 0;
4051 			int		mtime_tmp = 0;
4052 
4053 			zabbix_log(LOG_LEVEL_DEBUG, "%s(): item \"%s\": persistent file \"%s\" found, data:[%s]",
4054 					__func__, metric->key, metric->persistent_file_name, buf);
4055 
4056 			if (SUCCEED == zbx_restore_file_details(buf, &metric->logfiles, &metric->logfiles_num,
4057 					&processed_size_tmp, &mtime_tmp, &err_msg))
4058 			{
4059 				/* If 'lastlogsize' value from server is not equal to 'processed_size' from   */
4060 				/* persistent file then give priority to persistent file to prevent unwanted  */
4061 				/* re-reading of log file records. */
4062 				/* For 'mtime' give priority to persistent file only if server sent a smaller */
4063 				/* 'mtime' value' (unusual case, it should not happen). The value of 'mtime'  */
4064 				/* from server larger than 'mtime' from persistent file most likely means     */
4065 				/* that some other agent instance has analyzed log files up to server-supplied */
4066 				/* 'mtime' and the current instance needs not to start analysis from 'mtime'  */
4067 				/* restored from persistent file. */
4068 
4069 				if (metric->lastlogsize != processed_size_tmp || metric->mtime < mtime_tmp)
4070 				{
4071 					char	*msg = NULL;
4072 					size_t	msg_alloc = 0, msg_offset = 0;
4073 
4074 					zbx_snprintf_alloc(&msg, &msg_alloc, &msg_offset, "%s(): item \"%s\":"
4075 							" overriding", __func__, metric->key);
4076 
4077 					if (metric->lastlogsize != processed_size_tmp)
4078 					{
4079 						zbx_snprintf_alloc(&msg, &msg_alloc, &msg_offset,
4080 								" lastlogsize: " ZBX_FS_UI64 " -> " ZBX_FS_UI64,
4081 								metric->lastlogsize, processed_size_tmp);
4082 
4083 						metric->lastlogsize = processed_size_tmp;
4084 					}
4085 
4086 					if (metric->mtime < mtime_tmp)
4087 					{
4088 						zbx_snprintf_alloc(&msg, &msg_alloc, &msg_offset,
4089 								" mtime: %d -> %d",
4090 								metric->mtime, mtime_tmp);
4091 
4092 						metric->mtime = mtime_tmp;
4093 					}
4094 
4095 					zabbix_log(LOG_LEVEL_WARNING, "%s from persistent file", msg);
4096 
4097 					zbx_free(msg);
4098 				}
4099 			}
4100 			else
4101 			{
4102 				zabbix_log(LOG_LEVEL_WARNING, "%s(): item \"%s\": persistent file \"%s\" restore error:"
4103 						" %s", __func__, metric->key, metric->persistent_file_name, err_msg);
4104 				zbx_free(err_msg);
4105 			}
4106 		}
4107 		else
4108 		{
4109 			/* persistent file errors are not fatal */
4110 			zabbix_log(LOG_LEVEL_DEBUG, "%s(): item \"%s\": persistent file [%s] does not exist or error:"
4111 					" %s", __func__, metric->key, metric->persistent_file_name, err_msg);
4112 			zbx_free(err_msg);
4113 		}
4114 	}
4115 #endif
4116 	ret = process_logrt(metric->flags, filename, &metric->lastlogsize, &metric->mtime, lastlogsize_sent, mtime_sent,
4117 			&metric->skip_old_data, &metric->big_rec, &metric->use_ino, error, &metric->logfiles,
4118 			metric->logfiles_num, &logfiles_new, &logfiles_num_new, encoding, regexps, regexp,
4119 			output_template, &p_count, &s_count, process_value_cb, server, port, CONFIG_HOSTNAME,
4120 			metric->key_orig, &jumped, max_delay, &metric->start_time, &metric->processed_bytes,
4121 			rotation_type, metric->persistent_file_name, prep_vec);
4122 
4123 	if (0 == is_count_item && NULL != logfiles_new)
4124 	{
4125 		/* for log[] and logrt[] items - switch to the new log file list */
4126 
4127 		destroy_logfile_list(&metric->logfiles, NULL, &metric->logfiles_num);
4128 		metric->logfiles = logfiles_new;
4129 		metric->logfiles_num = logfiles_num_new;
4130 	}
4131 
4132 	if (SUCCEED == ret)
4133 	{
4134 		metric->error_count = 0;
4135 
4136 		if (0 != is_count_item)
4137 		{
4138 			/* send log.count[] or logrt.count[] item value to server */
4139 
4140 			int	match_count;			/* number of matching lines */
4141 			char	buf[ZBX_MAX_UINT64_LEN];
4142 
4143 			match_count = s_count_orig - s_count;
4144 
4145 			zbx_snprintf(buf, sizeof(buf), "%d", match_count);
4146 
4147 			if (SUCCEED == process_value_cb(server, port, CONFIG_HOSTNAME, metric->key_orig, buf,
4148 					ITEM_STATE_NORMAL, &metric->lastlogsize, &metric->mtime, NULL, NULL, NULL, NULL,
4149 					metric->flags | ZBX_METRIC_FLAG_PERSISTENT) || 0 != jumped)
4150 			{
4151 				/* if process_value() fails (i.e. log(rt).count result cannot be sent to server) but */
4152 				/* a jump took place to meet <maxdelay> then we discard the result and keep the state */
4153 				/* after jump */
4154 
4155 				*lastlogsize_sent = metric->lastlogsize;
4156 				*mtime_sent = metric->mtime;
4157 
4158 				/* switch to the new log file list */
4159 				destroy_logfile_list(&metric->logfiles, NULL, &metric->logfiles_num);
4160 				metric->logfiles = logfiles_new;
4161 				metric->logfiles_num = logfiles_num_new;
4162 			}
4163 			else
4164 			{
4165 				/* unable to send data and no jump took place, restore original state to try again */
4166 				/* during the next check */
4167 
4168 				metric->lastlogsize = lastlogsize_orig;
4169 				metric->mtime =  mtime_orig;
4170 				metric->big_rec = big_rec_orig;
4171 
4172 				/* the old log file list 'metric->logfiles' stays in its place, drop the new list */
4173 				destroy_logfile_list(&logfiles_new, NULL, &logfiles_num_new);
4174 			}
4175 		}
4176 	}
4177 	else
4178 	{
4179 		metric->error_count++;
4180 
4181 		if (0 != is_count_item)
4182 		{
4183 			/* restore original state to try again during the next check */
4184 
4185 			metric->lastlogsize = lastlogsize_orig;
4186 			metric->mtime =  mtime_orig;
4187 			metric->big_rec = big_rec_orig;
4188 
4189 			/* the old log file list 'metric->logfiles' stays in its place, drop the new list */
4190 			destroy_logfile_list(&logfiles_new, NULL, &logfiles_num_new);
4191 		}
4192 
4193 		/* suppress first two errors */
4194 		if (3 > metric->error_count)
4195 		{
4196 			zabbix_log(LOG_LEVEL_DEBUG, "suppressing log(rt)(.count) processing error #%d: %s",
4197 					metric->error_count, NULL != *error ? *error : "unknown error");
4198 
4199 			zbx_free(*error);
4200 			ret = SUCCEED;
4201 		}
4202 	}
4203 out:
4204 	zbx_free(encoding_uc);
4205 	free_request(&request);
4206 
4207 	return ret;
4208 }
4209 
find_last_processed_file_in_logfiles_list(struct st_logfile * logfiles,int logfiles_num)4210 struct st_logfile	*find_last_processed_file_in_logfiles_list(struct st_logfile *logfiles, int logfiles_num)
4211 {
4212 	int	i, max_seq = -1, last_file_idx = 0;
4213 
4214 	if (1 == logfiles_num)
4215 		return logfiles;
4216 
4217 	/* The last (at least partially) processed file is the one with the maximum 'seq' value. */
4218 	/* If no one file is processed then return pointer to the list first element. */
4219 
4220 	for (i = 0; i < logfiles_num; i++)
4221 	{
4222 		if (max_seq < logfiles[i].seq)
4223 		{
4224 			max_seq = logfiles[i].seq;
4225 			last_file_idx = i;
4226 		}
4227 	}
4228 
4229 	return logfiles + last_file_idx;
4230 }
4231