1 /*
2 ** Zabbix
3 ** Copyright (C) 2001-2021 Zabbix SIA
4 **
5 ** This program is free software; you can redistribute it and/or modify
6 ** it under the terms of the GNU General Public License as published by
7 ** the Free Software Foundation; either version 2 of the License, or
8 ** (at your option) any later version.
9 **
10 ** This program is distributed in the hope that it will be useful,
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ** GNU General Public License for more details.
14 **
15 ** You should have received a copy of the GNU General Public License
16 ** along with this program; if not, write to the Free Software
17 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
18 **/
19 
20 #include "common.h"
21 #include "logfiles.h"
22 #include "log.h"
23 #include "active.h"
24 
25 #if defined(_WINDOWS)
26 #	include "symbols.h"
27 #	include "zbxtypes.h"	/* ssize_t */
28 #endif /* _WINDOWS */
29 
30 #define MAX_LEN_MD5	512	/* maximum size of the initial part of the file to calculate MD5 sum for */
31 
32 #define ZBX_SAME_FILE_ERROR	-1
33 #define ZBX_SAME_FILE_NO	0
34 #define ZBX_SAME_FILE_YES	1
35 #define ZBX_SAME_FILE_RETRY	2
36 #define ZBX_NO_FILE_ERROR	3
37 #define ZBX_SAME_FILE_COPY	4
38 
39 #define ZBX_FILE_PLACE_UNKNOWN	-1	/* cannot compare file device and inode numbers */
40 #define ZBX_FILE_PLACE_OTHER	0	/* both files have different device or inode numbers */
41 #define ZBX_FILE_PLACE_SAME	1	/* both files have the same device and inode numbers */
42 
43 /******************************************************************************
44  *                                                                            *
45  * Function: split_string                                                     *
46  *                                                                            *
47  * Purpose: separates given string to two parts by given delimiter in string  *
48  *                                                                            *
49  * Parameters:                                                                *
50  *     str -   [IN] a not-empty string to split                               *
51  *     del -   [IN] pointer to a character in the string                      *
52  *     part1 - [OUT] pointer to buffer for the first part with delimiter      *
53  *     part2 - [OUT] pointer to buffer for the second part                    *
54  *                                                                            *
55  * Return value: SUCCEED - on splitting without errors                        *
56  *               FAIL - on splitting with errors                              *
57  *                                                                            *
58  * Author: Dmitry Borovikov, Aleksandrs Saveljevs                             *
59  *                                                                            *
60  * Comments: Memory for "part1" and "part2" is allocated only on SUCCEED.     *
61  *                                                                            *
62  ******************************************************************************/
split_string(const char * str,const char * del,char ** part1,char ** part2)63 static int	split_string(const char *str, const char *del, char **part1, char **part2)
64 {
65 	const char	*__function_name = "split_string";
66 	size_t		str_length, part1_length, part2_length;
67 	int		ret = FAIL;
68 
69 	zabbix_log(LOG_LEVEL_DEBUG, "In %s() str:'%s' del:'%s'", __function_name, str, del);
70 
71 	str_length = strlen(str);
72 
73 	/* since the purpose of this function is to be used in split_filename(), we allow part1 to be */
74 	/* just *del (e.g., "/" - file system root), but we do not allow part2 (filename) to be empty */
75 	if (del < str || del >= (str + str_length - 1))
76 	{
77 		zabbix_log(LOG_LEVEL_DEBUG, "%s() cannot proceed: delimiter is out of range", __function_name);
78 		goto out;
79 	}
80 
81 	part1_length = (size_t)(del - str + 1);
82 	part2_length = str_length - part1_length;
83 
84 	*part1 = (char *)zbx_malloc(*part1, part1_length + 1);
85 	zbx_strlcpy(*part1, str, part1_length + 1);
86 
87 	*part2 = (char *)zbx_malloc(*part2, part2_length + 1);
88 	zbx_strlcpy(*part2, str + part1_length, part2_length + 1);
89 
90 	ret = SUCCEED;
91 out:
92 	zabbix_log(LOG_LEVEL_DEBUG, "End of %s():%s part1:'%s' part2:'%s'", __function_name, zbx_result_string(ret),
93 			*part1, *part2);
94 
95 	return ret;
96 }
97 
98 /******************************************************************************
99  *                                                                            *
100  * Function: split_filename                                                   *
101  *                                                                            *
102  * Purpose: separates full-path file name into directory and file name regexp *
103  *          parts                                                             *
104  *                                                                            *
105  * Parameters:                                                                *
106  *     filename        - [IN] first parameter of logrt[] or logrt.count[]     *
107  *                       item                                                 *
108  *     directory       - [IN/OUT] directory part of the 'filename'            *
109  *     filename_regexp - [IN/OUT] file name regular expression part           *
110  *     err_msg         - [IN/OUT] error message why an item became            *
111  *                       NOTSUPPORTED                                         *
112  *                                                                            *
113  * Return value: SUCCEED - on successful splitting                            *
114  *               FAIL - on unable to split sensibly                           *
115  *                                                                            *
116  * Author: Dmitry Borovikov                                                   *
117  *                                                                            *
118  * Comments: Allocates memory for "directory" and "filename_regexp" only on   *
119  *           SUCCEED. On FAIL memory, allocated for "directory" and           *
120  *           "filename_regexp" is freed.                                      *
121  *                                                                            *
122  ******************************************************************************/
split_filename(const char * filename,char ** directory,char ** filename_regexp,char ** err_msg)123 static int	split_filename(const char *filename, char **directory, char **filename_regexp, char **err_msg)
124 {
125 	const char	*__function_name = "split_filename";
126 	const char	*separator = NULL;
127 	zbx_stat_t	buf;
128 	int		ret = FAIL;
129 #ifdef _WINDOWS
130 	size_t		sz;
131 #endif
132 	zabbix_log(LOG_LEVEL_DEBUG, "In %s() filename:'%s'", __function_name, ZBX_NULL2STR(filename));
133 
134 	if (NULL == filename || '\0' == *filename)
135 	{
136 		*err_msg = zbx_strdup(*err_msg, "Cannot split empty path.");
137 		goto out;
138 	}
139 
140 #ifdef _WINDOWS
141 	/* special processing for Windows, since directory name cannot be simply separated from file name regexp */
142 	for (sz = strlen(filename) - 1, separator = &filename[sz]; separator >= filename; separator--)
143 	{
144 		if (PATH_SEPARATOR != *separator)
145 			continue;
146 
147 		zabbix_log(LOG_LEVEL_DEBUG, "%s() %s", __function_name, filename);
148 		zabbix_log(LOG_LEVEL_DEBUG, "%s() %*s", __function_name, separator - filename + 1, "^");
149 
150 		/* separator must be relative delimiter of the original filename */
151 		if (FAIL == split_string(filename, separator, directory, filename_regexp))
152 		{
153 			*err_msg = zbx_dsprintf(*err_msg, "Cannot split path by \"%c\".", PATH_SEPARATOR);
154 			goto out;
155 		}
156 
157 		sz = strlen(*directory);
158 
159 		/* Windows world verification */
160 		if (sz + 1 > MAX_PATH)
161 		{
162 			*err_msg = zbx_strdup(*err_msg, "Directory path is too long.");
163 			zbx_free(*directory);
164 			zbx_free(*filename_regexp);
165 			goto out;
166 		}
167 
168 		/* Windows "stat" functions cannot get info about directories with '\' at the end of the path, */
169 		/* except for root directories 'x:\' */
170 		if (0 == zbx_stat(*directory, &buf) && S_ISDIR(buf.st_mode))
171 			break;
172 
173 		if (sz > 0 && PATH_SEPARATOR == (*directory)[sz - 1])
174 		{
175 			(*directory)[sz - 1] = '\0';
176 
177 			if (0 == zbx_stat(*directory, &buf) && S_ISDIR(buf.st_mode))
178 			{
179 				(*directory)[sz - 1] = PATH_SEPARATOR;
180 				break;
181 			}
182 		}
183 
184 		zabbix_log(LOG_LEVEL_DEBUG, "cannot find directory '%s'", *directory);
185 		zbx_free(*directory);
186 		zbx_free(*filename_regexp);
187 	}
188 
189 	if (separator < filename)
190 	{
191 		*err_msg = zbx_strdup(*err_msg, "Non-existing disk or directory.");
192 		goto out;
193 	}
194 #else	/* not _WINDOWS */
195 	if (NULL == (separator = strrchr(filename, PATH_SEPARATOR)))
196 	{
197 		*err_msg = zbx_dsprintf(*err_msg, "Cannot find separator \"%c\" in path.", PATH_SEPARATOR);
198 		goto out;
199 	}
200 
201 	if (SUCCEED != split_string(filename, separator, directory, filename_regexp))
202 	{
203 		*err_msg = zbx_dsprintf(*err_msg, "Cannot split path by \"%c\".", PATH_SEPARATOR);
204 		goto out;
205 	}
206 
207 	if (-1 == zbx_stat(*directory, &buf))
208 	{
209 		*err_msg = zbx_dsprintf(*err_msg, "Cannot obtain directory information: %s", zbx_strerror(errno));
210 		zbx_free(*directory);
211 		zbx_free(*filename_regexp);
212 		goto out;
213 	}
214 
215 	if (0 == S_ISDIR(buf.st_mode))
216 	{
217 		*err_msg = zbx_dsprintf(*err_msg, "Base path \"%s\" is not a directory.", *directory);
218 		zbx_free(*directory);
219 		zbx_free(*filename_regexp);
220 		goto out;
221 	}
222 #endif	/* _WINDOWS */
223 
224 	ret = SUCCEED;
225 out:
226 	zabbix_log(LOG_LEVEL_DEBUG, "End of %s():%s directory:'%s' filename_regexp:'%s'", __function_name,
227 			zbx_result_string(ret), *directory, *filename_regexp);
228 
229 	return ret;
230 }
231 
232 /******************************************************************************
233  *                                                                            *
234  * Function: file_start_md5                                                   *
235  *                                                                            *
236  * Purpose: calculate the MD5 sum of the first block of the file              *
237  *                                                                            *
238  * Parameters:                                                                *
239  *     f        - [IN] file descriptor                                        *
240  *     length   - [IN] length of the block in bytes. Maximum is 512 bytes.    *
241  *     md5buf   - [OUT] output buffer, MD5_DIGEST_SIZE-bytes long, where the  *
242  *                calculated MD5 sum is placed                                *
243  *     filename - [IN] file name, used in error logging                       *
244  *     err_msg  - [IN/OUT] error message why FAIL-ed                          *
245  *                                                                            *
246  * Return value: SUCCEED or FAIL                                              *
247  *                                                                            *
248  ******************************************************************************/
file_start_md5(int f,int length,md5_byte_t * md5buf,const char * filename,char ** err_msg)249 static int	file_start_md5(int f, int length, md5_byte_t *md5buf, const char *filename, char **err_msg)
250 {
251 	md5_state_t	state;
252 	char		buf[MAX_LEN_MD5];
253 	int		rc;
254 
255 	if (MAX_LEN_MD5 < length)
256 	{
257 		*err_msg = zbx_dsprintf(*err_msg, "Length %d exceeds maximum MD5 fragment length of %d.", length,
258 				MAX_LEN_MD5);
259 		return FAIL;
260 	}
261 
262 	if ((zbx_offset_t)-1 == zbx_lseek(f, 0, SEEK_SET))
263 	{
264 		*err_msg = zbx_dsprintf(*err_msg, "Cannot set position to 0 for file \"%s\": %s", filename,
265 				zbx_strerror(errno));
266 		return FAIL;
267 	}
268 
269 	if (length != (rc = (int)read(f, buf, (size_t)length)))
270 	{
271 		if (-1 == rc)
272 		{
273 			*err_msg = zbx_dsprintf(*err_msg, "Cannot read %d bytes from file \"%s\": %s", length, filename,
274 					zbx_strerror(errno));
275 		}
276 		else
277 		{
278 			*err_msg = zbx_dsprintf(*err_msg, "Cannot read %d bytes from file \"%s\". Read %d bytes only.",
279 					length, filename, rc);
280 		}
281 
282 		return FAIL;
283 	}
284 
285 	zbx_md5_init(&state);
286 	zbx_md5_append(&state, (const md5_byte_t *)buf, length);
287 	zbx_md5_finish(&state, md5buf);
288 
289 	return SUCCEED;
290 }
291 
292 #ifdef _WINDOWS
293 /******************************************************************************
294  *                                                                            *
295  * Function: file_id                                                          *
296  *                                                                            *
297  * Purpose: get Microsoft Windows file device ID, 64-bit FileIndex or         *
298  *          128-bit FileId                                                    *
299  *                                                                            *
300  * Parameters:                                                                *
301  *     f        - [IN] file descriptor                                        *
302  *     use_ino  - [IN] how to use file IDs                                    *
303  *     dev      - [OUT] device ID                                             *
304  *     ino_lo   - [OUT] 64-bit nFileIndex or lower 64-bits of FileId          *
305  *     ino_hi   - [OUT] higher 64-bits of FileId                              *
306  *     filename - [IN] file name, used in error logging                       *
307  *     err_msg  - [IN/OUT] error message why an item became NOTSUPPORTED      *
308  *                                                                            *
309  * Return value: SUCCEED or FAIL                                              *
310  *                                                                            *
311  ******************************************************************************/
file_id(int f,int use_ino,zbx_uint64_t * dev,zbx_uint64_t * ino_lo,zbx_uint64_t * ino_hi,const char * filename,char ** err_msg)312 static int	file_id(int f, int use_ino, zbx_uint64_t *dev, zbx_uint64_t *ino_lo, zbx_uint64_t *ino_hi,
313 		const char *filename, char **err_msg)
314 {
315 	int				ret = FAIL;
316 	intptr_t			h;	/* file HANDLE */
317 	BY_HANDLE_FILE_INFORMATION	hfi;
318 	ZBX_FILE_ID_INFO		fid;
319 
320 	if (-1 == (h = _get_osfhandle(f)))
321 	{
322 		*err_msg = zbx_dsprintf(*err_msg, "Cannot obtain handle from descriptor of file \"%s\": %s",
323 				filename, zbx_strerror(errno));
324 		return ret;
325 	}
326 
327 	if (1 == use_ino || 0 == use_ino)
328 	{
329 		/* Although nFileIndexHigh and nFileIndexLow cannot be reliably used to identify files when */
330 		/* use_ino = 0 (e.g. on FAT32, exFAT), we copy indexes to have at least correct debug logs. */
331 		if (0 != GetFileInformationByHandle((HANDLE)h, &hfi))
332 		{
333 			*dev = hfi.dwVolumeSerialNumber;
334 			*ino_lo = (zbx_uint64_t)hfi.nFileIndexHigh << 32 | (zbx_uint64_t)hfi.nFileIndexLow;
335 			*ino_hi = 0;
336 		}
337 		else
338 		{
339 			*err_msg = zbx_dsprintf(*err_msg, "Cannot obtain information for file \"%s\": %s",
340 					filename, strerror_from_system(GetLastError()));
341 			return ret;
342 		}
343 	}
344 	else if (2 == use_ino)
345 	{
346 		if (NULL != zbx_GetFileInformationByHandleEx)
347 		{
348 			if (0 != zbx_GetFileInformationByHandleEx((HANDLE)h, zbx_FileIdInfo, &fid, sizeof(fid)))
349 			{
350 				*dev = fid.VolumeSerialNumber;
351 				*ino_lo = fid.FileId.LowPart;
352 				*ino_hi = fid.FileId.HighPart;
353 			}
354 			else
355 			{
356 				*err_msg = zbx_dsprintf(*err_msg, "Cannot obtain extended information for file"
357 						" \"%s\": %s", filename, strerror_from_system(GetLastError()));
358 				return ret;
359 			}
360 		}
361 	}
362 	else
363 	{
364 		THIS_SHOULD_NEVER_HAPPEN;
365 		return ret;
366 	}
367 
368 	ret = SUCCEED;
369 
370 	return ret;
371 }
372 
373 /******************************************************************************
374  *                                                                            *
375  * Function: set_use_ino_by_fs_type                                           *
376  *                                                                            *
377  * Purpose: find file system type and set 'use_ino' parameter                 *
378  *                                                                            *
379  * Parameters:                                                                *
380  *     path     - [IN] directory or file name                                 *
381  *     use_ino  - [IN] how to use file IDs                                    *
382  *     err_msg  - [IN/OUT] error message why an item became NOTSUPPORTED      *
383  *                                                                            *
384  * Return value: SUCCEED or FAIL                                              *
385  *                                                                            *
386  ******************************************************************************/
set_use_ino_by_fs_type(const char * path,int * use_ino,char ** err_msg)387 static int	set_use_ino_by_fs_type(const char *path, int *use_ino, char **err_msg)
388 {
389 	char	*utf8;
390 	wchar_t	*path_uni, mount_point[MAX_PATH + 1], fs_type[MAX_PATH + 1];
391 
392 	path_uni = zbx_utf8_to_unicode(path);
393 
394 	/* get volume mount point */
395 	if (0 == GetVolumePathName(path_uni, mount_point,
396 			sizeof(mount_point) / sizeof(wchar_t)))
397 	{
398 		*err_msg = zbx_dsprintf(*err_msg, "Cannot obtain volume mount point for file \"%s\": %s", path,
399 				strerror_from_system(GetLastError()));
400 		zbx_free(path_uni);
401 		return FAIL;
402 	}
403 
404 	zbx_free(path_uni);
405 
406 	/* Which file system type this directory resides on ? */
407 	if (0 == GetVolumeInformation(mount_point, NULL, 0, NULL, NULL, NULL, fs_type,
408 			sizeof(fs_type) / sizeof(wchar_t)))
409 	{
410 		utf8 = zbx_unicode_to_utf8(mount_point);
411 		*err_msg = zbx_dsprintf(*err_msg, "Cannot obtain volume information for directory \"%s\": %s", utf8,
412 				strerror_from_system(GetLastError()));
413 		zbx_free(utf8);
414 		return FAIL;
415 	}
416 
417 	utf8 = zbx_unicode_to_utf8(fs_type);
418 
419 	if (0 == strcmp(utf8, "NTFS"))
420 		*use_ino = 1;			/* 64-bit FileIndex */
421 	else if (0 == strcmp(utf8, "ReFS"))
422 		*use_ino = 2;			/* 128-bit FileId */
423 	else
424 		*use_ino = 0;			/* cannot use inodes to identify files (e.g. FAT32) */
425 
426 	zabbix_log(LOG_LEVEL_DEBUG, "log files reside on '%s' file system", utf8);
427 	zbx_free(utf8);
428 
429 	return SUCCEED;
430 }
431 #endif
432 
433 /******************************************************************************
434  *                                                                            *
435  * Function: print_logfile_list                                               *
436  *                                                                            *
437  * Purpose: write logfile list into log for debugging                         *
438  *                                                                            *
439  * Parameters:                                                                *
440  *     logfiles     - [IN] array of logfiles                                  *
441  *     logfiles_num - [IN] number of elements in the array                    *
442  *                                                                            *
443  ******************************************************************************/
print_logfile_list(const struct st_logfile * logfiles,int logfiles_num)444 static void	print_logfile_list(const struct st_logfile *logfiles, int logfiles_num)
445 {
446 	int	i;
447 
448 	for (i = 0; i < logfiles_num; i++)
449 	{
450 		zabbix_log(LOG_LEVEL_DEBUG, "   nr:%d filename:'%s' mtime:%d size:" ZBX_FS_UI64 " processed_size:"
451 				ZBX_FS_UI64 " seq:%d copy_of:%d incomplete:%d dev:" ZBX_FS_UI64 " ino_hi:" ZBX_FS_UI64
452 				" ino_lo:" ZBX_FS_UI64
453 				" md5size:%d md5buf:%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
454 				i, logfiles[i].filename, logfiles[i].mtime, logfiles[i].size,
455 				logfiles[i].processed_size, logfiles[i].seq, logfiles[i].copy_of,
456 				logfiles[i].incomplete, logfiles[i].dev, logfiles[i].ino_hi, logfiles[i].ino_lo,
457 				logfiles[i].md5size, logfiles[i].md5buf[0], logfiles[i].md5buf[1],
458 				logfiles[i].md5buf[2], logfiles[i].md5buf[3], logfiles[i].md5buf[4],
459 				logfiles[i].md5buf[5], logfiles[i].md5buf[6], logfiles[i].md5buf[7],
460 				logfiles[i].md5buf[8], logfiles[i].md5buf[9], logfiles[i].md5buf[10],
461 				logfiles[i].md5buf[11], logfiles[i].md5buf[12], logfiles[i].md5buf[13],
462 				logfiles[i].md5buf[14], logfiles[i].md5buf[15]);
463 	}
464 }
465 
466 /******************************************************************************
467  *                                                                            *
468  * Function: compare_file_places                                              *
469  *                                                                            *
470  * Purpose: compare device numbers and inode numbers of 2 files               *
471  *                                                                            *
472  * Parameters: old_file - [IN] details of the 1st log file                    *
473  *             new_file - [IN] details of the 2nd log file                    *
474  *             use_ino  - [IN] 0 - do not use inodes in comparison,           *
475  *                             1 - use up to 64-bit inodes in comparison,     *
476  *                             2 - use 128-bit inodes in comparison.          *
477  *                                                                            *
478  * Return value: ZBX_FILE_PLACE_SAME - both files have the same place         *
479  *               ZBX_FILE_PLACE_OTHER - files reside in different places      *
480  *               ZBX_FILE_PLACE_UNKNOWN - cannot compare places (no inodes)   *
481  *                                                                            *
482  ******************************************************************************/
compare_file_places(const struct st_logfile * old_file,const struct st_logfile * new_file,int use_ino)483 static int	compare_file_places(const struct st_logfile *old_file, const struct st_logfile *new_file, int use_ino)
484 {
485 	if (1 == use_ino || 2 == use_ino)
486 	{
487 		if (old_file->ino_lo != new_file->ino_lo || old_file->dev != new_file->dev ||
488 				(2 == use_ino && old_file->ino_hi != new_file->ino_hi))
489 		{
490 			return ZBX_FILE_PLACE_OTHER;
491 		}
492 		else
493 			return ZBX_FILE_PLACE_SAME;
494 	}
495 
496 	return ZBX_FILE_PLACE_UNKNOWN;
497 }
498 
499 /******************************************************************************
500  *                                                                            *
501  * Function: open_file_helper                                                 *
502  *                                                                            *
503  * Purpose: open specified file for reading                                   *
504  *                                                                            *
505  * Parameters: pathname - [IN] full pathname of file                          *
506  *             err_msg  - [IN/OUT] error message why file could not be opened *
507  *                                                                            *
508  * Return value: file descriptor on success or -1 on error                    *
509  *                                                                            *
510  ******************************************************************************/
open_file_helper(const char * pathname,char ** err_msg)511 static int	open_file_helper(const char *pathname, char **err_msg)
512 {
513 	int	fd;
514 
515 	if (-1 == (fd = zbx_open(pathname, O_RDONLY)))
516 		*err_msg = zbx_dsprintf(*err_msg, "Cannot open file \"%s\": %s", pathname, zbx_strerror(errno));
517 
518 	return fd;
519 }
520 
521 /******************************************************************************
522  *                                                                            *
523  * Function: close_file_helper                                                *
524  *                                                                            *
525  * Purpose: close specified file                                              *
526  *                                                                            *
527  * Parameters: fd       - [IN] file descriptor to close                       *
528  *             pathname - [IN] pathname of file, used for error reporting     *
529  *             err_msg  - [IN/OUT] error message why file could not be closed *
530  *                                                                            *
531  * Return value: SUCCEED or FAIL                                              *
532  *                                                                            *
533  ******************************************************************************/
close_file_helper(int fd,const char * pathname,char ** err_msg)534 static int	close_file_helper(int fd, const char *pathname, char **err_msg)
535 {
536 	if (0 == close(fd))
537 		return SUCCEED;
538 
539 	*err_msg = zbx_dsprintf(*err_msg, "Cannot close file \"%s\": %s", pathname, zbx_strerror(errno));
540 
541 	return FAIL;
542 }
543 
544 /******************************************************************************
545  *                                                                            *
546  * Function: examine_md5_and_place                                            *
547  *                                                                            *
548  * Purpose: from MD5 sums of initial blocks and places of 2 files make        *
549  *          a conclusion is it the same file, a pair 'original/copy' or       *
550  *          2 different files                                                 *
551  *                                                                            *
552  * Parameters:  buf1          - [IN] MD5 sum of initial block of he 1st file  *
553  *              buf2          - [IN] MD5 sum of initial block of he 2nd file  *
554  *              is_same_place - [IN] equality of file places                  *
555  *                                                                            *
556  * Return value: ZBX_SAME_FILE_NO - they are 2 different files                *
557  *               ZBX_SAME_FILE_YES - 2 files are (assumed) to be the same     *
558  *               ZBX_SAME_FILE_COPY - one file is copy of the other           *
559  *                                                                            *
560  * Comments: in case files places are unknown but MD5 sums of initial blocks  *
561  *           match it is assumed to be the same file                          *
562  *                                                                            *
563  ******************************************************************************/
examine_md5_and_place(const md5_byte_t * buf1,const md5_byte_t * buf2,size_t size,int is_same_place)564 static int	examine_md5_and_place(const md5_byte_t *buf1, const md5_byte_t *buf2, size_t size, int is_same_place)
565 {
566 	if (0 == memcmp(buf1, buf2, size))
567 	{
568 		switch (is_same_place)
569 		{
570 			case ZBX_FILE_PLACE_UNKNOWN:
571 			case ZBX_FILE_PLACE_SAME:
572 				return ZBX_SAME_FILE_YES;
573 			case ZBX_FILE_PLACE_OTHER:
574 				return ZBX_SAME_FILE_COPY;
575 		}
576 	}
577 
578 	return ZBX_SAME_FILE_NO;
579 }
580 
581 /******************************************************************************
582  *                                                                            *
583  * Function: is_same_file_logcpt                                              *
584  *                                                                            *
585  * Purpose: find out if a file from the old list and a file from the new list *
586  *          could be the same file or copy in case of copy/truncate rotation  *
587  *                                                                            *
588  * Parameters:                                                                *
589  *          old_file - [IN] file from the old list                            *
590  *          new_file - [IN] file from the new list                            *
591  *          use_ino  - [IN] 0 - do not use inodes in comparison,              *
592  *                          1 - use up to 64-bit inodes in comparison,        *
593  *                          2 - use 128-bit inodes in comparison.             *
594  *          err_msg  - [IN/OUT] error message why an item became              *
595  *                     NOTSUPPORTED                                           *
596  *                                                                            *
597  * Return value: ZBX_SAME_FILE_NO - it is not the same file                   *
598  *               ZBX_SAME_FILE_YES - it could be the same file                *
599  *               ZBX_SAME_FILE_COPY - it is a copy                            *
600  *               ZBX_SAME_FILE_ERROR - error                                  *
601  *                                                                            *
602  * Comments: In some cases we can say that it IS NOT the same file.           *
603  *           In other cases it COULD BE the same file or copy.                *
604  *                                                                            *
605  ******************************************************************************/
is_same_file_logcpt(const struct st_logfile * old_file,const struct st_logfile * new_file,int use_ino,char ** err_msg)606 static int	is_same_file_logcpt(const struct st_logfile *old_file, const struct st_logfile *new_file, int use_ino,
607 		char **err_msg)
608 {
609 	int	is_same_place;
610 
611 	if (old_file->mtime > new_file->mtime)
612 		return ZBX_SAME_FILE_NO;
613 
614 	if (-1 == old_file->md5size || -1 == new_file->md5size)
615 	{
616 		/* Cannot compare MD5 sums. Assume two different files - reporting twice is better than skipping. */
617 		return ZBX_SAME_FILE_NO;
618 	}
619 
620 	is_same_place = compare_file_places(old_file, new_file, use_ino);
621 
622 	if (old_file->md5size == new_file->md5size)
623 	{
624 		return examine_md5_and_place(old_file->md5buf, new_file->md5buf, sizeof(new_file->md5buf),
625 				is_same_place);
626 	}
627 
628 	if (0 < old_file->md5size && 0 < new_file->md5size)
629 	{
630 		/* MD5 sums have been calculated from initial blocks of diferent sizes */
631 
632 		const struct st_logfile	*p_smaller, *p_larger;
633 		int			f, ret;
634 		md5_byte_t		md5tmp[MD5_DIGEST_SIZE];
635 
636 		if (old_file->md5size < new_file->md5size)
637 		{
638 			p_smaller = old_file;
639 			p_larger = new_file;
640 		}
641 		else
642 		{
643 			p_smaller = new_file;
644 			p_larger = old_file;
645 		}
646 
647 		if (-1 == (f = open_file_helper(p_larger->filename, err_msg)))
648 			return ZBX_SAME_FILE_ERROR;
649 
650 		if (SUCCEED == file_start_md5(f, p_smaller->md5size, md5tmp, p_larger->filename, err_msg))
651 			ret = examine_md5_and_place(p_smaller->md5buf, md5tmp, sizeof(md5tmp), is_same_place);
652 		else
653 			ret = ZBX_SAME_FILE_ERROR;
654 
655 		if (0 != close(f))
656 		{
657 			if (ZBX_SAME_FILE_ERROR != ret)
658 			{
659 				*err_msg = zbx_dsprintf(*err_msg, "Cannot close file \"%s\": %s", p_larger->filename,
660 						zbx_strerror(errno));
661 				ret = ZBX_SAME_FILE_ERROR;
662 			}
663 		}
664 
665 		return ret;
666 	}
667 
668 	return ZBX_SAME_FILE_NO;
669 }
670 
671 /******************************************************************************
672  *                                                                            *
673  * Function: is_same_file_logrt                                               *
674  *                                                                            *
675  * Purpose: find out if a file from the old list and a file from the new list *
676  *          could be the same file in case of simple rotation                 *
677  *                                                                            *
678  * Parameters:                                                                *
679  *          old_file - [IN] file from the old list                            *
680  *          new_file - [IN] file from the new list                            *
681  *          use_ino  - [IN] 0 - do not use inodes in comparison,              *
682  *                          1 - use up to 64-bit inodes in comparison,        *
683  *                          2 - use 128-bit inodes in comparison.             *
684  *          options  - [IN] log rotation options                              *
685  *          err_msg  - [IN/OUT] error message why an item became              *
686  *                     NOTSUPPORTED                                           *
687  *                                                                            *
688  * Return value: ZBX_SAME_FILE_NO - it is not the same file,                  *
689  *               ZBX_SAME_FILE_YES - it could be the same file,               *
690  *               ZBX_SAME_FILE_ERROR - error.                                 *
691  *               ZBX_SAME_FILE_RETRY - retry on the next check                *
692  *                                                                            *
693  * Comments: In some cases we can say that it IS NOT the same file.           *
694  *           We can never say that it IS the same file and it has not been    *
695  *           truncated and replaced with a similar one.                       *
696  *                                                                            *
697  ******************************************************************************/
is_same_file_logrt(const struct st_logfile * old_file,const struct st_logfile * new_file,int use_ino,zbx_log_rotation_options_t options,char ** err_msg)698 static int	is_same_file_logrt(const struct st_logfile *old_file, const struct st_logfile *new_file, int use_ino,
699 		zbx_log_rotation_options_t options, char **err_msg)
700 {
701 	if (ZBX_LOG_ROTATION_LOGCPT == options)
702 		return is_same_file_logcpt(old_file, new_file, use_ino, err_msg);
703 
704 	if (ZBX_FILE_PLACE_OTHER == compare_file_places(old_file, new_file, use_ino))
705 	{
706 		/* files cannot reside on different devices or occupy different inodes */
707 		return ZBX_SAME_FILE_NO;
708 	}
709 
710 	if (old_file->size > new_file->size)
711 	{
712 		/* File size cannot decrease. Truncating or replacing a file with a smaller one */
713 		/* counts as 2 different files. */
714 		return ZBX_SAME_FILE_NO;
715 	}
716 
717 	if (old_file->size == new_file->size && old_file->mtime < new_file->mtime)
718 	{
719 		/* Depending on file system it's possible that stat() was called */
720 		/* between mtime and file size update. In this situation we will */
721 		/* get a file with the old size and a new mtime.                 */
722 		/* On the first try we assume it's the same file, just its size  */
723 		/* has not been changed yet.                                     */
724 		/* If the size has not changed on the next check, then we assume */
725 		/* that some tampering was done and to be safe we will treat it  */
726 		/* as a different file.                                          */
727 		if (0 == old_file->retry)
728 		{
729 			if (ZBX_LOG_ROTATION_NO_REREAD != options)
730 			{
731 				zabbix_log(LOG_LEVEL_WARNING, "the modification time of log file \"%s\" has been"
732 						" updated without changing its size, try checking again later",
733 						old_file->filename);
734 			}
735 
736 			return ZBX_SAME_FILE_RETRY;
737 		}
738 
739 		if (ZBX_LOG_ROTATION_NO_REREAD == options)
740 		{
741 			zabbix_log(LOG_LEVEL_WARNING, "after changing modification time the size of log file \"%s\""
742 					" still has not been updated, consider it to be same file",
743 					old_file->filename);
744 			return ZBX_SAME_FILE_YES;
745 		}
746 
747 		zabbix_log(LOG_LEVEL_WARNING, "after changing modification time the size of log file \"%s\""
748 				" still has not been updated, consider it to be a new file", old_file->filename);
749 		return ZBX_SAME_FILE_NO;
750 	}
751 
752 	if (-1 == old_file->md5size || -1 == new_file->md5size)
753 	{
754 		/* Cannot compare MD5 sums. Assume two different files - reporting twice is better than skipping. */
755 		return ZBX_SAME_FILE_NO;
756 	}
757 
758 	if (old_file->md5size > new_file->md5size)
759 	{
760 		/* file initial block size from which MD5 sum is calculated cannot decrease */
761 		return ZBX_SAME_FILE_NO;
762 	}
763 
764 	if (old_file->md5size == new_file->md5size)
765 	{
766 		if (0 != memcmp(old_file->md5buf, new_file->md5buf, sizeof(new_file->md5buf)))	/* MD5 sums differ */
767 			return ZBX_SAME_FILE_NO;
768 
769 		return ZBX_SAME_FILE_YES;
770 	}
771 
772 	if (0 < old_file->md5size)
773 	{
774 		/* MD5 for the old file has been calculated from a smaller block than for the new file */
775 
776 		int		f, ret;
777 		md5_byte_t	md5tmp[MD5_DIGEST_SIZE];
778 
779 		if (-1 == (f = open_file_helper(new_file->filename, err_msg)))
780 			return ZBX_SAME_FILE_ERROR;
781 
782 		if (SUCCEED == file_start_md5(f, old_file->md5size, md5tmp, new_file->filename, err_msg))
783 		{
784 			ret = (0 == memcmp(old_file->md5buf, &md5tmp, sizeof(md5tmp))) ? ZBX_SAME_FILE_YES :
785 					ZBX_SAME_FILE_NO;
786 		}
787 		else
788 			ret = ZBX_SAME_FILE_ERROR;
789 
790 		if (0 != close(f))
791 		{
792 			if (ZBX_SAME_FILE_ERROR != ret)
793 			{
794 				*err_msg = zbx_dsprintf(*err_msg, "Cannot close file \"%s\": %s", new_file->filename,
795 						zbx_strerror(errno));
796 				ret = ZBX_SAME_FILE_ERROR;
797 			}
798 		}
799 
800 		return ret;
801 	}
802 
803 	return ZBX_SAME_FILE_YES;
804 }
805 
806 /******************************************************************************
807  *                                                                            *
808  * Function: cross_out                                                        *
809  *                                                                            *
810  * Purpose: fill the given row and column with '0' except the element at the  *
811  *          cross point and protected columns and protected rows              *
812  *                                                                            *
813  * Parameters:                                                                *
814  *          arr    - [IN/OUT] two dimensional array                           *
815  *          n_rows - [IN] number of rows in the array                         *
816  *          n_cols - [IN] number of columns in the array                      *
817  *          row    - [IN] number of cross point row                           *
818  *          col    - [IN] number of cross point column                        *
819  *          p_rows - [IN] vector with 'n_rows' elements.                      *
820  *                        Value '1' means protected row.                      *
821  *          p_cols - [IN] vector with 'n_cols' elements.                      *
822  *                        Value '1' means protected column.                   *
823  *                                                                            *
824  * Example:                                                                   *
825  *     Given array                                                            *
826  *                                                                            *
827  *         1 1 1 1                                                            *
828  *         1 1 1 1                                                            *
829  *         1 1 1 1                                                            *
830  *                                                                            *
831  *     and row = 1, col = 2 and no protected rows and columns                 *
832  *     the array is modified as                                               *
833  *                                                                            *
834  *         1 1 0 1                                                            *
835  *         0 0 1 0                                                            *
836  *         1 1 0 1                                                            *
837  *                                                                            *
838  ******************************************************************************/
cross_out(char * arr,int n_rows,int n_cols,int row,int col,const char * p_rows,const char * p_cols)839 static void	cross_out(char *arr, int n_rows, int n_cols, int row, int col, const char *p_rows, const char *p_cols)
840 {
841 	int	i;
842 	char	*p;
843 
844 	p = arr + row * n_cols;		/* point to the first element of the 'row' */
845 
846 	for (i = 0; i < n_cols; i++)	/* process row */
847 	{
848 		if ('1' != p_cols[i] && col != i)
849 			p[i] = '0';
850 	}
851 
852 	p = arr + col;			/* point to the top element of the 'col' */
853 
854 	for (i = 0; i < n_rows; i++)	/* process column */
855 	{
856 		if ('1' != p_rows[i] && row != i)
857 			p[i * n_cols] = '0';
858 	}
859 }
860 
861 /******************************************************************************
862  *                                                                            *
863  * Function: is_uniq_row                                                      *
864  *                                                                            *
865  * Purpose: check if there is only one element '1' or '2' in the given row    *
866  *                                                                            *
867  * Parameters:                                                                *
868  *          arr    - [IN] two dimensional array                               *
869  *          n_cols - [IN] number of columns in the array                      *
870  *          row    - [IN] number of row to search                             *
871  *                                                                            *
872  * Return value: number of column where the element '1' or '2' was found or   *
873  *               -1 if there are zero or multiple elements '1' or '2' in the  *
874  *               row                                                          *
875  *                                                                            *
876  ******************************************************************************/
is_uniq_row(const char * const arr,int n_cols,int row)877 static int	is_uniq_row(const char * const arr, int n_cols, int row)
878 {
879 	int		i, mappings = 0, ret = -1;
880 	const char	*p;
881 
882 	p = arr + row * n_cols;			/* point to the first element of the 'row' */
883 
884 	for (i = 0; i < n_cols; i++)
885 	{
886 		if ('1' == *p || '2' == *p)
887 		{
888 			if (2 == ++mappings)
889 			{
890 				ret = -1;	/* non-unique mapping in the row */
891 				break;
892 			}
893 
894 			ret = i;
895 		}
896 
897 		p++;
898 	}
899 
900 	return ret;
901 }
902 
903 /******************************************************************************
904  *                                                                            *
905  * Function: is_uniq_col                                                      *
906  *                                                                            *
907  * Purpose: check if there is only one element '1' or '2' in the given column *
908  *                                                                            *
909  * Parameters:                                                                *
910  *          arr    - [IN] two dimensional array                               *
911  *          n_rows - [IN] number of rows in the array                         *
912  *          n_cols - [IN] number of columns in the array                      *
913  *          col    - [IN] number of column to search                          *
914  *                                                                            *
915  * Return value: number of row where the element '1' or '2 ' was found or     *
916  *               -1 if there are zero or multiple elements '1' or '2' in the  *
917  *               column                                                       *
918  *                                                                            *
919  ******************************************************************************/
is_uniq_col(const char * const arr,int n_rows,int n_cols,int col)920 static int	is_uniq_col(const char * const arr, int n_rows, int n_cols, int col)
921 {
922 	int		i, mappings = 0, ret = -1;
923 	const char	*p;
924 
925 	p = arr + col;				/* point to the top element of the 'col' */
926 
927 	for (i = 0; i < n_rows; i++)
928 	{
929 		if ('1' == *p || '2' == *p)
930 		{
931 			if (2 == ++mappings)
932 			{
933 				ret = -1;	/* non-unique mapping in the column */
934 				break;
935 			}
936 
937 			ret = i;
938 		}
939 
940 		p += n_cols;
941 	}
942 
943 	return ret;
944 }
945 
946 /******************************************************************************
947  *                                                                            *
948  * Function: is_old2new_unique_mapping                                        *
949  *                                                                            *
950  * Purpose: check if 'old2new' array has only unique mappings                 *
951  *                                                                            *
952  * Parameters:                                                                *
953  *          old2new - [IN] two dimensional array of possible mappings         *
954  *          num_old - [IN] number of elements in the old file list            *
955  *          num_new - [IN] number of elements in the new file list            *
956  *                                                                            *
957  * Return value: SUCCEED - all mappings are unique,                           *
958  *               FAIL - there are non-unique mappings                         *
959  *                                                                            *
960  ******************************************************************************/
is_old2new_unique_mapping(const char * const old2new,int num_old,int num_new)961 static int	is_old2new_unique_mapping(const char * const old2new, int num_old, int num_new)
962 {
963 	int	i;
964 
965 	/* Is there 1:1 mapping in both directions between files in the old and the new list ? */
966 	/* In this case every row and column has not more than one element '1' or '2', others are '0'. */
967 	/* This is expected on UNIX (using inode numbers) and MS Windows (using FileID on NTFS, ReFS) */
968 	/* unless 'copytruncate' rotation type is combined with multiple log file copies. */
969 
970 	for (i = 0; i < num_old; i++)		/* loop over rows (old files) */
971 	{
972 		if (-1 == is_uniq_row(old2new, num_new, i))
973 			return FAIL;
974 	}
975 
976 	for (i = 0; i < num_new; i++)		/* loop over columns (new files) */
977 	{
978 		if (-1 == is_uniq_col(old2new, num_old, num_new, i))
979 			return FAIL;
980 	}
981 
982 	return SUCCEED;
983 }
984 
985 /******************************************************************************
986  *                                                                            *
987  * Function: resolve_old2new                                                  *
988  *                                                                            *
989  * Purpose: resolve non-unique mappings                                       *
990  *                                                                            *
991  * Parameters:                                                                *
992  *     old2new - [IN] two dimensional array of possible mappings              *
993  *     num_old - [IN] number of elements in the old file list                 *
994  *     num_new - [IN] number of elements in the new file list                 *
995  *                                                                            *
996  ******************************************************************************/
resolve_old2new(char * old2new,int num_old,int num_new)997 static void	resolve_old2new(char *old2new, int num_old, int num_new)
998 {
999 	int	i;
1000 	char	*protected_rows = NULL, *protected_cols = NULL;
1001 
1002 	if (SUCCEED == is_old2new_unique_mapping(old2new, num_old, num_new))
1003 		return;
1004 
1005 	/* Non-unique mapping is expected: */
1006 	/*   - on MS Windows using FAT32 and other file systems where inodes or file indexes are either not */
1007 	/*     preserved if a file is renamed or are not applicable, */
1008 	/*   - in 'copytruncate' rotation mode if multiple copies of log files are present. */
1009 
1010 	zabbix_log(LOG_LEVEL_DEBUG, "resolve_old2new(): non-unique mapping");
1011 
1012 	/* protect unique mappings from further modifications */
1013 
1014 	protected_rows = (char *)zbx_calloc(protected_rows, (size_t)num_old, sizeof(char));
1015 	protected_cols = (char *)zbx_calloc(protected_cols, (size_t)num_new, sizeof(char));
1016 
1017 	for (i = 0; i < num_old; i++)
1018 	{
1019 		int	c;
1020 
1021 		if (-1 != (c = is_uniq_row(old2new, num_new, i)) && -1 != is_uniq_col(old2new, num_old, num_new, c))
1022 		{
1023 			protected_rows[i] = '1';
1024 			protected_cols[c] = '1';
1025 		}
1026 	}
1027 
1028 	/* resolve the remaining non-unique mappings - turn them into unique ones */
1029 
1030 	if (num_old <= num_new)				/* square or wide array */
1031 	{
1032 		/****************************************************************************************************
1033 		 *                                                                                                  *
1034 		 * Example for a wide array:                                                                        *
1035 		 *                                                                                                  *
1036 		 *            D.log C.log B.log A.log                                                               *
1037 		 *           ------------------------                                                               *
1038 		 *    3.log | <1>    1     1     1                                                                  *
1039 		 *    2.log |  1    <1>    1     1                                                                  *
1040 		 *    1.log |  1     1    <1>    1                                                                  *
1041 		 *                                                                                                  *
1042 		 * There are 3 files in the old log file list and 4 files in the new log file list.                 *
1043 		 * The mapping is totally non-unique: the old log file '3.log' could have become the new 'D.log' or *
1044 		 * 'C.log', or 'B.log', or 'A.log' - we don't know for sure.                                        *
1045 		 * We make an assumption that a reasonable solution will be to proceed as if '3.log' was renamed to *
1046 		 * 'D.log', '2.log' - to 'C.log' and '1.log' - to 'B.log'.                                          *
1047 		 * We modify the array according to this assumption:                                                *
1048 		 *                                                                                                  *
1049 		 *            D.log C.log B.log A.log                                                               *
1050 		 *           ------------------------                                                               *
1051 		 *    3.log | <1>    0     0     0                                                                  *
1052 		 *    2.log |  0    <1>    0     0                                                                  *
1053 		 *    1.log |  0     0    <1>    0                                                                  *
1054 		 *                                                                                                  *
1055 		 * Now the mapping is unique. The file 'A.log' is counted as a new file to be analyzed from the     *
1056 		 * start.                                                                                           *
1057 		 *                                                                                                  *
1058 		 ****************************************************************************************************/
1059 
1060 		for (i = 0; i < num_old; i++)		/* loop over rows from top-left corner */
1061 		{
1062 			char	*p;
1063 			int	j;
1064 
1065 			if ('1' == protected_rows[i])
1066 				continue;
1067 
1068 			p = old2new + i * num_new;	/* the first element of the current row */
1069 
1070 			for (j = 0; j < num_new; j++)
1071 			{
1072 				if (('1' == p[j] || '2' == p[j]) && '1' != protected_cols[j])
1073 				{
1074 					cross_out(old2new, num_old, num_new, i, j, protected_rows, protected_cols);
1075 					break;
1076 				}
1077 			}
1078 		}
1079 	}
1080 	else	/* tall array */
1081 	{
1082 		/****************************************************************************************************
1083 		 *                                                                                                  *
1084 		 * Example for a tall array:                                                                        *
1085 		 *                                                                                                  *
1086 		 *            D.log C.log B.log A.log                                                               *
1087 		 *           ------------------------                                                               *
1088 		 *    6.log |  1     1     1     1                                                                  *
1089 		 *    5.log |  1     1     1     1                                                                  *
1090 		 *    4.log | <1>    1     1     1                                                                  *
1091 		 *    3.log |  1    <1>    1     1                                                                  *
1092 		 *    2.log |  1     1    <1>    1                                                                  *
1093 		 *    1.log |  1     1     1    <1>                                                                 *
1094 		 *                                                                                                  *
1095 		 * There are 6 files in the old log file list and 4 files in the new log file list.                 *
1096 		 * The mapping is totally non-unique: the old log file '6.log' could have become the new 'D.log' or *
1097 		 * 'C.log', or 'B.log', or 'A.log' - we don't know for sure.                                        *
1098 		 * We make an assumption that a reasonable solution will be to proceed as if '1.log' was renamed to *
1099 		 * 'A.log', '2.log' - to 'B.log', '3.log' - to 'C.log', '4.log' - to 'D.log'.                       *
1100 		 * We modify the array according to this assumption:                                                *
1101 		 *                                                                                                  *
1102 		 *            D.log C.log B.log A.log                                                               *
1103 		 *           ------------------------                                                               *
1104 		 *    6.log |  0     0     0     0                                                                  *
1105 		 *    5.log |  0     0     0     0                                                                  *
1106 		 *    4.log | <1>    0     0     0                                                                  *
1107 		 *    3.log |  0    <1>    0     0                                                                  *
1108 		 *    2.log |  0     0    <1>    0                                                                  *
1109 		 *    1.log |  0     0     0    <1>                                                                 *
1110 		 *                                                                                                  *
1111 		 * Now the mapping is unique. Files '6.log' and '5.log' are counted as not present in the new file. *
1112 		 *                                                                                                  *
1113 		 ****************************************************************************************************/
1114 
1115 		for (i = num_old - 1; i >= 0; i--)	/* loop over rows from bottom-right corner */
1116 		{
1117 			char	*p;
1118 			int	j;
1119 
1120 			if ('1' == protected_rows[i])
1121 				continue;
1122 
1123 			p = old2new + i * num_new;	/* the first element of the current row */
1124 
1125 			for (j = num_new - 1; j >= 0; j--)
1126 			{
1127 				if (('1' == p[j] || '2' == p[j]) && '1' != protected_cols[j])
1128 				{
1129 					cross_out(old2new, num_old, num_new, i, j, protected_rows, protected_cols);
1130 					break;
1131 				}
1132 			}
1133 		}
1134 	}
1135 
1136 	zbx_free(protected_cols);
1137 	zbx_free(protected_rows);
1138 }
1139 
1140 /******************************************************************************
1141  *                                                                            *
1142  * Function: create_old2new_and_copy_of                                       *
1143  *                                                                            *
1144  * Purpose: allocate and fill an array of possible mappings from the old log  *
1145  *          files to the new log files                                        *
1146  *                                                                            *
1147  * Parameters:                                                                *
1148  *     rotation_type - [IN] file rotation type                                *
1149  *     old_files     - [IN] old file list                                     *
1150  *     num_old       - [IN] number of elements in the old file list           *
1151  *     new_files     - [IN] new file list                                     *
1152  *     num_new       - [IN] number of elements in the new file list           *
1153  *     use_ino       - [IN] how to use inodes in is_same_file()               *
1154  *     err_msg       - [IN/OUT] error message why an item became NOTSUPPORTED *
1155  *                                                                            *
1156  * Return value: pointer to allocated array or NULL                           *
1157  *                                                                            *
1158  * Comments:                                                                  *
1159  *    The array is filled with '0', '1' and '2'  which mean:                  *
1160  *       old2new[i][j] = '0' - the i-th old file IS NOT the j-th new file     *
1161  *       old2new[i][j] = '1' - the i-th old file COULD BE the j-th new file   *
1162  *       old2new[i][j] = '2' - the j-th new file is a copy of the i-th old    *
1163  *                             file                                           *
1164  *                                                                            *
1165  ******************************************************************************/
create_old2new_and_copy_of(zbx_log_rotation_options_t rotation_type,struct st_logfile * old_files,int num_old,struct st_logfile * new_files,int num_new,int use_ino,char ** err_msg)1166 static char	*create_old2new_and_copy_of(zbx_log_rotation_options_t rotation_type, struct st_logfile *old_files,
1167 		int num_old, struct st_logfile *new_files, int num_new, int use_ino, char **err_msg)
1168 {
1169 	const char	*__function_name = "create_old2new_and_copy_of";
1170 	int		i, j;
1171 	char		*old2new, *p;
1172 
1173 	/* set up a two dimensional array of possible mappings from old files to new files */
1174 	old2new = (char *)zbx_malloc(NULL, (size_t)num_new * (size_t)num_old * sizeof(char));
1175 	p = old2new;
1176 
1177 	for (i = 0; i < num_old; i++)
1178 	{
1179 		for (j = 0; j < num_new; j++)
1180 		{
1181 			switch (is_same_file_logrt(old_files + i, new_files + j, use_ino, rotation_type, err_msg))
1182 			{
1183 				case ZBX_SAME_FILE_NO:
1184 					p[j] = '0';
1185 					break;
1186 				case ZBX_SAME_FILE_YES:
1187 					if (1 == old_files[i].retry)
1188 					{
1189 						zabbix_log(LOG_LEVEL_DEBUG, "%s(): the size of log file \"%s\" has been"
1190 								" updated since modification time change, consider"
1191 								" it to be the same file", __function_name,
1192 								old_files[i].filename);
1193 						old_files[i].retry = 0;
1194 					}
1195 					p[j] = '1';
1196 					break;
1197 				case ZBX_SAME_FILE_COPY:
1198 					p[j] = '2';
1199 					new_files[j].copy_of = i;
1200 					break;
1201 				case ZBX_SAME_FILE_RETRY:
1202 					old_files[i].retry = 1;
1203 					zbx_free(old2new);
1204 					return NULL;
1205 				case ZBX_SAME_FILE_ERROR:
1206 					zbx_free(old2new);
1207 					return NULL;
1208 			}
1209 
1210 			zabbix_log(LOG_LEVEL_DEBUG, "%s(): is_same_file(%s, %s) = %c", __function_name,
1211 					old_files[i].filename, new_files[j].filename, p[j]);
1212 		}
1213 
1214 		p += (size_t)num_new;
1215 	}
1216 
1217 	if (ZBX_LOG_ROTATION_LOGCPT != rotation_type && (1 < num_old || 1 < num_new))
1218 		resolve_old2new(old2new, num_old, num_new);
1219 
1220 	return old2new;
1221 }
1222 
1223 /******************************************************************************
1224  *                                                                            *
1225  * Function: find_old2new                                                     *
1226  *                                                                            *
1227  * Purpose: find a mapping from old to new file                               *
1228  *                                                                            *
1229  * Parameters:                                                                *
1230  *          old2new - [IN] two dimensional array of possible mappings         *
1231  *          num_new - [IN] number of elements in the new file list            *
1232  *          i_old   - [IN] index of the old file                              *
1233  *                                                                            *
1234  * Return value: index of the new file or                                     *
1235  *               -1 if no mapping was found                                   *
1236  *                                                                            *
1237  ******************************************************************************/
find_old2new(const char * const old2new,int num_new,int i_old)1238 static int	find_old2new(const char * const old2new, int num_new, int i_old)
1239 {
1240 	int		i;
1241 	const char	*p = old2new + i_old * num_new;
1242 
1243 	for (i = 0; i < num_new; i++)		/* loop over columns (new files) on i_old-th row */
1244 	{
1245 		if ('1' == *p || '2' == *p)
1246 			return i;
1247 
1248 		p++;
1249 	}
1250 
1251 	return -1;
1252 }
1253 
1254 /******************************************************************************
1255  *                                                                            *
1256  * Function: add_logfile                                                      *
1257  *                                                                            *
1258  * Purpose: adds information of a logfile to the list of logfiles             *
1259  *                                                                            *
1260  * Parameters: logfiles - pointer to the list of logfiles                     *
1261  *             logfiles_alloc - number of logfiles memory was allocated for   *
1262  *             logfiles_num - number of already inserted logfiles             *
1263  *             filename - name of a logfile (with full path)                  *
1264  *             st - structure returned by stat()                              *
1265  *                                                                            *
1266  * Author: Dmitry Borovikov                                                   *
1267  *                                                                            *
1268  ******************************************************************************/
add_logfile(struct st_logfile ** logfiles,int * logfiles_alloc,int * logfiles_num,const char * filename,zbx_stat_t * st)1269 static void	add_logfile(struct st_logfile **logfiles, int *logfiles_alloc, int *logfiles_num, const char *filename,
1270 		zbx_stat_t *st)
1271 {
1272 	const char	*__function_name = "add_logfile";
1273 	int		i = 0, cmp = 0;
1274 
1275 	zabbix_log(LOG_LEVEL_DEBUG, "In %s() filename:'%s' mtime:%d size:" ZBX_FS_UI64, __function_name, filename,
1276 			(int)st->st_mtime, (zbx_uint64_t)st->st_size);
1277 
1278 	if (*logfiles_alloc == *logfiles_num)
1279 	{
1280 		*logfiles_alloc += 64;
1281 		*logfiles = (struct st_logfile *)zbx_realloc(*logfiles,
1282 				(size_t)*logfiles_alloc * sizeof(struct st_logfile));
1283 
1284 		zabbix_log(LOG_LEVEL_DEBUG, "%s() logfiles:%p logfiles_alloc:%d",
1285 				__function_name, (void *)*logfiles, *logfiles_alloc);
1286 	}
1287 
1288 	/************************************************************************************************/
1289 	/* (1) sort by ascending mtimes                                                                 */
1290 	/* (2) if mtimes are equal, sort alphabetically by descending names                             */
1291 	/* the oldest is put first, the most current is at the end                                      */
1292 	/*                                                                                              */
1293 	/*      filename.log.3 mtime3, filename.log.2 mtime2, filename.log1 mtime1, filename.log mtime  */
1294 	/*      --------------------------------------------------------------------------------------  */
1295 	/*      mtime3          <=      mtime2          <=      mtime1          <=      mtime           */
1296 	/*      --------------------------------------------------------------------------------------  */
1297 	/*      filename.log.3  >      filename.log.2   >       filename.log.1  >       filename.log    */
1298 	/*      --------------------------------------------------------------------------------------  */
1299 	/*      array[i=0]             array[i=1]               array[i=2]              array[i=3]      */
1300 	/*                                                                                              */
1301 	/* note: the application is writing into filename.log, mtimes are more important than filenames */
1302 	/************************************************************************************************/
1303 
1304 	for (; i < *logfiles_num; i++)
1305 	{
1306 		if (st->st_mtime > (*logfiles)[i].mtime)
1307 			continue;	/* (1) sort by ascending mtime */
1308 
1309 		if (st->st_mtime == (*logfiles)[i].mtime)
1310 		{
1311 			if (0 > (cmp = strcmp(filename, (*logfiles)[i].filename)))
1312 				continue;	/* (2) sort by descending name */
1313 
1314 			if (0 == cmp)
1315 			{
1316 				/* the file already exists, quite impossible branch */
1317 				zabbix_log(LOG_LEVEL_WARNING, "%s() file '%s' already added", __function_name,
1318 						filename);
1319 				goto out;
1320 			}
1321 
1322 			/* filename is smaller, must insert here */
1323 		}
1324 
1325 		/* the place is found, move all from the position forward by one struct */
1326 		break;
1327 	}
1328 
1329 	if (*logfiles_num > i)
1330 	{
1331 		/* free a gap for inserting the new element */
1332 		memmove((void *)&(*logfiles)[i + 1], (const void *)&(*logfiles)[i],
1333 				(size_t)(*logfiles_num - i) * sizeof(struct st_logfile));
1334 	}
1335 
1336 	(*logfiles)[i].filename = zbx_strdup(NULL, filename);
1337 	(*logfiles)[i].mtime = (int)st->st_mtime;
1338 	(*logfiles)[i].md5size = -1;
1339 	(*logfiles)[i].seq = 0;
1340 	(*logfiles)[i].incomplete = 0;
1341 	(*logfiles)[i].copy_of = -1;
1342 #ifndef _WINDOWS
1343 	(*logfiles)[i].dev = (zbx_uint64_t)st->st_dev;
1344 	(*logfiles)[i].ino_lo = (zbx_uint64_t)st->st_ino;
1345 	(*logfiles)[i].ino_hi = 0;
1346 #endif
1347 	(*logfiles)[i].size = (zbx_uint64_t)st->st_size;
1348 	(*logfiles)[i].processed_size = 0;
1349 	(*logfiles)[i].retry = 0;
1350 
1351 	++(*logfiles_num);
1352 out:
1353 	zabbix_log(LOG_LEVEL_DEBUG, "End of %s()", __function_name);
1354 }
1355 
1356 /******************************************************************************
1357  *                                                                            *
1358  * Function: destroy_logfile_list                                             *
1359  *                                                                            *
1360  * Purpose: release resources allocated to a logfile list                     *
1361  *                                                                            *
1362  * Parameters:                                                                *
1363  *     logfiles       - [IN/OUT] pointer to the list of logfiles, can be NULL *
1364  *     logfiles_alloc - [IN/OUT] pointer to number of logfiles memory was     *
1365  *                               allocated for, can be NULL.                  *
1366  *     logfiles_num   - [IN/OUT] valid pointer to number of inserted logfiles *
1367  *                                                                            *
1368  ******************************************************************************/
destroy_logfile_list(struct st_logfile ** logfiles,int * logfiles_alloc,int * logfiles_num)1369 void	destroy_logfile_list(struct st_logfile **logfiles, int *logfiles_alloc, int *logfiles_num)
1370 {
1371 	int	i;
1372 
1373 	for (i = 0; i < *logfiles_num; i++)
1374 		zbx_free((*logfiles)[i].filename);
1375 
1376 	*logfiles_num = 0;
1377 
1378 	if (NULL != logfiles_alloc)
1379 		*logfiles_alloc = 0;
1380 
1381 	zbx_free(*logfiles);
1382 }
1383 
1384 /******************************************************************************
1385  *                                                                            *
1386  * Function: pick_logfile                                                     *
1387  *                                                                            *
1388  * Purpose: checks if the specified file meets requirements and adds it to    *
1389  *          the logfile list                                                  *
1390  *                                                                            *
1391  * Parameters:                                                                *
1392  *     directory      - [IN] directory where the logfiles reside              *
1393  *     filename       - [IN] name of the logfile (without path)               *
1394  *     mtime          - [IN] selection criterion "logfile modification time"  *
1395  *                      The logfile will be selected if modified not before   *
1396  *                      'mtime'.                                              *
1397  *     re             - [IN] selection criterion "regexp describing filename  *
1398  *                      pattern"                                              *
1399  *     logfiles       - [IN/OUT] pointer to the list of logfiles              *
1400  *     logfiles_alloc - [IN/OUT] number of logfiles memory was allocated for  *
1401  *     logfiles_num   - [IN/OUT] number of already inserted logfiles          *
1402  *                                                                            *
1403  * Comments: This is a helper function for pick_logfiles()                    *
1404  *                                                                            *
1405  ******************************************************************************/
pick_logfile(const char * directory,const char * filename,int mtime,const zbx_regexp_t * re,struct st_logfile ** logfiles,int * logfiles_alloc,int * logfiles_num)1406 static void	pick_logfile(const char *directory, const char *filename, int mtime, const zbx_regexp_t *re,
1407 		struct st_logfile **logfiles, int *logfiles_alloc, int *logfiles_num)
1408 {
1409 	char		*logfile_candidate;
1410 	zbx_stat_t	file_buf;
1411 
1412 	logfile_candidate = zbx_dsprintf(NULL, "%s%s", directory, filename);
1413 
1414 	if (0 == zbx_stat(logfile_candidate, &file_buf))
1415 	{
1416 		if (S_ISREG(file_buf.st_mode) &&
1417 				mtime <= file_buf.st_mtime &&
1418 				0 == zbx_regexp_match_precompiled(filename, re))
1419 		{
1420 			add_logfile(logfiles, logfiles_alloc, logfiles_num, logfile_candidate, &file_buf);
1421 		}
1422 	}
1423 	else
1424 		zabbix_log(LOG_LEVEL_DEBUG, "cannot process entry '%s': %s", logfile_candidate, zbx_strerror(errno));
1425 
1426 	zbx_free(logfile_candidate);
1427 }
1428 
1429 /******************************************************************************
1430  *                                                                            *
1431  * Function: pick_logfiles                                                    *
1432  *                                                                            *
1433  * Purpose: find logfiles in a directory and put them into a list             *
1434  *                                                                            *
1435  * Parameters:                                                                *
1436  *     directory      - [IN] directory where the logfiles reside              *
1437  *     mtime          - [IN] selection criterion "logfile modification time"  *
1438  *                      The logfile will be selected if modified not before   *
1439  *                      'mtime'.                                              *
1440  *     re             - [IN] selection criterion "regexp describing filename  *
1441  *                      pattern"                                              *
1442  *     use_ino        - [OUT] how to use inodes in is_same_file()             *
1443  *     logfiles       - [IN/OUT] pointer to the list of logfiles              *
1444  *     logfiles_alloc - [IN/OUT] number of logfiles memory was allocated for  *
1445  *     logfiles_num   - [IN/OUT] number of already inserted logfiles          *
1446  *     err_msg        - [IN/OUT] error message why an item became             *
1447  *                      NOTSUPPORTED                                          *
1448  *                                                                            *
1449  * Return value: SUCCEED or FAIL                                              *
1450  *                                                                            *
1451  * Comments: This is a helper function for make_logfile_list()                *
1452  *                                                                            *
1453  ******************************************************************************/
pick_logfiles(const char * directory,int mtime,const zbx_regexp_t * re,int * use_ino,struct st_logfile ** logfiles,int * logfiles_alloc,int * logfiles_num,char ** err_msg)1454 static int	pick_logfiles(const char *directory, int mtime, const zbx_regexp_t *re, int *use_ino,
1455 		struct st_logfile **logfiles, int *logfiles_alloc, int *logfiles_num, char **err_msg)
1456 {
1457 #ifdef _WINDOWS
1458 	int			ret = FAIL;
1459 	char			*find_path = NULL, *file_name_utf8;
1460 	wchar_t			*find_wpath = NULL;
1461 	intptr_t		find_handle;
1462 	struct _wfinddata_t	find_data;
1463 
1464 	/* "open" Windows directory */
1465 	find_path = zbx_dsprintf(find_path, "%s*", directory);
1466 	find_wpath = zbx_utf8_to_unicode(find_path);
1467 
1468 	if (-1 == (find_handle = _wfindfirst(find_wpath, &find_data)))
1469 	{
1470 		*err_msg = zbx_dsprintf(*err_msg, "Cannot open directory \"%s\" for reading: %s", directory,
1471 				zbx_strerror(errno));
1472 		zbx_free(find_wpath);
1473 		zbx_free(find_path);
1474 		return FAIL;
1475 	}
1476 
1477 	if (SUCCEED != set_use_ino_by_fs_type(find_path, use_ino, err_msg))
1478 		goto clean;
1479 
1480 	do
1481 	{
1482 		file_name_utf8 = zbx_unicode_to_utf8(find_data.name);
1483 		pick_logfile(directory, file_name_utf8, mtime, re, logfiles, logfiles_alloc, logfiles_num);
1484 		zbx_free(file_name_utf8);
1485 	}
1486 	while (0 == _wfindnext(find_handle, &find_data));
1487 
1488 	ret = SUCCEED;
1489 clean:
1490 	if (-1 == _findclose(find_handle))
1491 	{
1492 		*err_msg = zbx_dsprintf(*err_msg, "Cannot close directory \"%s\": %s", directory, zbx_strerror(errno));
1493 		ret = FAIL;
1494 	}
1495 
1496 	zbx_free(find_wpath);
1497 	zbx_free(find_path);
1498 
1499 	return ret;
1500 #else
1501 	DIR		*dir = NULL;
1502 	struct dirent	*d_ent = NULL;
1503 
1504 	if (NULL == (dir = opendir(directory)))
1505 	{
1506 		*err_msg = zbx_dsprintf(*err_msg, "Cannot open directory \"%s\" for reading: %s", directory,
1507 				zbx_strerror(errno));
1508 		return FAIL;
1509 	}
1510 
1511 	/* on UNIX file systems we always assume that inodes can be used to identify files */
1512 	*use_ino = 1;
1513 
1514 	while (NULL != (d_ent = readdir(dir)))
1515 	{
1516 		pick_logfile(directory, d_ent->d_name, mtime, re, logfiles, logfiles_alloc, logfiles_num);
1517 	}
1518 
1519 	if (-1 == closedir(dir))
1520 	{
1521 		*err_msg = zbx_dsprintf(*err_msg, "Cannot close directory \"%s\": %s", directory, zbx_strerror(errno));
1522 		return FAIL;
1523 	}
1524 
1525 	return SUCCEED;
1526 #endif
1527 }
1528 
1529 /******************************************************************************
1530  *                                                                            *
1531  * Function: compile_filename_regexp                                          *
1532  *                                                                            *
1533  * Purpose: compile regular expression                                        *
1534  *                                                                            *
1535  * Parameters:                                                                *
1536  *     filename_regexp - [IN] regexp to be compiled                           *
1537  *     re              - [OUT] compiled regexp                                *
1538  *     err_msg         - [OUT] error message why regexp could not be          *
1539  *                       compiled                                             *
1540  *                                                                            *
1541  * Return value: SUCCEED or FAIL                                              *
1542  *                                                                            *
1543  ******************************************************************************/
compile_filename_regexp(const char * filename_regexp,zbx_regexp_t ** re,char ** err_msg)1544 static int	compile_filename_regexp(const char *filename_regexp, zbx_regexp_t **re, char **err_msg)
1545 {
1546 	const char	*regexp_err;
1547 
1548 	if (SUCCEED != zbx_regexp_compile(filename_regexp, re, &regexp_err))
1549 	{
1550 		*err_msg = zbx_dsprintf(*err_msg, "Cannot compile a regular expression describing filename pattern: %s",
1551 				regexp_err);
1552 		return FAIL;
1553 	}
1554 
1555 	return SUCCEED;
1556 }
1557 
1558 /******************************************************************************
1559  *                                                                            *
1560  * Function: fill_file_details                                                *
1561  *                                                                            *
1562  * Purpose: fill-in MD5 sums, device and inode numbers for files in the list  *
1563  *                                                                            *
1564  * Parameters:                                                                *
1565  *     logfiles     - [IN/OUT] list of log files                              *
1566  *     logfiles_num - [IN] number of elements in 'logfiles'                   *
1567  *     use_ino      - [IN] how to get file IDs in file_id()                   *
1568  *     err_msg      - [IN/OUT] error message why operation failed             *
1569  *                                                                            *
1570  * Return value: SUCCEED or FAIL                                              *
1571  *                                                                            *
1572  ******************************************************************************/
1573 #ifdef _WINDOWS
fill_file_details(struct st_logfile ** logfiles,int logfiles_num,int use_ino,char ** err_msg)1574 static int	fill_file_details(struct st_logfile **logfiles, int logfiles_num, int use_ino, char **err_msg)
1575 #else
1576 static int	fill_file_details(struct st_logfile **logfiles, int logfiles_num, char **err_msg)
1577 #endif
1578 {
1579 	int	i, ret = SUCCEED;
1580 
1581 	/* Fill in MD5 sums and file indexes in the logfile list. */
1582 	/* These operations require opening of file, therefore we group them together. */
1583 
1584 	for (i = 0; i < logfiles_num; i++)
1585 	{
1586 		int			f;
1587 		struct st_logfile	*p = *logfiles + i;
1588 
1589 		if (-1 == (f = open_file_helper(p->filename, err_msg)))
1590 			return FAIL;
1591 
1592 		p->md5size = (zbx_uint64_t)MAX_LEN_MD5 > p->size ? (int)p->size : MAX_LEN_MD5;
1593 
1594 		if (SUCCEED != (ret = file_start_md5(f, p->md5size, p->md5buf, p->filename, err_msg)))
1595 			goto clean;
1596 #ifdef _WINDOWS
1597 		ret = file_id(f, use_ino, &p->dev, &p->ino_lo, &p->ino_hi, p->filename, err_msg);
1598 #endif	/*_WINDOWS*/
1599 clean:
1600 		if (SUCCEED != close_file_helper(f, p->filename, err_msg) || FAIL == ret)
1601 			return FAIL;
1602 	}
1603 
1604 	return ret;
1605 }
1606 
1607 /******************************************************************************
1608  *                                                                            *
1609  * Function: make_logfile_list                                                *
1610  *                                                                            *
1611  * Purpose: select log files to be analyzed and make a list, set 'use_ino'    *
1612  *          parameter                                                         *
1613  *                                                                            *
1614  * Parameters:                                                                *
1615  *     flags          - [IN] bit flags with item type: log, logrt, log.count  *
1616  *                      or logrt.count                                        *
1617  *     filename       - [IN] logfile name (regular expression with a path)    *
1618  *     mtime          - [IN] last modification time of the file               *
1619  *     logfiles       - [IN/OUT] pointer to the list of logfiles              *
1620  *     logfiles_alloc - [IN/OUT] number of logfiles memory was allocated for  *
1621  *     logfiles_num   - [IN/OUT] number of already inserted logfiles          *
1622  *     use_ino        - [IN/OUT] how to use inode numbers                     *
1623  *     err_msg        - [IN/OUT] error message (if FAIL or ZBX_NO_FILE_ERROR  *
1624  *                      is returned)                                          *
1625  *                                                                            *
1626  * Return value: SUCCEED - file list successfully built,                      *
1627  *               ZBX_NO_FILE_ERROR - file(s) do not exist,                    *
1628  *               FAIL - other errors                                          *
1629  *                                                                            *
1630  ******************************************************************************/
make_logfile_list(unsigned char flags,const char * filename,int mtime,struct st_logfile ** logfiles,int * logfiles_alloc,int * logfiles_num,int * use_ino,char ** err_msg)1631 static int	make_logfile_list(unsigned char flags, const char *filename, int mtime,
1632 		struct st_logfile **logfiles, int *logfiles_alloc, int *logfiles_num, int *use_ino, char **err_msg)
1633 {
1634 	int	ret = SUCCEED;
1635 
1636 	if (0 != (ZBX_METRIC_FLAG_LOG_LOG & flags))	/* log[] or log.count[] item */
1637 	{
1638 		zbx_stat_t	file_buf;
1639 
1640 		if (0 != zbx_stat(filename, &file_buf))
1641 		{
1642 			*err_msg = zbx_dsprintf(*err_msg, "Cannot obtain information for file \"%s\": %s", filename,
1643 					zbx_strerror(errno));
1644 			ret = ZBX_NO_FILE_ERROR;
1645 			goto clean;
1646 		}
1647 
1648 		if (!S_ISREG(file_buf.st_mode))
1649 		{
1650 			*err_msg = zbx_dsprintf(*err_msg, "\"%s\" is not a regular file.", filename);
1651 			ret = FAIL;
1652 			goto clean;
1653 		}
1654 
1655 		/* mtime is not used for log, log.count items, reset to ignore */
1656 		file_buf.st_mtime = 0;
1657 
1658 		add_logfile(logfiles, logfiles_alloc, logfiles_num, filename, &file_buf);
1659 #ifdef _WINDOWS
1660 		if (SUCCEED != (ret = set_use_ino_by_fs_type(filename, use_ino, err_msg)))
1661 			goto clean;
1662 #else
1663 		/* on UNIX file systems we always assume that inodes can be used to identify files */
1664 		*use_ino = 1;
1665 #endif
1666 	}
1667 	else if (0 != (ZBX_METRIC_FLAG_LOG_LOGRT & flags))	/* logrt[] or logrt.count[] item */
1668 	{
1669 		char	*directory = NULL, *filename_regexp = NULL;
1670 		zbx_regexp_t	*re;
1671 
1672 		/* split a filename into directory and file name regular expression parts */
1673 		if (SUCCEED != (ret = split_filename(filename, &directory, &filename_regexp, err_msg)))
1674 			goto clean;
1675 
1676 		if (SUCCEED != (ret = compile_filename_regexp(filename_regexp, &re, err_msg)))
1677 			goto clean1;
1678 
1679 		if (SUCCEED != (ret = pick_logfiles(directory, mtime, re, use_ino, logfiles, logfiles_alloc,
1680 				logfiles_num, err_msg)))
1681 		{
1682 			goto clean2;
1683 		}
1684 
1685 		if (0 == *logfiles_num)
1686 		{
1687 			/* do not make logrt[] and logrt.count[] items NOTSUPPORTED if there are no matching log */
1688 			/* files or they are not accessible (can happen during a rotation), just log the problem */
1689 #ifdef _WINDOWS
1690 			zabbix_log(LOG_LEVEL_WARNING, "there are no recently modified files matching \"%s\" in \"%s\"",
1691 					filename_regexp, directory);
1692 
1693 			ret = ZBX_NO_FILE_ERROR;
1694 #else
1695 			if (0 != access(directory, X_OK))
1696 			{
1697 				zabbix_log(LOG_LEVEL_WARNING, "insufficient access rights (no \"execute\" permission) "
1698 						"to directory \"%s\": %s", directory, zbx_strerror(errno));
1699 			}
1700 			else
1701 			{
1702 				zabbix_log(LOG_LEVEL_WARNING, "there are no recently modified files matching \"%s\" in"
1703 						" \"%s\"", filename_regexp, directory);
1704 				ret = ZBX_NO_FILE_ERROR;
1705 			}
1706 #endif
1707 		}
1708 clean2:
1709 		zbx_regexp_free(re);
1710 clean1:
1711 		zbx_free(directory);
1712 		zbx_free(filename_regexp);
1713 
1714 		if (FAIL == ret || ZBX_NO_FILE_ERROR == ret)
1715 			goto clean;
1716 	}
1717 	else
1718 		THIS_SHOULD_NEVER_HAPPEN;
1719 
1720 #ifdef _WINDOWS
1721 	ret = fill_file_details(logfiles, *logfiles_num, *use_ino, err_msg);
1722 #else
1723 	ret = fill_file_details(logfiles, *logfiles_num, err_msg);
1724 #endif
1725 clean:
1726 	if ((FAIL == ret || ZBX_NO_FILE_ERROR == ret) && NULL != *logfiles)
1727 		destroy_logfile_list(logfiles, logfiles_alloc, logfiles_num);
1728 
1729 	return	ret;
1730 }
1731 
buf_find_newline(char * p,char ** p_next,const char * p_end,const char * cr,const char * lf,size_t szbyte)1732 static char	*buf_find_newline(char *p, char **p_next, const char *p_end, const char *cr, const char *lf,
1733 		size_t szbyte)
1734 {
1735 	if (1 == szbyte)	/* single-byte character set */
1736 	{
1737 		for (; p < p_end; p++)
1738 		{
1739 			/* detect NULL byte and replace it with '?' character */
1740 			if (0x0 == *p)
1741 			{
1742 				*p = '?';
1743 				continue;
1744 			}
1745 
1746 			if (0xd < *p || 0xa > *p)
1747 				continue;
1748 
1749 			if (0xa == *p)  /* LF (Unix) */
1750 			{
1751 				*p_next = p + 1;
1752 				return p;
1753 			}
1754 
1755 			if (0xd == *p)	/* CR (Mac) */
1756 			{
1757 				if (p < p_end - 1 && 0xa == *(p + 1))   /* CR+LF (Windows) */
1758 				{
1759 					*p_next = p + 2;
1760 					return p;
1761 				}
1762 
1763 				*p_next = p + 1;
1764 				return p;
1765 			}
1766 		}
1767 		return (char *)NULL;
1768 	}
1769 	else
1770 	{
1771 		while (p <= p_end - szbyte)
1772 		{
1773 			/* detect NULL byte in UTF-16 encoding and replace it with '?' character */
1774 			if (2 == szbyte && 0x0 == *p && 0x0 == *(p + 1))
1775 			{
1776 				if (0x0 == *cr)			/* Big-endian */
1777 					p[1] = '?';
1778 				else				/* Little-endian */
1779 					*p = '?';
1780 			}
1781 
1782 			if (0 == memcmp(p, lf, szbyte))		/* LF (Unix) */
1783 			{
1784 				*p_next = p + szbyte;
1785 				return p;
1786 			}
1787 
1788 			if (0 == memcmp(p, cr, szbyte))		/* CR (Mac) */
1789 			{
1790 				if (p <= p_end - szbyte - szbyte && 0 == memcmp(p + szbyte, lf, szbyte))
1791 				{
1792 					/* CR+LF (Windows) */
1793 					*p_next = p + szbyte + szbyte;
1794 					return p;
1795 				}
1796 
1797 				*p_next = p + szbyte;
1798 				return p;
1799 			}
1800 
1801 			p += szbyte;
1802 		}
1803 		return (char *)NULL;
1804 	}
1805 }
1806 
zbx_read2(int fd,unsigned char flags,zbx_uint64_t * lastlogsize,int * mtime,int * big_rec,int * incomplete,char ** err_msg,const char * encoding,zbx_vector_ptr_t * regexps,const char * pattern,const char * output_template,int * p_count,int * s_count,zbx_process_value_func_t process_value,const char * server,unsigned short port,const char * hostname,const char * key,zbx_uint64_t * lastlogsize_sent,int * mtime_sent)1807 static int	zbx_read2(int fd, unsigned char flags, zbx_uint64_t *lastlogsize, int *mtime, int *big_rec,
1808 		int *incomplete, char **err_msg, const char *encoding, zbx_vector_ptr_t *regexps, const char *pattern,
1809 		const char *output_template, int *p_count, int *s_count, zbx_process_value_func_t process_value,
1810 		const char *server, unsigned short port, const char *hostname, const char *key,
1811 		zbx_uint64_t *lastlogsize_sent, int *mtime_sent)
1812 {
1813 	ZBX_THREAD_LOCAL static char	*buf = NULL;
1814 
1815 	int				ret, nbytes, regexp_ret;
1816 	const char			*cr, *lf, *p_end;
1817 	char				*p_start, *p, *p_nl, *p_next, *item_value = NULL;
1818 	size_t				szbyte;
1819 	zbx_offset_t			offset;
1820 	int				send_err;
1821 	zbx_uint64_t			lastlogsize1;
1822 
1823 #define BUF_SIZE	(256 * ZBX_KIBIBYTE)	/* The longest encodings use 4 bytes for every character. To send */
1824 						/* up to 64 k characters to Zabbix server a 256 kB buffer might be */
1825 						/* required. */
1826 
1827 	if (NULL == buf)
1828 		buf = (char *)zbx_malloc(buf, (size_t)(BUF_SIZE + 1));
1829 
1830 	find_cr_lf_szbyte(encoding, &cr, &lf, &szbyte);
1831 
1832 	for (;;)
1833 	{
1834 		if (0 >= *p_count || 0 >= *s_count)
1835 		{
1836 			/* limit on number of processed or sent-to-server lines reached */
1837 			ret = SUCCEED;
1838 			goto out;
1839 		}
1840 
1841 		if ((zbx_offset_t)-1 == (offset = zbx_lseek(fd, 0, SEEK_CUR)))
1842 		{
1843 			*big_rec = 0;
1844 			*err_msg = zbx_dsprintf(*err_msg, "Cannot set position to 0 in file: %s", zbx_strerror(errno));
1845 			ret = FAIL;
1846 			goto out;
1847 		}
1848 
1849 		nbytes = (int)read(fd, buf, (size_t)BUF_SIZE);
1850 
1851 		if (-1 == nbytes)
1852 		{
1853 			/* error on read */
1854 			*big_rec = 0;
1855 			*err_msg = zbx_dsprintf(*err_msg, "Cannot read from file: %s", zbx_strerror(errno));
1856 			ret = FAIL;
1857 			goto out;
1858 		}
1859 
1860 		if (0 == nbytes)
1861 		{
1862 			/* end of file reached */
1863 			ret = SUCCEED;
1864 			goto out;
1865 		}
1866 
1867 		p_start = buf;			/* beginning of current line */
1868 		p = buf;			/* current byte */
1869 		p_end = buf + (size_t)nbytes;	/* no data from this position */
1870 
1871 		if (NULL == (p_nl = buf_find_newline(p, &p_next, p_end, cr, lf, szbyte)))
1872 		{
1873 			if (p_end > p)
1874 				*incomplete = 1;
1875 
1876 			if (BUF_SIZE > nbytes)
1877 			{
1878 				/* Buffer is not full (no more data available) and there is no "newline" in it. */
1879 				/* Do not analyze it now, keep the same position in the file and wait the next check, */
1880 				/* maybe more data will come. */
1881 
1882 				*lastlogsize = (zbx_uint64_t)offset;
1883 				ret = SUCCEED;
1884 				goto out;
1885 			}
1886 			else
1887 			{
1888 				/* buffer is full and there is no "newline" in it */
1889 
1890 				if (0 == *big_rec)
1891 				{
1892 					/* It is the first, beginning part of a long record. Match it against the */
1893 					/* regexp now (our buffer length corresponds to what we can save in the */
1894 					/* database). */
1895 
1896 					char	*value;
1897 
1898 					buf[BUF_SIZE] = '\0';
1899 
1900 					if ('\0' != *encoding)
1901 						value = convert_to_utf8(buf, (size_t)BUF_SIZE, encoding);
1902 					else
1903 						value = buf;
1904 
1905 					zabbix_log(LOG_LEVEL_WARNING, "Logfile contains a large record: \"%.64s\""
1906 							" (showing only the first 64 characters). Only the first 256 kB"
1907 							" will be analyzed, the rest will be ignored while Zabbix agent"
1908 							" is running.", value);
1909 
1910 					lastlogsize1 = (size_t)offset + (size_t)nbytes;
1911 					send_err = FAIL;
1912 
1913 					if (0 == (ZBX_METRIC_FLAG_LOG_COUNT & flags))	/* log[] or logrt[] */
1914 					{
1915 						if (ZBX_REGEXP_MATCH == (regexp_ret = regexp_sub_ex(regexps, value,
1916 								pattern, ZBX_CASE_SENSITIVE, output_template,
1917 								&item_value)))
1918 						{
1919 							if (SUCCEED == (send_err = process_value(server, port,
1920 									hostname, key, item_value, ITEM_STATE_NORMAL,
1921 									&lastlogsize1, mtime, NULL, NULL, NULL, NULL,
1922 									flags | ZBX_METRIC_FLAG_PERSISTENT)))
1923 							{
1924 								*lastlogsize_sent = lastlogsize1;
1925 								if (NULL != mtime_sent)
1926 									*mtime_sent = *mtime;
1927 
1928 								(*s_count)--;
1929 								zbx_free(item_value);
1930 							}
1931 							else
1932 							{
1933 								zbx_free(item_value);
1934 
1935 								/* Sending of buffer failed. */
1936 								/* Try to resend it in the next check. */
1937 								ret = SUCCEED;
1938 								goto out;
1939 							}
1940 						}
1941 					}
1942 					else	/* log.count[] or logrt.count[] */
1943 					{
1944 						if (ZBX_REGEXP_MATCH == (regexp_ret = regexp_sub_ex(regexps, value,
1945 								pattern, ZBX_CASE_SENSITIVE, NULL, NULL)))
1946 						{
1947 							(*s_count)--;
1948 						}
1949 					}
1950 
1951 					if ('\0' != *encoding)
1952 						zbx_free(value);
1953 
1954 					if (FAIL == regexp_ret)
1955 					{
1956 						*err_msg = zbx_dsprintf(*err_msg, "cannot compile regular expression");
1957 						ret = FAIL;
1958 						goto out;
1959 					}
1960 
1961 					(*p_count)--;
1962 
1963 					if (0 != (ZBX_METRIC_FLAG_LOG_COUNT & flags) ||
1964 							ZBX_REGEXP_NO_MATCH == regexp_ret || SUCCEED == send_err)
1965 					{
1966 						*lastlogsize = lastlogsize1;
1967 						*big_rec = 1;	/* ignore the rest of this record */
1968 					}
1969 				}
1970 				else
1971 				{
1972 					/* It is a middle part of a long record. Ignore it. We have already */
1973 					/* checked the first part against the regexp. */
1974 					*lastlogsize = (size_t)offset + (size_t)nbytes;
1975 				}
1976 			}
1977 		}
1978 		else
1979 		{
1980 			/* the "newline" was found, so there is at least one complete record */
1981 			/* (or trailing part of a large record) in the buffer */
1982 			*incomplete = 0;
1983 
1984 			for (;;)
1985 			{
1986 				if (0 >= *p_count || 0 >= *s_count)
1987 				{
1988 					/* limit on number of processed or sent-to-server lines reached */
1989 					ret = SUCCEED;
1990 					goto out;
1991 				}
1992 
1993 				if (0 == *big_rec)
1994 				{
1995 					char	*value;
1996 
1997 					*p_nl = '\0';
1998 
1999 					if ('\0' != *encoding)
2000 						value = convert_to_utf8(p_start, (size_t)(p_nl - p_start), encoding);
2001 					else
2002 						value = p_start;
2003 
2004 					lastlogsize1 = (size_t)offset + (size_t)(p_next - buf);
2005 					send_err = FAIL;
2006 
2007 					if (0 == (ZBX_METRIC_FLAG_LOG_COUNT & flags))   /* log[] or logrt[] */
2008 					{
2009 						if (ZBX_REGEXP_MATCH == (regexp_ret = regexp_sub_ex(regexps, value,
2010 								pattern, ZBX_CASE_SENSITIVE, output_template,
2011 								&item_value)))
2012 						{
2013 							if (SUCCEED == (send_err = process_value(server, port,
2014 									hostname, key, item_value, ITEM_STATE_NORMAL,
2015 									&lastlogsize1, mtime, NULL, NULL, NULL, NULL,
2016 									flags | ZBX_METRIC_FLAG_PERSISTENT)))
2017 							{
2018 								*lastlogsize_sent = lastlogsize1;
2019 								if (NULL != mtime_sent)
2020 									*mtime_sent = *mtime;
2021 
2022 								(*s_count)--;
2023 								zbx_free(item_value);
2024 							}
2025 							else
2026 							{
2027 								zbx_free(item_value);
2028 
2029 								/* Sending of buffer failed. */
2030 								/* Try to resend it in the next check. */
2031 								ret = SUCCEED;
2032 								goto out;
2033 							}
2034 						}
2035 					}
2036 					else	/* log.count[] or logrt.count[] */
2037 					{
2038 						if (ZBX_REGEXP_MATCH == (regexp_ret = regexp_sub_ex(regexps, value,
2039 								pattern, ZBX_CASE_SENSITIVE, NULL, NULL)))
2040 						{
2041 							(*s_count)--;
2042 						}
2043 					}
2044 
2045 					if ('\0' != *encoding)
2046 						zbx_free(value);
2047 
2048 					if (FAIL == regexp_ret)
2049 					{
2050 						*err_msg = zbx_dsprintf(*err_msg, "cannot compile regular expression");
2051 						ret = FAIL;
2052 						goto out;
2053 					}
2054 
2055 					(*p_count)--;
2056 
2057 					if (0 != (ZBX_METRIC_FLAG_LOG_COUNT & flags) ||
2058 							ZBX_REGEXP_NO_MATCH == regexp_ret || SUCCEED == send_err)
2059 					{
2060 						*lastlogsize = lastlogsize1;
2061 					}
2062 				}
2063 				else
2064 				{
2065 					/* skip the trailing part of a long record */
2066 					*lastlogsize = (size_t)offset + (size_t)(p_next - buf);
2067 					*big_rec = 0;
2068 				}
2069 
2070 				/* move to the next record in the buffer */
2071 				p_start = p_next;
2072 				p = p_next;
2073 
2074 				if (NULL == (p_nl = buf_find_newline(p, &p_next, p_end, cr, lf, szbyte)))
2075 				{
2076 					/* There are no complete records in the buffer. */
2077 					/* Try to read more data from this position if available. */
2078 					if (p_end > p)
2079 						*incomplete = 1;
2080 
2081 					if ((zbx_offset_t)-1 == zbx_lseek(fd, *lastlogsize, SEEK_SET))
2082 					{
2083 						*err_msg = zbx_dsprintf(*err_msg, "Cannot set position to " ZBX_FS_UI64
2084 								" in file: %s", *lastlogsize, zbx_strerror(errno));
2085 						ret = FAIL;
2086 						goto out;
2087 					}
2088 					else
2089 						break;
2090 				}
2091 				else
2092 					*incomplete = 0;
2093 			}
2094 		}
2095 	}
2096 out:
2097 	return ret;
2098 
2099 #undef BUF_SIZE
2100 }
2101 
2102 /******************************************************************************
2103  *                                                                            *
2104  * Function: process_log                                                      *
2105  *                                                                            *
2106  * Purpose: Match new records in logfile with regexp, transmit matching       *
2107  *          records to Zabbix server                                          *
2108  *                                                                            *
2109  * Parameters:                                                                *
2110  *     flags           - [IN] bit flags with item type: log, logrt, log.count *
2111  *                       or logrt.count                                       *
2112  *     filename        - [IN] logfile name                                    *
2113  *     lastlogsize     - [IN/OUT] offset from the beginning of the file       *
2114  *     mtime           - [IN/OUT] file modification time for reporting to     *
2115  *                       server                                               *
2116  *     lastlogsize_sent - [OUT] lastlogsize value that was last sent          *
2117  *     mtime_sent      - [OUT] mtime value that was last sent                 *
2118  *     skip_old_data   - [IN/OUT] start from the beginning of the file or     *
2119  *                       jump to the end                                      *
2120  *     big_rec         - [IN/OUT] state variable to remember whether a long   *
2121  *                       record is being processed                            *
2122  *     incomplete      - [OUT] 0 - the last record ended with a newline,      *
2123  *                       1 - there was no newline at the end of the last      *
2124  *                       record.                                              *
2125  *     err_msg         - [IN/OUT] error message why an item became            *
2126  *                       NOTSUPPORTED                                         *
2127  *     encoding        - [IN] text string describing encoding.                *
2128  *                       See function find_cr_lf_szbyte() for supported       *
2129  *                       encodings.                                           *
2130  *                       "" (empty string) means a single-byte character set  *
2131  *                       (e.g. ASCII).                                        *
2132  *     regexps         - [IN] array of regexps                                *
2133  *     pattern         - [IN] pattern to match                                *
2134  *     output_template - [IN] output formatting template                      *
2135  *     p_count         - [IN/OUT] limit of records to be processed            *
2136  *     s_count         - [IN/OUT] limit of records to be sent to server       *
2137  *     process_value   - [IN] pointer to function process_value()             *
2138  *     server          - [IN] server to send data to                          *
2139  *     port            - [IN] port to send data to                            *
2140  *     hostname        - [IN] hostname the data comes from                    *
2141  *     key             - [IN] item key the data belongs to                    *
2142  *     processed_bytes - [OUT] number of processed bytes in logfile           *
2143  *     seek_offset     - [IN] position to seek in file                        *
2144  *                                                                            *
2145  * Return value: returns SUCCEED on successful reading,                       *
2146  *               FAIL on other cases                                          *
2147  *                                                                            *
2148  * Author: Eugene Grigorjev                                                   *
2149  *                                                                            *
2150  * Comments:                                                                  *
2151  *           This function does not deal with log file rotation.              *
2152  *                                                                            *
2153  ******************************************************************************/
process_log(unsigned char flags,const char * filename,zbx_uint64_t * lastlogsize,int * mtime,zbx_uint64_t * lastlogsize_sent,int * mtime_sent,unsigned char * skip_old_data,int * big_rec,int * incomplete,char ** err_msg,const char * encoding,zbx_vector_ptr_t * regexps,const char * pattern,const char * output_template,int * p_count,int * s_count,zbx_process_value_func_t process_value,const char * server,unsigned short port,const char * hostname,const char * key,zbx_uint64_t * processed_bytes,zbx_uint64_t seek_offset)2154 static int	process_log(unsigned char flags, const char *filename, zbx_uint64_t *lastlogsize, int *mtime,
2155 		zbx_uint64_t *lastlogsize_sent, int *mtime_sent, unsigned char *skip_old_data, int *big_rec,
2156 		int *incomplete, char **err_msg, const char *encoding, zbx_vector_ptr_t *regexps, const char *pattern,
2157 		const char *output_template, int *p_count, int *s_count, zbx_process_value_func_t process_value,
2158 		const char *server, unsigned short port, const char *hostname, const char *key,
2159 		zbx_uint64_t *processed_bytes, zbx_uint64_t seek_offset)
2160 {
2161 	const char	*__function_name = "process_log";
2162 	int		f, ret = FAIL;
2163 
2164 	zabbix_log(LOG_LEVEL_DEBUG, "In %s() filename:'%s' lastlogsize:" ZBX_FS_UI64 " mtime:%d",
2165 			__function_name, filename, *lastlogsize, NULL != mtime ? *mtime : 0);
2166 
2167 	if (-1 == (f = open_file_helper(filename, err_msg)))
2168 		goto out;
2169 
2170 	if ((zbx_offset_t)-1 != zbx_lseek(f, seek_offset, SEEK_SET))
2171 	{
2172 		*lastlogsize = seek_offset;
2173 		*skip_old_data = 0;
2174 
2175 		if (SUCCEED == (ret = zbx_read2(f, flags, lastlogsize, mtime, big_rec, incomplete, err_msg, encoding,
2176 				regexps, pattern, output_template, p_count, s_count, process_value, server, port,
2177 				hostname, key, lastlogsize_sent, mtime_sent)))
2178 		{
2179 			*processed_bytes = *lastlogsize - seek_offset;
2180 		}
2181 	}
2182 	else
2183 	{
2184 		*err_msg = zbx_dsprintf(*err_msg, "Cannot set position to " ZBX_FS_UI64 " in file \"%s\": %s",
2185 				seek_offset, filename, zbx_strerror(errno));
2186 	}
2187 
2188 	if (SUCCEED != close_file_helper(f, filename, err_msg))
2189 		ret = FAIL;
2190 out:
2191 	zabbix_log(LOG_LEVEL_DEBUG, "End of %s() filename:'%s' lastlogsize:" ZBX_FS_UI64 " mtime:%d ret:%s"
2192 			" processed_bytes:" ZBX_FS_UI64, __function_name, filename, *lastlogsize,
2193 			NULL != mtime ? *mtime : 0, zbx_result_string(ret),
2194 			SUCCEED == ret ? *processed_bytes : (zbx_uint64_t)0);
2195 
2196 	return ret;
2197 }
2198 
adjust_mtime_to_clock(int * mtime)2199 static void	adjust_mtime_to_clock(int *mtime)
2200 {
2201 	time_t	now;
2202 
2203 	/* Adjust 'mtime' if the system clock has been set back in time. */
2204 	/* Setting the clock ahead of time is harmless in our case. */
2205 
2206 	if (*mtime > (now = time(NULL)))
2207 	{
2208 		int	old_mtime;
2209 
2210 		old_mtime = *mtime;
2211 		*mtime = (int)now;
2212 
2213 		zabbix_log(LOG_LEVEL_WARNING, "System clock has been set back in time. Setting agent mtime %d "
2214 				"seconds back.", (int)(old_mtime - now));
2215 	}
2216 }
2217 
is_swap_required(const struct st_logfile * old_files,struct st_logfile * new_files,int use_ino,int idx)2218 static int	is_swap_required(const struct st_logfile *old_files, struct st_logfile *new_files, int use_ino, int idx)
2219 {
2220 	int	is_same_place;
2221 
2222 	/* if the 1st file is not processed at all while the 2nd file was processed (at least partially) */
2223 	/* then swap them */
2224 	if (0 == new_files[idx].seq && 0 < new_files[idx + 1].seq)
2225 		return SUCCEED;
2226 
2227 	/* if the 2nd file is not a copy of some other file then no need to swap */
2228 	if (-1 == new_files[idx + 1].copy_of)
2229 		return FAIL;
2230 
2231 	/* The 2nd file is a copy. But is it a copy of the 1st file ? */
2232 
2233 	/* On file systems with inodes or file indices if a file is copied and truncated, we assume that */
2234 	/* there is a high possibility that the truncated file has the same inode (index) as before. */
2235 
2236 	if (NULL == old_files)	/* cannot consult the old file list */
2237 		return FAIL;
2238 
2239 	is_same_place = compare_file_places(old_files + new_files[idx + 1].copy_of, new_files + idx, use_ino);
2240 
2241 	if (ZBX_FILE_PLACE_SAME == is_same_place && new_files[idx].seq >= new_files[idx + 1].seq)
2242 		return SUCCEED;
2243 
2244 	/* The last attempt - compare file names. It is less reliable as file rotation can change file names. */
2245 	if (ZBX_FILE_PLACE_OTHER == is_same_place || ZBX_FILE_PLACE_UNKNOWN == is_same_place)
2246 	{
2247 		if (0 == strcmp((old_files + new_files[idx + 1].copy_of)->filename, (new_files + idx)->filename))
2248 			return SUCCEED;
2249 	}
2250 
2251 	return FAIL;
2252 }
2253 
swap_logfile_array_elements(struct st_logfile * array,int idx1,int idx2)2254 static void	swap_logfile_array_elements(struct st_logfile *array, int idx1, int idx2)
2255 {
2256 	struct st_logfile	*p1 = array + idx1;
2257 	struct st_logfile	*p2 = array + idx2;
2258 	struct st_logfile	tmp;
2259 
2260 	memcpy(&tmp, p1, sizeof(struct st_logfile));
2261 	memcpy(p1, p2, sizeof(struct st_logfile));
2262 	memcpy(p2, &tmp, sizeof(struct st_logfile));
2263 }
2264 
ensure_order_if_mtimes_equal(const struct st_logfile * logfiles_old,struct st_logfile * logfiles,int logfiles_num,int use_ino,int * start_idx)2265 static void	ensure_order_if_mtimes_equal(const struct st_logfile *logfiles_old, struct st_logfile *logfiles,
2266 		int logfiles_num, int use_ino, int *start_idx)
2267 {
2268 	int	i;
2269 
2270 	/* There is a special case when within 1 second of time:       */
2271 	/*   1. a log file ORG.log is copied to other file COPY.log,   */
2272 	/*   2. the original file ORG.log is truncated,                */
2273 	/*   3. new records are appended to the original file ORG.log, */
2274 	/*   4. both files ORG.log and COPY.log have the same 'mtime'. */
2275 	/* Now in the list 'logfiles' the file ORG.log precedes the COPY.log because if 'mtime' is the same   */
2276 	/* then add_logfile() function sorts files by name in descending order. This would lead to an error - */
2277 	/* processing ORG.log before COPY.log. We need to correct the order by swapping ORG.log and COPY.log  */
2278 	/* elements in the 'logfiles' list. */
2279 
2280 	for (i = 0; i < logfiles_num - 1; i++)
2281 	{
2282 		if (logfiles[i].mtime == logfiles[i + 1].mtime &&
2283 				SUCCEED == is_swap_required(logfiles_old, logfiles, use_ino, i))
2284 		{
2285 			zabbix_log(LOG_LEVEL_DEBUG, "ensure_order_if_mtimes_equal() swapping files '%s' and '%s'",
2286 					logfiles[i].filename, logfiles[i + 1].filename);
2287 
2288 			swap_logfile_array_elements(logfiles, i, i + 1);
2289 
2290 			if (*start_idx == i + 1)
2291 				*start_idx = i;
2292 		}
2293 	}
2294 }
2295 
files_start_with_same_md5(const struct st_logfile * log1,const struct st_logfile * log2)2296 static int	files_start_with_same_md5(const struct st_logfile *log1, const struct st_logfile *log2)
2297 {
2298 	if (-1 == log1->md5size || -1 == log2->md5size)
2299 		return FAIL;
2300 
2301 	if (log1->md5size == log2->md5size)	/* this works for empty files, too */
2302 	{
2303 		if (0 == memcmp(log1->md5buf, log2->md5buf, sizeof(log1->md5buf)))
2304 			return SUCCEED;
2305 		else
2306 			return FAIL;
2307 	}
2308 
2309 	/* we have MD5 sums, but they are calculated from blocks of different sizes */
2310 
2311 	if (0 < log1->md5size && 0 < log2->md5size)
2312 	{
2313 		const struct st_logfile	*file_smaller, *file_larger;
2314 		int			fd, ret = FAIL;
2315 		char			*err_msg = NULL;		/* required, but not used */
2316 		md5_byte_t		md5tmp[MD5_DIGEST_SIZE];
2317 
2318 		if (log1->md5size < log2->md5size)
2319 		{
2320 			file_smaller = log1;
2321 			file_larger = log2;
2322 		}
2323 		else
2324 		{
2325 			file_smaller = log2;
2326 			file_larger = log1;
2327 		}
2328 
2329 		if (-1 == (fd = zbx_open(file_larger->filename, O_RDONLY)))
2330 			return FAIL;
2331 
2332 		if (SUCCEED == file_start_md5(fd, file_smaller->md5size, md5tmp, "", &err_msg))
2333 		{
2334 			if (0 == memcmp(file_smaller->md5buf, md5tmp, sizeof(md5tmp)))
2335 				ret = SUCCEED;
2336 		}
2337 
2338 		zbx_free(err_msg);
2339 		close(fd);
2340 
2341 		return ret;
2342 	}
2343 
2344 	return FAIL;
2345 }
2346 
handle_multiple_copies(struct st_logfile * logfiles,int logfiles_num,int i)2347 static void	handle_multiple_copies(struct st_logfile *logfiles, int logfiles_num, int i)
2348 {
2349 	/* There is a special case when the latest log file is copied to other file but not yet truncated. */
2350 	/* So there are two files and we don't know which one will stay as the copy and which one will be  */
2351 	/* truncated. Similar cases: the latest log file is copied but never truncated or is copied multiple */
2352 	/* times. */
2353 
2354 	int	j;
2355 
2356 	for (j = i + 1; j < logfiles_num; j++)
2357 	{
2358 		if (SUCCEED == files_start_with_same_md5(logfiles + i, logfiles + j))
2359 		{
2360 			/* logfiles[i] and logfiles[j] are original and copy (or vice versa). */
2361 			/* If logfiles[i] has been at least partially processed then transfer its */
2362 			/* processed size to logfiles[j], too. */
2363 
2364 			if (logfiles[j].processed_size < logfiles[i].processed_size)
2365 			{
2366 				logfiles[j].processed_size = MIN(logfiles[i].processed_size, logfiles[j].size);
2367 
2368 				zabbix_log(LOG_LEVEL_DEBUG, "handle_multiple_copies() file '%s' processed_size:"
2369 						ZBX_FS_UI64 " transferred to" " file '%s' processed_size:" ZBX_FS_UI64,
2370 						logfiles[i].filename, logfiles[i].processed_size,
2371 						logfiles[j].filename, logfiles[j].processed_size);
2372 			}
2373 			else if (logfiles[i].processed_size < logfiles[j].processed_size)
2374 			{
2375 				logfiles[i].processed_size = MIN(logfiles[j].processed_size, logfiles[i].size);
2376 
2377 				zabbix_log(LOG_LEVEL_DEBUG, "handle_multiple_copies() file '%s' processed_size:"
2378 						ZBX_FS_UI64 " transferred to" " file '%s' processed_size:" ZBX_FS_UI64,
2379 						logfiles[j].filename, logfiles[j].processed_size,
2380 						logfiles[i].filename, logfiles[i].processed_size);
2381 			}
2382 		}
2383 	}
2384 }
2385 
delay_update_if_copies(struct st_logfile * logfiles,int logfiles_num,int * mtime,zbx_uint64_t * lastlogsize)2386 static void	delay_update_if_copies(struct st_logfile *logfiles, int logfiles_num, int *mtime,
2387 		zbx_uint64_t *lastlogsize)
2388 {
2389 	int	i, idx_to_keep = logfiles_num - 1;
2390 
2391 	/* If there are copies in 'logfiles' list then find the element with the smallest index which must be */
2392 	/* preserved in the list to keep information about copies. */
2393 
2394 	for (i = 0; i < logfiles_num - 1; i++)
2395 	{
2396 		int	j, largest_for_i = -1;
2397 
2398 		if (0 == logfiles[i].size)
2399 			continue;
2400 
2401 		for (j = i + 1; j < logfiles_num; j++)
2402 		{
2403 			if (0 == logfiles[j].size)
2404 				continue;
2405 
2406 			if (SUCCEED == files_start_with_same_md5(logfiles + i, logfiles + j))
2407 			{
2408 				int	more_processed;
2409 
2410 				/* logfiles[i] and logfiles[j] are original and copy (or vice versa) */
2411 
2412 				more_processed = (logfiles[i].processed_size > logfiles[j].processed_size) ? i : j;
2413 
2414 				if (largest_for_i < more_processed)
2415 					largest_for_i = more_processed;
2416 			}
2417 		}
2418 
2419 		if (-1 != largest_for_i && idx_to_keep > largest_for_i)
2420 			idx_to_keep = largest_for_i;
2421 	}
2422 
2423 	if (logfiles[idx_to_keep].mtime < *mtime)
2424 	{
2425 		zabbix_log(LOG_LEVEL_DEBUG, "delay_update_if_copies(): setting mtime back from %d to %d,"
2426 				" lastlogsize from " ZBX_FS_UI64 " to " ZBX_FS_UI64, *mtime,
2427 				logfiles[idx_to_keep].mtime, *lastlogsize, logfiles[idx_to_keep].processed_size);
2428 
2429 		/* ensure that next time element 'idx_to_keep' is included in file list with the right 'lastlogsize' */
2430 		*mtime = logfiles[idx_to_keep].mtime;
2431 		*lastlogsize = logfiles[idx_to_keep].processed_size;
2432 
2433 		if (logfiles_num - 1 > idx_to_keep)
2434 		{
2435 			/* ensure that next time processing starts from element'idx_to_keep' */
2436 			for (i = idx_to_keep + 1; i < logfiles_num; i++)
2437 				logfiles[i].seq = 0;
2438 		}
2439 	}
2440 }
2441 
max_processed_size_in_copies(const struct st_logfile * logfiles,int logfiles_num,int i)2442 static zbx_uint64_t	max_processed_size_in_copies(const struct st_logfile *logfiles, int logfiles_num, int i)
2443 {
2444 	zbx_uint64_t	max_processed = 0;
2445 	int		j;
2446 
2447 	for (j = 0; j < logfiles_num; j++)
2448 	{
2449 		if (i != j && SUCCEED == files_start_with_same_md5(logfiles + i, logfiles + j))
2450 		{
2451 			/* logfiles[i] and logfiles[j] are original and copy (or vice versa). */
2452 			if (max_processed < logfiles[j].processed_size)
2453 				max_processed = logfiles[j].processed_size;
2454 		}
2455 	}
2456 
2457 	return max_processed;
2458 }
2459 
2460 /******************************************************************************
2461  *                                                                            *
2462  * Function: calculate_delay                                                  *
2463  *                                                                            *
2464  * Purpose: calculate delay based on number of processed and remaining bytes, *
2465  *          and processing time                                               *
2466  *                                                                            *
2467  * Parameters:                                                                *
2468  *     processed_bytes - [IN] number of processed bytes in logfile            *
2469  *     remaining_bytes - [IN] number of remaining bytes in all logfiles       *
2470  *     t_proc          - [IN] processing time, s                              *
2471  *                                                                            *
2472  * Return value:                                                              *
2473  *     delay in seconds or 0 (if cannot be calculated)                        *
2474  *                                                                            *
2475  ******************************************************************************/
calculate_delay(zbx_uint64_t processed_bytes,zbx_uint64_t remaining_bytes,double t_proc)2476 static double	calculate_delay(zbx_uint64_t processed_bytes, zbx_uint64_t remaining_bytes, double t_proc)
2477 {
2478 	double	delay = 0.0;
2479 
2480 	/* Processing time could be negative or 0 if the system clock has been set back in time. */
2481 	/* In this case return 0, then a jump over log lines will not take place. */
2482 
2483 	if (0 != processed_bytes && 0.0 < t_proc)
2484 	{
2485 		delay = (double)remaining_bytes * t_proc / (double)processed_bytes;
2486 
2487 		zabbix_log(LOG_LEVEL_DEBUG, "calculate_delay(): processed bytes:" ZBX_FS_UI64
2488 				" remaining bytes:" ZBX_FS_UI64 " t_proc:%e s speed:%e B/s remaining full checks:"
2489 				ZBX_FS_UI64 " delay:%e s", processed_bytes, remaining_bytes, t_proc,
2490 				(double)processed_bytes / t_proc, remaining_bytes / processed_bytes, delay);
2491 	}
2492 
2493 	return delay;
2494 }
2495 
jump_remaining_bytes_logrt(struct st_logfile * logfiles,int logfiles_num,const char * key,int start_from,zbx_uint64_t bytes_to_jump,int * seq,zbx_uint64_t * lastlogsize,int * mtime,int * jumped_to)2496 static void	jump_remaining_bytes_logrt(struct st_logfile *logfiles, int logfiles_num, const char *key,
2497 		int start_from, zbx_uint64_t bytes_to_jump, int *seq, zbx_uint64_t *lastlogsize, int *mtime,
2498 		int *jumped_to)
2499 {
2500 	int	first_pass = 1;
2501 	int	i = start_from;		/* enter the loop with index of the last file processed, */
2502 					/* later continue the loop from the start */
2503 
2504 	while (i < logfiles_num)
2505 	{
2506 		if (logfiles[i].size != logfiles[i].processed_size)
2507 		{
2508 			zbx_uint64_t	bytes_jumped, new_processed_size;
2509 
2510 			bytes_jumped = MIN(bytes_to_jump, logfiles[i].size - logfiles[i].processed_size);
2511 			new_processed_size = logfiles[i].processed_size + bytes_jumped;
2512 
2513 			zabbix_log(LOG_LEVEL_WARNING, "item:\"%s\" logfile:\"%s\" skipping " ZBX_FS_UI64 " bytes (from"
2514 					" byte " ZBX_FS_UI64 " to byte " ZBX_FS_UI64 ") to meet maxdelay", key,
2515 					logfiles[i].filename, bytes_jumped, logfiles[i].processed_size,
2516 					new_processed_size);
2517 
2518 			logfiles[i].processed_size = new_processed_size;
2519 			*lastlogsize = new_processed_size;
2520 			*mtime = logfiles[i].mtime;
2521 
2522 			logfiles[i].seq = (*seq)++;
2523 
2524 			bytes_to_jump -= bytes_jumped;
2525 
2526 			*jumped_to = i;
2527 		}
2528 
2529 		if (0 == bytes_to_jump)
2530 			break;
2531 
2532 		if (0 != first_pass)
2533 		{
2534 			/* 'start_from' element was processed, now proceed from the beginning of file list */
2535 			first_pass = 0;
2536 			i = 0;
2537 			continue;
2538 		}
2539 
2540 		i++;
2541 	}
2542 }
2543 
2544 /******************************************************************************
2545  *                                                                            *
2546  * Function: adjust_position_after_jump                                       *
2547  *                                                                            *
2548  * Purpose:                                                                   *
2549  *    After jumping over a number of bytes we "land" most likely somewhere in *
2550  *    the middle of log file line. This function tries to adjust position to  *
2551  *    the beginning of the log line.                                          *
2552  *                                                                            *
2553  * Parameters:                                                                *
2554  *     logfile     - [IN/OUT] log file data                                   *
2555  *     lastlogsize - [IN/OUT] offset from the beginning of the file           *
2556  *     min_size    - [IN] minimum offset to search from                       *
2557  *     encoding    - [IN] text string describing encoding                     *
2558  *     err_msg     - [IN/OUT] error message                                   *
2559  *                                                                            *
2560  * Return value: SUCCEED or FAIL (with error message allocated in 'err_msg')  *
2561  *                                                                            *
2562  ******************************************************************************/
adjust_position_after_jump(struct st_logfile * logfile,zbx_uint64_t * lastlogsize,zbx_uint64_t min_size,const char * encoding,char ** err_msg)2563 static int	adjust_position_after_jump(struct st_logfile *logfile, zbx_uint64_t *lastlogsize, zbx_uint64_t min_size,
2564 		const char *encoding, char **err_msg)
2565 {
2566 	int		fd, ret = FAIL;
2567 	size_t		szbyte;
2568 	ssize_t		nbytes;
2569 	const char	*cr, *lf, *p_end;
2570 	char		*p, *p_nl, *p_next;
2571 	zbx_uint64_t	lastlogsize_tmp, lastlogsize_aligned, lastlogsize_org, seek_pos, remainder;
2572 	char   		buf[32 * ZBX_KIBIBYTE];		/* buffer must be of size multiple of 4 as some character */
2573 							/* encodings use 4 bytes for every character */
2574 
2575 	if (-1 == (fd = open_file_helper(logfile->filename, err_msg)))
2576 		return FAIL;
2577 
2578 	find_cr_lf_szbyte(encoding, &cr, &lf, &szbyte);
2579 
2580 	/* For multibyte character encodings 'lastlogsize' needs to be aligned to character border. */
2581 	/* Align it towards smaller offset. We assume that log file contains no corrupted data stream. */
2582 
2583 	lastlogsize_org = *lastlogsize;
2584 	lastlogsize_aligned = *lastlogsize;
2585 
2586 	if (1 < szbyte && 0 != (remainder = lastlogsize_aligned % szbyte))	/* remainder can be 0, 1, 2 or 3 */
2587 	{
2588 		if (min_size <= lastlogsize_aligned - remainder)
2589 			lastlogsize_aligned -= remainder;
2590 		else
2591 			lastlogsize_aligned = min_size;
2592 	}
2593 
2594 	if ((zbx_offset_t)-1 == zbx_lseek(fd, lastlogsize_aligned, SEEK_SET))
2595 	{
2596 		*err_msg = zbx_dsprintf(*err_msg, "Cannot set position to " ZBX_FS_UI64 " in file \"%s\": %s",
2597 				lastlogsize_aligned, logfile->filename, zbx_strerror(errno));
2598 		goto out;
2599 	}
2600 
2601 	/* search forward for the first newline until EOF */
2602 
2603 	lastlogsize_tmp = lastlogsize_aligned;
2604 
2605 	for (;;)
2606 	{
2607 		if (-1 == (nbytes = read(fd, buf, sizeof(buf))))
2608 		{
2609 			*err_msg = zbx_dsprintf(*err_msg, "Cannot read from file \"%s\": %s", logfile->filename,
2610 					zbx_strerror(errno));
2611 			goto out;
2612 		}
2613 
2614 		if (0 == nbytes)	/* end of file reached */
2615 			break;
2616 
2617 		p = buf;
2618 		p_end = buf + nbytes;	/* no data from this position */
2619 
2620 		if (NULL != (p_nl = buf_find_newline(p, &p_next, p_end, cr, lf, szbyte)))
2621 		{
2622 			/* found the beginning of line */
2623 
2624 			*lastlogsize = lastlogsize_tmp + (zbx_uint64_t)(p_next - buf);
2625 			logfile->processed_size = *lastlogsize;
2626 			ret = SUCCEED;
2627 			goto out;
2628 		}
2629 
2630 		lastlogsize_tmp += (zbx_uint64_t)nbytes;
2631 	}
2632 
2633 	/* Searching forward did not find a newline. Now search backwards until 'min_size'. */
2634 
2635 	seek_pos = lastlogsize_aligned;
2636 
2637 	for (;;)
2638 	{
2639 		if (sizeof(buf) <= seek_pos)
2640 			seek_pos -= MIN(sizeof(buf), seek_pos - min_size);
2641 		else
2642 			seek_pos = min_size;
2643 
2644 		if ((zbx_offset_t)-1 == zbx_lseek(fd, seek_pos, SEEK_SET))
2645 		{
2646 			*err_msg = zbx_dsprintf(*err_msg, "Cannot set position to " ZBX_FS_UI64 " in file \"%s\": %s",
2647 					lastlogsize_aligned, logfile->filename, zbx_strerror(errno));
2648 			goto out;
2649 		}
2650 
2651 		if (-1 == (nbytes = read(fd, buf, sizeof(buf))))
2652 		{
2653 			*err_msg = zbx_dsprintf(*err_msg, "Cannot read from file \"%s\": %s", logfile->filename,
2654 					zbx_strerror(errno));
2655 			goto out;
2656 		}
2657 
2658 		if (0 == nbytes)	/* end of file reached */
2659 		{
2660 			*err_msg = zbx_dsprintf(*err_msg, "Unexpected end of file while reading file \"%s\"",
2661 					logfile->filename);
2662 			goto out;
2663 		}
2664 
2665 		p = buf;
2666 		p_end = buf + nbytes;	/* no data from this position */
2667 
2668 		if (NULL != (p_nl = buf_find_newline(p, &p_next, p_end, cr, lf, szbyte)))
2669 		{
2670 			/* Found the beginning of line. It may not be the one closest to place we jumped to */
2671 			/* (it could be about sizeof(buf) bytes away) but it is ok for our purposes. */
2672 
2673 			*lastlogsize = seek_pos + (zbx_uint64_t)(p_next - buf);
2674 			logfile->processed_size = *lastlogsize;
2675 			ret = SUCCEED;
2676 			goto out;
2677 		}
2678 
2679 		if (min_size == seek_pos)
2680 		{
2681 			/* We have searched backwards until 'min_size' and did not find a 'newline'. */
2682 			/* Effectively it turned out to be a jump with zero-length. */
2683 
2684 			*lastlogsize = min_size;
2685 			logfile->processed_size = *lastlogsize;
2686 			ret = SUCCEED;
2687 			goto out;
2688 		}
2689 	}
2690 out:
2691 	if (SUCCEED != close_file_helper(fd, logfile->filename, err_msg))
2692 		ret = FAIL;
2693 
2694 	if (SUCCEED == ZBX_CHECK_LOG_LEVEL(LOG_LEVEL_DEBUG))
2695 	{
2696 		const char	*dbg_msg;
2697 
2698 		if (SUCCEED == ret)
2699 			dbg_msg = "NEWLINE FOUND";
2700 		else
2701 			dbg_msg = "NEWLINE NOT FOUND";
2702 
2703 		zabbix_log(LOG_LEVEL_DEBUG, "adjust_position_after_jump(): szbyte:" ZBX_FS_SIZE_T " lastlogsize_org:"
2704 				ZBX_FS_UI64 " lastlogsize_aligned:" ZBX_FS_UI64 " (change " ZBX_FS_I64 " bytes)"
2705 				" lastlogsize_after:" ZBX_FS_UI64 " (change " ZBX_FS_I64 " bytes) %s %s",
2706 				(zbx_fs_size_t)szbyte, lastlogsize_org, lastlogsize_aligned,
2707 				(zbx_int64_t)lastlogsize_aligned - (zbx_int64_t)lastlogsize_org, *lastlogsize,
2708 				(zbx_int64_t)*lastlogsize - (zbx_int64_t)lastlogsize_aligned,
2709 				dbg_msg, ZBX_NULL2EMPTY_STR(*err_msg));
2710 	}
2711 
2712 	return ret;
2713 }
2714 
2715 /******************************************************************************
2716  *                                                                            *
2717  * Function: jump_ahead                                                       *
2718  *                                                                            *
2719  * Purpose: move forward to a new position in the log file list               *
2720  *                                                                            *
2721  * Parameters:                                                                *
2722  *     key           - [IN] item key for logging                              *
2723  *     logfiles      - [IN/OUT] list of log files                             *
2724  *     logfiles_num  - [IN] number of elements in 'logfiles'                  *
2725  *     jump_from_to  - [IN/OUT] on input - number of element where to start   *
2726  *                     jump, on output - number of element we jumped into     *
2727  *     seq           - [IN/OUT] sequence number of last processed file        *
2728  *     lastlogsize   - [IN/OUT] offset from the beginning of the file         *
2729  *     mtime         - [IN/OUT] last modification time of the file            *
2730  *     encoding      - [IN] text string describing encoding                   *
2731  *     bytes_to_jump - [IN] number of bytes to jump ahead                     *
2732  *     err_msg       - [IN/OUT] error message                                 *
2733  *                                                                            *
2734  * Return value: SUCCEED or FAIL (with error message allocated in 'err_msg')  *
2735  *                                                                            *
2736  ******************************************************************************/
jump_ahead(const char * key,struct st_logfile * logfiles,int logfiles_num,int * jump_from_to,int * seq,zbx_uint64_t * lastlogsize,int * mtime,const char * encoding,zbx_uint64_t bytes_to_jump,char ** err_msg)2737 static int	jump_ahead(const char *key, struct st_logfile *logfiles, int logfiles_num,
2738 		int *jump_from_to, int *seq, zbx_uint64_t *lastlogsize, int *mtime, const char *encoding,
2739 		zbx_uint64_t bytes_to_jump, char **err_msg)
2740 {
2741 	zbx_uint64_t	lastlogsize_org, min_size;
2742 	int		jumped_to = -1;		/* number of file in 'logfiles' list we jumped to */
2743 
2744 	lastlogsize_org = *lastlogsize;
2745 
2746 	jump_remaining_bytes_logrt(logfiles, logfiles_num, key, *jump_from_to, bytes_to_jump, seq, lastlogsize,
2747 			mtime, &jumped_to);
2748 
2749 	if (-1 == jumped_to)		/* no actual jump took place, no need to modify 'jump_from_to' */
2750 		return SUCCEED;
2751 
2752 	/* We have jumped into file, most likely somewhere in the middle of log line. Now find the beginning */
2753 	/* of a line to avoid pattern-matching a line from a random position. */
2754 
2755 	if (*jump_from_to == jumped_to)
2756 	{
2757 		/* jumped within the same file - do not search the beginning of a line before "pre-jump" position */
2758 		min_size = lastlogsize_org;
2759 	}
2760 	else
2761 	{
2762 		*jump_from_to = jumped_to;
2763 
2764 		/* jumped into different file - may search the beginning of a line from beginning of file */
2765 		min_size = 0;
2766 	}
2767 
2768 	return adjust_position_after_jump(&logfiles[jumped_to], lastlogsize, min_size, encoding, err_msg);
2769 }
2770 
calculate_remaining_bytes(struct st_logfile * logfiles,int logfiles_num)2771 static zbx_uint64_t	calculate_remaining_bytes(struct st_logfile *logfiles, int logfiles_num)
2772 {
2773 	zbx_uint64_t	remaining_bytes = 0;
2774 	int		i;
2775 
2776 	for (i = 0; i < logfiles_num; i++)
2777 		remaining_bytes += logfiles[i].size - logfiles[i].processed_size;
2778 
2779 	return remaining_bytes;
2780 }
2781 
transfer_for_rotate(const struct st_logfile * logfiles_old,int idx,struct st_logfile * logfiles,int logfiles_num,const char * old2new,int * seq)2782 static void	transfer_for_rotate(const struct st_logfile *logfiles_old, int idx, struct st_logfile *logfiles,
2783 		int logfiles_num, const char *old2new, int *seq)
2784 {
2785 	int	j;
2786 
2787 	if (0 < logfiles_old[idx].processed_size && 0 == logfiles_old[idx].incomplete &&
2788 			-1 != (j = find_old2new(old2new, logfiles_num, idx)))
2789 	{
2790 		if (logfiles_old[idx].size == logfiles_old[idx].processed_size &&
2791 				logfiles_old[idx].size == logfiles[j].size)
2792 		{
2793 			/* the file was fully processed during the previous check and must be ignored during this */
2794 			/* check */
2795 			logfiles[j].processed_size = logfiles[j].size;
2796 			logfiles[j].seq = (*seq)++;
2797 		}
2798 		else
2799 		{
2800 			/* the file was not fully processed during the previous check or has grown */
2801 			if (logfiles[j].processed_size < logfiles_old[idx].processed_size)
2802 				logfiles[j].processed_size = MIN(logfiles[j].size, logfiles_old[idx].processed_size);
2803 		}
2804 	}
2805 	else if (1 == logfiles_old[idx].incomplete && -1 != (j = find_old2new(old2new, logfiles_num, idx)))
2806 	{
2807 		if (logfiles_old[idx].size < logfiles[j].size)
2808 		{
2809 			/* The file was not fully processed because of incomplete last record but it has grown. */
2810 			/* Try to process it further. */
2811 			logfiles[j].incomplete = 0;
2812 		}
2813 		else
2814 			logfiles[j].incomplete = 1;
2815 
2816 		if (logfiles[j].processed_size < logfiles_old[idx].processed_size)
2817 			logfiles[j].processed_size = MIN(logfiles[j].size, logfiles_old[idx].processed_size);
2818 	}
2819 }
2820 
transfer_for_copytruncate(const struct st_logfile * logfiles_old,int idx,struct st_logfile * logfiles,int logfiles_num,const char * old2new,int * seq)2821 static void	transfer_for_copytruncate(const struct st_logfile *logfiles_old, int idx, struct st_logfile *logfiles,
2822 		int logfiles_num, const char *old2new, int *seq)
2823 {
2824 	const char	*p = old2new + idx * logfiles_num;	/* start of idx-th row in 'old2new' array */
2825 	int		j;
2826 
2827 	if (0 < logfiles_old[idx].processed_size && 0 == logfiles_old[idx].incomplete)
2828 	{
2829 		for (j = 0; j < logfiles_num; j++, p++)		/* loop over columns (new files) on idx-th row */
2830 		{
2831 			if ('1' == *p || '2' == *p)
2832 			{
2833 				if (logfiles_old[idx].size == logfiles_old[idx].processed_size &&
2834 						logfiles_old[idx].size == logfiles[j].size)
2835 				{
2836 					/* the file was fully processed during the previous check and must be ignored */
2837 					/* during this check */
2838 					logfiles[j].processed_size = logfiles[j].size;
2839 					logfiles[j].seq = (*seq)++;
2840 				}
2841 				else
2842 				{
2843 					/* the file was not fully processed during the previous check or has grown */
2844 					if (logfiles[j].processed_size < logfiles_old[idx].processed_size)
2845 					{
2846 						logfiles[j].processed_size = MIN(logfiles[j].size,
2847 								logfiles_old[idx].processed_size);
2848 					}
2849 				}
2850 			}
2851 		}
2852 	}
2853 	else if (1 == logfiles_old[idx].incomplete)
2854 	{
2855 		for (j = 0; j < logfiles_num; j++, p++)		/* loop over columns (new files) on idx-th row */
2856 		{
2857 			if ('1' == *p || '2' == *p)
2858 			{
2859 				if (logfiles_old[idx].size < logfiles[j].size)
2860 				{
2861 					/* The file was not fully processed because of incomplete last record but it */
2862 					/* has grown. Try to process it further. */
2863 					logfiles[j].incomplete = 0;
2864 				}
2865 				else
2866 					logfiles[j].incomplete = 1;
2867 
2868 				if (logfiles[j].processed_size < logfiles_old[idx].processed_size)
2869 				{
2870 					logfiles[j].processed_size = MIN(logfiles[j].size,
2871 							logfiles_old[idx].processed_size);
2872 				}
2873 			}
2874 		}
2875 	}
2876 }
2877 
update_new_list_from_old(zbx_log_rotation_options_t rotation_type,struct st_logfile * logfiles_old,int logfiles_num_old,struct st_logfile * logfiles,int logfiles_num,int use_ino,int * seq,int * start_idx,zbx_uint64_t * lastlogsize,char ** err_msg)2878 static int	update_new_list_from_old(zbx_log_rotation_options_t rotation_type, struct st_logfile *logfiles_old,
2879 		int logfiles_num_old, struct st_logfile *logfiles, int logfiles_num, int use_ino, int *seq,
2880 		int *start_idx, zbx_uint64_t *lastlogsize, char **err_msg)
2881 {
2882 	char	*old2new;
2883 	int	i, max_old_seq = 0, old_last;
2884 
2885 	if (NULL == (old2new = create_old2new_and_copy_of(rotation_type, logfiles_old, logfiles_num_old,
2886 			logfiles, logfiles_num, use_ino, err_msg)))
2887 	{
2888 		return FAIL;
2889 	}
2890 
2891 	/* transfer data about fully and partially processed files from the old file list to the new list */
2892 	for (i = 0; i < logfiles_num_old; i++)
2893 	{
2894 		if (ZBX_LOG_ROTATION_LOGCPT == rotation_type)
2895 			transfer_for_copytruncate(logfiles_old, i, logfiles, logfiles_num, old2new, seq);
2896 		else
2897 			transfer_for_rotate(logfiles_old, i, logfiles, logfiles_num, old2new, seq);
2898 
2899 		/* find the last file processed (fully or partially) in the previous check */
2900 		if (max_old_seq < logfiles_old[i].seq)
2901 		{
2902 			max_old_seq = logfiles_old[i].seq;
2903 			old_last = i;
2904 		}
2905 	}
2906 
2907 	/* find the first file to continue from in the new file list */
2908 	if (0 < max_old_seq && -1 == (*start_idx = find_old2new(old2new, logfiles_num, old_last)))
2909 	{
2910 		/* Cannot find the successor of the last processed file from the previous check. */
2911 		/* Adjust 'lastlogsize' for this case. */
2912 		*start_idx = 0;
2913 		*lastlogsize = logfiles[*start_idx].processed_size;
2914 	}
2915 
2916 	zbx_free(old2new);
2917 
2918 	return SUCCEED;
2919 }
2920 
2921 /******************************************************************************
2922  *                                                                            *
2923  * Function: process_logrt                                                    *
2924  *                                                                            *
2925  * Purpose: Find new records in logfiles                                      *
2926  *                                                                            *
2927  * Parameters:                                                                *
2928  *     flags            - [IN] bit flags with item type: log, logrt,          *
2929  *                        log.count or logrt.count                            *
2930  *     filename         - [IN] logfile name (regular expression with a path)  *
2931  *     lastlogsize      - [IN/OUT] offset from the beginning of the file      *
2932  *     mtime            - [IN/OUT] last modification time of the file         *
2933  *     lastlogsize_sent - [OUT] lastlogsize value that was last sent          *
2934  *     mtime_sent       - [OUT] mtime value that was last sent                *
2935  *     skip_old_data    - [IN/OUT] start from the beginning of the file or    *
2936  *                        jump to the end                                     *
2937  *     big_rec          - [IN/OUT] state variable to remember whether a long  *
2938  *                        record is being processed                           *
2939  *     use_ino          - [IN/OUT] how to use inode numbers                   *
2940  *     err_msg          - [IN/OUT] error message why an item became           *
2941  *                        NOTSUPPORTED                                        *
2942  *     logfiles_old     - [IN/OUT] array of logfiles from the last check      *
2943  *     logfiles_num_old - [IN] number of elements in "logfiles_old"           *
2944  *     logfiles_new     - [OUT] new array of logfiles                         *
2945  *     logfiles_num_new - [OUT] number of elements in "logfiles_new"          *
2946  *     encoding         - [IN] text string describing encoding.               *
2947  *                        See function find_cr_lf_szbyte() for supported      *
2948  *                        encodings.                                          *
2949  *                        "" (empty string) means a single-byte character set *
2950  *                        (e.g. ASCII).                                       *
2951  *     regexps          - [IN] array of regexps                               *
2952  *     pattern          - [IN] pattern to match                               *
2953  *     output_template  - [IN] output formatting template                     *
2954  *     p_count          - [IN/OUT] limit of records to be processed           *
2955  *     s_count          - [IN/OUT] limit of records to be sent to server      *
2956  *     process_value    - [IN] pointer to function process_value()            *
2957  *     server           - [IN] server to send data to                         *
2958  *     port             - [IN] port to send data to                           *
2959  *     hostname         - [IN] hostname the data comes from                   *
2960  *     key              - [IN] item key the data belongs to                   *
2961  *     jumped           - [OUT] flag to indicate that a jump took place       *
2962  *     max_delay        - [IN] maximum allowed delay, s                       *
2963  *     start_time       - [IN/OUT] start time of check                        *
2964  *     processed_bytes  - [IN/OUT] number of bytes processed                  *
2965  *     rotation_type    - [IN] simple rotation or copy/truncate rotation      *
2966  *                                                                            *
2967  * Return value: returns SUCCEED on successful reading,                       *
2968  *               FAIL on other cases                                          *
2969  *                                                                            *
2970  * Author: Dmitry Borovikov (logrotation)                                     *
2971  *                                                                            *
2972  ******************************************************************************/
process_logrt(unsigned char flags,const char * filename,zbx_uint64_t * lastlogsize,int * mtime,zbx_uint64_t * lastlogsize_sent,int * mtime_sent,unsigned char * skip_old_data,int * big_rec,int * use_ino,char ** err_msg,struct st_logfile ** logfiles_old,const int * logfiles_num_old,struct st_logfile ** logfiles_new,int * logfiles_num_new,const char * encoding,zbx_vector_ptr_t * regexps,const char * pattern,const char * output_template,int * p_count,int * s_count,zbx_process_value_func_t process_value,const char * server,unsigned short port,const char * hostname,const char * key,int * jumped,float max_delay,double * start_time,zbx_uint64_t * processed_bytes,zbx_log_rotation_options_t rotation_type)2973 int	process_logrt(unsigned char flags, const char *filename, zbx_uint64_t *lastlogsize, int *mtime,
2974 		zbx_uint64_t *lastlogsize_sent, int *mtime_sent, unsigned char *skip_old_data, int *big_rec,
2975 		int *use_ino, char **err_msg, struct st_logfile **logfiles_old, const int *logfiles_num_old,
2976 		struct st_logfile **logfiles_new, int *logfiles_num_new, const char *encoding,
2977 		zbx_vector_ptr_t *regexps, const char *pattern, const char *output_template, int *p_count, int *s_count,
2978 		zbx_process_value_func_t process_value, const char *server, unsigned short port, const char *hostname,
2979 		const char *key, int *jumped, float max_delay, double *start_time, zbx_uint64_t *processed_bytes,
2980 		zbx_log_rotation_options_t rotation_type)
2981 {
2982 	const char		*__function_name = "process_logrt";
2983 	int			i, start_idx, ret = FAIL, logfiles_num = 0, logfiles_alloc = 0, seq = 1,
2984 				from_first_file = 1, last_processed, limit_reached = 0, res;
2985 	struct st_logfile	*logfiles = NULL;
2986 	zbx_uint64_t		processed_bytes_sum = 0;
2987 
2988 	zabbix_log(LOG_LEVEL_DEBUG, "In %s() flags:0x%02x filename:'%s' lastlogsize:" ZBX_FS_UI64 " mtime:%d",
2989 			__function_name, (unsigned int)flags, filename, *lastlogsize, *mtime);
2990 
2991 	adjust_mtime_to_clock(mtime);
2992 
2993 	if (SUCCEED != (res = make_logfile_list(flags, filename, *mtime, &logfiles, &logfiles_alloc, &logfiles_num,
2994 			use_ino, err_msg)))
2995 	{
2996 		if (ZBX_NO_FILE_ERROR == res)
2997 		{
2998 			if (1 == *skip_old_data)
2999 			{
3000 				*skip_old_data = 0;
3001 
3002 				zabbix_log(LOG_LEVEL_DEBUG, "%s(): no files, setting skip_old_data to 0",
3003 						__function_name);
3004 			}
3005 
3006 			if (0 != (ZBX_METRIC_FLAG_LOG_LOGRT & flags) && 0 == *logfiles_num_old)
3007 			{
3008 				/* Both the old and the new log file lists are empty. That means the agent has not */
3009 				/* seen any log files for this logrt[] item since started. If log files appear later */
3010 				/* then analyze them from start, do not apply the 'lastlogsize' received from server */
3011 				/* anymore. */
3012 
3013 				*lastlogsize = 0;
3014 			}
3015 		}
3016 
3017 		/* file was not accessible for a log[] or log.count[] item or an error occurred */
3018 		if (0 != (ZBX_METRIC_FLAG_LOG_LOG & flags) || (0 != (ZBX_METRIC_FLAG_LOG_LOGRT & flags) && FAIL == res))
3019 			goto out;
3020 	}
3021 
3022 	if (0 == logfiles_num)
3023 	{
3024 		/* there were no files for a logrt[] or logrt.count[] item to analyze */
3025 		ret = SUCCEED;
3026 		goto out;
3027 	}
3028 
3029 	if (1 == *skip_old_data)
3030 	{
3031 		start_idx = logfiles_num - 1;
3032 
3033 		/* mark files to be skipped as processed (except the last one) */
3034 		for (i = 0; i < start_idx; i++)
3035 		{
3036 			logfiles[i].processed_size = logfiles[i].size;
3037 			logfiles[i].seq = seq++;
3038 		}
3039 	}
3040 	else
3041 		start_idx = 0;
3042 
3043 	if (0 < *logfiles_num_old && 0 < logfiles_num && SUCCEED != update_new_list_from_old(rotation_type,
3044 			*logfiles_old, *logfiles_num_old, logfiles, logfiles_num, *use_ino, &seq, &start_idx,
3045 			lastlogsize, err_msg))
3046 	{
3047 		destroy_logfile_list(&logfiles, &logfiles_alloc, &logfiles_num);
3048 		goto out;
3049 	}
3050 
3051 	if (ZBX_LOG_ROTATION_LOGCPT == rotation_type && 1 < logfiles_num)
3052 		ensure_order_if_mtimes_equal(*logfiles_old, logfiles, logfiles_num, *use_ino, &start_idx);
3053 
3054 	if (SUCCEED == ZBX_CHECK_LOG_LEVEL(LOG_LEVEL_DEBUG))
3055 	{
3056 		zabbix_log(LOG_LEVEL_DEBUG, "%s() old file list:", __function_name);
3057 		if (NULL != *logfiles_old)
3058 			print_logfile_list(*logfiles_old, *logfiles_num_old);
3059 		else
3060 			zabbix_log(LOG_LEVEL_DEBUG, "   file list empty");
3061 
3062 		zabbix_log(LOG_LEVEL_DEBUG, "%s() new file list: (mtime:%d lastlogsize:" ZBX_FS_UI64
3063 				" start_idx:%d)", __function_name, *mtime, *lastlogsize, start_idx);
3064 		if (NULL != logfiles)
3065 			print_logfile_list(logfiles, logfiles_num);
3066 		else
3067 			zabbix_log(LOG_LEVEL_DEBUG, "   file list empty");
3068 	}
3069 
3070 	/* number of file last processed - start from this */
3071 	last_processed = start_idx;
3072 
3073 	/* from now assume success - it could be that there is nothing to do */
3074 	ret = SUCCEED;
3075 
3076 	if (0.0f != max_delay)
3077 	{
3078 		if (0.0 != *start_time)
3079 		{
3080 			zbx_uint64_t	remaining_bytes;
3081 
3082 			if (0 != (remaining_bytes = calculate_remaining_bytes(logfiles, logfiles_num)))
3083 			{
3084 				/* calculate delay and jump if necessary */
3085 
3086 				double	delay;
3087 
3088 				if ((double)max_delay < (delay = calculate_delay(*processed_bytes, remaining_bytes,
3089 						zbx_time() - *start_time)))
3090 				{
3091 					zbx_uint64_t	bytes_to_jump;
3092 
3093 					bytes_to_jump = (zbx_uint64_t)((double)remaining_bytes *
3094 							(delay - (double)max_delay) / delay);
3095 
3096 					if (SUCCEED == (ret = jump_ahead(key, logfiles, logfiles_num,
3097 							&last_processed, &seq, lastlogsize, mtime, encoding,
3098 							bytes_to_jump, err_msg)))
3099 					{
3100 						*jumped = 1;
3101 					}
3102 				}
3103 			}
3104 		}
3105 
3106 		*start_time = zbx_time();	/* mark new start time for using in the next check */
3107 	}
3108 
3109 	/* enter the loop with index of the first file to be processed, later continue the loop from the start */
3110 	i = last_processed;
3111 
3112 	while (NULL != logfiles && i < logfiles_num)
3113 	{
3114 		if (0 == logfiles[i].incomplete &&
3115 				(logfiles[i].size != logfiles[i].processed_size || 0 == logfiles[i].seq))
3116 		{
3117 			zbx_uint64_t	processed_bytes_tmp = 0, seek_offset;
3118 			int		process_this_file = 1;
3119 
3120 			*mtime = logfiles[i].mtime;
3121 
3122 			if (start_idx != i)
3123 				*lastlogsize = logfiles[i].processed_size;
3124 
3125 			if (0 == *skip_old_data)
3126 			{
3127 				seek_offset = *lastlogsize;
3128 			}
3129 			else
3130 			{
3131 				seek_offset = logfiles[i].size;
3132 
3133 				zabbix_log(LOG_LEVEL_DEBUG, "skipping old data in filename:'%s' to seek_offset:"
3134 						ZBX_FS_UI64, logfiles[i].filename, seek_offset);
3135 			}
3136 
3137 			if (ZBX_LOG_ROTATION_LOGCPT == rotation_type)
3138 			{
3139 				zbx_uint64_t	max_processed;
3140 
3141 				if (seek_offset < (max_processed = max_processed_size_in_copies(logfiles, logfiles_num,
3142 						i)))
3143 				{
3144 					logfiles[i].processed_size = MIN(logfiles[i].size, max_processed);
3145 
3146 					if (logfiles[i].size == logfiles[i].processed_size)
3147 						process_this_file = 0;
3148 
3149 					*lastlogsize = max_processed;
3150 				}
3151 			}
3152 
3153 			if (0 != process_this_file)
3154 			{
3155 				ret = process_log(flags, logfiles[i].filename, lastlogsize, mtime, lastlogsize_sent,
3156 						mtime_sent, skip_old_data, big_rec, &logfiles[i].incomplete, err_msg,
3157 						encoding, regexps, pattern, output_template, p_count, s_count,
3158 						process_value, server, port, hostname, key, &processed_bytes_tmp,
3159 						seek_offset);
3160 
3161 				/* process_log() advances 'lastlogsize' only on success therefore */
3162 				/* we do not check for errors here */
3163 				logfiles[i].processed_size = *lastlogsize;
3164 
3165 				/* log file could grow during processing, update size in our list */
3166 				if (*lastlogsize > logfiles[i].size)
3167 					logfiles[i].size = *lastlogsize;
3168 			}
3169 
3170 			/* Mark file as processed (at least partially). In case if process_log() failed we will stop */
3171 			/* the current checking. In the next check the file will be marked in the list of old files */
3172 			/* and we will know where we left off. */
3173 			logfiles[i].seq = seq++;
3174 
3175 			if (ZBX_LOG_ROTATION_LOGCPT == rotation_type && 1 < logfiles_num)
3176 			{
3177 				int	k;
3178 
3179 				for (k = 0; k < logfiles_num - 1; k++)
3180 					handle_multiple_copies(logfiles, logfiles_num, k);
3181 			}
3182 
3183 			if (SUCCEED != ret)
3184 				break;
3185 
3186 			if (0.0f != max_delay)
3187 				processed_bytes_sum += processed_bytes_tmp;
3188 
3189 			if (0 >= *p_count || 0 >= *s_count)
3190 			{
3191 				limit_reached = 1;
3192 				break;
3193 			}
3194 		}
3195 
3196 		if (0 != from_first_file)
3197 		{
3198 			/* We have processed the file where we left off in the previous check. */
3199 			from_first_file = 0;
3200 
3201 			/* Now proceed from the beginning of the new file list to process the remaining files. */
3202 			i = 0;
3203 			continue;
3204 		}
3205 
3206 		i++;
3207 	}
3208 
3209 	if (ZBX_LOG_ROTATION_LOGCPT == rotation_type && 1 < logfiles_num)
3210 	{
3211 		/* If logrt[] or logrt.count[] item is checked often but rotation by copying is slow it could happen */
3212 		/* that the original file is completely processed but the copy with a newer timestamp is still in */
3213 		/* progress. The original file goes out of the list of files and the copy is analyzed as new file, */
3214 		/* so the matching lines are reported twice. To prevent this we manipulate our stored 'mtime' */
3215 		/* and 'lastlogsize' to keep information about copies in the list as long as necessary to prevent */
3216 		/* reporting twice. */
3217 
3218 		delay_update_if_copies(logfiles, logfiles_num, mtime, lastlogsize);
3219 	}
3220 
3221 	/* store the new log file list for using in the next check */
3222 	*logfiles_num_new = logfiles_num;
3223 
3224 	if (0 < logfiles_num)
3225 		*logfiles_new = logfiles;
3226 out:
3227 	if (0.0f != max_delay)
3228 	{
3229 		if (SUCCEED == ret)
3230 			*processed_bytes = processed_bytes_sum;
3231 
3232 		if (SUCCEED != ret || 0 == limit_reached)
3233 		{
3234 			/* FAIL or number of lines limits were not reached. */
3235 			/* Invalidate start_time to prevent jump in the next check. */
3236 			*start_time = 0.0;
3237 		}
3238 	}
3239 
3240 	zabbix_log(LOG_LEVEL_DEBUG, "End of %s():%s", __function_name, zbx_result_string(ret));
3241 
3242 	return ret;
3243 }
3244