1 /*
2 ** Zabbix
3 ** Copyright (C) 2001-2021 Zabbix SIA
4 **
5 ** This program is free software; you can redistribute it and/or modify
6 ** it under the terms of the GNU General Public License as published by
7 ** the Free Software Foundation; either version 2 of the License, or
8 ** (at your option) any later version.
9 **
10 ** This program is distributed in the hope that it will be useful,
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ** GNU General Public License for more details.
14 **
15 ** You should have received a copy of the GNU General Public License
16 ** along with this program; if not, write to the Free Software
17 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 **/
19
20 #include "common.h"
21 #include "logfiles.h"
22 #include "log.h"
23 #include "active.h"
24
25 #if defined(_WINDOWS)
26 # include "symbols.h"
27 # include "zbxtypes.h" /* ssize_t */
28 #endif /* _WINDOWS */
29
30 #define MAX_LEN_MD5 512 /* maximum size of the initial part of the file to calculate MD5 sum for */
31
32 #define ZBX_SAME_FILE_ERROR -1
33 #define ZBX_SAME_FILE_NO 0
34 #define ZBX_SAME_FILE_YES 1
35 #define ZBX_SAME_FILE_RETRY 2
36 #define ZBX_NO_FILE_ERROR 3
37 #define ZBX_SAME_FILE_COPY 4
38
39 #define ZBX_FILE_PLACE_UNKNOWN -1 /* cannot compare file device and inode numbers */
40 #define ZBX_FILE_PLACE_OTHER 0 /* both files have different device or inode numbers */
41 #define ZBX_FILE_PLACE_SAME 1 /* both files have the same device and inode numbers */
42
43 /******************************************************************************
44 * *
45 * Function: split_string *
46 * *
47 * Purpose: separates given string to two parts by given delimiter in string *
48 * *
49 * Parameters: *
50 * str - [IN] a not-empty string to split *
51 * del - [IN] pointer to a character in the string *
52 * part1 - [OUT] pointer to buffer for the first part with delimiter *
53 * part2 - [OUT] pointer to buffer for the second part *
54 * *
55 * Return value: SUCCEED - on splitting without errors *
56 * FAIL - on splitting with errors *
57 * *
58 * Author: Dmitry Borovikov, Aleksandrs Saveljevs *
59 * *
60 * Comments: Memory for "part1" and "part2" is allocated only on SUCCEED. *
61 * *
62 ******************************************************************************/
split_string(const char * str,const char * del,char ** part1,char ** part2)63 static int split_string(const char *str, const char *del, char **part1, char **part2)
64 {
65 const char *__function_name = "split_string";
66 size_t str_length, part1_length, part2_length;
67 int ret = FAIL;
68
69 zabbix_log(LOG_LEVEL_DEBUG, "In %s() str:'%s' del:'%s'", __function_name, str, del);
70
71 str_length = strlen(str);
72
73 /* since the purpose of this function is to be used in split_filename(), we allow part1 to be */
74 /* just *del (e.g., "/" - file system root), but we do not allow part2 (filename) to be empty */
75 if (del < str || del >= (str + str_length - 1))
76 {
77 zabbix_log(LOG_LEVEL_DEBUG, "%s() cannot proceed: delimiter is out of range", __function_name);
78 goto out;
79 }
80
81 part1_length = (size_t)(del - str + 1);
82 part2_length = str_length - part1_length;
83
84 *part1 = (char *)zbx_malloc(*part1, part1_length + 1);
85 zbx_strlcpy(*part1, str, part1_length + 1);
86
87 *part2 = (char *)zbx_malloc(*part2, part2_length + 1);
88 zbx_strlcpy(*part2, str + part1_length, part2_length + 1);
89
90 ret = SUCCEED;
91 out:
92 zabbix_log(LOG_LEVEL_DEBUG, "End of %s():%s part1:'%s' part2:'%s'", __function_name, zbx_result_string(ret),
93 *part1, *part2);
94
95 return ret;
96 }
97
98 /******************************************************************************
99 * *
100 * Function: split_filename *
101 * *
102 * Purpose: separates full-path file name into directory and file name regexp *
103 * parts *
104 * *
105 * Parameters: *
106 * filename - [IN] first parameter of logrt[] or logrt.count[] *
107 * item *
108 * directory - [IN/OUT] directory part of the 'filename' *
109 * filename_regexp - [IN/OUT] file name regular expression part *
110 * err_msg - [IN/OUT] error message why an item became *
111 * NOTSUPPORTED *
112 * *
113 * Return value: SUCCEED - on successful splitting *
114 * FAIL - on unable to split sensibly *
115 * *
116 * Author: Dmitry Borovikov *
117 * *
118 * Comments: Allocates memory for "directory" and "filename_regexp" only on *
119 * SUCCEED. On FAIL memory, allocated for "directory" and *
120 * "filename_regexp" is freed. *
121 * *
122 ******************************************************************************/
split_filename(const char * filename,char ** directory,char ** filename_regexp,char ** err_msg)123 static int split_filename(const char *filename, char **directory, char **filename_regexp, char **err_msg)
124 {
125 const char *__function_name = "split_filename";
126 const char *separator = NULL;
127 zbx_stat_t buf;
128 int ret = FAIL;
129 #ifdef _WINDOWS
130 size_t sz;
131 #endif
132 zabbix_log(LOG_LEVEL_DEBUG, "In %s() filename:'%s'", __function_name, ZBX_NULL2STR(filename));
133
134 if (NULL == filename || '\0' == *filename)
135 {
136 *err_msg = zbx_strdup(*err_msg, "Cannot split empty path.");
137 goto out;
138 }
139
140 #ifdef _WINDOWS
141 /* special processing for Windows, since directory name cannot be simply separated from file name regexp */
142 for (sz = strlen(filename) - 1, separator = &filename[sz]; separator >= filename; separator--)
143 {
144 if (PATH_SEPARATOR != *separator)
145 continue;
146
147 zabbix_log(LOG_LEVEL_DEBUG, "%s() %s", __function_name, filename);
148 zabbix_log(LOG_LEVEL_DEBUG, "%s() %*s", __function_name, separator - filename + 1, "^");
149
150 /* separator must be relative delimiter of the original filename */
151 if (FAIL == split_string(filename, separator, directory, filename_regexp))
152 {
153 *err_msg = zbx_dsprintf(*err_msg, "Cannot split path by \"%c\".", PATH_SEPARATOR);
154 goto out;
155 }
156
157 sz = strlen(*directory);
158
159 /* Windows world verification */
160 if (sz + 1 > MAX_PATH)
161 {
162 *err_msg = zbx_strdup(*err_msg, "Directory path is too long.");
163 zbx_free(*directory);
164 zbx_free(*filename_regexp);
165 goto out;
166 }
167
168 /* Windows "stat" functions cannot get info about directories with '\' at the end of the path, */
169 /* except for root directories 'x:\' */
170 if (0 == zbx_stat(*directory, &buf) && S_ISDIR(buf.st_mode))
171 break;
172
173 if (sz > 0 && PATH_SEPARATOR == (*directory)[sz - 1])
174 {
175 (*directory)[sz - 1] = '\0';
176
177 if (0 == zbx_stat(*directory, &buf) && S_ISDIR(buf.st_mode))
178 {
179 (*directory)[sz - 1] = PATH_SEPARATOR;
180 break;
181 }
182 }
183
184 zabbix_log(LOG_LEVEL_DEBUG, "cannot find directory '%s'", *directory);
185 zbx_free(*directory);
186 zbx_free(*filename_regexp);
187 }
188
189 if (separator < filename)
190 {
191 *err_msg = zbx_strdup(*err_msg, "Non-existing disk or directory.");
192 goto out;
193 }
194 #else /* not _WINDOWS */
195 if (NULL == (separator = strrchr(filename, PATH_SEPARATOR)))
196 {
197 *err_msg = zbx_dsprintf(*err_msg, "Cannot find separator \"%c\" in path.", PATH_SEPARATOR);
198 goto out;
199 }
200
201 if (SUCCEED != split_string(filename, separator, directory, filename_regexp))
202 {
203 *err_msg = zbx_dsprintf(*err_msg, "Cannot split path by \"%c\".", PATH_SEPARATOR);
204 goto out;
205 }
206
207 if (-1 == zbx_stat(*directory, &buf))
208 {
209 *err_msg = zbx_dsprintf(*err_msg, "Cannot obtain directory information: %s", zbx_strerror(errno));
210 zbx_free(*directory);
211 zbx_free(*filename_regexp);
212 goto out;
213 }
214
215 if (0 == S_ISDIR(buf.st_mode))
216 {
217 *err_msg = zbx_dsprintf(*err_msg, "Base path \"%s\" is not a directory.", *directory);
218 zbx_free(*directory);
219 zbx_free(*filename_regexp);
220 goto out;
221 }
222 #endif /* _WINDOWS */
223
224 ret = SUCCEED;
225 out:
226 zabbix_log(LOG_LEVEL_DEBUG, "End of %s():%s directory:'%s' filename_regexp:'%s'", __function_name,
227 zbx_result_string(ret), *directory, *filename_regexp);
228
229 return ret;
230 }
231
232 /******************************************************************************
233 * *
234 * Function: file_start_md5 *
235 * *
236 * Purpose: calculate the MD5 sum of the first block of the file *
237 * *
238 * Parameters: *
239 * f - [IN] file descriptor *
240 * length - [IN] length of the block in bytes. Maximum is 512 bytes. *
241 * md5buf - [OUT] output buffer, MD5_DIGEST_SIZE-bytes long, where the *
242 * calculated MD5 sum is placed *
243 * filename - [IN] file name, used in error logging *
244 * err_msg - [IN/OUT] error message why FAIL-ed *
245 * *
246 * Return value: SUCCEED or FAIL *
247 * *
248 ******************************************************************************/
file_start_md5(int f,int length,md5_byte_t * md5buf,const char * filename,char ** err_msg)249 static int file_start_md5(int f, int length, md5_byte_t *md5buf, const char *filename, char **err_msg)
250 {
251 md5_state_t state;
252 char buf[MAX_LEN_MD5];
253 int rc;
254
255 if (MAX_LEN_MD5 < length)
256 {
257 *err_msg = zbx_dsprintf(*err_msg, "Length %d exceeds maximum MD5 fragment length of %d.", length,
258 MAX_LEN_MD5);
259 return FAIL;
260 }
261
262 if ((zbx_offset_t)-1 == zbx_lseek(f, 0, SEEK_SET))
263 {
264 *err_msg = zbx_dsprintf(*err_msg, "Cannot set position to 0 for file \"%s\": %s", filename,
265 zbx_strerror(errno));
266 return FAIL;
267 }
268
269 if (length != (rc = (int)read(f, buf, (size_t)length)))
270 {
271 if (-1 == rc)
272 {
273 *err_msg = zbx_dsprintf(*err_msg, "Cannot read %d bytes from file \"%s\": %s", length, filename,
274 zbx_strerror(errno));
275 }
276 else
277 {
278 *err_msg = zbx_dsprintf(*err_msg, "Cannot read %d bytes from file \"%s\". Read %d bytes only.",
279 length, filename, rc);
280 }
281
282 return FAIL;
283 }
284
285 zbx_md5_init(&state);
286 zbx_md5_append(&state, (const md5_byte_t *)buf, length);
287 zbx_md5_finish(&state, md5buf);
288
289 return SUCCEED;
290 }
291
292 #ifdef _WINDOWS
293 /******************************************************************************
294 * *
295 * Function: file_id *
296 * *
297 * Purpose: get Microsoft Windows file device ID, 64-bit FileIndex or *
298 * 128-bit FileId *
299 * *
300 * Parameters: *
301 * f - [IN] file descriptor *
302 * use_ino - [IN] how to use file IDs *
303 * dev - [OUT] device ID *
304 * ino_lo - [OUT] 64-bit nFileIndex or lower 64-bits of FileId *
305 * ino_hi - [OUT] higher 64-bits of FileId *
306 * filename - [IN] file name, used in error logging *
307 * err_msg - [IN/OUT] error message why an item became NOTSUPPORTED *
308 * *
309 * Return value: SUCCEED or FAIL *
310 * *
311 ******************************************************************************/
file_id(int f,int use_ino,zbx_uint64_t * dev,zbx_uint64_t * ino_lo,zbx_uint64_t * ino_hi,const char * filename,char ** err_msg)312 static int file_id(int f, int use_ino, zbx_uint64_t *dev, zbx_uint64_t *ino_lo, zbx_uint64_t *ino_hi,
313 const char *filename, char **err_msg)
314 {
315 int ret = FAIL;
316 intptr_t h; /* file HANDLE */
317 BY_HANDLE_FILE_INFORMATION hfi;
318 ZBX_FILE_ID_INFO fid;
319
320 if (-1 == (h = _get_osfhandle(f)))
321 {
322 *err_msg = zbx_dsprintf(*err_msg, "Cannot obtain handle from descriptor of file \"%s\": %s",
323 filename, zbx_strerror(errno));
324 return ret;
325 }
326
327 if (1 == use_ino || 0 == use_ino)
328 {
329 /* Although nFileIndexHigh and nFileIndexLow cannot be reliably used to identify files when */
330 /* use_ino = 0 (e.g. on FAT32, exFAT), we copy indexes to have at least correct debug logs. */
331 if (0 != GetFileInformationByHandle((HANDLE)h, &hfi))
332 {
333 *dev = hfi.dwVolumeSerialNumber;
334 *ino_lo = (zbx_uint64_t)hfi.nFileIndexHigh << 32 | (zbx_uint64_t)hfi.nFileIndexLow;
335 *ino_hi = 0;
336 }
337 else
338 {
339 *err_msg = zbx_dsprintf(*err_msg, "Cannot obtain information for file \"%s\": %s",
340 filename, strerror_from_system(GetLastError()));
341 return ret;
342 }
343 }
344 else if (2 == use_ino)
345 {
346 if (NULL != zbx_GetFileInformationByHandleEx)
347 {
348 if (0 != zbx_GetFileInformationByHandleEx((HANDLE)h, zbx_FileIdInfo, &fid, sizeof(fid)))
349 {
350 *dev = fid.VolumeSerialNumber;
351 *ino_lo = fid.FileId.LowPart;
352 *ino_hi = fid.FileId.HighPart;
353 }
354 else
355 {
356 *err_msg = zbx_dsprintf(*err_msg, "Cannot obtain extended information for file"
357 " \"%s\": %s", filename, strerror_from_system(GetLastError()));
358 return ret;
359 }
360 }
361 }
362 else
363 {
364 THIS_SHOULD_NEVER_HAPPEN;
365 return ret;
366 }
367
368 ret = SUCCEED;
369
370 return ret;
371 }
372
373 /******************************************************************************
374 * *
375 * Function: set_use_ino_by_fs_type *
376 * *
377 * Purpose: find file system type and set 'use_ino' parameter *
378 * *
379 * Parameters: *
380 * path - [IN] directory or file name *
381 * use_ino - [IN] how to use file IDs *
382 * err_msg - [IN/OUT] error message why an item became NOTSUPPORTED *
383 * *
384 * Return value: SUCCEED or FAIL *
385 * *
386 ******************************************************************************/
set_use_ino_by_fs_type(const char * path,int * use_ino,char ** err_msg)387 static int set_use_ino_by_fs_type(const char *path, int *use_ino, char **err_msg)
388 {
389 char *utf8;
390 wchar_t *path_uni, mount_point[MAX_PATH + 1], fs_type[MAX_PATH + 1];
391
392 path_uni = zbx_utf8_to_unicode(path);
393
394 /* get volume mount point */
395 if (0 == GetVolumePathName(path_uni, mount_point,
396 sizeof(mount_point) / sizeof(wchar_t)))
397 {
398 *err_msg = zbx_dsprintf(*err_msg, "Cannot obtain volume mount point for file \"%s\": %s", path,
399 strerror_from_system(GetLastError()));
400 zbx_free(path_uni);
401 return FAIL;
402 }
403
404 zbx_free(path_uni);
405
406 /* Which file system type this directory resides on ? */
407 if (0 == GetVolumeInformation(mount_point, NULL, 0, NULL, NULL, NULL, fs_type,
408 sizeof(fs_type) / sizeof(wchar_t)))
409 {
410 utf8 = zbx_unicode_to_utf8(mount_point);
411 *err_msg = zbx_dsprintf(*err_msg, "Cannot obtain volume information for directory \"%s\": %s", utf8,
412 strerror_from_system(GetLastError()));
413 zbx_free(utf8);
414 return FAIL;
415 }
416
417 utf8 = zbx_unicode_to_utf8(fs_type);
418
419 if (0 == strcmp(utf8, "NTFS"))
420 *use_ino = 1; /* 64-bit FileIndex */
421 else if (0 == strcmp(utf8, "ReFS"))
422 *use_ino = 2; /* 128-bit FileId */
423 else
424 *use_ino = 0; /* cannot use inodes to identify files (e.g. FAT32) */
425
426 zabbix_log(LOG_LEVEL_DEBUG, "log files reside on '%s' file system", utf8);
427 zbx_free(utf8);
428
429 return SUCCEED;
430 }
431 #endif
432
433 /******************************************************************************
434 * *
435 * Function: print_logfile_list *
436 * *
437 * Purpose: write logfile list into log for debugging *
438 * *
439 * Parameters: *
440 * logfiles - [IN] array of logfiles *
441 * logfiles_num - [IN] number of elements in the array *
442 * *
443 ******************************************************************************/
print_logfile_list(const struct st_logfile * logfiles,int logfiles_num)444 static void print_logfile_list(const struct st_logfile *logfiles, int logfiles_num)
445 {
446 int i;
447
448 for (i = 0; i < logfiles_num; i++)
449 {
450 zabbix_log(LOG_LEVEL_DEBUG, " nr:%d filename:'%s' mtime:%d size:" ZBX_FS_UI64 " processed_size:"
451 ZBX_FS_UI64 " seq:%d copy_of:%d incomplete:%d dev:" ZBX_FS_UI64 " ino_hi:" ZBX_FS_UI64
452 " ino_lo:" ZBX_FS_UI64
453 " md5size:%d md5buf:%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x",
454 i, logfiles[i].filename, logfiles[i].mtime, logfiles[i].size,
455 logfiles[i].processed_size, logfiles[i].seq, logfiles[i].copy_of,
456 logfiles[i].incomplete, logfiles[i].dev, logfiles[i].ino_hi, logfiles[i].ino_lo,
457 logfiles[i].md5size, logfiles[i].md5buf[0], logfiles[i].md5buf[1],
458 logfiles[i].md5buf[2], logfiles[i].md5buf[3], logfiles[i].md5buf[4],
459 logfiles[i].md5buf[5], logfiles[i].md5buf[6], logfiles[i].md5buf[7],
460 logfiles[i].md5buf[8], logfiles[i].md5buf[9], logfiles[i].md5buf[10],
461 logfiles[i].md5buf[11], logfiles[i].md5buf[12], logfiles[i].md5buf[13],
462 logfiles[i].md5buf[14], logfiles[i].md5buf[15]);
463 }
464 }
465
466 /******************************************************************************
467 * *
468 * Function: compare_file_places *
469 * *
470 * Purpose: compare device numbers and inode numbers of 2 files *
471 * *
472 * Parameters: old_file - [IN] details of the 1st log file *
473 * new_file - [IN] details of the 2nd log file *
474 * use_ino - [IN] 0 - do not use inodes in comparison, *
475 * 1 - use up to 64-bit inodes in comparison, *
476 * 2 - use 128-bit inodes in comparison. *
477 * *
478 * Return value: ZBX_FILE_PLACE_SAME - both files have the same place *
479 * ZBX_FILE_PLACE_OTHER - files reside in different places *
480 * ZBX_FILE_PLACE_UNKNOWN - cannot compare places (no inodes) *
481 * *
482 ******************************************************************************/
compare_file_places(const struct st_logfile * old_file,const struct st_logfile * new_file,int use_ino)483 static int compare_file_places(const struct st_logfile *old_file, const struct st_logfile *new_file, int use_ino)
484 {
485 if (1 == use_ino || 2 == use_ino)
486 {
487 if (old_file->ino_lo != new_file->ino_lo || old_file->dev != new_file->dev ||
488 (2 == use_ino && old_file->ino_hi != new_file->ino_hi))
489 {
490 return ZBX_FILE_PLACE_OTHER;
491 }
492 else
493 return ZBX_FILE_PLACE_SAME;
494 }
495
496 return ZBX_FILE_PLACE_UNKNOWN;
497 }
498
499 /******************************************************************************
500 * *
501 * Function: open_file_helper *
502 * *
503 * Purpose: open specified file for reading *
504 * *
505 * Parameters: pathname - [IN] full pathname of file *
506 * err_msg - [IN/OUT] error message why file could not be opened *
507 * *
508 * Return value: file descriptor on success or -1 on error *
509 * *
510 ******************************************************************************/
open_file_helper(const char * pathname,char ** err_msg)511 static int open_file_helper(const char *pathname, char **err_msg)
512 {
513 int fd;
514
515 if (-1 == (fd = zbx_open(pathname, O_RDONLY)))
516 *err_msg = zbx_dsprintf(*err_msg, "Cannot open file \"%s\": %s", pathname, zbx_strerror(errno));
517
518 return fd;
519 }
520
521 /******************************************************************************
522 * *
523 * Function: close_file_helper *
524 * *
525 * Purpose: close specified file *
526 * *
527 * Parameters: fd - [IN] file descriptor to close *
528 * pathname - [IN] pathname of file, used for error reporting *
529 * err_msg - [IN/OUT] error message why file could not be closed *
530 * *
531 * Return value: SUCCEED or FAIL *
532 * *
533 ******************************************************************************/
close_file_helper(int fd,const char * pathname,char ** err_msg)534 static int close_file_helper(int fd, const char *pathname, char **err_msg)
535 {
536 if (0 == close(fd))
537 return SUCCEED;
538
539 *err_msg = zbx_dsprintf(*err_msg, "Cannot close file \"%s\": %s", pathname, zbx_strerror(errno));
540
541 return FAIL;
542 }
543
544 /******************************************************************************
545 * *
546 * Function: examine_md5_and_place *
547 * *
548 * Purpose: from MD5 sums of initial blocks and places of 2 files make *
549 * a conclusion is it the same file, a pair 'original/copy' or *
550 * 2 different files *
551 * *
552 * Parameters: buf1 - [IN] MD5 sum of initial block of he 1st file *
553 * buf2 - [IN] MD5 sum of initial block of he 2nd file *
554 * is_same_place - [IN] equality of file places *
555 * *
556 * Return value: ZBX_SAME_FILE_NO - they are 2 different files *
557 * ZBX_SAME_FILE_YES - 2 files are (assumed) to be the same *
558 * ZBX_SAME_FILE_COPY - one file is copy of the other *
559 * *
560 * Comments: in case files places are unknown but MD5 sums of initial blocks *
561 * match it is assumed to be the same file *
562 * *
563 ******************************************************************************/
examine_md5_and_place(const md5_byte_t * buf1,const md5_byte_t * buf2,size_t size,int is_same_place)564 static int examine_md5_and_place(const md5_byte_t *buf1, const md5_byte_t *buf2, size_t size, int is_same_place)
565 {
566 if (0 == memcmp(buf1, buf2, size))
567 {
568 switch (is_same_place)
569 {
570 case ZBX_FILE_PLACE_UNKNOWN:
571 case ZBX_FILE_PLACE_SAME:
572 return ZBX_SAME_FILE_YES;
573 case ZBX_FILE_PLACE_OTHER:
574 return ZBX_SAME_FILE_COPY;
575 }
576 }
577
578 return ZBX_SAME_FILE_NO;
579 }
580
581 /******************************************************************************
582 * *
583 * Function: is_same_file_logcpt *
584 * *
585 * Purpose: find out if a file from the old list and a file from the new list *
586 * could be the same file or copy in case of copy/truncate rotation *
587 * *
588 * Parameters: *
589 * old_file - [IN] file from the old list *
590 * new_file - [IN] file from the new list *
591 * use_ino - [IN] 0 - do not use inodes in comparison, *
592 * 1 - use up to 64-bit inodes in comparison, *
593 * 2 - use 128-bit inodes in comparison. *
594 * err_msg - [IN/OUT] error message why an item became *
595 * NOTSUPPORTED *
596 * *
597 * Return value: ZBX_SAME_FILE_NO - it is not the same file *
598 * ZBX_SAME_FILE_YES - it could be the same file *
599 * ZBX_SAME_FILE_COPY - it is a copy *
600 * ZBX_SAME_FILE_ERROR - error *
601 * *
602 * Comments: In some cases we can say that it IS NOT the same file. *
603 * In other cases it COULD BE the same file or copy. *
604 * *
605 ******************************************************************************/
is_same_file_logcpt(const struct st_logfile * old_file,const struct st_logfile * new_file,int use_ino,char ** err_msg)606 static int is_same_file_logcpt(const struct st_logfile *old_file, const struct st_logfile *new_file, int use_ino,
607 char **err_msg)
608 {
609 int is_same_place;
610
611 if (old_file->mtime > new_file->mtime)
612 return ZBX_SAME_FILE_NO;
613
614 if (-1 == old_file->md5size || -1 == new_file->md5size)
615 {
616 /* Cannot compare MD5 sums. Assume two different files - reporting twice is better than skipping. */
617 return ZBX_SAME_FILE_NO;
618 }
619
620 is_same_place = compare_file_places(old_file, new_file, use_ino);
621
622 if (old_file->md5size == new_file->md5size)
623 {
624 return examine_md5_and_place(old_file->md5buf, new_file->md5buf, sizeof(new_file->md5buf),
625 is_same_place);
626 }
627
628 if (0 < old_file->md5size && 0 < new_file->md5size)
629 {
630 /* MD5 sums have been calculated from initial blocks of diferent sizes */
631
632 const struct st_logfile *p_smaller, *p_larger;
633 int f, ret;
634 md5_byte_t md5tmp[MD5_DIGEST_SIZE];
635
636 if (old_file->md5size < new_file->md5size)
637 {
638 p_smaller = old_file;
639 p_larger = new_file;
640 }
641 else
642 {
643 p_smaller = new_file;
644 p_larger = old_file;
645 }
646
647 if (-1 == (f = open_file_helper(p_larger->filename, err_msg)))
648 return ZBX_SAME_FILE_ERROR;
649
650 if (SUCCEED == file_start_md5(f, p_smaller->md5size, md5tmp, p_larger->filename, err_msg))
651 ret = examine_md5_and_place(p_smaller->md5buf, md5tmp, sizeof(md5tmp), is_same_place);
652 else
653 ret = ZBX_SAME_FILE_ERROR;
654
655 if (0 != close(f))
656 {
657 if (ZBX_SAME_FILE_ERROR != ret)
658 {
659 *err_msg = zbx_dsprintf(*err_msg, "Cannot close file \"%s\": %s", p_larger->filename,
660 zbx_strerror(errno));
661 ret = ZBX_SAME_FILE_ERROR;
662 }
663 }
664
665 return ret;
666 }
667
668 return ZBX_SAME_FILE_NO;
669 }
670
671 /******************************************************************************
672 * *
673 * Function: is_same_file_logrt *
674 * *
675 * Purpose: find out if a file from the old list and a file from the new list *
676 * could be the same file in case of simple rotation *
677 * *
678 * Parameters: *
679 * old_file - [IN] file from the old list *
680 * new_file - [IN] file from the new list *
681 * use_ino - [IN] 0 - do not use inodes in comparison, *
682 * 1 - use up to 64-bit inodes in comparison, *
683 * 2 - use 128-bit inodes in comparison. *
684 * options - [IN] log rotation options *
685 * err_msg - [IN/OUT] error message why an item became *
686 * NOTSUPPORTED *
687 * *
688 * Return value: ZBX_SAME_FILE_NO - it is not the same file, *
689 * ZBX_SAME_FILE_YES - it could be the same file, *
690 * ZBX_SAME_FILE_ERROR - error. *
691 * ZBX_SAME_FILE_RETRY - retry on the next check *
692 * *
693 * Comments: In some cases we can say that it IS NOT the same file. *
694 * We can never say that it IS the same file and it has not been *
695 * truncated and replaced with a similar one. *
696 * *
697 ******************************************************************************/
is_same_file_logrt(const struct st_logfile * old_file,const struct st_logfile * new_file,int use_ino,zbx_log_rotation_options_t options,char ** err_msg)698 static int is_same_file_logrt(const struct st_logfile *old_file, const struct st_logfile *new_file, int use_ino,
699 zbx_log_rotation_options_t options, char **err_msg)
700 {
701 if (ZBX_LOG_ROTATION_LOGCPT == options)
702 return is_same_file_logcpt(old_file, new_file, use_ino, err_msg);
703
704 if (ZBX_FILE_PLACE_OTHER == compare_file_places(old_file, new_file, use_ino))
705 {
706 /* files cannot reside on different devices or occupy different inodes */
707 return ZBX_SAME_FILE_NO;
708 }
709
710 if (old_file->size > new_file->size)
711 {
712 /* File size cannot decrease. Truncating or replacing a file with a smaller one */
713 /* counts as 2 different files. */
714 return ZBX_SAME_FILE_NO;
715 }
716
717 if (old_file->size == new_file->size && old_file->mtime < new_file->mtime)
718 {
719 /* Depending on file system it's possible that stat() was called */
720 /* between mtime and file size update. In this situation we will */
721 /* get a file with the old size and a new mtime. */
722 /* On the first try we assume it's the same file, just its size */
723 /* has not been changed yet. */
724 /* If the size has not changed on the next check, then we assume */
725 /* that some tampering was done and to be safe we will treat it */
726 /* as a different file. */
727 if (0 == old_file->retry)
728 {
729 if (ZBX_LOG_ROTATION_NO_REREAD != options)
730 {
731 zabbix_log(LOG_LEVEL_WARNING, "the modification time of log file \"%s\" has been"
732 " updated without changing its size, try checking again later",
733 old_file->filename);
734 }
735
736 return ZBX_SAME_FILE_RETRY;
737 }
738
739 if (ZBX_LOG_ROTATION_NO_REREAD == options)
740 {
741 zabbix_log(LOG_LEVEL_WARNING, "after changing modification time the size of log file \"%s\""
742 " still has not been updated, consider it to be same file",
743 old_file->filename);
744 return ZBX_SAME_FILE_YES;
745 }
746
747 zabbix_log(LOG_LEVEL_WARNING, "after changing modification time the size of log file \"%s\""
748 " still has not been updated, consider it to be a new file", old_file->filename);
749 return ZBX_SAME_FILE_NO;
750 }
751
752 if (-1 == old_file->md5size || -1 == new_file->md5size)
753 {
754 /* Cannot compare MD5 sums. Assume two different files - reporting twice is better than skipping. */
755 return ZBX_SAME_FILE_NO;
756 }
757
758 if (old_file->md5size > new_file->md5size)
759 {
760 /* file initial block size from which MD5 sum is calculated cannot decrease */
761 return ZBX_SAME_FILE_NO;
762 }
763
764 if (old_file->md5size == new_file->md5size)
765 {
766 if (0 != memcmp(old_file->md5buf, new_file->md5buf, sizeof(new_file->md5buf))) /* MD5 sums differ */
767 return ZBX_SAME_FILE_NO;
768
769 return ZBX_SAME_FILE_YES;
770 }
771
772 if (0 < old_file->md5size)
773 {
774 /* MD5 for the old file has been calculated from a smaller block than for the new file */
775
776 int f, ret;
777 md5_byte_t md5tmp[MD5_DIGEST_SIZE];
778
779 if (-1 == (f = open_file_helper(new_file->filename, err_msg)))
780 return ZBX_SAME_FILE_ERROR;
781
782 if (SUCCEED == file_start_md5(f, old_file->md5size, md5tmp, new_file->filename, err_msg))
783 {
784 ret = (0 == memcmp(old_file->md5buf, &md5tmp, sizeof(md5tmp))) ? ZBX_SAME_FILE_YES :
785 ZBX_SAME_FILE_NO;
786 }
787 else
788 ret = ZBX_SAME_FILE_ERROR;
789
790 if (0 != close(f))
791 {
792 if (ZBX_SAME_FILE_ERROR != ret)
793 {
794 *err_msg = zbx_dsprintf(*err_msg, "Cannot close file \"%s\": %s", new_file->filename,
795 zbx_strerror(errno));
796 ret = ZBX_SAME_FILE_ERROR;
797 }
798 }
799
800 return ret;
801 }
802
803 return ZBX_SAME_FILE_YES;
804 }
805
806 /******************************************************************************
807 * *
808 * Function: cross_out *
809 * *
810 * Purpose: fill the given row and column with '0' except the element at the *
811 * cross point and protected columns and protected rows *
812 * *
813 * Parameters: *
814 * arr - [IN/OUT] two dimensional array *
815 * n_rows - [IN] number of rows in the array *
816 * n_cols - [IN] number of columns in the array *
817 * row - [IN] number of cross point row *
818 * col - [IN] number of cross point column *
819 * p_rows - [IN] vector with 'n_rows' elements. *
820 * Value '1' means protected row. *
821 * p_cols - [IN] vector with 'n_cols' elements. *
822 * Value '1' means protected column. *
823 * *
824 * Example: *
825 * Given array *
826 * *
827 * 1 1 1 1 *
828 * 1 1 1 1 *
829 * 1 1 1 1 *
830 * *
831 * and row = 1, col = 2 and no protected rows and columns *
832 * the array is modified as *
833 * *
834 * 1 1 0 1 *
835 * 0 0 1 0 *
836 * 1 1 0 1 *
837 * *
838 ******************************************************************************/
cross_out(char * arr,int n_rows,int n_cols,int row,int col,const char * p_rows,const char * p_cols)839 static void cross_out(char *arr, int n_rows, int n_cols, int row, int col, const char *p_rows, const char *p_cols)
840 {
841 int i;
842 char *p;
843
844 p = arr + row * n_cols; /* point to the first element of the 'row' */
845
846 for (i = 0; i < n_cols; i++) /* process row */
847 {
848 if ('1' != p_cols[i] && col != i)
849 p[i] = '0';
850 }
851
852 p = arr + col; /* point to the top element of the 'col' */
853
854 for (i = 0; i < n_rows; i++) /* process column */
855 {
856 if ('1' != p_rows[i] && row != i)
857 p[i * n_cols] = '0';
858 }
859 }
860
861 /******************************************************************************
862 * *
863 * Function: is_uniq_row *
864 * *
865 * Purpose: check if there is only one element '1' or '2' in the given row *
866 * *
867 * Parameters: *
868 * arr - [IN] two dimensional array *
869 * n_cols - [IN] number of columns in the array *
870 * row - [IN] number of row to search *
871 * *
872 * Return value: number of column where the element '1' or '2' was found or *
873 * -1 if there are zero or multiple elements '1' or '2' in the *
874 * row *
875 * *
876 ******************************************************************************/
is_uniq_row(const char * const arr,int n_cols,int row)877 static int is_uniq_row(const char * const arr, int n_cols, int row)
878 {
879 int i, mappings = 0, ret = -1;
880 const char *p;
881
882 p = arr + row * n_cols; /* point to the first element of the 'row' */
883
884 for (i = 0; i < n_cols; i++)
885 {
886 if ('1' == *p || '2' == *p)
887 {
888 if (2 == ++mappings)
889 {
890 ret = -1; /* non-unique mapping in the row */
891 break;
892 }
893
894 ret = i;
895 }
896
897 p++;
898 }
899
900 return ret;
901 }
902
903 /******************************************************************************
904 * *
905 * Function: is_uniq_col *
906 * *
907 * Purpose: check if there is only one element '1' or '2' in the given column *
908 * *
909 * Parameters: *
910 * arr - [IN] two dimensional array *
911 * n_rows - [IN] number of rows in the array *
912 * n_cols - [IN] number of columns in the array *
913 * col - [IN] number of column to search *
914 * *
915 * Return value: number of row where the element '1' or '2 ' was found or *
916 * -1 if there are zero or multiple elements '1' or '2' in the *
917 * column *
918 * *
919 ******************************************************************************/
is_uniq_col(const char * const arr,int n_rows,int n_cols,int col)920 static int is_uniq_col(const char * const arr, int n_rows, int n_cols, int col)
921 {
922 int i, mappings = 0, ret = -1;
923 const char *p;
924
925 p = arr + col; /* point to the top element of the 'col' */
926
927 for (i = 0; i < n_rows; i++)
928 {
929 if ('1' == *p || '2' == *p)
930 {
931 if (2 == ++mappings)
932 {
933 ret = -1; /* non-unique mapping in the column */
934 break;
935 }
936
937 ret = i;
938 }
939
940 p += n_cols;
941 }
942
943 return ret;
944 }
945
946 /******************************************************************************
947 * *
948 * Function: is_old2new_unique_mapping *
949 * *
950 * Purpose: check if 'old2new' array has only unique mappings *
951 * *
952 * Parameters: *
953 * old2new - [IN] two dimensional array of possible mappings *
954 * num_old - [IN] number of elements in the old file list *
955 * num_new - [IN] number of elements in the new file list *
956 * *
957 * Return value: SUCCEED - all mappings are unique, *
958 * FAIL - there are non-unique mappings *
959 * *
960 ******************************************************************************/
is_old2new_unique_mapping(const char * const old2new,int num_old,int num_new)961 static int is_old2new_unique_mapping(const char * const old2new, int num_old, int num_new)
962 {
963 int i;
964
965 /* Is there 1:1 mapping in both directions between files in the old and the new list ? */
966 /* In this case every row and column has not more than one element '1' or '2', others are '0'. */
967 /* This is expected on UNIX (using inode numbers) and MS Windows (using FileID on NTFS, ReFS) */
968 /* unless 'copytruncate' rotation type is combined with multiple log file copies. */
969
970 for (i = 0; i < num_old; i++) /* loop over rows (old files) */
971 {
972 if (-1 == is_uniq_row(old2new, num_new, i))
973 return FAIL;
974 }
975
976 for (i = 0; i < num_new; i++) /* loop over columns (new files) */
977 {
978 if (-1 == is_uniq_col(old2new, num_old, num_new, i))
979 return FAIL;
980 }
981
982 return SUCCEED;
983 }
984
985 /******************************************************************************
986 * *
987 * Function: resolve_old2new *
988 * *
989 * Purpose: resolve non-unique mappings *
990 * *
991 * Parameters: *
992 * old2new - [IN] two dimensional array of possible mappings *
993 * num_old - [IN] number of elements in the old file list *
994 * num_new - [IN] number of elements in the new file list *
995 * *
996 ******************************************************************************/
resolve_old2new(char * old2new,int num_old,int num_new)997 static void resolve_old2new(char *old2new, int num_old, int num_new)
998 {
999 int i;
1000 char *protected_rows = NULL, *protected_cols = NULL;
1001
1002 if (SUCCEED == is_old2new_unique_mapping(old2new, num_old, num_new))
1003 return;
1004
1005 /* Non-unique mapping is expected: */
1006 /* - on MS Windows using FAT32 and other file systems where inodes or file indexes are either not */
1007 /* preserved if a file is renamed or are not applicable, */
1008 /* - in 'copytruncate' rotation mode if multiple copies of log files are present. */
1009
1010 zabbix_log(LOG_LEVEL_DEBUG, "resolve_old2new(): non-unique mapping");
1011
1012 /* protect unique mappings from further modifications */
1013
1014 protected_rows = (char *)zbx_calloc(protected_rows, (size_t)num_old, sizeof(char));
1015 protected_cols = (char *)zbx_calloc(protected_cols, (size_t)num_new, sizeof(char));
1016
1017 for (i = 0; i < num_old; i++)
1018 {
1019 int c;
1020
1021 if (-1 != (c = is_uniq_row(old2new, num_new, i)) && -1 != is_uniq_col(old2new, num_old, num_new, c))
1022 {
1023 protected_rows[i] = '1';
1024 protected_cols[c] = '1';
1025 }
1026 }
1027
1028 /* resolve the remaining non-unique mappings - turn them into unique ones */
1029
1030 if (num_old <= num_new) /* square or wide array */
1031 {
1032 /****************************************************************************************************
1033 * *
1034 * Example for a wide array: *
1035 * *
1036 * D.log C.log B.log A.log *
1037 * ------------------------ *
1038 * 3.log | <1> 1 1 1 *
1039 * 2.log | 1 <1> 1 1 *
1040 * 1.log | 1 1 <1> 1 *
1041 * *
1042 * There are 3 files in the old log file list and 4 files in the new log file list. *
1043 * The mapping is totally non-unique: the old log file '3.log' could have become the new 'D.log' or *
1044 * 'C.log', or 'B.log', or 'A.log' - we don't know for sure. *
1045 * We make an assumption that a reasonable solution will be to proceed as if '3.log' was renamed to *
1046 * 'D.log', '2.log' - to 'C.log' and '1.log' - to 'B.log'. *
1047 * We modify the array according to this assumption: *
1048 * *
1049 * D.log C.log B.log A.log *
1050 * ------------------------ *
1051 * 3.log | <1> 0 0 0 *
1052 * 2.log | 0 <1> 0 0 *
1053 * 1.log | 0 0 <1> 0 *
1054 * *
1055 * Now the mapping is unique. The file 'A.log' is counted as a new file to be analyzed from the *
1056 * start. *
1057 * *
1058 ****************************************************************************************************/
1059
1060 for (i = 0; i < num_old; i++) /* loop over rows from top-left corner */
1061 {
1062 char *p;
1063 int j;
1064
1065 if ('1' == protected_rows[i])
1066 continue;
1067
1068 p = old2new + i * num_new; /* the first element of the current row */
1069
1070 for (j = 0; j < num_new; j++)
1071 {
1072 if (('1' == p[j] || '2' == p[j]) && '1' != protected_cols[j])
1073 {
1074 cross_out(old2new, num_old, num_new, i, j, protected_rows, protected_cols);
1075 break;
1076 }
1077 }
1078 }
1079 }
1080 else /* tall array */
1081 {
1082 /****************************************************************************************************
1083 * *
1084 * Example for a tall array: *
1085 * *
1086 * D.log C.log B.log A.log *
1087 * ------------------------ *
1088 * 6.log | 1 1 1 1 *
1089 * 5.log | 1 1 1 1 *
1090 * 4.log | <1> 1 1 1 *
1091 * 3.log | 1 <1> 1 1 *
1092 * 2.log | 1 1 <1> 1 *
1093 * 1.log | 1 1 1 <1> *
1094 * *
1095 * There are 6 files in the old log file list and 4 files in the new log file list. *
1096 * The mapping is totally non-unique: the old log file '6.log' could have become the new 'D.log' or *
1097 * 'C.log', or 'B.log', or 'A.log' - we don't know for sure. *
1098 * We make an assumption that a reasonable solution will be to proceed as if '1.log' was renamed to *
1099 * 'A.log', '2.log' - to 'B.log', '3.log' - to 'C.log', '4.log' - to 'D.log'. *
1100 * We modify the array according to this assumption: *
1101 * *
1102 * D.log C.log B.log A.log *
1103 * ------------------------ *
1104 * 6.log | 0 0 0 0 *
1105 * 5.log | 0 0 0 0 *
1106 * 4.log | <1> 0 0 0 *
1107 * 3.log | 0 <1> 0 0 *
1108 * 2.log | 0 0 <1> 0 *
1109 * 1.log | 0 0 0 <1> *
1110 * *
1111 * Now the mapping is unique. Files '6.log' and '5.log' are counted as not present in the new file. *
1112 * *
1113 ****************************************************************************************************/
1114
1115 for (i = num_old - 1; i >= 0; i--) /* loop over rows from bottom-right corner */
1116 {
1117 char *p;
1118 int j;
1119
1120 if ('1' == protected_rows[i])
1121 continue;
1122
1123 p = old2new + i * num_new; /* the first element of the current row */
1124
1125 for (j = num_new - 1; j >= 0; j--)
1126 {
1127 if (('1' == p[j] || '2' == p[j]) && '1' != protected_cols[j])
1128 {
1129 cross_out(old2new, num_old, num_new, i, j, protected_rows, protected_cols);
1130 break;
1131 }
1132 }
1133 }
1134 }
1135
1136 zbx_free(protected_cols);
1137 zbx_free(protected_rows);
1138 }
1139
1140 /******************************************************************************
1141 * *
1142 * Function: create_old2new_and_copy_of *
1143 * *
1144 * Purpose: allocate and fill an array of possible mappings from the old log *
1145 * files to the new log files *
1146 * *
1147 * Parameters: *
1148 * rotation_type - [IN] file rotation type *
1149 * old_files - [IN] old file list *
1150 * num_old - [IN] number of elements in the old file list *
1151 * new_files - [IN] new file list *
1152 * num_new - [IN] number of elements in the new file list *
1153 * use_ino - [IN] how to use inodes in is_same_file() *
1154 * err_msg - [IN/OUT] error message why an item became NOTSUPPORTED *
1155 * *
1156 * Return value: pointer to allocated array or NULL *
1157 * *
1158 * Comments: *
1159 * The array is filled with '0', '1' and '2' which mean: *
1160 * old2new[i][j] = '0' - the i-th old file IS NOT the j-th new file *
1161 * old2new[i][j] = '1' - the i-th old file COULD BE the j-th new file *
1162 * old2new[i][j] = '2' - the j-th new file is a copy of the i-th old *
1163 * file *
1164 * *
1165 ******************************************************************************/
create_old2new_and_copy_of(zbx_log_rotation_options_t rotation_type,struct st_logfile * old_files,int num_old,struct st_logfile * new_files,int num_new,int use_ino,char ** err_msg)1166 static char *create_old2new_and_copy_of(zbx_log_rotation_options_t rotation_type, struct st_logfile *old_files,
1167 int num_old, struct st_logfile *new_files, int num_new, int use_ino, char **err_msg)
1168 {
1169 const char *__function_name = "create_old2new_and_copy_of";
1170 int i, j;
1171 char *old2new, *p;
1172
1173 /* set up a two dimensional array of possible mappings from old files to new files */
1174 old2new = (char *)zbx_malloc(NULL, (size_t)num_new * (size_t)num_old * sizeof(char));
1175 p = old2new;
1176
1177 for (i = 0; i < num_old; i++)
1178 {
1179 for (j = 0; j < num_new; j++)
1180 {
1181 switch (is_same_file_logrt(old_files + i, new_files + j, use_ino, rotation_type, err_msg))
1182 {
1183 case ZBX_SAME_FILE_NO:
1184 p[j] = '0';
1185 break;
1186 case ZBX_SAME_FILE_YES:
1187 if (1 == old_files[i].retry)
1188 {
1189 zabbix_log(LOG_LEVEL_DEBUG, "%s(): the size of log file \"%s\" has been"
1190 " updated since modification time change, consider"
1191 " it to be the same file", __function_name,
1192 old_files[i].filename);
1193 old_files[i].retry = 0;
1194 }
1195 p[j] = '1';
1196 break;
1197 case ZBX_SAME_FILE_COPY:
1198 p[j] = '2';
1199 new_files[j].copy_of = i;
1200 break;
1201 case ZBX_SAME_FILE_RETRY:
1202 old_files[i].retry = 1;
1203 zbx_free(old2new);
1204 return NULL;
1205 case ZBX_SAME_FILE_ERROR:
1206 zbx_free(old2new);
1207 return NULL;
1208 }
1209
1210 zabbix_log(LOG_LEVEL_DEBUG, "%s(): is_same_file(%s, %s) = %c", __function_name,
1211 old_files[i].filename, new_files[j].filename, p[j]);
1212 }
1213
1214 p += (size_t)num_new;
1215 }
1216
1217 if (ZBX_LOG_ROTATION_LOGCPT != rotation_type && (1 < num_old || 1 < num_new))
1218 resolve_old2new(old2new, num_old, num_new);
1219
1220 return old2new;
1221 }
1222
1223 /******************************************************************************
1224 * *
1225 * Function: find_old2new *
1226 * *
1227 * Purpose: find a mapping from old to new file *
1228 * *
1229 * Parameters: *
1230 * old2new - [IN] two dimensional array of possible mappings *
1231 * num_new - [IN] number of elements in the new file list *
1232 * i_old - [IN] index of the old file *
1233 * *
1234 * Return value: index of the new file or *
1235 * -1 if no mapping was found *
1236 * *
1237 ******************************************************************************/
find_old2new(const char * const old2new,int num_new,int i_old)1238 static int find_old2new(const char * const old2new, int num_new, int i_old)
1239 {
1240 int i;
1241 const char *p = old2new + i_old * num_new;
1242
1243 for (i = 0; i < num_new; i++) /* loop over columns (new files) on i_old-th row */
1244 {
1245 if ('1' == *p || '2' == *p)
1246 return i;
1247
1248 p++;
1249 }
1250
1251 return -1;
1252 }
1253
1254 /******************************************************************************
1255 * *
1256 * Function: add_logfile *
1257 * *
1258 * Purpose: adds information of a logfile to the list of logfiles *
1259 * *
1260 * Parameters: logfiles - pointer to the list of logfiles *
1261 * logfiles_alloc - number of logfiles memory was allocated for *
1262 * logfiles_num - number of already inserted logfiles *
1263 * filename - name of a logfile (with full path) *
1264 * st - structure returned by stat() *
1265 * *
1266 * Author: Dmitry Borovikov *
1267 * *
1268 ******************************************************************************/
add_logfile(struct st_logfile ** logfiles,int * logfiles_alloc,int * logfiles_num,const char * filename,zbx_stat_t * st)1269 static void add_logfile(struct st_logfile **logfiles, int *logfiles_alloc, int *logfiles_num, const char *filename,
1270 zbx_stat_t *st)
1271 {
1272 const char *__function_name = "add_logfile";
1273 int i = 0, cmp = 0;
1274
1275 zabbix_log(LOG_LEVEL_DEBUG, "In %s() filename:'%s' mtime:%d size:" ZBX_FS_UI64, __function_name, filename,
1276 (int)st->st_mtime, (zbx_uint64_t)st->st_size);
1277
1278 if (*logfiles_alloc == *logfiles_num)
1279 {
1280 *logfiles_alloc += 64;
1281 *logfiles = (struct st_logfile *)zbx_realloc(*logfiles,
1282 (size_t)*logfiles_alloc * sizeof(struct st_logfile));
1283
1284 zabbix_log(LOG_LEVEL_DEBUG, "%s() logfiles:%p logfiles_alloc:%d",
1285 __function_name, (void *)*logfiles, *logfiles_alloc);
1286 }
1287
1288 /************************************************************************************************/
1289 /* (1) sort by ascending mtimes */
1290 /* (2) if mtimes are equal, sort alphabetically by descending names */
1291 /* the oldest is put first, the most current is at the end */
1292 /* */
1293 /* filename.log.3 mtime3, filename.log.2 mtime2, filename.log1 mtime1, filename.log mtime */
1294 /* -------------------------------------------------------------------------------------- */
1295 /* mtime3 <= mtime2 <= mtime1 <= mtime */
1296 /* -------------------------------------------------------------------------------------- */
1297 /* filename.log.3 > filename.log.2 > filename.log.1 > filename.log */
1298 /* -------------------------------------------------------------------------------------- */
1299 /* array[i=0] array[i=1] array[i=2] array[i=3] */
1300 /* */
1301 /* note: the application is writing into filename.log, mtimes are more important than filenames */
1302 /************************************************************************************************/
1303
1304 for (; i < *logfiles_num; i++)
1305 {
1306 if (st->st_mtime > (*logfiles)[i].mtime)
1307 continue; /* (1) sort by ascending mtime */
1308
1309 if (st->st_mtime == (*logfiles)[i].mtime)
1310 {
1311 if (0 > (cmp = strcmp(filename, (*logfiles)[i].filename)))
1312 continue; /* (2) sort by descending name */
1313
1314 if (0 == cmp)
1315 {
1316 /* the file already exists, quite impossible branch */
1317 zabbix_log(LOG_LEVEL_WARNING, "%s() file '%s' already added", __function_name,
1318 filename);
1319 goto out;
1320 }
1321
1322 /* filename is smaller, must insert here */
1323 }
1324
1325 /* the place is found, move all from the position forward by one struct */
1326 break;
1327 }
1328
1329 if (*logfiles_num > i)
1330 {
1331 /* free a gap for inserting the new element */
1332 memmove((void *)&(*logfiles)[i + 1], (const void *)&(*logfiles)[i],
1333 (size_t)(*logfiles_num - i) * sizeof(struct st_logfile));
1334 }
1335
1336 (*logfiles)[i].filename = zbx_strdup(NULL, filename);
1337 (*logfiles)[i].mtime = (int)st->st_mtime;
1338 (*logfiles)[i].md5size = -1;
1339 (*logfiles)[i].seq = 0;
1340 (*logfiles)[i].incomplete = 0;
1341 (*logfiles)[i].copy_of = -1;
1342 #ifndef _WINDOWS
1343 (*logfiles)[i].dev = (zbx_uint64_t)st->st_dev;
1344 (*logfiles)[i].ino_lo = (zbx_uint64_t)st->st_ino;
1345 (*logfiles)[i].ino_hi = 0;
1346 #endif
1347 (*logfiles)[i].size = (zbx_uint64_t)st->st_size;
1348 (*logfiles)[i].processed_size = 0;
1349 (*logfiles)[i].retry = 0;
1350
1351 ++(*logfiles_num);
1352 out:
1353 zabbix_log(LOG_LEVEL_DEBUG, "End of %s()", __function_name);
1354 }
1355
1356 /******************************************************************************
1357 * *
1358 * Function: destroy_logfile_list *
1359 * *
1360 * Purpose: release resources allocated to a logfile list *
1361 * *
1362 * Parameters: *
1363 * logfiles - [IN/OUT] pointer to the list of logfiles, can be NULL *
1364 * logfiles_alloc - [IN/OUT] pointer to number of logfiles memory was *
1365 * allocated for, can be NULL. *
1366 * logfiles_num - [IN/OUT] valid pointer to number of inserted logfiles *
1367 * *
1368 ******************************************************************************/
destroy_logfile_list(struct st_logfile ** logfiles,int * logfiles_alloc,int * logfiles_num)1369 void destroy_logfile_list(struct st_logfile **logfiles, int *logfiles_alloc, int *logfiles_num)
1370 {
1371 int i;
1372
1373 for (i = 0; i < *logfiles_num; i++)
1374 zbx_free((*logfiles)[i].filename);
1375
1376 *logfiles_num = 0;
1377
1378 if (NULL != logfiles_alloc)
1379 *logfiles_alloc = 0;
1380
1381 zbx_free(*logfiles);
1382 }
1383
1384 /******************************************************************************
1385 * *
1386 * Function: pick_logfile *
1387 * *
1388 * Purpose: checks if the specified file meets requirements and adds it to *
1389 * the logfile list *
1390 * *
1391 * Parameters: *
1392 * directory - [IN] directory where the logfiles reside *
1393 * filename - [IN] name of the logfile (without path) *
1394 * mtime - [IN] selection criterion "logfile modification time" *
1395 * The logfile will be selected if modified not before *
1396 * 'mtime'. *
1397 * re - [IN] selection criterion "regexp describing filename *
1398 * pattern" *
1399 * logfiles - [IN/OUT] pointer to the list of logfiles *
1400 * logfiles_alloc - [IN/OUT] number of logfiles memory was allocated for *
1401 * logfiles_num - [IN/OUT] number of already inserted logfiles *
1402 * *
1403 * Comments: This is a helper function for pick_logfiles() *
1404 * *
1405 ******************************************************************************/
pick_logfile(const char * directory,const char * filename,int mtime,const zbx_regexp_t * re,struct st_logfile ** logfiles,int * logfiles_alloc,int * logfiles_num)1406 static void pick_logfile(const char *directory, const char *filename, int mtime, const zbx_regexp_t *re,
1407 struct st_logfile **logfiles, int *logfiles_alloc, int *logfiles_num)
1408 {
1409 char *logfile_candidate;
1410 zbx_stat_t file_buf;
1411
1412 logfile_candidate = zbx_dsprintf(NULL, "%s%s", directory, filename);
1413
1414 if (0 == zbx_stat(logfile_candidate, &file_buf))
1415 {
1416 if (S_ISREG(file_buf.st_mode) &&
1417 mtime <= file_buf.st_mtime &&
1418 0 == zbx_regexp_match_precompiled(filename, re))
1419 {
1420 add_logfile(logfiles, logfiles_alloc, logfiles_num, logfile_candidate, &file_buf);
1421 }
1422 }
1423 else
1424 zabbix_log(LOG_LEVEL_DEBUG, "cannot process entry '%s': %s", logfile_candidate, zbx_strerror(errno));
1425
1426 zbx_free(logfile_candidate);
1427 }
1428
1429 /******************************************************************************
1430 * *
1431 * Function: pick_logfiles *
1432 * *
1433 * Purpose: find logfiles in a directory and put them into a list *
1434 * *
1435 * Parameters: *
1436 * directory - [IN] directory where the logfiles reside *
1437 * mtime - [IN] selection criterion "logfile modification time" *
1438 * The logfile will be selected if modified not before *
1439 * 'mtime'. *
1440 * re - [IN] selection criterion "regexp describing filename *
1441 * pattern" *
1442 * use_ino - [OUT] how to use inodes in is_same_file() *
1443 * logfiles - [IN/OUT] pointer to the list of logfiles *
1444 * logfiles_alloc - [IN/OUT] number of logfiles memory was allocated for *
1445 * logfiles_num - [IN/OUT] number of already inserted logfiles *
1446 * err_msg - [IN/OUT] error message why an item became *
1447 * NOTSUPPORTED *
1448 * *
1449 * Return value: SUCCEED or FAIL *
1450 * *
1451 * Comments: This is a helper function for make_logfile_list() *
1452 * *
1453 ******************************************************************************/
pick_logfiles(const char * directory,int mtime,const zbx_regexp_t * re,int * use_ino,struct st_logfile ** logfiles,int * logfiles_alloc,int * logfiles_num,char ** err_msg)1454 static int pick_logfiles(const char *directory, int mtime, const zbx_regexp_t *re, int *use_ino,
1455 struct st_logfile **logfiles, int *logfiles_alloc, int *logfiles_num, char **err_msg)
1456 {
1457 #ifdef _WINDOWS
1458 int ret = FAIL;
1459 char *find_path = NULL, *file_name_utf8;
1460 wchar_t *find_wpath = NULL;
1461 intptr_t find_handle;
1462 struct _wfinddata_t find_data;
1463
1464 /* "open" Windows directory */
1465 find_path = zbx_dsprintf(find_path, "%s*", directory);
1466 find_wpath = zbx_utf8_to_unicode(find_path);
1467
1468 if (-1 == (find_handle = _wfindfirst(find_wpath, &find_data)))
1469 {
1470 *err_msg = zbx_dsprintf(*err_msg, "Cannot open directory \"%s\" for reading: %s", directory,
1471 zbx_strerror(errno));
1472 zbx_free(find_wpath);
1473 zbx_free(find_path);
1474 return FAIL;
1475 }
1476
1477 if (SUCCEED != set_use_ino_by_fs_type(find_path, use_ino, err_msg))
1478 goto clean;
1479
1480 do
1481 {
1482 file_name_utf8 = zbx_unicode_to_utf8(find_data.name);
1483 pick_logfile(directory, file_name_utf8, mtime, re, logfiles, logfiles_alloc, logfiles_num);
1484 zbx_free(file_name_utf8);
1485 }
1486 while (0 == _wfindnext(find_handle, &find_data));
1487
1488 ret = SUCCEED;
1489 clean:
1490 if (-1 == _findclose(find_handle))
1491 {
1492 *err_msg = zbx_dsprintf(*err_msg, "Cannot close directory \"%s\": %s", directory, zbx_strerror(errno));
1493 ret = FAIL;
1494 }
1495
1496 zbx_free(find_wpath);
1497 zbx_free(find_path);
1498
1499 return ret;
1500 #else
1501 DIR *dir = NULL;
1502 struct dirent *d_ent = NULL;
1503
1504 if (NULL == (dir = opendir(directory)))
1505 {
1506 *err_msg = zbx_dsprintf(*err_msg, "Cannot open directory \"%s\" for reading: %s", directory,
1507 zbx_strerror(errno));
1508 return FAIL;
1509 }
1510
1511 /* on UNIX file systems we always assume that inodes can be used to identify files */
1512 *use_ino = 1;
1513
1514 while (NULL != (d_ent = readdir(dir)))
1515 {
1516 pick_logfile(directory, d_ent->d_name, mtime, re, logfiles, logfiles_alloc, logfiles_num);
1517 }
1518
1519 if (-1 == closedir(dir))
1520 {
1521 *err_msg = zbx_dsprintf(*err_msg, "Cannot close directory \"%s\": %s", directory, zbx_strerror(errno));
1522 return FAIL;
1523 }
1524
1525 return SUCCEED;
1526 #endif
1527 }
1528
1529 /******************************************************************************
1530 * *
1531 * Function: compile_filename_regexp *
1532 * *
1533 * Purpose: compile regular expression *
1534 * *
1535 * Parameters: *
1536 * filename_regexp - [IN] regexp to be compiled *
1537 * re - [OUT] compiled regexp *
1538 * err_msg - [OUT] error message why regexp could not be *
1539 * compiled *
1540 * *
1541 * Return value: SUCCEED or FAIL *
1542 * *
1543 ******************************************************************************/
compile_filename_regexp(const char * filename_regexp,zbx_regexp_t ** re,char ** err_msg)1544 static int compile_filename_regexp(const char *filename_regexp, zbx_regexp_t **re, char **err_msg)
1545 {
1546 const char *regexp_err;
1547
1548 if (SUCCEED != zbx_regexp_compile(filename_regexp, re, ®exp_err))
1549 {
1550 *err_msg = zbx_dsprintf(*err_msg, "Cannot compile a regular expression describing filename pattern: %s",
1551 regexp_err);
1552 return FAIL;
1553 }
1554
1555 return SUCCEED;
1556 }
1557
1558 /******************************************************************************
1559 * *
1560 * Function: fill_file_details *
1561 * *
1562 * Purpose: fill-in MD5 sums, device and inode numbers for files in the list *
1563 * *
1564 * Parameters: *
1565 * logfiles - [IN/OUT] list of log files *
1566 * logfiles_num - [IN] number of elements in 'logfiles' *
1567 * use_ino - [IN] how to get file IDs in file_id() *
1568 * err_msg - [IN/OUT] error message why operation failed *
1569 * *
1570 * Return value: SUCCEED or FAIL *
1571 * *
1572 ******************************************************************************/
1573 #ifdef _WINDOWS
fill_file_details(struct st_logfile ** logfiles,int logfiles_num,int use_ino,char ** err_msg)1574 static int fill_file_details(struct st_logfile **logfiles, int logfiles_num, int use_ino, char **err_msg)
1575 #else
1576 static int fill_file_details(struct st_logfile **logfiles, int logfiles_num, char **err_msg)
1577 #endif
1578 {
1579 int i, ret = SUCCEED;
1580
1581 /* Fill in MD5 sums and file indexes in the logfile list. */
1582 /* These operations require opening of file, therefore we group them together. */
1583
1584 for (i = 0; i < logfiles_num; i++)
1585 {
1586 int f;
1587 struct st_logfile *p = *logfiles + i;
1588
1589 if (-1 == (f = open_file_helper(p->filename, err_msg)))
1590 return FAIL;
1591
1592 p->md5size = (zbx_uint64_t)MAX_LEN_MD5 > p->size ? (int)p->size : MAX_LEN_MD5;
1593
1594 if (SUCCEED != (ret = file_start_md5(f, p->md5size, p->md5buf, p->filename, err_msg)))
1595 goto clean;
1596 #ifdef _WINDOWS
1597 ret = file_id(f, use_ino, &p->dev, &p->ino_lo, &p->ino_hi, p->filename, err_msg);
1598 #endif /*_WINDOWS*/
1599 clean:
1600 if (SUCCEED != close_file_helper(f, p->filename, err_msg) || FAIL == ret)
1601 return FAIL;
1602 }
1603
1604 return ret;
1605 }
1606
1607 /******************************************************************************
1608 * *
1609 * Function: make_logfile_list *
1610 * *
1611 * Purpose: select log files to be analyzed and make a list, set 'use_ino' *
1612 * parameter *
1613 * *
1614 * Parameters: *
1615 * flags - [IN] bit flags with item type: log, logrt, log.count *
1616 * or logrt.count *
1617 * filename - [IN] logfile name (regular expression with a path) *
1618 * mtime - [IN] last modification time of the file *
1619 * logfiles - [IN/OUT] pointer to the list of logfiles *
1620 * logfiles_alloc - [IN/OUT] number of logfiles memory was allocated for *
1621 * logfiles_num - [IN/OUT] number of already inserted logfiles *
1622 * use_ino - [IN/OUT] how to use inode numbers *
1623 * err_msg - [IN/OUT] error message (if FAIL or ZBX_NO_FILE_ERROR *
1624 * is returned) *
1625 * *
1626 * Return value: SUCCEED - file list successfully built, *
1627 * ZBX_NO_FILE_ERROR - file(s) do not exist, *
1628 * FAIL - other errors *
1629 * *
1630 ******************************************************************************/
make_logfile_list(unsigned char flags,const char * filename,int mtime,struct st_logfile ** logfiles,int * logfiles_alloc,int * logfiles_num,int * use_ino,char ** err_msg)1631 static int make_logfile_list(unsigned char flags, const char *filename, int mtime,
1632 struct st_logfile **logfiles, int *logfiles_alloc, int *logfiles_num, int *use_ino, char **err_msg)
1633 {
1634 int ret = SUCCEED;
1635
1636 if (0 != (ZBX_METRIC_FLAG_LOG_LOG & flags)) /* log[] or log.count[] item */
1637 {
1638 zbx_stat_t file_buf;
1639
1640 if (0 != zbx_stat(filename, &file_buf))
1641 {
1642 *err_msg = zbx_dsprintf(*err_msg, "Cannot obtain information for file \"%s\": %s", filename,
1643 zbx_strerror(errno));
1644 ret = ZBX_NO_FILE_ERROR;
1645 goto clean;
1646 }
1647
1648 if (!S_ISREG(file_buf.st_mode))
1649 {
1650 *err_msg = zbx_dsprintf(*err_msg, "\"%s\" is not a regular file.", filename);
1651 ret = FAIL;
1652 goto clean;
1653 }
1654
1655 /* mtime is not used for log, log.count items, reset to ignore */
1656 file_buf.st_mtime = 0;
1657
1658 add_logfile(logfiles, logfiles_alloc, logfiles_num, filename, &file_buf);
1659 #ifdef _WINDOWS
1660 if (SUCCEED != (ret = set_use_ino_by_fs_type(filename, use_ino, err_msg)))
1661 goto clean;
1662 #else
1663 /* on UNIX file systems we always assume that inodes can be used to identify files */
1664 *use_ino = 1;
1665 #endif
1666 }
1667 else if (0 != (ZBX_METRIC_FLAG_LOG_LOGRT & flags)) /* logrt[] or logrt.count[] item */
1668 {
1669 char *directory = NULL, *filename_regexp = NULL;
1670 zbx_regexp_t *re;
1671
1672 /* split a filename into directory and file name regular expression parts */
1673 if (SUCCEED != (ret = split_filename(filename, &directory, &filename_regexp, err_msg)))
1674 goto clean;
1675
1676 if (SUCCEED != (ret = compile_filename_regexp(filename_regexp, &re, err_msg)))
1677 goto clean1;
1678
1679 if (SUCCEED != (ret = pick_logfiles(directory, mtime, re, use_ino, logfiles, logfiles_alloc,
1680 logfiles_num, err_msg)))
1681 {
1682 goto clean2;
1683 }
1684
1685 if (0 == *logfiles_num)
1686 {
1687 /* do not make logrt[] and logrt.count[] items NOTSUPPORTED if there are no matching log */
1688 /* files or they are not accessible (can happen during a rotation), just log the problem */
1689 #ifdef _WINDOWS
1690 zabbix_log(LOG_LEVEL_WARNING, "there are no recently modified files matching \"%s\" in \"%s\"",
1691 filename_regexp, directory);
1692
1693 ret = ZBX_NO_FILE_ERROR;
1694 #else
1695 if (0 != access(directory, X_OK))
1696 {
1697 zabbix_log(LOG_LEVEL_WARNING, "insufficient access rights (no \"execute\" permission) "
1698 "to directory \"%s\": %s", directory, zbx_strerror(errno));
1699 }
1700 else
1701 {
1702 zabbix_log(LOG_LEVEL_WARNING, "there are no recently modified files matching \"%s\" in"
1703 " \"%s\"", filename_regexp, directory);
1704 ret = ZBX_NO_FILE_ERROR;
1705 }
1706 #endif
1707 }
1708 clean2:
1709 zbx_regexp_free(re);
1710 clean1:
1711 zbx_free(directory);
1712 zbx_free(filename_regexp);
1713
1714 if (FAIL == ret || ZBX_NO_FILE_ERROR == ret)
1715 goto clean;
1716 }
1717 else
1718 THIS_SHOULD_NEVER_HAPPEN;
1719
1720 #ifdef _WINDOWS
1721 ret = fill_file_details(logfiles, *logfiles_num, *use_ino, err_msg);
1722 #else
1723 ret = fill_file_details(logfiles, *logfiles_num, err_msg);
1724 #endif
1725 clean:
1726 if ((FAIL == ret || ZBX_NO_FILE_ERROR == ret) && NULL != *logfiles)
1727 destroy_logfile_list(logfiles, logfiles_alloc, logfiles_num);
1728
1729 return ret;
1730 }
1731
buf_find_newline(char * p,char ** p_next,const char * p_end,const char * cr,const char * lf,size_t szbyte)1732 static char *buf_find_newline(char *p, char **p_next, const char *p_end, const char *cr, const char *lf,
1733 size_t szbyte)
1734 {
1735 if (1 == szbyte) /* single-byte character set */
1736 {
1737 for (; p < p_end; p++)
1738 {
1739 /* detect NULL byte and replace it with '?' character */
1740 if (0x0 == *p)
1741 {
1742 *p = '?';
1743 continue;
1744 }
1745
1746 if (0xd < *p || 0xa > *p)
1747 continue;
1748
1749 if (0xa == *p) /* LF (Unix) */
1750 {
1751 *p_next = p + 1;
1752 return p;
1753 }
1754
1755 if (0xd == *p) /* CR (Mac) */
1756 {
1757 if (p < p_end - 1 && 0xa == *(p + 1)) /* CR+LF (Windows) */
1758 {
1759 *p_next = p + 2;
1760 return p;
1761 }
1762
1763 *p_next = p + 1;
1764 return p;
1765 }
1766 }
1767 return (char *)NULL;
1768 }
1769 else
1770 {
1771 while (p <= p_end - szbyte)
1772 {
1773 /* detect NULL byte in UTF-16 encoding and replace it with '?' character */
1774 if (2 == szbyte && 0x0 == *p && 0x0 == *(p + 1))
1775 {
1776 if (0x0 == *cr) /* Big-endian */
1777 p[1] = '?';
1778 else /* Little-endian */
1779 *p = '?';
1780 }
1781
1782 if (0 == memcmp(p, lf, szbyte)) /* LF (Unix) */
1783 {
1784 *p_next = p + szbyte;
1785 return p;
1786 }
1787
1788 if (0 == memcmp(p, cr, szbyte)) /* CR (Mac) */
1789 {
1790 if (p <= p_end - szbyte - szbyte && 0 == memcmp(p + szbyte, lf, szbyte))
1791 {
1792 /* CR+LF (Windows) */
1793 *p_next = p + szbyte + szbyte;
1794 return p;
1795 }
1796
1797 *p_next = p + szbyte;
1798 return p;
1799 }
1800
1801 p += szbyte;
1802 }
1803 return (char *)NULL;
1804 }
1805 }
1806
zbx_read2(int fd,unsigned char flags,zbx_uint64_t * lastlogsize,int * mtime,int * big_rec,int * incomplete,char ** err_msg,const char * encoding,zbx_vector_ptr_t * regexps,const char * pattern,const char * output_template,int * p_count,int * s_count,zbx_process_value_func_t process_value,const char * server,unsigned short port,const char * hostname,const char * key,zbx_uint64_t * lastlogsize_sent,int * mtime_sent)1807 static int zbx_read2(int fd, unsigned char flags, zbx_uint64_t *lastlogsize, int *mtime, int *big_rec,
1808 int *incomplete, char **err_msg, const char *encoding, zbx_vector_ptr_t *regexps, const char *pattern,
1809 const char *output_template, int *p_count, int *s_count, zbx_process_value_func_t process_value,
1810 const char *server, unsigned short port, const char *hostname, const char *key,
1811 zbx_uint64_t *lastlogsize_sent, int *mtime_sent)
1812 {
1813 ZBX_THREAD_LOCAL static char *buf = NULL;
1814
1815 int ret, nbytes, regexp_ret;
1816 const char *cr, *lf, *p_end;
1817 char *p_start, *p, *p_nl, *p_next, *item_value = NULL;
1818 size_t szbyte;
1819 zbx_offset_t offset;
1820 int send_err;
1821 zbx_uint64_t lastlogsize1;
1822
1823 #define BUF_SIZE (256 * ZBX_KIBIBYTE) /* The longest encodings use 4 bytes for every character. To send */
1824 /* up to 64 k characters to Zabbix server a 256 kB buffer might be */
1825 /* required. */
1826
1827 if (NULL == buf)
1828 buf = (char *)zbx_malloc(buf, (size_t)(BUF_SIZE + 1));
1829
1830 find_cr_lf_szbyte(encoding, &cr, &lf, &szbyte);
1831
1832 for (;;)
1833 {
1834 if (0 >= *p_count || 0 >= *s_count)
1835 {
1836 /* limit on number of processed or sent-to-server lines reached */
1837 ret = SUCCEED;
1838 goto out;
1839 }
1840
1841 if ((zbx_offset_t)-1 == (offset = zbx_lseek(fd, 0, SEEK_CUR)))
1842 {
1843 *big_rec = 0;
1844 *err_msg = zbx_dsprintf(*err_msg, "Cannot set position to 0 in file: %s", zbx_strerror(errno));
1845 ret = FAIL;
1846 goto out;
1847 }
1848
1849 nbytes = (int)read(fd, buf, (size_t)BUF_SIZE);
1850
1851 if (-1 == nbytes)
1852 {
1853 /* error on read */
1854 *big_rec = 0;
1855 *err_msg = zbx_dsprintf(*err_msg, "Cannot read from file: %s", zbx_strerror(errno));
1856 ret = FAIL;
1857 goto out;
1858 }
1859
1860 if (0 == nbytes)
1861 {
1862 /* end of file reached */
1863 ret = SUCCEED;
1864 goto out;
1865 }
1866
1867 p_start = buf; /* beginning of current line */
1868 p = buf; /* current byte */
1869 p_end = buf + (size_t)nbytes; /* no data from this position */
1870
1871 if (NULL == (p_nl = buf_find_newline(p, &p_next, p_end, cr, lf, szbyte)))
1872 {
1873 if (p_end > p)
1874 *incomplete = 1;
1875
1876 if (BUF_SIZE > nbytes)
1877 {
1878 /* Buffer is not full (no more data available) and there is no "newline" in it. */
1879 /* Do not analyze it now, keep the same position in the file and wait the next check, */
1880 /* maybe more data will come. */
1881
1882 *lastlogsize = (zbx_uint64_t)offset;
1883 ret = SUCCEED;
1884 goto out;
1885 }
1886 else
1887 {
1888 /* buffer is full and there is no "newline" in it */
1889
1890 if (0 == *big_rec)
1891 {
1892 /* It is the first, beginning part of a long record. Match it against the */
1893 /* regexp now (our buffer length corresponds to what we can save in the */
1894 /* database). */
1895
1896 char *value;
1897
1898 buf[BUF_SIZE] = '\0';
1899
1900 if ('\0' != *encoding)
1901 value = convert_to_utf8(buf, (size_t)BUF_SIZE, encoding);
1902 else
1903 value = buf;
1904
1905 zabbix_log(LOG_LEVEL_WARNING, "Logfile contains a large record: \"%.64s\""
1906 " (showing only the first 64 characters). Only the first 256 kB"
1907 " will be analyzed, the rest will be ignored while Zabbix agent"
1908 " is running.", value);
1909
1910 lastlogsize1 = (size_t)offset + (size_t)nbytes;
1911 send_err = FAIL;
1912
1913 if (0 == (ZBX_METRIC_FLAG_LOG_COUNT & flags)) /* log[] or logrt[] */
1914 {
1915 if (ZBX_REGEXP_MATCH == (regexp_ret = regexp_sub_ex(regexps, value,
1916 pattern, ZBX_CASE_SENSITIVE, output_template,
1917 &item_value)))
1918 {
1919 if (SUCCEED == (send_err = process_value(server, port,
1920 hostname, key, item_value, ITEM_STATE_NORMAL,
1921 &lastlogsize1, mtime, NULL, NULL, NULL, NULL,
1922 flags | ZBX_METRIC_FLAG_PERSISTENT)))
1923 {
1924 *lastlogsize_sent = lastlogsize1;
1925 if (NULL != mtime_sent)
1926 *mtime_sent = *mtime;
1927
1928 (*s_count)--;
1929 zbx_free(item_value);
1930 }
1931 else
1932 {
1933 zbx_free(item_value);
1934
1935 /* Sending of buffer failed. */
1936 /* Try to resend it in the next check. */
1937 ret = SUCCEED;
1938 goto out;
1939 }
1940 }
1941 }
1942 else /* log.count[] or logrt.count[] */
1943 {
1944 if (ZBX_REGEXP_MATCH == (regexp_ret = regexp_sub_ex(regexps, value,
1945 pattern, ZBX_CASE_SENSITIVE, NULL, NULL)))
1946 {
1947 (*s_count)--;
1948 }
1949 }
1950
1951 if ('\0' != *encoding)
1952 zbx_free(value);
1953
1954 if (FAIL == regexp_ret)
1955 {
1956 *err_msg = zbx_dsprintf(*err_msg, "cannot compile regular expression");
1957 ret = FAIL;
1958 goto out;
1959 }
1960
1961 (*p_count)--;
1962
1963 if (0 != (ZBX_METRIC_FLAG_LOG_COUNT & flags) ||
1964 ZBX_REGEXP_NO_MATCH == regexp_ret || SUCCEED == send_err)
1965 {
1966 *lastlogsize = lastlogsize1;
1967 *big_rec = 1; /* ignore the rest of this record */
1968 }
1969 }
1970 else
1971 {
1972 /* It is a middle part of a long record. Ignore it. We have already */
1973 /* checked the first part against the regexp. */
1974 *lastlogsize = (size_t)offset + (size_t)nbytes;
1975 }
1976 }
1977 }
1978 else
1979 {
1980 /* the "newline" was found, so there is at least one complete record */
1981 /* (or trailing part of a large record) in the buffer */
1982 *incomplete = 0;
1983
1984 for (;;)
1985 {
1986 if (0 >= *p_count || 0 >= *s_count)
1987 {
1988 /* limit on number of processed or sent-to-server lines reached */
1989 ret = SUCCEED;
1990 goto out;
1991 }
1992
1993 if (0 == *big_rec)
1994 {
1995 char *value;
1996
1997 *p_nl = '\0';
1998
1999 if ('\0' != *encoding)
2000 value = convert_to_utf8(p_start, (size_t)(p_nl - p_start), encoding);
2001 else
2002 value = p_start;
2003
2004 lastlogsize1 = (size_t)offset + (size_t)(p_next - buf);
2005 send_err = FAIL;
2006
2007 if (0 == (ZBX_METRIC_FLAG_LOG_COUNT & flags)) /* log[] or logrt[] */
2008 {
2009 if (ZBX_REGEXP_MATCH == (regexp_ret = regexp_sub_ex(regexps, value,
2010 pattern, ZBX_CASE_SENSITIVE, output_template,
2011 &item_value)))
2012 {
2013 if (SUCCEED == (send_err = process_value(server, port,
2014 hostname, key, item_value, ITEM_STATE_NORMAL,
2015 &lastlogsize1, mtime, NULL, NULL, NULL, NULL,
2016 flags | ZBX_METRIC_FLAG_PERSISTENT)))
2017 {
2018 *lastlogsize_sent = lastlogsize1;
2019 if (NULL != mtime_sent)
2020 *mtime_sent = *mtime;
2021
2022 (*s_count)--;
2023 zbx_free(item_value);
2024 }
2025 else
2026 {
2027 zbx_free(item_value);
2028
2029 /* Sending of buffer failed. */
2030 /* Try to resend it in the next check. */
2031 ret = SUCCEED;
2032 goto out;
2033 }
2034 }
2035 }
2036 else /* log.count[] or logrt.count[] */
2037 {
2038 if (ZBX_REGEXP_MATCH == (regexp_ret = regexp_sub_ex(regexps, value,
2039 pattern, ZBX_CASE_SENSITIVE, NULL, NULL)))
2040 {
2041 (*s_count)--;
2042 }
2043 }
2044
2045 if ('\0' != *encoding)
2046 zbx_free(value);
2047
2048 if (FAIL == regexp_ret)
2049 {
2050 *err_msg = zbx_dsprintf(*err_msg, "cannot compile regular expression");
2051 ret = FAIL;
2052 goto out;
2053 }
2054
2055 (*p_count)--;
2056
2057 if (0 != (ZBX_METRIC_FLAG_LOG_COUNT & flags) ||
2058 ZBX_REGEXP_NO_MATCH == regexp_ret || SUCCEED == send_err)
2059 {
2060 *lastlogsize = lastlogsize1;
2061 }
2062 }
2063 else
2064 {
2065 /* skip the trailing part of a long record */
2066 *lastlogsize = (size_t)offset + (size_t)(p_next - buf);
2067 *big_rec = 0;
2068 }
2069
2070 /* move to the next record in the buffer */
2071 p_start = p_next;
2072 p = p_next;
2073
2074 if (NULL == (p_nl = buf_find_newline(p, &p_next, p_end, cr, lf, szbyte)))
2075 {
2076 /* There are no complete records in the buffer. */
2077 /* Try to read more data from this position if available. */
2078 if (p_end > p)
2079 *incomplete = 1;
2080
2081 if ((zbx_offset_t)-1 == zbx_lseek(fd, *lastlogsize, SEEK_SET))
2082 {
2083 *err_msg = zbx_dsprintf(*err_msg, "Cannot set position to " ZBX_FS_UI64
2084 " in file: %s", *lastlogsize, zbx_strerror(errno));
2085 ret = FAIL;
2086 goto out;
2087 }
2088 else
2089 break;
2090 }
2091 else
2092 *incomplete = 0;
2093 }
2094 }
2095 }
2096 out:
2097 return ret;
2098
2099 #undef BUF_SIZE
2100 }
2101
2102 /******************************************************************************
2103 * *
2104 * Function: process_log *
2105 * *
2106 * Purpose: Match new records in logfile with regexp, transmit matching *
2107 * records to Zabbix server *
2108 * *
2109 * Parameters: *
2110 * flags - [IN] bit flags with item type: log, logrt, log.count *
2111 * or logrt.count *
2112 * filename - [IN] logfile name *
2113 * lastlogsize - [IN/OUT] offset from the beginning of the file *
2114 * mtime - [IN/OUT] file modification time for reporting to *
2115 * server *
2116 * lastlogsize_sent - [OUT] lastlogsize value that was last sent *
2117 * mtime_sent - [OUT] mtime value that was last sent *
2118 * skip_old_data - [IN/OUT] start from the beginning of the file or *
2119 * jump to the end *
2120 * big_rec - [IN/OUT] state variable to remember whether a long *
2121 * record is being processed *
2122 * incomplete - [OUT] 0 - the last record ended with a newline, *
2123 * 1 - there was no newline at the end of the last *
2124 * record. *
2125 * err_msg - [IN/OUT] error message why an item became *
2126 * NOTSUPPORTED *
2127 * encoding - [IN] text string describing encoding. *
2128 * See function find_cr_lf_szbyte() for supported *
2129 * encodings. *
2130 * "" (empty string) means a single-byte character set *
2131 * (e.g. ASCII). *
2132 * regexps - [IN] array of regexps *
2133 * pattern - [IN] pattern to match *
2134 * output_template - [IN] output formatting template *
2135 * p_count - [IN/OUT] limit of records to be processed *
2136 * s_count - [IN/OUT] limit of records to be sent to server *
2137 * process_value - [IN] pointer to function process_value() *
2138 * server - [IN] server to send data to *
2139 * port - [IN] port to send data to *
2140 * hostname - [IN] hostname the data comes from *
2141 * key - [IN] item key the data belongs to *
2142 * processed_bytes - [OUT] number of processed bytes in logfile *
2143 * seek_offset - [IN] position to seek in file *
2144 * *
2145 * Return value: returns SUCCEED on successful reading, *
2146 * FAIL on other cases *
2147 * *
2148 * Author: Eugene Grigorjev *
2149 * *
2150 * Comments: *
2151 * This function does not deal with log file rotation. *
2152 * *
2153 ******************************************************************************/
process_log(unsigned char flags,const char * filename,zbx_uint64_t * lastlogsize,int * mtime,zbx_uint64_t * lastlogsize_sent,int * mtime_sent,unsigned char * skip_old_data,int * big_rec,int * incomplete,char ** err_msg,const char * encoding,zbx_vector_ptr_t * regexps,const char * pattern,const char * output_template,int * p_count,int * s_count,zbx_process_value_func_t process_value,const char * server,unsigned short port,const char * hostname,const char * key,zbx_uint64_t * processed_bytes,zbx_uint64_t seek_offset)2154 static int process_log(unsigned char flags, const char *filename, zbx_uint64_t *lastlogsize, int *mtime,
2155 zbx_uint64_t *lastlogsize_sent, int *mtime_sent, unsigned char *skip_old_data, int *big_rec,
2156 int *incomplete, char **err_msg, const char *encoding, zbx_vector_ptr_t *regexps, const char *pattern,
2157 const char *output_template, int *p_count, int *s_count, zbx_process_value_func_t process_value,
2158 const char *server, unsigned short port, const char *hostname, const char *key,
2159 zbx_uint64_t *processed_bytes, zbx_uint64_t seek_offset)
2160 {
2161 const char *__function_name = "process_log";
2162 int f, ret = FAIL;
2163
2164 zabbix_log(LOG_LEVEL_DEBUG, "In %s() filename:'%s' lastlogsize:" ZBX_FS_UI64 " mtime:%d",
2165 __function_name, filename, *lastlogsize, NULL != mtime ? *mtime : 0);
2166
2167 if (-1 == (f = open_file_helper(filename, err_msg)))
2168 goto out;
2169
2170 if ((zbx_offset_t)-1 != zbx_lseek(f, seek_offset, SEEK_SET))
2171 {
2172 *lastlogsize = seek_offset;
2173 *skip_old_data = 0;
2174
2175 if (SUCCEED == (ret = zbx_read2(f, flags, lastlogsize, mtime, big_rec, incomplete, err_msg, encoding,
2176 regexps, pattern, output_template, p_count, s_count, process_value, server, port,
2177 hostname, key, lastlogsize_sent, mtime_sent)))
2178 {
2179 *processed_bytes = *lastlogsize - seek_offset;
2180 }
2181 }
2182 else
2183 {
2184 *err_msg = zbx_dsprintf(*err_msg, "Cannot set position to " ZBX_FS_UI64 " in file \"%s\": %s",
2185 seek_offset, filename, zbx_strerror(errno));
2186 }
2187
2188 if (SUCCEED != close_file_helper(f, filename, err_msg))
2189 ret = FAIL;
2190 out:
2191 zabbix_log(LOG_LEVEL_DEBUG, "End of %s() filename:'%s' lastlogsize:" ZBX_FS_UI64 " mtime:%d ret:%s"
2192 " processed_bytes:" ZBX_FS_UI64, __function_name, filename, *lastlogsize,
2193 NULL != mtime ? *mtime : 0, zbx_result_string(ret),
2194 SUCCEED == ret ? *processed_bytes : (zbx_uint64_t)0);
2195
2196 return ret;
2197 }
2198
adjust_mtime_to_clock(int * mtime)2199 static void adjust_mtime_to_clock(int *mtime)
2200 {
2201 time_t now;
2202
2203 /* Adjust 'mtime' if the system clock has been set back in time. */
2204 /* Setting the clock ahead of time is harmless in our case. */
2205
2206 if (*mtime > (now = time(NULL)))
2207 {
2208 int old_mtime;
2209
2210 old_mtime = *mtime;
2211 *mtime = (int)now;
2212
2213 zabbix_log(LOG_LEVEL_WARNING, "System clock has been set back in time. Setting agent mtime %d "
2214 "seconds back.", (int)(old_mtime - now));
2215 }
2216 }
2217
is_swap_required(const struct st_logfile * old_files,struct st_logfile * new_files,int use_ino,int idx)2218 static int is_swap_required(const struct st_logfile *old_files, struct st_logfile *new_files, int use_ino, int idx)
2219 {
2220 int is_same_place;
2221
2222 /* if the 1st file is not processed at all while the 2nd file was processed (at least partially) */
2223 /* then swap them */
2224 if (0 == new_files[idx].seq && 0 < new_files[idx + 1].seq)
2225 return SUCCEED;
2226
2227 /* if the 2nd file is not a copy of some other file then no need to swap */
2228 if (-1 == new_files[idx + 1].copy_of)
2229 return FAIL;
2230
2231 /* The 2nd file is a copy. But is it a copy of the 1st file ? */
2232
2233 /* On file systems with inodes or file indices if a file is copied and truncated, we assume that */
2234 /* there is a high possibility that the truncated file has the same inode (index) as before. */
2235
2236 if (NULL == old_files) /* cannot consult the old file list */
2237 return FAIL;
2238
2239 is_same_place = compare_file_places(old_files + new_files[idx + 1].copy_of, new_files + idx, use_ino);
2240
2241 if (ZBX_FILE_PLACE_SAME == is_same_place && new_files[idx].seq >= new_files[idx + 1].seq)
2242 return SUCCEED;
2243
2244 /* The last attempt - compare file names. It is less reliable as file rotation can change file names. */
2245 if (ZBX_FILE_PLACE_OTHER == is_same_place || ZBX_FILE_PLACE_UNKNOWN == is_same_place)
2246 {
2247 if (0 == strcmp((old_files + new_files[idx + 1].copy_of)->filename, (new_files + idx)->filename))
2248 return SUCCEED;
2249 }
2250
2251 return FAIL;
2252 }
2253
swap_logfile_array_elements(struct st_logfile * array,int idx1,int idx2)2254 static void swap_logfile_array_elements(struct st_logfile *array, int idx1, int idx2)
2255 {
2256 struct st_logfile *p1 = array + idx1;
2257 struct st_logfile *p2 = array + idx2;
2258 struct st_logfile tmp;
2259
2260 memcpy(&tmp, p1, sizeof(struct st_logfile));
2261 memcpy(p1, p2, sizeof(struct st_logfile));
2262 memcpy(p2, &tmp, sizeof(struct st_logfile));
2263 }
2264
ensure_order_if_mtimes_equal(const struct st_logfile * logfiles_old,struct st_logfile * logfiles,int logfiles_num,int use_ino,int * start_idx)2265 static void ensure_order_if_mtimes_equal(const struct st_logfile *logfiles_old, struct st_logfile *logfiles,
2266 int logfiles_num, int use_ino, int *start_idx)
2267 {
2268 int i;
2269
2270 /* There is a special case when within 1 second of time: */
2271 /* 1. a log file ORG.log is copied to other file COPY.log, */
2272 /* 2. the original file ORG.log is truncated, */
2273 /* 3. new records are appended to the original file ORG.log, */
2274 /* 4. both files ORG.log and COPY.log have the same 'mtime'. */
2275 /* Now in the list 'logfiles' the file ORG.log precedes the COPY.log because if 'mtime' is the same */
2276 /* then add_logfile() function sorts files by name in descending order. This would lead to an error - */
2277 /* processing ORG.log before COPY.log. We need to correct the order by swapping ORG.log and COPY.log */
2278 /* elements in the 'logfiles' list. */
2279
2280 for (i = 0; i < logfiles_num - 1; i++)
2281 {
2282 if (logfiles[i].mtime == logfiles[i + 1].mtime &&
2283 SUCCEED == is_swap_required(logfiles_old, logfiles, use_ino, i))
2284 {
2285 zabbix_log(LOG_LEVEL_DEBUG, "ensure_order_if_mtimes_equal() swapping files '%s' and '%s'",
2286 logfiles[i].filename, logfiles[i + 1].filename);
2287
2288 swap_logfile_array_elements(logfiles, i, i + 1);
2289
2290 if (*start_idx == i + 1)
2291 *start_idx = i;
2292 }
2293 }
2294 }
2295
files_start_with_same_md5(const struct st_logfile * log1,const struct st_logfile * log2)2296 static int files_start_with_same_md5(const struct st_logfile *log1, const struct st_logfile *log2)
2297 {
2298 if (-1 == log1->md5size || -1 == log2->md5size)
2299 return FAIL;
2300
2301 if (log1->md5size == log2->md5size) /* this works for empty files, too */
2302 {
2303 if (0 == memcmp(log1->md5buf, log2->md5buf, sizeof(log1->md5buf)))
2304 return SUCCEED;
2305 else
2306 return FAIL;
2307 }
2308
2309 /* we have MD5 sums, but they are calculated from blocks of different sizes */
2310
2311 if (0 < log1->md5size && 0 < log2->md5size)
2312 {
2313 const struct st_logfile *file_smaller, *file_larger;
2314 int fd, ret = FAIL;
2315 char *err_msg = NULL; /* required, but not used */
2316 md5_byte_t md5tmp[MD5_DIGEST_SIZE];
2317
2318 if (log1->md5size < log2->md5size)
2319 {
2320 file_smaller = log1;
2321 file_larger = log2;
2322 }
2323 else
2324 {
2325 file_smaller = log2;
2326 file_larger = log1;
2327 }
2328
2329 if (-1 == (fd = zbx_open(file_larger->filename, O_RDONLY)))
2330 return FAIL;
2331
2332 if (SUCCEED == file_start_md5(fd, file_smaller->md5size, md5tmp, "", &err_msg))
2333 {
2334 if (0 == memcmp(file_smaller->md5buf, md5tmp, sizeof(md5tmp)))
2335 ret = SUCCEED;
2336 }
2337
2338 zbx_free(err_msg);
2339 close(fd);
2340
2341 return ret;
2342 }
2343
2344 return FAIL;
2345 }
2346
handle_multiple_copies(struct st_logfile * logfiles,int logfiles_num,int i)2347 static void handle_multiple_copies(struct st_logfile *logfiles, int logfiles_num, int i)
2348 {
2349 /* There is a special case when the latest log file is copied to other file but not yet truncated. */
2350 /* So there are two files and we don't know which one will stay as the copy and which one will be */
2351 /* truncated. Similar cases: the latest log file is copied but never truncated or is copied multiple */
2352 /* times. */
2353
2354 int j;
2355
2356 for (j = i + 1; j < logfiles_num; j++)
2357 {
2358 if (SUCCEED == files_start_with_same_md5(logfiles + i, logfiles + j))
2359 {
2360 /* logfiles[i] and logfiles[j] are original and copy (or vice versa). */
2361 /* If logfiles[i] has been at least partially processed then transfer its */
2362 /* processed size to logfiles[j], too. */
2363
2364 if (logfiles[j].processed_size < logfiles[i].processed_size)
2365 {
2366 logfiles[j].processed_size = MIN(logfiles[i].processed_size, logfiles[j].size);
2367
2368 zabbix_log(LOG_LEVEL_DEBUG, "handle_multiple_copies() file '%s' processed_size:"
2369 ZBX_FS_UI64 " transferred to" " file '%s' processed_size:" ZBX_FS_UI64,
2370 logfiles[i].filename, logfiles[i].processed_size,
2371 logfiles[j].filename, logfiles[j].processed_size);
2372 }
2373 else if (logfiles[i].processed_size < logfiles[j].processed_size)
2374 {
2375 logfiles[i].processed_size = MIN(logfiles[j].processed_size, logfiles[i].size);
2376
2377 zabbix_log(LOG_LEVEL_DEBUG, "handle_multiple_copies() file '%s' processed_size:"
2378 ZBX_FS_UI64 " transferred to" " file '%s' processed_size:" ZBX_FS_UI64,
2379 logfiles[j].filename, logfiles[j].processed_size,
2380 logfiles[i].filename, logfiles[i].processed_size);
2381 }
2382 }
2383 }
2384 }
2385
delay_update_if_copies(struct st_logfile * logfiles,int logfiles_num,int * mtime,zbx_uint64_t * lastlogsize)2386 static void delay_update_if_copies(struct st_logfile *logfiles, int logfiles_num, int *mtime,
2387 zbx_uint64_t *lastlogsize)
2388 {
2389 int i, idx_to_keep = logfiles_num - 1;
2390
2391 /* If there are copies in 'logfiles' list then find the element with the smallest index which must be */
2392 /* preserved in the list to keep information about copies. */
2393
2394 for (i = 0; i < logfiles_num - 1; i++)
2395 {
2396 int j, largest_for_i = -1;
2397
2398 if (0 == logfiles[i].size)
2399 continue;
2400
2401 for (j = i + 1; j < logfiles_num; j++)
2402 {
2403 if (0 == logfiles[j].size)
2404 continue;
2405
2406 if (SUCCEED == files_start_with_same_md5(logfiles + i, logfiles + j))
2407 {
2408 int more_processed;
2409
2410 /* logfiles[i] and logfiles[j] are original and copy (or vice versa) */
2411
2412 more_processed = (logfiles[i].processed_size > logfiles[j].processed_size) ? i : j;
2413
2414 if (largest_for_i < more_processed)
2415 largest_for_i = more_processed;
2416 }
2417 }
2418
2419 if (-1 != largest_for_i && idx_to_keep > largest_for_i)
2420 idx_to_keep = largest_for_i;
2421 }
2422
2423 if (logfiles[idx_to_keep].mtime < *mtime)
2424 {
2425 zabbix_log(LOG_LEVEL_DEBUG, "delay_update_if_copies(): setting mtime back from %d to %d,"
2426 " lastlogsize from " ZBX_FS_UI64 " to " ZBX_FS_UI64, *mtime,
2427 logfiles[idx_to_keep].mtime, *lastlogsize, logfiles[idx_to_keep].processed_size);
2428
2429 /* ensure that next time element 'idx_to_keep' is included in file list with the right 'lastlogsize' */
2430 *mtime = logfiles[idx_to_keep].mtime;
2431 *lastlogsize = logfiles[idx_to_keep].processed_size;
2432
2433 if (logfiles_num - 1 > idx_to_keep)
2434 {
2435 /* ensure that next time processing starts from element'idx_to_keep' */
2436 for (i = idx_to_keep + 1; i < logfiles_num; i++)
2437 logfiles[i].seq = 0;
2438 }
2439 }
2440 }
2441
max_processed_size_in_copies(const struct st_logfile * logfiles,int logfiles_num,int i)2442 static zbx_uint64_t max_processed_size_in_copies(const struct st_logfile *logfiles, int logfiles_num, int i)
2443 {
2444 zbx_uint64_t max_processed = 0;
2445 int j;
2446
2447 for (j = 0; j < logfiles_num; j++)
2448 {
2449 if (i != j && SUCCEED == files_start_with_same_md5(logfiles + i, logfiles + j))
2450 {
2451 /* logfiles[i] and logfiles[j] are original and copy (or vice versa). */
2452 if (max_processed < logfiles[j].processed_size)
2453 max_processed = logfiles[j].processed_size;
2454 }
2455 }
2456
2457 return max_processed;
2458 }
2459
2460 /******************************************************************************
2461 * *
2462 * Function: calculate_delay *
2463 * *
2464 * Purpose: calculate delay based on number of processed and remaining bytes, *
2465 * and processing time *
2466 * *
2467 * Parameters: *
2468 * processed_bytes - [IN] number of processed bytes in logfile *
2469 * remaining_bytes - [IN] number of remaining bytes in all logfiles *
2470 * t_proc - [IN] processing time, s *
2471 * *
2472 * Return value: *
2473 * delay in seconds or 0 (if cannot be calculated) *
2474 * *
2475 ******************************************************************************/
calculate_delay(zbx_uint64_t processed_bytes,zbx_uint64_t remaining_bytes,double t_proc)2476 static double calculate_delay(zbx_uint64_t processed_bytes, zbx_uint64_t remaining_bytes, double t_proc)
2477 {
2478 double delay = 0.0;
2479
2480 /* Processing time could be negative or 0 if the system clock has been set back in time. */
2481 /* In this case return 0, then a jump over log lines will not take place. */
2482
2483 if (0 != processed_bytes && 0.0 < t_proc)
2484 {
2485 delay = (double)remaining_bytes * t_proc / (double)processed_bytes;
2486
2487 zabbix_log(LOG_LEVEL_DEBUG, "calculate_delay(): processed bytes:" ZBX_FS_UI64
2488 " remaining bytes:" ZBX_FS_UI64 " t_proc:%e s speed:%e B/s remaining full checks:"
2489 ZBX_FS_UI64 " delay:%e s", processed_bytes, remaining_bytes, t_proc,
2490 (double)processed_bytes / t_proc, remaining_bytes / processed_bytes, delay);
2491 }
2492
2493 return delay;
2494 }
2495
jump_remaining_bytes_logrt(struct st_logfile * logfiles,int logfiles_num,const char * key,int start_from,zbx_uint64_t bytes_to_jump,int * seq,zbx_uint64_t * lastlogsize,int * mtime,int * jumped_to)2496 static void jump_remaining_bytes_logrt(struct st_logfile *logfiles, int logfiles_num, const char *key,
2497 int start_from, zbx_uint64_t bytes_to_jump, int *seq, zbx_uint64_t *lastlogsize, int *mtime,
2498 int *jumped_to)
2499 {
2500 int first_pass = 1;
2501 int i = start_from; /* enter the loop with index of the last file processed, */
2502 /* later continue the loop from the start */
2503
2504 while (i < logfiles_num)
2505 {
2506 if (logfiles[i].size != logfiles[i].processed_size)
2507 {
2508 zbx_uint64_t bytes_jumped, new_processed_size;
2509
2510 bytes_jumped = MIN(bytes_to_jump, logfiles[i].size - logfiles[i].processed_size);
2511 new_processed_size = logfiles[i].processed_size + bytes_jumped;
2512
2513 zabbix_log(LOG_LEVEL_WARNING, "item:\"%s\" logfile:\"%s\" skipping " ZBX_FS_UI64 " bytes (from"
2514 " byte " ZBX_FS_UI64 " to byte " ZBX_FS_UI64 ") to meet maxdelay", key,
2515 logfiles[i].filename, bytes_jumped, logfiles[i].processed_size,
2516 new_processed_size);
2517
2518 logfiles[i].processed_size = new_processed_size;
2519 *lastlogsize = new_processed_size;
2520 *mtime = logfiles[i].mtime;
2521
2522 logfiles[i].seq = (*seq)++;
2523
2524 bytes_to_jump -= bytes_jumped;
2525
2526 *jumped_to = i;
2527 }
2528
2529 if (0 == bytes_to_jump)
2530 break;
2531
2532 if (0 != first_pass)
2533 {
2534 /* 'start_from' element was processed, now proceed from the beginning of file list */
2535 first_pass = 0;
2536 i = 0;
2537 continue;
2538 }
2539
2540 i++;
2541 }
2542 }
2543
2544 /******************************************************************************
2545 * *
2546 * Function: adjust_position_after_jump *
2547 * *
2548 * Purpose: *
2549 * After jumping over a number of bytes we "land" most likely somewhere in *
2550 * the middle of log file line. This function tries to adjust position to *
2551 * the beginning of the log line. *
2552 * *
2553 * Parameters: *
2554 * logfile - [IN/OUT] log file data *
2555 * lastlogsize - [IN/OUT] offset from the beginning of the file *
2556 * min_size - [IN] minimum offset to search from *
2557 * encoding - [IN] text string describing encoding *
2558 * err_msg - [IN/OUT] error message *
2559 * *
2560 * Return value: SUCCEED or FAIL (with error message allocated in 'err_msg') *
2561 * *
2562 ******************************************************************************/
adjust_position_after_jump(struct st_logfile * logfile,zbx_uint64_t * lastlogsize,zbx_uint64_t min_size,const char * encoding,char ** err_msg)2563 static int adjust_position_after_jump(struct st_logfile *logfile, zbx_uint64_t *lastlogsize, zbx_uint64_t min_size,
2564 const char *encoding, char **err_msg)
2565 {
2566 int fd, ret = FAIL;
2567 size_t szbyte;
2568 ssize_t nbytes;
2569 const char *cr, *lf, *p_end;
2570 char *p, *p_nl, *p_next;
2571 zbx_uint64_t lastlogsize_tmp, lastlogsize_aligned, lastlogsize_org, seek_pos, remainder;
2572 char buf[32 * ZBX_KIBIBYTE]; /* buffer must be of size multiple of 4 as some character */
2573 /* encodings use 4 bytes for every character */
2574
2575 if (-1 == (fd = open_file_helper(logfile->filename, err_msg)))
2576 return FAIL;
2577
2578 find_cr_lf_szbyte(encoding, &cr, &lf, &szbyte);
2579
2580 /* For multibyte character encodings 'lastlogsize' needs to be aligned to character border. */
2581 /* Align it towards smaller offset. We assume that log file contains no corrupted data stream. */
2582
2583 lastlogsize_org = *lastlogsize;
2584 lastlogsize_aligned = *lastlogsize;
2585
2586 if (1 < szbyte && 0 != (remainder = lastlogsize_aligned % szbyte)) /* remainder can be 0, 1, 2 or 3 */
2587 {
2588 if (min_size <= lastlogsize_aligned - remainder)
2589 lastlogsize_aligned -= remainder;
2590 else
2591 lastlogsize_aligned = min_size;
2592 }
2593
2594 if ((zbx_offset_t)-1 == zbx_lseek(fd, lastlogsize_aligned, SEEK_SET))
2595 {
2596 *err_msg = zbx_dsprintf(*err_msg, "Cannot set position to " ZBX_FS_UI64 " in file \"%s\": %s",
2597 lastlogsize_aligned, logfile->filename, zbx_strerror(errno));
2598 goto out;
2599 }
2600
2601 /* search forward for the first newline until EOF */
2602
2603 lastlogsize_tmp = lastlogsize_aligned;
2604
2605 for (;;)
2606 {
2607 if (-1 == (nbytes = read(fd, buf, sizeof(buf))))
2608 {
2609 *err_msg = zbx_dsprintf(*err_msg, "Cannot read from file \"%s\": %s", logfile->filename,
2610 zbx_strerror(errno));
2611 goto out;
2612 }
2613
2614 if (0 == nbytes) /* end of file reached */
2615 break;
2616
2617 p = buf;
2618 p_end = buf + nbytes; /* no data from this position */
2619
2620 if (NULL != (p_nl = buf_find_newline(p, &p_next, p_end, cr, lf, szbyte)))
2621 {
2622 /* found the beginning of line */
2623
2624 *lastlogsize = lastlogsize_tmp + (zbx_uint64_t)(p_next - buf);
2625 logfile->processed_size = *lastlogsize;
2626 ret = SUCCEED;
2627 goto out;
2628 }
2629
2630 lastlogsize_tmp += (zbx_uint64_t)nbytes;
2631 }
2632
2633 /* Searching forward did not find a newline. Now search backwards until 'min_size'. */
2634
2635 seek_pos = lastlogsize_aligned;
2636
2637 for (;;)
2638 {
2639 if (sizeof(buf) <= seek_pos)
2640 seek_pos -= MIN(sizeof(buf), seek_pos - min_size);
2641 else
2642 seek_pos = min_size;
2643
2644 if ((zbx_offset_t)-1 == zbx_lseek(fd, seek_pos, SEEK_SET))
2645 {
2646 *err_msg = zbx_dsprintf(*err_msg, "Cannot set position to " ZBX_FS_UI64 " in file \"%s\": %s",
2647 lastlogsize_aligned, logfile->filename, zbx_strerror(errno));
2648 goto out;
2649 }
2650
2651 if (-1 == (nbytes = read(fd, buf, sizeof(buf))))
2652 {
2653 *err_msg = zbx_dsprintf(*err_msg, "Cannot read from file \"%s\": %s", logfile->filename,
2654 zbx_strerror(errno));
2655 goto out;
2656 }
2657
2658 if (0 == nbytes) /* end of file reached */
2659 {
2660 *err_msg = zbx_dsprintf(*err_msg, "Unexpected end of file while reading file \"%s\"",
2661 logfile->filename);
2662 goto out;
2663 }
2664
2665 p = buf;
2666 p_end = buf + nbytes; /* no data from this position */
2667
2668 if (NULL != (p_nl = buf_find_newline(p, &p_next, p_end, cr, lf, szbyte)))
2669 {
2670 /* Found the beginning of line. It may not be the one closest to place we jumped to */
2671 /* (it could be about sizeof(buf) bytes away) but it is ok for our purposes. */
2672
2673 *lastlogsize = seek_pos + (zbx_uint64_t)(p_next - buf);
2674 logfile->processed_size = *lastlogsize;
2675 ret = SUCCEED;
2676 goto out;
2677 }
2678
2679 if (min_size == seek_pos)
2680 {
2681 /* We have searched backwards until 'min_size' and did not find a 'newline'. */
2682 /* Effectively it turned out to be a jump with zero-length. */
2683
2684 *lastlogsize = min_size;
2685 logfile->processed_size = *lastlogsize;
2686 ret = SUCCEED;
2687 goto out;
2688 }
2689 }
2690 out:
2691 if (SUCCEED != close_file_helper(fd, logfile->filename, err_msg))
2692 ret = FAIL;
2693
2694 if (SUCCEED == ZBX_CHECK_LOG_LEVEL(LOG_LEVEL_DEBUG))
2695 {
2696 const char *dbg_msg;
2697
2698 if (SUCCEED == ret)
2699 dbg_msg = "NEWLINE FOUND";
2700 else
2701 dbg_msg = "NEWLINE NOT FOUND";
2702
2703 zabbix_log(LOG_LEVEL_DEBUG, "adjust_position_after_jump(): szbyte:" ZBX_FS_SIZE_T " lastlogsize_org:"
2704 ZBX_FS_UI64 " lastlogsize_aligned:" ZBX_FS_UI64 " (change " ZBX_FS_I64 " bytes)"
2705 " lastlogsize_after:" ZBX_FS_UI64 " (change " ZBX_FS_I64 " bytes) %s %s",
2706 (zbx_fs_size_t)szbyte, lastlogsize_org, lastlogsize_aligned,
2707 (zbx_int64_t)lastlogsize_aligned - (zbx_int64_t)lastlogsize_org, *lastlogsize,
2708 (zbx_int64_t)*lastlogsize - (zbx_int64_t)lastlogsize_aligned,
2709 dbg_msg, ZBX_NULL2EMPTY_STR(*err_msg));
2710 }
2711
2712 return ret;
2713 }
2714
2715 /******************************************************************************
2716 * *
2717 * Function: jump_ahead *
2718 * *
2719 * Purpose: move forward to a new position in the log file list *
2720 * *
2721 * Parameters: *
2722 * key - [IN] item key for logging *
2723 * logfiles - [IN/OUT] list of log files *
2724 * logfiles_num - [IN] number of elements in 'logfiles' *
2725 * jump_from_to - [IN/OUT] on input - number of element where to start *
2726 * jump, on output - number of element we jumped into *
2727 * seq - [IN/OUT] sequence number of last processed file *
2728 * lastlogsize - [IN/OUT] offset from the beginning of the file *
2729 * mtime - [IN/OUT] last modification time of the file *
2730 * encoding - [IN] text string describing encoding *
2731 * bytes_to_jump - [IN] number of bytes to jump ahead *
2732 * err_msg - [IN/OUT] error message *
2733 * *
2734 * Return value: SUCCEED or FAIL (with error message allocated in 'err_msg') *
2735 * *
2736 ******************************************************************************/
jump_ahead(const char * key,struct st_logfile * logfiles,int logfiles_num,int * jump_from_to,int * seq,zbx_uint64_t * lastlogsize,int * mtime,const char * encoding,zbx_uint64_t bytes_to_jump,char ** err_msg)2737 static int jump_ahead(const char *key, struct st_logfile *logfiles, int logfiles_num,
2738 int *jump_from_to, int *seq, zbx_uint64_t *lastlogsize, int *mtime, const char *encoding,
2739 zbx_uint64_t bytes_to_jump, char **err_msg)
2740 {
2741 zbx_uint64_t lastlogsize_org, min_size;
2742 int jumped_to = -1; /* number of file in 'logfiles' list we jumped to */
2743
2744 lastlogsize_org = *lastlogsize;
2745
2746 jump_remaining_bytes_logrt(logfiles, logfiles_num, key, *jump_from_to, bytes_to_jump, seq, lastlogsize,
2747 mtime, &jumped_to);
2748
2749 if (-1 == jumped_to) /* no actual jump took place, no need to modify 'jump_from_to' */
2750 return SUCCEED;
2751
2752 /* We have jumped into file, most likely somewhere in the middle of log line. Now find the beginning */
2753 /* of a line to avoid pattern-matching a line from a random position. */
2754
2755 if (*jump_from_to == jumped_to)
2756 {
2757 /* jumped within the same file - do not search the beginning of a line before "pre-jump" position */
2758 min_size = lastlogsize_org;
2759 }
2760 else
2761 {
2762 *jump_from_to = jumped_to;
2763
2764 /* jumped into different file - may search the beginning of a line from beginning of file */
2765 min_size = 0;
2766 }
2767
2768 return adjust_position_after_jump(&logfiles[jumped_to], lastlogsize, min_size, encoding, err_msg);
2769 }
2770
calculate_remaining_bytes(struct st_logfile * logfiles,int logfiles_num)2771 static zbx_uint64_t calculate_remaining_bytes(struct st_logfile *logfiles, int logfiles_num)
2772 {
2773 zbx_uint64_t remaining_bytes = 0;
2774 int i;
2775
2776 for (i = 0; i < logfiles_num; i++)
2777 remaining_bytes += logfiles[i].size - logfiles[i].processed_size;
2778
2779 return remaining_bytes;
2780 }
2781
transfer_for_rotate(const struct st_logfile * logfiles_old,int idx,struct st_logfile * logfiles,int logfiles_num,const char * old2new,int * seq)2782 static void transfer_for_rotate(const struct st_logfile *logfiles_old, int idx, struct st_logfile *logfiles,
2783 int logfiles_num, const char *old2new, int *seq)
2784 {
2785 int j;
2786
2787 if (0 < logfiles_old[idx].processed_size && 0 == logfiles_old[idx].incomplete &&
2788 -1 != (j = find_old2new(old2new, logfiles_num, idx)))
2789 {
2790 if (logfiles_old[idx].size == logfiles_old[idx].processed_size &&
2791 logfiles_old[idx].size == logfiles[j].size)
2792 {
2793 /* the file was fully processed during the previous check and must be ignored during this */
2794 /* check */
2795 logfiles[j].processed_size = logfiles[j].size;
2796 logfiles[j].seq = (*seq)++;
2797 }
2798 else
2799 {
2800 /* the file was not fully processed during the previous check or has grown */
2801 if (logfiles[j].processed_size < logfiles_old[idx].processed_size)
2802 logfiles[j].processed_size = MIN(logfiles[j].size, logfiles_old[idx].processed_size);
2803 }
2804 }
2805 else if (1 == logfiles_old[idx].incomplete && -1 != (j = find_old2new(old2new, logfiles_num, idx)))
2806 {
2807 if (logfiles_old[idx].size < logfiles[j].size)
2808 {
2809 /* The file was not fully processed because of incomplete last record but it has grown. */
2810 /* Try to process it further. */
2811 logfiles[j].incomplete = 0;
2812 }
2813 else
2814 logfiles[j].incomplete = 1;
2815
2816 if (logfiles[j].processed_size < logfiles_old[idx].processed_size)
2817 logfiles[j].processed_size = MIN(logfiles[j].size, logfiles_old[idx].processed_size);
2818 }
2819 }
2820
transfer_for_copytruncate(const struct st_logfile * logfiles_old,int idx,struct st_logfile * logfiles,int logfiles_num,const char * old2new,int * seq)2821 static void transfer_for_copytruncate(const struct st_logfile *logfiles_old, int idx, struct st_logfile *logfiles,
2822 int logfiles_num, const char *old2new, int *seq)
2823 {
2824 const char *p = old2new + idx * logfiles_num; /* start of idx-th row in 'old2new' array */
2825 int j;
2826
2827 if (0 < logfiles_old[idx].processed_size && 0 == logfiles_old[idx].incomplete)
2828 {
2829 for (j = 0; j < logfiles_num; j++, p++) /* loop over columns (new files) on idx-th row */
2830 {
2831 if ('1' == *p || '2' == *p)
2832 {
2833 if (logfiles_old[idx].size == logfiles_old[idx].processed_size &&
2834 logfiles_old[idx].size == logfiles[j].size)
2835 {
2836 /* the file was fully processed during the previous check and must be ignored */
2837 /* during this check */
2838 logfiles[j].processed_size = logfiles[j].size;
2839 logfiles[j].seq = (*seq)++;
2840 }
2841 else
2842 {
2843 /* the file was not fully processed during the previous check or has grown */
2844 if (logfiles[j].processed_size < logfiles_old[idx].processed_size)
2845 {
2846 logfiles[j].processed_size = MIN(logfiles[j].size,
2847 logfiles_old[idx].processed_size);
2848 }
2849 }
2850 }
2851 }
2852 }
2853 else if (1 == logfiles_old[idx].incomplete)
2854 {
2855 for (j = 0; j < logfiles_num; j++, p++) /* loop over columns (new files) on idx-th row */
2856 {
2857 if ('1' == *p || '2' == *p)
2858 {
2859 if (logfiles_old[idx].size < logfiles[j].size)
2860 {
2861 /* The file was not fully processed because of incomplete last record but it */
2862 /* has grown. Try to process it further. */
2863 logfiles[j].incomplete = 0;
2864 }
2865 else
2866 logfiles[j].incomplete = 1;
2867
2868 if (logfiles[j].processed_size < logfiles_old[idx].processed_size)
2869 {
2870 logfiles[j].processed_size = MIN(logfiles[j].size,
2871 logfiles_old[idx].processed_size);
2872 }
2873 }
2874 }
2875 }
2876 }
2877
update_new_list_from_old(zbx_log_rotation_options_t rotation_type,struct st_logfile * logfiles_old,int logfiles_num_old,struct st_logfile * logfiles,int logfiles_num,int use_ino,int * seq,int * start_idx,zbx_uint64_t * lastlogsize,char ** err_msg)2878 static int update_new_list_from_old(zbx_log_rotation_options_t rotation_type, struct st_logfile *logfiles_old,
2879 int logfiles_num_old, struct st_logfile *logfiles, int logfiles_num, int use_ino, int *seq,
2880 int *start_idx, zbx_uint64_t *lastlogsize, char **err_msg)
2881 {
2882 char *old2new;
2883 int i, max_old_seq = 0, old_last;
2884
2885 if (NULL == (old2new = create_old2new_and_copy_of(rotation_type, logfiles_old, logfiles_num_old,
2886 logfiles, logfiles_num, use_ino, err_msg)))
2887 {
2888 return FAIL;
2889 }
2890
2891 /* transfer data about fully and partially processed files from the old file list to the new list */
2892 for (i = 0; i < logfiles_num_old; i++)
2893 {
2894 if (ZBX_LOG_ROTATION_LOGCPT == rotation_type)
2895 transfer_for_copytruncate(logfiles_old, i, logfiles, logfiles_num, old2new, seq);
2896 else
2897 transfer_for_rotate(logfiles_old, i, logfiles, logfiles_num, old2new, seq);
2898
2899 /* find the last file processed (fully or partially) in the previous check */
2900 if (max_old_seq < logfiles_old[i].seq)
2901 {
2902 max_old_seq = logfiles_old[i].seq;
2903 old_last = i;
2904 }
2905 }
2906
2907 /* find the first file to continue from in the new file list */
2908 if (0 < max_old_seq && -1 == (*start_idx = find_old2new(old2new, logfiles_num, old_last)))
2909 {
2910 /* Cannot find the successor of the last processed file from the previous check. */
2911 /* Adjust 'lastlogsize' for this case. */
2912 *start_idx = 0;
2913 *lastlogsize = logfiles[*start_idx].processed_size;
2914 }
2915
2916 zbx_free(old2new);
2917
2918 return SUCCEED;
2919 }
2920
2921 /******************************************************************************
2922 * *
2923 * Function: process_logrt *
2924 * *
2925 * Purpose: Find new records in logfiles *
2926 * *
2927 * Parameters: *
2928 * flags - [IN] bit flags with item type: log, logrt, *
2929 * log.count or logrt.count *
2930 * filename - [IN] logfile name (regular expression with a path) *
2931 * lastlogsize - [IN/OUT] offset from the beginning of the file *
2932 * mtime - [IN/OUT] last modification time of the file *
2933 * lastlogsize_sent - [OUT] lastlogsize value that was last sent *
2934 * mtime_sent - [OUT] mtime value that was last sent *
2935 * skip_old_data - [IN/OUT] start from the beginning of the file or *
2936 * jump to the end *
2937 * big_rec - [IN/OUT] state variable to remember whether a long *
2938 * record is being processed *
2939 * use_ino - [IN/OUT] how to use inode numbers *
2940 * err_msg - [IN/OUT] error message why an item became *
2941 * NOTSUPPORTED *
2942 * logfiles_old - [IN/OUT] array of logfiles from the last check *
2943 * logfiles_num_old - [IN] number of elements in "logfiles_old" *
2944 * logfiles_new - [OUT] new array of logfiles *
2945 * logfiles_num_new - [OUT] number of elements in "logfiles_new" *
2946 * encoding - [IN] text string describing encoding. *
2947 * See function find_cr_lf_szbyte() for supported *
2948 * encodings. *
2949 * "" (empty string) means a single-byte character set *
2950 * (e.g. ASCII). *
2951 * regexps - [IN] array of regexps *
2952 * pattern - [IN] pattern to match *
2953 * output_template - [IN] output formatting template *
2954 * p_count - [IN/OUT] limit of records to be processed *
2955 * s_count - [IN/OUT] limit of records to be sent to server *
2956 * process_value - [IN] pointer to function process_value() *
2957 * server - [IN] server to send data to *
2958 * port - [IN] port to send data to *
2959 * hostname - [IN] hostname the data comes from *
2960 * key - [IN] item key the data belongs to *
2961 * jumped - [OUT] flag to indicate that a jump took place *
2962 * max_delay - [IN] maximum allowed delay, s *
2963 * start_time - [IN/OUT] start time of check *
2964 * processed_bytes - [IN/OUT] number of bytes processed *
2965 * rotation_type - [IN] simple rotation or copy/truncate rotation *
2966 * *
2967 * Return value: returns SUCCEED on successful reading, *
2968 * FAIL on other cases *
2969 * *
2970 * Author: Dmitry Borovikov (logrotation) *
2971 * *
2972 ******************************************************************************/
process_logrt(unsigned char flags,const char * filename,zbx_uint64_t * lastlogsize,int * mtime,zbx_uint64_t * lastlogsize_sent,int * mtime_sent,unsigned char * skip_old_data,int * big_rec,int * use_ino,char ** err_msg,struct st_logfile ** logfiles_old,const int * logfiles_num_old,struct st_logfile ** logfiles_new,int * logfiles_num_new,const char * encoding,zbx_vector_ptr_t * regexps,const char * pattern,const char * output_template,int * p_count,int * s_count,zbx_process_value_func_t process_value,const char * server,unsigned short port,const char * hostname,const char * key,int * jumped,float max_delay,double * start_time,zbx_uint64_t * processed_bytes,zbx_log_rotation_options_t rotation_type)2973 int process_logrt(unsigned char flags, const char *filename, zbx_uint64_t *lastlogsize, int *mtime,
2974 zbx_uint64_t *lastlogsize_sent, int *mtime_sent, unsigned char *skip_old_data, int *big_rec,
2975 int *use_ino, char **err_msg, struct st_logfile **logfiles_old, const int *logfiles_num_old,
2976 struct st_logfile **logfiles_new, int *logfiles_num_new, const char *encoding,
2977 zbx_vector_ptr_t *regexps, const char *pattern, const char *output_template, int *p_count, int *s_count,
2978 zbx_process_value_func_t process_value, const char *server, unsigned short port, const char *hostname,
2979 const char *key, int *jumped, float max_delay, double *start_time, zbx_uint64_t *processed_bytes,
2980 zbx_log_rotation_options_t rotation_type)
2981 {
2982 const char *__function_name = "process_logrt";
2983 int i, start_idx, ret = FAIL, logfiles_num = 0, logfiles_alloc = 0, seq = 1,
2984 from_first_file = 1, last_processed, limit_reached = 0, res;
2985 struct st_logfile *logfiles = NULL;
2986 zbx_uint64_t processed_bytes_sum = 0;
2987
2988 zabbix_log(LOG_LEVEL_DEBUG, "In %s() flags:0x%02x filename:'%s' lastlogsize:" ZBX_FS_UI64 " mtime:%d",
2989 __function_name, (unsigned int)flags, filename, *lastlogsize, *mtime);
2990
2991 adjust_mtime_to_clock(mtime);
2992
2993 if (SUCCEED != (res = make_logfile_list(flags, filename, *mtime, &logfiles, &logfiles_alloc, &logfiles_num,
2994 use_ino, err_msg)))
2995 {
2996 if (ZBX_NO_FILE_ERROR == res)
2997 {
2998 if (1 == *skip_old_data)
2999 {
3000 *skip_old_data = 0;
3001
3002 zabbix_log(LOG_LEVEL_DEBUG, "%s(): no files, setting skip_old_data to 0",
3003 __function_name);
3004 }
3005
3006 if (0 != (ZBX_METRIC_FLAG_LOG_LOGRT & flags) && 0 == *logfiles_num_old)
3007 {
3008 /* Both the old and the new log file lists are empty. That means the agent has not */
3009 /* seen any log files for this logrt[] item since started. If log files appear later */
3010 /* then analyze them from start, do not apply the 'lastlogsize' received from server */
3011 /* anymore. */
3012
3013 *lastlogsize = 0;
3014 }
3015 }
3016
3017 /* file was not accessible for a log[] or log.count[] item or an error occurred */
3018 if (0 != (ZBX_METRIC_FLAG_LOG_LOG & flags) || (0 != (ZBX_METRIC_FLAG_LOG_LOGRT & flags) && FAIL == res))
3019 goto out;
3020 }
3021
3022 if (0 == logfiles_num)
3023 {
3024 /* there were no files for a logrt[] or logrt.count[] item to analyze */
3025 ret = SUCCEED;
3026 goto out;
3027 }
3028
3029 if (1 == *skip_old_data)
3030 {
3031 start_idx = logfiles_num - 1;
3032
3033 /* mark files to be skipped as processed (except the last one) */
3034 for (i = 0; i < start_idx; i++)
3035 {
3036 logfiles[i].processed_size = logfiles[i].size;
3037 logfiles[i].seq = seq++;
3038 }
3039 }
3040 else
3041 start_idx = 0;
3042
3043 if (0 < *logfiles_num_old && 0 < logfiles_num && SUCCEED != update_new_list_from_old(rotation_type,
3044 *logfiles_old, *logfiles_num_old, logfiles, logfiles_num, *use_ino, &seq, &start_idx,
3045 lastlogsize, err_msg))
3046 {
3047 destroy_logfile_list(&logfiles, &logfiles_alloc, &logfiles_num);
3048 goto out;
3049 }
3050
3051 if (ZBX_LOG_ROTATION_LOGCPT == rotation_type && 1 < logfiles_num)
3052 ensure_order_if_mtimes_equal(*logfiles_old, logfiles, logfiles_num, *use_ino, &start_idx);
3053
3054 if (SUCCEED == ZBX_CHECK_LOG_LEVEL(LOG_LEVEL_DEBUG))
3055 {
3056 zabbix_log(LOG_LEVEL_DEBUG, "%s() old file list:", __function_name);
3057 if (NULL != *logfiles_old)
3058 print_logfile_list(*logfiles_old, *logfiles_num_old);
3059 else
3060 zabbix_log(LOG_LEVEL_DEBUG, " file list empty");
3061
3062 zabbix_log(LOG_LEVEL_DEBUG, "%s() new file list: (mtime:%d lastlogsize:" ZBX_FS_UI64
3063 " start_idx:%d)", __function_name, *mtime, *lastlogsize, start_idx);
3064 if (NULL != logfiles)
3065 print_logfile_list(logfiles, logfiles_num);
3066 else
3067 zabbix_log(LOG_LEVEL_DEBUG, " file list empty");
3068 }
3069
3070 /* number of file last processed - start from this */
3071 last_processed = start_idx;
3072
3073 /* from now assume success - it could be that there is nothing to do */
3074 ret = SUCCEED;
3075
3076 if (0.0f != max_delay)
3077 {
3078 if (0.0 != *start_time)
3079 {
3080 zbx_uint64_t remaining_bytes;
3081
3082 if (0 != (remaining_bytes = calculate_remaining_bytes(logfiles, logfiles_num)))
3083 {
3084 /* calculate delay and jump if necessary */
3085
3086 double delay;
3087
3088 if ((double)max_delay < (delay = calculate_delay(*processed_bytes, remaining_bytes,
3089 zbx_time() - *start_time)))
3090 {
3091 zbx_uint64_t bytes_to_jump;
3092
3093 bytes_to_jump = (zbx_uint64_t)((double)remaining_bytes *
3094 (delay - (double)max_delay) / delay);
3095
3096 if (SUCCEED == (ret = jump_ahead(key, logfiles, logfiles_num,
3097 &last_processed, &seq, lastlogsize, mtime, encoding,
3098 bytes_to_jump, err_msg)))
3099 {
3100 *jumped = 1;
3101 }
3102 }
3103 }
3104 }
3105
3106 *start_time = zbx_time(); /* mark new start time for using in the next check */
3107 }
3108
3109 /* enter the loop with index of the first file to be processed, later continue the loop from the start */
3110 i = last_processed;
3111
3112 while (NULL != logfiles && i < logfiles_num)
3113 {
3114 if (0 == logfiles[i].incomplete &&
3115 (logfiles[i].size != logfiles[i].processed_size || 0 == logfiles[i].seq))
3116 {
3117 zbx_uint64_t processed_bytes_tmp = 0, seek_offset;
3118 int process_this_file = 1;
3119
3120 *mtime = logfiles[i].mtime;
3121
3122 if (start_idx != i)
3123 *lastlogsize = logfiles[i].processed_size;
3124
3125 if (0 == *skip_old_data)
3126 {
3127 seek_offset = *lastlogsize;
3128 }
3129 else
3130 {
3131 seek_offset = logfiles[i].size;
3132
3133 zabbix_log(LOG_LEVEL_DEBUG, "skipping old data in filename:'%s' to seek_offset:"
3134 ZBX_FS_UI64, logfiles[i].filename, seek_offset);
3135 }
3136
3137 if (ZBX_LOG_ROTATION_LOGCPT == rotation_type)
3138 {
3139 zbx_uint64_t max_processed;
3140
3141 if (seek_offset < (max_processed = max_processed_size_in_copies(logfiles, logfiles_num,
3142 i)))
3143 {
3144 logfiles[i].processed_size = MIN(logfiles[i].size, max_processed);
3145
3146 if (logfiles[i].size == logfiles[i].processed_size)
3147 process_this_file = 0;
3148
3149 *lastlogsize = max_processed;
3150 }
3151 }
3152
3153 if (0 != process_this_file)
3154 {
3155 ret = process_log(flags, logfiles[i].filename, lastlogsize, mtime, lastlogsize_sent,
3156 mtime_sent, skip_old_data, big_rec, &logfiles[i].incomplete, err_msg,
3157 encoding, regexps, pattern, output_template, p_count, s_count,
3158 process_value, server, port, hostname, key, &processed_bytes_tmp,
3159 seek_offset);
3160
3161 /* process_log() advances 'lastlogsize' only on success therefore */
3162 /* we do not check for errors here */
3163 logfiles[i].processed_size = *lastlogsize;
3164
3165 /* log file could grow during processing, update size in our list */
3166 if (*lastlogsize > logfiles[i].size)
3167 logfiles[i].size = *lastlogsize;
3168 }
3169
3170 /* Mark file as processed (at least partially). In case if process_log() failed we will stop */
3171 /* the current checking. In the next check the file will be marked in the list of old files */
3172 /* and we will know where we left off. */
3173 logfiles[i].seq = seq++;
3174
3175 if (ZBX_LOG_ROTATION_LOGCPT == rotation_type && 1 < logfiles_num)
3176 {
3177 int k;
3178
3179 for (k = 0; k < logfiles_num - 1; k++)
3180 handle_multiple_copies(logfiles, logfiles_num, k);
3181 }
3182
3183 if (SUCCEED != ret)
3184 break;
3185
3186 if (0.0f != max_delay)
3187 processed_bytes_sum += processed_bytes_tmp;
3188
3189 if (0 >= *p_count || 0 >= *s_count)
3190 {
3191 limit_reached = 1;
3192 break;
3193 }
3194 }
3195
3196 if (0 != from_first_file)
3197 {
3198 /* We have processed the file where we left off in the previous check. */
3199 from_first_file = 0;
3200
3201 /* Now proceed from the beginning of the new file list to process the remaining files. */
3202 i = 0;
3203 continue;
3204 }
3205
3206 i++;
3207 }
3208
3209 if (ZBX_LOG_ROTATION_LOGCPT == rotation_type && 1 < logfiles_num)
3210 {
3211 /* If logrt[] or logrt.count[] item is checked often but rotation by copying is slow it could happen */
3212 /* that the original file is completely processed but the copy with a newer timestamp is still in */
3213 /* progress. The original file goes out of the list of files and the copy is analyzed as new file, */
3214 /* so the matching lines are reported twice. To prevent this we manipulate our stored 'mtime' */
3215 /* and 'lastlogsize' to keep information about copies in the list as long as necessary to prevent */
3216 /* reporting twice. */
3217
3218 delay_update_if_copies(logfiles, logfiles_num, mtime, lastlogsize);
3219 }
3220
3221 /* store the new log file list for using in the next check */
3222 *logfiles_num_new = logfiles_num;
3223
3224 if (0 < logfiles_num)
3225 *logfiles_new = logfiles;
3226 out:
3227 if (0.0f != max_delay)
3228 {
3229 if (SUCCEED == ret)
3230 *processed_bytes = processed_bytes_sum;
3231
3232 if (SUCCEED != ret || 0 == limit_reached)
3233 {
3234 /* FAIL or number of lines limits were not reached. */
3235 /* Invalidate start_time to prevent jump in the next check. */
3236 *start_time = 0.0;
3237 }
3238 }
3239
3240 zabbix_log(LOG_LEVEL_DEBUG, "End of %s():%s", __function_name, zbx_result_string(ret));
3241
3242 return ret;
3243 }
3244