1 /**
2  * parser.c -- web log parsing
3  *    ______      ___
4  *   / ____/___  /   | _____________  __________
5  *  / / __/ __ \/ /| |/ ___/ ___/ _ \/ ___/ ___/
6  * / /_/ / /_/ / ___ / /__/ /__/  __(__  |__  )
7  * \____/\____/_/  |_\___/\___/\___/____/____/
8  *
9  * The MIT License (MIT)
10  * Copyright (c) 2009-2020 Gerardo Orellana <hello @ goaccess.io>
11  *
12  * Permission is hereby granted, free of charge, to any person obtaining a copy
13  * of this software and associated documentation files (the "Software"), to deal
14  * in the Software without restriction, including without limitation the rights
15  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16  * copies of the Software, and to permit persons to whom the Software is
17  * furnished to do so, subject to the following conditions:
18  *
19  * The above copyright notice and this permission notice shall be included in all
20  * copies or substantial portions of the Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28  * SOFTWARE.
29  */
30 
31 /*
32  * "_XOPEN_SOURCE" is required for the GNU libc to export "strptime(3)"
33  * correctly.
34  */
35 #define _LARGEFILE_SOURCE
36 #define _LARGEFILE64_SOURCE
37 #define _FILE_OFFSET_BITS 64
38 
39 #define _XOPEN_SOURCE 700
40 #define _DEFAULT_SOURCE
41 
42 #include <ctype.h>
43 #include <errno.h>
44 
45 #if HAVE_CONFIG_H
46 #include <config.h>
47 #endif
48 
49 #include <arpa/inet.h>
50 #include <stddef.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <string.h>
54 #include <strings.h>
55 #include <sys/stat.h>
56 #include <sys/types.h>
57 #include <unistd.h>
58 #include <sys/stat.h>
59 #include <fcntl.h>
60 
61 #include "gkhash.h"
62 
63 #ifdef HAVE_GEOLOCATION
64 #include "geoip1.h"
65 #endif
66 
67 #include "parser.h"
68 
69 #include "browsers.h"
70 #include "error.h"
71 #include "goaccess.h"
72 #include "opesys.h"
73 #include "pdjson.h"
74 #include "util.h"
75 #include "websocket.h"
76 #include "xmalloc.h"
77 
78 /* private prototypes */
79 /* key/data generators for each module */
80 
81 static int gen_visitor_key (GKeyData * kdata, GLogItem * logitem);
82 static int gen_404_key (GKeyData * kdata, GLogItem * logitem);
83 static int gen_browser_key (GKeyData * kdata, GLogItem * logitem);
84 static int gen_host_key (GKeyData * kdata, GLogItem * logitem);
85 static int gen_keyphrase_key (GKeyData * kdata, GLogItem * logitem);
86 static int gen_os_key (GKeyData * kdata, GLogItem * logitem);
87 static int gen_vhost_key (GKeyData * kdata, GLogItem * logitem);
88 static int gen_remote_user_key (GKeyData * kdata, GLogItem * logitem);
89 static int gen_cache_status_key (GKeyData * kdata, GLogItem * logitem);
90 static int gen_referer_key (GKeyData * kdata, GLogItem * logitem);
91 static int gen_ref_site_key (GKeyData * kdata, GLogItem * logitem);
92 static int gen_request_key (GKeyData * kdata, GLogItem * logitem);
93 static int gen_static_request_key (GKeyData * kdata, GLogItem * logitem);
94 static int gen_status_code_key (GKeyData * kdata, GLogItem * logitem);
95 static int gen_visit_time_key (GKeyData * kdata, GLogItem * logitem);
96 #ifdef HAVE_GEOLOCATION
97 static int gen_geolocation_key (GKeyData * kdata, GLogItem * logitem);
98 #endif
99 /* UMS */
100 static int gen_mime_type_key (GKeyData * kdata, GLogItem * logitem);
101 static int gen_tls_type_key (GKeyData * kdata, GLogItem * logitem);
102 
103 /* insertion metric routines */
104 static void insert_data (GModule module, GKeyData * kdata);
105 static void insert_rootmap (GModule module, GKeyData * kdata);
106 static void insert_root (GModule module, GKeyData * kdata);
107 static void insert_hit (GModule module, GKeyData * kdata);
108 static void insert_visitor (GModule module, GKeyData * kdata);
109 static void insert_bw (GModule module, GKeyData * kdata, uint64_t size);
110 static void insert_cumts (GModule module, GKeyData * kdata, uint64_t ts);
111 static void insert_maxts (GModule module, GKeyData * kdata, uint64_t ts);
112 static void insert_method (GModule module, GKeyData * kdata, const char *data);
113 static void insert_protocol (GModule module, GKeyData * kdata, const char *data);
114 static void insert_agent (GModule module, GKeyData * kdata, uint32_t agent_nkey);
115 
116 /* *INDENT-OFF* */
117 static GParse paneling[] = {
118   {
119     VISITORS,
120     gen_visitor_key,
121     insert_data,
122     NULL,
123     insert_hit,
124     insert_visitor,
125     insert_bw,
126     insert_cumts,
127     insert_maxts,
128     NULL,
129     NULL,
130     NULL,
131   }, {
132     REQUESTS,
133     gen_request_key,
134     insert_data,
135     NULL,
136     insert_hit,
137     insert_visitor,
138     insert_bw,
139     insert_cumts,
140     insert_maxts,
141     insert_method,
142     insert_protocol,
143     NULL,
144   }, {
145     REQUESTS_STATIC,
146     gen_static_request_key,
147     insert_data,
148     NULL,
149     insert_hit,
150     insert_visitor,
151     insert_bw,
152     insert_cumts,
153     insert_maxts,
154     insert_method,
155     insert_protocol,
156     NULL,
157   }, {
158     NOT_FOUND,
159     gen_404_key,
160     insert_data,
161     NULL,
162     insert_hit,
163     insert_visitor,
164     insert_bw,
165     insert_cumts,
166     insert_maxts,
167     insert_method,
168     insert_protocol,
169     NULL,
170   }, {
171     HOSTS,
172     gen_host_key,
173     insert_data,
174     NULL,
175     insert_hit,
176     insert_visitor,
177     insert_bw,
178     insert_cumts,
179     insert_maxts,
180     NULL,
181     NULL,
182     insert_agent,
183   }, {
184     OS,
185     gen_os_key,
186     insert_data,
187     insert_rootmap,
188     insert_hit,
189     insert_visitor,
190     insert_bw,
191     insert_cumts,
192     insert_maxts,
193     insert_method,
194     insert_protocol,
195     NULL,
196   }, {
197     BROWSERS,
198     gen_browser_key,
199     insert_data,
200     insert_rootmap,
201     insert_hit,
202     insert_visitor,
203     insert_bw,
204     insert_cumts,
205     insert_maxts,
206     NULL,
207     NULL,
208     NULL,
209   }, {
210     REFERRERS,
211     gen_referer_key,
212     insert_data,
213     NULL,
214     insert_hit,
215     insert_visitor,
216     insert_bw,
217     insert_cumts,
218     insert_maxts,
219     NULL,
220     NULL,
221     NULL,
222   }, {
223     REFERRING_SITES,
224     gen_ref_site_key,
225     insert_data,
226     NULL,
227     insert_hit,
228     insert_visitor,
229     insert_bw,
230     insert_cumts,
231     insert_maxts,
232     NULL,
233     NULL,
234     NULL,
235   }, {
236     KEYPHRASES,
237     gen_keyphrase_key,
238     insert_data,
239     NULL,
240     insert_hit,
241     insert_visitor,
242     insert_bw,
243     insert_cumts,
244     insert_maxts,
245     NULL,
246     NULL,
247     NULL,
248   },
249 #ifdef HAVE_GEOLOCATION
250   {
251     GEO_LOCATION,
252     gen_geolocation_key,
253     insert_data,
254     insert_rootmap,
255     insert_hit,
256     insert_visitor,
257     insert_bw,
258     insert_cumts,
259     insert_maxts,
260     NULL,
261     NULL,
262     NULL,
263   },
264 #endif
265   {
266     STATUS_CODES,
267     gen_status_code_key,
268     insert_data,
269     insert_rootmap,
270     insert_hit,
271     insert_visitor,
272     insert_bw,
273     insert_cumts,
274     insert_maxts,
275     NULL,
276     NULL,
277     NULL,
278   }, {
279     VISIT_TIMES,
280     gen_visit_time_key,
281     insert_data,
282     NULL,
283     insert_hit,
284     insert_visitor,
285     insert_bw,
286     insert_cumts,
287     insert_maxts,
288     NULL,
289     NULL,
290     NULL,
291   }, {
292     VIRTUAL_HOSTS,
293     gen_vhost_key,
294     insert_data,
295     NULL,
296     insert_hit,
297     insert_visitor,
298     insert_bw,
299     insert_cumts,
300     insert_maxts,
301     NULL,
302     NULL,
303     NULL,
304   }, {
305     REMOTE_USER,
306     gen_remote_user_key,
307     insert_data,
308     NULL,
309     insert_hit,
310     insert_visitor,
311     insert_bw,
312     insert_cumts,
313     insert_maxts,
314     NULL,
315     NULL,
316     NULL,
317   }, {
318     CACHE_STATUS,
319     gen_cache_status_key,
320     insert_data,
321     NULL,
322     insert_hit,
323     insert_visitor,
324     insert_bw,
325     insert_cumts,
326     insert_maxts,
327     NULL,
328     NULL,
329     NULL,
330   }, {
331     MIME_TYPE,
332     gen_mime_type_key,
333     insert_data,
334     insert_rootmap,
335     insert_hit,
336     insert_visitor,
337     insert_bw,
338     insert_cumts,
339     insert_maxts,
340     NULL, /*method*/
341     NULL, /*protocol*/
342     NULL, /*agent*/
343   }, {
344     TLS_TYPE,
345     gen_tls_type_key,
346     insert_data,
347     insert_rootmap,
348     insert_hit,
349     insert_visitor,
350     insert_bw,
351     insert_cumts,
352     insert_maxts,
353     NULL,
354     NULL,
355     NULL,
356   },
357 };
358 /* *INDENT-ON* */
359 
360 /* Initialize a new GKeyData instance */
361 static void
new_modulekey(GKeyData * kdata)362 new_modulekey (GKeyData * kdata) {
363   GKeyData key = {
364     .data = NULL,
365     .data_key = NULL,
366     .data_nkey = 0,
367     .root = NULL,
368     .root_key = NULL,
369     .root_nkey = 0,
370     .uniq_key = NULL,
371     .uniq_nkey = 0,
372   };
373   *kdata = key;
374 }
375 
376 /* Get a panel from the GParse structure given a module.
377  *
378  * On error, or if not found, NULL is returned.
379  * On success, the panel value is returned. */
380 static GParse *
panel_lookup(GModule module)381 panel_lookup (GModule module) {
382   int i, num_panels = ARRAY_SIZE (paneling);
383 
384   for (i = 0; i < num_panels; i++) {
385     if (paneling[i].module == module)
386       return &paneling[i];
387   }
388   return NULL;
389 }
390 
391 /* Allocate memory for a new GRawData instance.
392  *
393  * On success, the newly allocated GRawData is returned . */
394 GRawData *
new_grawdata(void)395 new_grawdata (void) {
396   GRawData *raw_data = xmalloc (sizeof (*raw_data));
397   memset (raw_data, 0, sizeof *raw_data);
398 
399   return raw_data;
400 }
401 
402 /* Allocate memory for a new GRawDataItem instance.
403  *
404  * On success, the newly allocated GRawDataItem is returned . */
405 GRawDataItem *
new_grawdata_item(unsigned int size)406 new_grawdata_item (unsigned int size) {
407   GRawDataItem *item = xcalloc (size, sizeof (*item));
408   return item;
409 }
410 
411 /* Free memory allocated for a GRawData and GRawDataItem instance. */
412 void
free_raw_data(GRawData * raw_data)413 free_raw_data (GRawData * raw_data) {
414   free (raw_data->items);
415   free (raw_data);
416 }
417 
418 /* Reset an instance of GLog structure. */
419 void
reset_struct(Logs * logs)420 reset_struct (Logs * logs) {
421   int i = 0;
422 
423   for (i = 0; i < logs->size; ++i)
424     logs->glog[i].invalid = logs->glog[i].processed = 0;
425 }
426 
427 /* Allocate memory for a new set of Logs including a GLog instance.
428  *
429  * On success, the newly allocated Logs is returned . */
430 Logs *
init_logs(int size)431 init_logs (int size) {
432   Logs *logs = NULL;
433   GLog *glog = NULL;
434   int i = 0;
435 
436   /* if no logs no a pipe nor restoring, nothing to do then */
437   if (!size && !conf.restore)
438     return NULL;
439 
440   /* If no logs nor a pipe but restoring, we still need an minimal instance of
441    * logs and a glog */
442   logs = xcalloc (1, sizeof (*logs));
443   if (!size) {
444     logs->glog = xcalloc (1, sizeof (*glog));
445     logs->processed = &(logs->glog[0].processed);
446     return logs;
447   }
448 
449   glog = xcalloc (size, sizeof (*glog));
450   for (i = 0; i < size; ++i) {
451     glog[i].errors = xcalloc (MAX_LOG_ERRORS, sizeof (char *));
452     glog[i].filename = xstrdup (conf.filenames[i]);
453 
454     logs->processed = &(glog[i].processed);
455     logs->filename = glog[i].filename;
456   }
457 
458   logs->glog = glog;
459   logs->size = size;
460 
461   return logs;
462 }
463 
464 /* Free all log errors stored during parsing. */
465 void
free_logerrors(GLog * glog)466 free_logerrors (GLog * glog) {
467   int i;
468 
469   if (!glog->log_erridx)
470     return;
471 
472   for (i = 0; i < glog->log_erridx; ++i)
473     free (glog->errors[i]);
474   glog->log_erridx = 0;
475 }
476 
477 /* Free all log containers. */
478 void
free_logs(Logs * logs)479 free_logs (Logs * logs) {
480   GLog *glog = NULL;
481   int i;
482 
483   for (i = 0; i < logs->size; ++i) {
484     glog = &logs->glog[i];
485 
486     free (glog->filename);
487     free_logerrors (glog);
488     free (glog->errors);
489     if (glog->pipe) {
490       fclose (glog->pipe);
491     }
492   }
493 
494   free (logs->glog);
495   free (logs);
496 }
497 
498 /* Initialize a new GLogItem instance.
499  *
500  * On success, the new GLogItem instance is returned. */
501 GLogItem *
init_log_item(GLog * glog)502 init_log_item (GLog * glog) {
503   time_t now = time (0);
504   GLogItem *logitem;
505   glog->items = xmalloc (sizeof (GLogItem));
506   logitem = glog->items;
507   memset (logitem, 0, sizeof *logitem);
508 
509   logitem->agent = NULL;
510   logitem->browser = NULL;
511   logitem->browser_type = NULL;
512   logitem->continent = NULL;
513   logitem->country = NULL;
514   logitem->date = NULL;
515   logitem->errstr = NULL;
516   logitem->host = NULL;
517   logitem->keyphrase = NULL;
518   logitem->method = NULL;
519   logitem->os = NULL;
520   logitem->os_type = NULL;
521   logitem->protocol = NULL;
522   logitem->qstr = NULL;
523   logitem->ref = NULL;
524   logitem->req_key = NULL;
525   logitem->req = NULL;
526   logitem->resp_size = 0LL;
527   logitem->serve_time = 0;
528   logitem->status = NULL;
529   logitem->time = NULL;
530   logitem->uniq_key = NULL;
531   logitem->vhost = NULL;
532   logitem->userid = NULL;
533   logitem->cache_status = NULL;
534 
535   /* UMS */
536   logitem->mime_type = NULL;
537   logitem->tls_type = NULL;
538   logitem->tls_cypher = NULL;
539   logitem->tls_type_cypher = NULL;
540 
541   memset (logitem->site, 0, sizeof (logitem->site));
542   localtime_r (&now, &logitem->dt);
543 
544   return logitem;
545 }
546 
547 /* Free all members of a GLogItem */
548 static void
free_glog(GLogItem * logitem)549 free_glog (GLogItem * logitem) {
550   if (logitem->agent != NULL)
551     free (logitem->agent);
552   if (logitem->browser != NULL)
553     free (logitem->browser);
554   if (logitem->browser_type != NULL)
555     free (logitem->browser_type);
556   if (logitem->continent != NULL)
557     free (logitem->continent);
558   if (logitem->country != NULL)
559     free (logitem->country);
560   if (logitem->date != NULL)
561     free (logitem->date);
562   if (logitem->errstr != NULL)
563     free (logitem->errstr);
564   if (logitem->host != NULL)
565     free (logitem->host);
566   if (logitem->keyphrase != NULL)
567     free (logitem->keyphrase);
568   if (logitem->method != NULL)
569     free (logitem->method);
570   if (logitem->os != NULL)
571     free (logitem->os);
572   if (logitem->os_type != NULL)
573     free (logitem->os_type);
574   if (logitem->protocol != NULL)
575     free (logitem->protocol);
576   if (logitem->qstr != NULL)
577     free (logitem->qstr);
578   if (logitem->ref != NULL)
579     free (logitem->ref);
580   if (logitem->req_key != NULL)
581     free (logitem->req_key);
582   if (logitem->req != NULL)
583     free (logitem->req);
584   if (logitem->status != NULL)
585     free (logitem->status);
586   if (logitem->time != NULL)
587     free (logitem->time);
588   if (logitem->uniq_key != NULL)
589     free (logitem->uniq_key);
590   if (logitem->userid != NULL)
591     free (logitem->userid);
592   if (logitem->cache_status != NULL)
593     free (logitem->cache_status);
594   if (logitem->vhost != NULL)
595     free (logitem->vhost);
596 
597   if (logitem->mime_type != NULL)
598     free (logitem->mime_type);
599   if (logitem->tls_type != NULL)
600     free (logitem->tls_type);
601   if (logitem->tls_cypher != NULL)
602     free (logitem->tls_cypher);
603   if (logitem->tls_type_cypher != NULL)
604     free (logitem->tls_type_cypher);
605 
606   free (logitem);
607 }
608 
609 /* Decodes the given URL-encoded string.
610  *
611  * On success, the decoded string is assigned to the output buffer. */
612 #define B16210(x) (((x) >= '0' && (x) <= '9') ? ((x) - '0') : (toupper((x)) - 'A' + 10))
613 static void
decode_hex(char * url,char * out)614 decode_hex (char *url, char *out) {
615   char *ptr;
616   const char *c;
617 
618   for (c = url, ptr = out; *c; c++) {
619     if (*c != '%' || !isxdigit (c[1]) || !isxdigit (c[2])) {
620       *ptr++ = *c;
621     } else {
622       *ptr++ = (char) ((B16210 (c[1]) * 16) + (B16210 (c[2])));
623       c += 2;
624     }
625   }
626   *ptr = 0;
627 }
628 
629 /* Entry point to decode the given URL-encoded string.
630  *
631  * On success, the decoded trimmed string is assigned to the output
632  * buffer. */
633 static char *
decode_url(char * url)634 decode_url (char *url) {
635   char *out, *decoded;
636 
637   if ((url == NULL) || (*url == '\0'))
638     return NULL;
639 
640   out = decoded = xstrdup (url);
641   decode_hex (url, out);
642   /* double encoded URL? */
643   if (conf.double_decode)
644     decode_hex (decoded, out);
645   strip_newlines (out);
646 
647   return trim_str (char_replace (out, '+', ' '));
648 }
649 
650 /* Process keyphrases from Google search, cache, and translate.
651  * Note that the referer hasn't been decoded at the entry point
652  * since there could be '&' within the search query.
653  *
654  * On error, 1 is returned.
655  * On success, the extracted keyphrase is assigned and 0 is returned. */
656 static int
extract_keyphrase(char * ref,char ** keyphrase)657 extract_keyphrase (char *ref, char **keyphrase) {
658   char *r, *ptr, *pch, *referer;
659   int encoded = 0;
660 
661   if (!(strstr (ref, "http://www.google.")) &&
662       !(strstr (ref, "http://webcache.googleusercontent.com/")) &&
663       !(strstr (ref, "http://translate.googleusercontent.com/")) &&
664       !(strstr (ref, "https://www.google.")) &&
665       !(strstr (ref, "https://webcache.googleusercontent.com/")) &&
666       !(strstr (ref, "https://translate.googleusercontent.com/")))
667     return 1;
668 
669   /* webcache.googleusercontent */
670   if ((r = strstr (ref, "/+&")) != NULL)
671     return 1;
672   /* webcache.googleusercontent */
673   else if ((r = strstr (ref, "/+")) != NULL)
674     r += 2;
675   /* webcache.googleusercontent */
676   else if ((r = strstr (ref, "q=cache:")) != NULL) {
677     pch = strchr (r, '+');
678     if (pch)
679       r += pch - r + 1;
680   }
681   /* www.google.* or translate.googleusercontent */
682   else if ((r = strstr (ref, "&q=")) != NULL || (r = strstr (ref, "?q=")) != NULL)
683     r += 3;
684   else if ((r = strstr (ref, "%26q%3D")) != NULL || (r = strstr (ref, "%3Fq%3D")) != NULL)
685     encoded = 1, r += 7;
686   else
687     return 1;
688 
689   if (!encoded && (ptr = strchr (r, '&')) != NULL)
690     *ptr = '\0';
691   else if (encoded && (ptr = strstr (r, "%26")) != NULL)
692     *ptr = '\0';
693 
694   referer = decode_url (r);
695   if (referer == NULL || *referer == '\0') {
696     free (referer);
697     return 1;
698   }
699 
700   referer = char_replace (referer, '+', ' ');
701   *keyphrase = trim_str (referer);
702 
703   return 0;
704 }
705 
706 #ifdef HAVE_GEOLOCATION
707 /* Extract geolocation for the given host.
708  *
709  * On error, 1 is returned.
710  * On success, the extracted continent and country are set and 0 is
711  * returned. */
712 static int
extract_geolocation(GLogItem * logitem,char * continent,char * country)713 extract_geolocation (GLogItem * logitem, char *continent, char *country) {
714   if (!is_geoip_resource ())
715     return 1;
716 
717   geoip_get_country (logitem->host, country, logitem->type_ip);
718   geoip_get_continent (logitem->host, continent, logitem->type_ip);
719 
720   return 0;
721 }
722 #endif
723 
724 
725 /* Parse a URI and extracts the *host* part from it
726  * i.e., //www.example.com/path?googleguy > www.example.com
727  *
728  * On error, 1 is returned.
729  * On success, the extracted referer is set and 0 is returned. */
730 static int
extract_referer_site(const char * referer,char * host)731 extract_referer_site (const char *referer, char *host) {
732   char *url, *begin, *end;
733   int len = 0;
734 
735   if ((referer == NULL) || (*referer == '\0'))
736     return 1;
737 
738   url = strdup (referer);
739   if ((begin = strstr (url, "//")) == NULL)
740     goto clean;
741 
742   begin += 2;
743   if ((len = strlen (begin)) == 0)
744     goto clean;
745 
746   if ((end = strchr (begin, '/')) != NULL)
747     len = end - begin;
748 
749   if (len == 0)
750     goto clean;
751 
752   if (len >= REF_SITE_LEN)
753     len = REF_SITE_LEN;
754 
755   memcpy (host, begin, len);
756   host[len] = '\0';
757   free (url);
758   return 0;
759 clean:
760   free (url);
761 
762   return 1;
763 }
764 
765 /* Determine if the given request is static (e.g., jpg, css, js, etc).
766  *
767  * On error, or if not static, 0 is returned.
768  * On success, the 1 is returned. */
769 static int
verify_static_content(const char * req)770 verify_static_content (const char *req) {
771   const char *nul = req + strlen (req);
772   const char *ext = NULL, *pch = NULL;
773   int elen = 0, i;
774 
775   if (strlen (req) < conf.static_file_max_len)
776     return 0;
777 
778   for (i = 0; i < conf.static_file_idx; ++i) {
779     ext = conf.static_files[i];
780     if (ext == NULL || *ext == '\0')
781       continue;
782 
783     elen = strlen (ext);
784     if (conf.all_static_files && (pch = strchr (req, '?')) != NULL && pch - req > elen) {
785       pch -= elen;
786       if (0 == strncasecmp (ext, pch, elen))
787         return 1;
788       continue;
789     }
790 
791     if (!strncasecmp (nul - elen, ext, elen))
792       return 1;
793   }
794 
795   return 0;
796 }
797 
798 /* Extract the HTTP method.
799  *
800  * On error, or if not found, NULL is returned.
801  * On success, the HTTP method is returned. */
802 static const char *
extract_method(const char * token)803 extract_method (const char *token) {
804   const char *methods[] = {
805     "OPTIONS", "GET", "HEAD", "POST", "PUT",
806     "DELETE", "TRACE", "CONNECT", "PATCH", "options",
807     "get", "head", "post", "put", "delete",
808     "trace", "connect", "patch",
809     /* WebDAV */
810     "PROPFIND", "PROPPATCH", "MKCOL", "COPY", "MOVE",
811     "LOCK", "UNLOCK", "VERSION-CONTROL", "REPORT", "CHECKOUT",
812     "CHECKIN", "UNCHECKOUT", "MKWORKSPACE", "UPDATE", "LABEL",
813     "MERGE", "BASELINE-CONTROL", "MKACTIVITY", "ORDERPATCH", "propfind",
814     "propwatch", "mkcol", "copy", "move", "lock",
815     "unlock", "version-control", "report", "checkout", "checkin",
816     "uncheckout", "mkworkspace", "update", "label", "merge",
817     "baseline-control", "mkactivity", "orderpatch"
818   };
819 
820   const int methods_count = sizeof (methods) / sizeof (*methods);
821 
822   int i;
823   /* Length of every string in list */
824   static int list_length[sizeof (methods) / sizeof (*methods)] = { -1 };
825   /* Only calculate length on first time */
826   if (list_length[0] == -1) {
827     for (i = 0; i < methods_count; i++) {
828       list_length[i] = strlen (methods[i]);
829     }
830   }
831 
832   for (i = 0; i < methods_count; i++) {
833     if (strncmp (token, methods[i], list_length[i]) == 0) {
834       return methods[i];
835     }
836   }
837   return NULL;
838 }
839 
840 /* Determine if time-served data was stored on-disk. */
841 static void
contains_usecs(void)842 contains_usecs (void) {
843   if (conf.serve_usecs)
844     return;
845   conf.serve_usecs = 1; /* flag */
846 }
847 
848 static int
is_cache_hit(const char * tkn)849 is_cache_hit (const char *tkn) {
850   if (strcasecmp ("MISS", tkn) == 0)
851     return 1;
852   else if (strcasecmp ("BYPASS", tkn) == 0)
853     return 1;
854   else if (strcasecmp ("EXPIRED", tkn) == 0)
855     return 1;
856   else if (strcasecmp ("STALE", tkn) == 0)
857     return 1;
858   else if (strcasecmp ("UPDATING", tkn) == 0)
859     return 1;
860   else if (strcasecmp ("REVALIDATED", tkn) == 0)
861     return 1;
862   else if (strcasecmp ("HIT", tkn) == 0)
863     return 1;
864   return 0;
865 }
866 
867 /* Determine if the given token is a valid HTTP protocol.
868  *
869  * If not valid, 1 is returned.
870  * If valid, 0 is returned. */
871 static const char *
extract_protocol(const char * token)872 extract_protocol (const char *token) {
873   const char *lookfor;
874 
875   if ((lookfor = "HTTP/1.0", !strncmp (token, lookfor, 8)) ||
876       (lookfor = "HTTP/1.1", !strncmp (token, lookfor, 8)) ||
877       (lookfor = "HTTP/2", !strncmp (token, lookfor, 6)))
878     return lookfor;
879   return NULL;
880 }
881 
882 /* Parse a request containing the method and protocol.
883  *
884  * On error, or unable to parse, NULL is returned.
885  * On success, the HTTP request is returned and the method and
886  * protocol are assigned to the corresponding buffers. */
887 static char *
parse_req(char * line,char ** method,char ** protocol)888 parse_req (char *line, char **method, char **protocol) {
889   char *req = NULL, *request = NULL, *dreq = NULL, *ptr = NULL;
890   const char *meth, *proto;
891   ptrdiff_t rlen;
892 
893   meth = extract_method (line);
894 
895   /* couldn't find a method, so use the whole request line */
896   if (meth == NULL) {
897     request = xstrdup (line);
898   }
899   /* method found, attempt to parse request */
900   else {
901     req = line + strlen (meth);
902     if (!(ptr = strrchr (req, ' ')) || !(proto = extract_protocol (++ptr)))
903       return alloc_string ("-");
904 
905     req++;
906     if ((rlen = ptr - req) <= 0)
907       return alloc_string ("-");
908 
909     request = xmalloc (rlen + 1);
910     strncpy (request, req, rlen);
911     request[rlen] = 0;
912 
913     if (conf.append_method)
914       (*method) = strtoupper (xstrdup (meth));
915 
916     if (conf.append_protocol)
917       (*protocol) = strtoupper (xstrdup (proto));
918   }
919 
920   if (!(dreq = decode_url (request)))
921     return request;
922   else if (*dreq == '\0') {
923     free (dreq);
924     return request;
925   }
926 
927   free (request);
928   return dreq;
929 }
930 
931 #if defined(HAVE_LIBSSL) && defined(HAVE_CIPHER_STD_NAME)
932 static int
extract_tls_version_cipher(char * tkn,char ** cipher,char ** tls_version)933 extract_tls_version_cipher (char *tkn, char **cipher, char **tls_version) {
934   SSL_CTX *ctx = NULL;
935   SSL *ssl = NULL;
936   int code = 0;
937   unsigned short code_be;
938   unsigned char cipherid[3];
939   const SSL_CIPHER *c = NULL;
940   char *bEnd;
941   const char *sn = NULL;
942 
943   code = strtoull (tkn, &bEnd, 10);
944   if (tkn == bEnd || *bEnd != '\0' || errno == ERANGE) {
945     LOG_DEBUG (("unable to convert cipher code to a valid decimal."));
946     free (tkn);
947     return 1;
948   }
949 
950   /* ssl context */
951   if (!(ctx = SSL_CTX_new (SSLv23_server_method ()))) {
952     LOG_DEBUG (("Unable to create a new SSL_CTX_new to extact TLS."));
953     free (tkn);
954     return 1;
955   }
956   if (!(ssl = SSL_new (ctx))) {
957     LOG_DEBUG (("Unable to create a new instace of SSL_new to extact TLS."));
958     free (tkn);
959     return 1;
960   }
961 
962   code_be = htobe16 (code);
963   memcpy (cipherid, &code_be, 2);
964   cipherid[2] = 0;
965 
966   if (!(c = SSL_CIPHER_find (ssl, cipherid))) {
967     LOG_DEBUG (("Unable to find cipher to extact TLS."));
968     free (tkn);
969     return 1;
970   }
971 
972   if (!(sn = SSL_CIPHER_standard_name (c))) {
973     LOG_DEBUG (("Unable to get cipher standard name to extact TLS."));
974     free (tkn);
975     return 1;
976   }
977   *cipher = xstrdup (sn);
978   *tls_version = xstrdup (SSL_CIPHER_get_version (c));
979 
980   free (tkn);
981   SSL_free (ssl);
982   SSL_CTX_free (ctx);
983 
984   return 0;
985 }
986 #endif
987 
988 /* Extract the next delimiter given a log format and copy the delimiter to the
989  * destination buffer.
990  *
991  * On error, the dest buffer will be empty.
992  * On success, the delimiter(s) are stored in the dest buffer. */
993 static void
get_delim(char * dest,const char * p)994 get_delim (char *dest, const char *p) {
995   /* done, nothing to do */
996   if (p[0] == '\0' || p[1] == '\0') {
997     dest[0] = '\0';
998     return;
999   }
1000   /* add the first delim */
1001   dest[0] = *(p + 1);
1002 }
1003 
1004 /* Extract and malloc a token given the parsed rule.
1005  *
1006  * On success, the malloc'd token is returned. */
1007 static char *
parsed_string(const char * pch,char ** str,int move_ptr)1008 parsed_string (const char *pch, char **str, int move_ptr) {
1009   char *p;
1010   size_t len = (pch - *str + 1);
1011 
1012   p = xmalloc (len);
1013   memcpy (p, *str, (len - 1));
1014   p[len - 1] = '\0';
1015   if (move_ptr)
1016     *str += len - 1;
1017 
1018   return trim_str (p);
1019 }
1020 
1021 /* Find and extract a token given a log format rule.
1022  *
1023  * On error, or unable to parse it, NULL is returned.
1024  * On success, the malloc'd token is returned. */
1025 static char *
parse_string(char ** str,const char * delims,int cnt)1026 parse_string (char **str, const char *delims, int cnt) {
1027   int idx = 0;
1028   char *pch = *str, *p = NULL;
1029   char end;
1030 
1031   if ((*delims != 0x0) && (p = strpbrk (*str, delims)) == NULL)
1032     return NULL;
1033 
1034   end = !*delims ? 0x0 : *p;
1035   do {
1036     /* match number of delims */
1037     if (*pch == end)
1038       idx++;
1039     /* delim found, parse string then */
1040     if ((*pch == end && cnt == idx) || *pch == '\0')
1041       return parsed_string (pch, str, 1);
1042     /* advance to the first unescaped delim */
1043     if (*pch == '\\')
1044       pch++;
1045   } while (*pch++);
1046 
1047   return NULL;
1048 }
1049 
1050 /* Move forward through the log string until a non-space (!isspace)
1051  * char is found. */
1052 static void
find_alpha(char ** str)1053 find_alpha (char **str) {
1054   char *s = *str;
1055   while (*s) {
1056     if (isspace (*s))
1057       s++;
1058     else
1059       break;
1060   }
1061   *str += s - *str;
1062 }
1063 
1064 /* Move forward through the log string until a non-space (!isspace)
1065  * char is found and returns the count. */
1066 static int
find_alpha_count(char * str)1067 find_alpha_count (char *str) {
1068   int cnt = 0;
1069   char *s = str;
1070   while (*s) {
1071     if (isspace (*s))
1072       s++, cnt++;
1073     else
1074       break;
1075   }
1076   return cnt;
1077 }
1078 
1079 /* Format the broken-down time tm to a numeric date format.
1080  *
1081  * On error, or unable to format the given tm, 1 is returned.
1082  * On success, a malloc'd format is returned. */
1083 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
1084 static int
set_date(char ** fdate,struct tm tm)1085 set_date (char **fdate, struct tm tm) {
1086   char buf[DATE_LEN] = "";      /* Ymd */
1087 
1088   memset (buf, 0, sizeof (buf));
1089   if (strftime (buf, DATE_LEN, conf.date_num_format, &tm) <= 0)
1090     return 1;
1091   *fdate = xstrdup (buf);
1092 
1093   return 0;
1094 }
1095 
1096 /* Format the broken-down time tm to a numeric time format.
1097  *
1098  * On error, or unable to format the given tm, 1 is returned.
1099  * On success, a malloc'd format is returned. */
1100 static int
set_time(char ** ftime,struct tm tm)1101 set_time (char **ftime, struct tm tm) {
1102   char buf[TIME_LEN] = "";
1103 
1104   memset (buf, 0, sizeof (buf));
1105   if (strftime (buf, TIME_LEN, "%H:%M:%S", &tm) <= 0)
1106     return 1;
1107   *ftime = xstrdup (buf);
1108 
1109   return 0;
1110 }
1111 
1112 /* Determine the parsing specifier error and construct a message out
1113  * of it.
1114  *
1115  * On success, a malloc'd error message is assigned to the log
1116  * structure and 1 is returned. */
1117 static int
spec_err(GLogItem * logitem,int code,const char spec,const char * tkn)1118 spec_err (GLogItem * logitem, int code, const char spec, const char *tkn) {
1119   char *err = NULL;
1120   const char *fmt = NULL;
1121 
1122   switch (code) {
1123   case SPEC_TOKN_NUL:
1124     fmt = "Token for '%%%c' specifier is NULL.";
1125     err = xmalloc (snprintf (NULL, 0, fmt, spec) + 1);
1126     sprintf (err, fmt, spec);
1127     break;
1128   case SPEC_TOKN_INV:
1129     fmt = "Token '%s' doesn't match specifier '%%%c'";
1130     err = xmalloc (snprintf (NULL, 0, fmt, (tkn ? tkn : "-"), spec) + 1);
1131     sprintf (err, fmt, (tkn ? tkn : "-"), spec);
1132     break;
1133   case SPEC_SFMT_MIS:
1134     fmt = "Missing braces '%s' and ignore chars for specifier '%%%c'";
1135     err = xmalloc (snprintf (NULL, 0, fmt, (tkn ? tkn : "-"), spec) + 1);
1136     sprintf (err, fmt, (tkn ? tkn : "-"), spec);
1137     break;
1138   }
1139   logitem->errstr = err;
1140 
1141   return code;
1142 }
1143 
1144 static void
set_tm_dt_logitem(GLogItem * logitem,struct tm tm)1145 set_tm_dt_logitem (GLogItem * logitem, struct tm tm) {
1146   logitem->dt.tm_year = tm.tm_year;
1147   logitem->dt.tm_mon = tm.tm_mon;
1148   logitem->dt.tm_mday = tm.tm_mday;
1149 }
1150 
1151 static void
set_tm_tm_logitem(GLogItem * logitem,struct tm tm)1152 set_tm_tm_logitem (GLogItem * logitem, struct tm tm) {
1153   logitem->dt.tm_hour = tm.tm_hour;
1154   logitem->dt.tm_min = tm.tm_min;
1155   logitem->dt.tm_sec = tm.tm_sec;
1156 }
1157 
1158 static void
set_numeric_date(uint32_t * numdate,const char * date)1159 set_numeric_date (uint32_t * numdate, const char *date) {
1160   int res = 0;
1161   if ((res = str2int (date)) == -1)
1162     FATAL ("Unable to parse date to integer %s", date);
1163   *numdate = res;
1164 }
1165 
1166 #pragma GCC diagnostic warning "-Wformat-nonliteral"
1167 
1168 /* Parse the log string given log format rule.
1169  *
1170  * On error, or unable to parse it, 1 is returned.
1171  * On success, the malloc'd token is assigned to a GLogItem member. */
1172 static int
parse_specifier(GLogItem * logitem,char ** str,const char * p,const char * end)1173 parse_specifier (GLogItem * logitem, char **str, const char *p, const char *end) {
1174   struct tm tm;
1175   time_t now = time (0);
1176   const char *dfmt = conf.date_format;
1177   const char *tfmt = conf.time_format;
1178 
1179   char *pch, *sEnd, *bEnd, *tkn = NULL;
1180   double serve_secs = 0.0;
1181   uint64_t bandw = 0, serve_time = 0;
1182   long status = 0L;
1183   int dspc = 0, fmtspcs = 0;
1184 
1185   errno = 0;
1186   memset (&tm, 0, sizeof (tm));
1187   localtime_r (&now, &tm);
1188 
1189   switch (*p) {
1190     /* date */
1191   case 'd':
1192     if (logitem->date)
1193       return 0;
1194 
1195     /* Attempt to parse date format containing spaces,
1196      * i.e., syslog date format (Jul\s15, Nov\s\s2).
1197      * Note that it's possible a date could contain some padding, e.g.,
1198      * Dec\s\s2 vs Nov\s22, so we attempt to take that into consideration by looking
1199      * ahead the log string and counting the # of spaces until we find an alphanum char. */
1200     if ((fmtspcs = count_matches (dfmt, ' ')) && (pch = strchr (*str, ' ')))
1201       dspc = find_alpha_count (pch);
1202 
1203     if (!(tkn = parse_string (&(*str), end, MAX (dspc, fmtspcs) + 1)))
1204       return spec_err (logitem, SPEC_TOKN_NUL, *p, NULL);
1205 
1206     if (str_to_time (tkn, dfmt, &tm) != 0 || set_date (&logitem->date, tm) != 0) {
1207       spec_err (logitem, SPEC_TOKN_INV, *p, tkn);
1208       free (tkn);
1209       return 1;
1210     }
1211 
1212     set_numeric_date (&logitem->numdate, logitem->date);
1213     set_tm_dt_logitem (logitem, tm);
1214     free (tkn);
1215     break;
1216     /* time */
1217   case 't':
1218     if (logitem->time)
1219       return 0;
1220     if (!(tkn = parse_string (&(*str), end, 1)))
1221       return spec_err (logitem, SPEC_TOKN_NUL, *p, NULL);
1222 
1223     if (str_to_time (tkn, tfmt, &tm) != 0 || set_time (&logitem->time, tm) != 0) {
1224       spec_err (logitem, SPEC_TOKN_INV, *p, tkn);
1225       free (tkn);
1226       return 1;
1227     }
1228 
1229     set_tm_tm_logitem (logitem, tm);
1230     free (tkn);
1231     break;
1232     /* date/time as decimal, i.e., timestamps, ms/us  */
1233   case 'x':
1234     if (logitem->time && logitem->date)
1235       return 0;
1236     if (!(tkn = parse_string (&(*str), end, 1)))
1237       return spec_err (logitem, SPEC_TOKN_NUL, *p, NULL);
1238 
1239     if (str_to_time (tkn, tfmt, &tm) != 0 || set_date (&logitem->date, tm) != 0 ||
1240         set_time (&logitem->time, tm) != 0) {
1241       spec_err (logitem, SPEC_TOKN_INV, *p, tkn);
1242       free (tkn);
1243       return 1;
1244     }
1245     set_numeric_date (&logitem->numdate, logitem->date);
1246     set_tm_dt_logitem (logitem, tm);
1247     set_tm_tm_logitem (logitem, tm);
1248     free (tkn);
1249     break;
1250     /* Virtual Host */
1251   case 'v':
1252     if (logitem->vhost)
1253       return 0;
1254     tkn = parse_string (&(*str), end, 1);
1255     if (tkn == NULL)
1256       return spec_err (logitem, SPEC_TOKN_NUL, *p, NULL);
1257     logitem->vhost = tkn;
1258     break;
1259     /* remote user */
1260   case 'e':
1261     if (logitem->userid)
1262       return 0;
1263     tkn = parse_string (&(*str), end, 1);
1264     if (tkn == NULL)
1265       return spec_err (logitem, SPEC_TOKN_NUL, *p, NULL);
1266     logitem->userid = tkn;
1267     break;
1268     /* cache status */
1269   case 'C':
1270     if (logitem->cache_status)
1271       return 0;
1272     tkn = parse_string (&(*str), end, 1);
1273     if (tkn == NULL)
1274       return spec_err (logitem, SPEC_TOKN_NUL, *p, NULL);
1275     if (is_cache_hit (tkn))
1276       logitem->cache_status = tkn;
1277     else
1278       free (tkn);
1279     break;
1280     /* remote hostname (IP only) */
1281   case 'h':
1282     if (logitem->host)
1283       return 0;
1284     if (!(tkn = parse_string (&(*str), end, 1)))
1285       return spec_err (logitem, SPEC_TOKN_NUL, *p, NULL);
1286 
1287     if (!conf.no_ip_validation && invalid_ipaddr (tkn, &logitem->type_ip)) {
1288       spec_err (logitem, SPEC_TOKN_INV, *p, tkn);
1289       free (tkn);
1290       return 1;
1291     }
1292     logitem->host = tkn;
1293     break;
1294     /* request method */
1295   case 'm':
1296     if (logitem->method)
1297       return 0;
1298     if (!(tkn = parse_string (&(*str), end, 1)))
1299       return spec_err (logitem, SPEC_TOKN_NUL, *p, NULL);
1300 
1301     if (!extract_method (tkn)) {
1302       spec_err (logitem, SPEC_TOKN_INV, *p, tkn);
1303       free (tkn);
1304       return 1;
1305     }
1306     logitem->method = tkn;
1307     break;
1308     /* request not including method or protocol */
1309   case 'U':
1310     if (logitem->req)
1311       return 0;
1312     tkn = parse_string (&(*str), end, 1);
1313     if (tkn == NULL || *tkn == '\0') {
1314       free (tkn);
1315       return spec_err (logitem, SPEC_TOKN_NUL, *p, NULL);
1316     }
1317 
1318     if ((logitem->req = decode_url (tkn)) == NULL) {
1319       spec_err (logitem, SPEC_TOKN_INV, *p, tkn);
1320       free (tkn);
1321       return 1;
1322     }
1323     free (tkn);
1324     break;
1325     /* query string alone, e.g., ?param=goaccess&tbm=shop */
1326   case 'q':
1327     if (logitem->qstr)
1328       return 0;
1329     tkn = parse_string (&(*str), end, 1);
1330     if (tkn == NULL || *tkn == '\0') {
1331       free (tkn);
1332       return 0;
1333     }
1334 
1335     if ((logitem->qstr = decode_url (tkn)) == NULL) {
1336       spec_err (logitem, SPEC_TOKN_INV, *p, tkn);
1337       free (tkn);
1338       return 1;
1339     }
1340     free (tkn);
1341     break;
1342     /* request protocol */
1343   case 'H':
1344     if (logitem->protocol)
1345       return 0;
1346     if (!(tkn = parse_string (&(*str), end, 1)))
1347       return spec_err (logitem, SPEC_TOKN_NUL, *p, NULL);
1348 
1349     if (!extract_protocol (tkn)) {
1350       spec_err (logitem, SPEC_TOKN_INV, *p, tkn);
1351       free (tkn);
1352       return 1;
1353     }
1354     logitem->protocol = tkn;
1355     break;
1356     /* request, including method + protocol */
1357   case 'r':
1358     if (logitem->req)
1359       return 0;
1360     if (!(tkn = parse_string (&(*str), end, 1)))
1361       return spec_err (logitem, SPEC_TOKN_NUL, *p, NULL);
1362 
1363     logitem->req = parse_req (tkn, &logitem->method, &logitem->protocol);
1364     free (tkn);
1365     break;
1366     /* Status Code */
1367   case 's':
1368     if (logitem->status)
1369       return 0;
1370     if (!(tkn = parse_string (&(*str), end, 1)))
1371       return spec_err (logitem, SPEC_TOKN_NUL, *p, NULL);
1372 
1373     /* do not validate HTTP status code */
1374     if (conf.no_strict_status) {
1375       logitem->status = tkn;
1376       break;
1377     }
1378 
1379     status = strtol (tkn, &sEnd, 10);
1380     if (tkn == sEnd || *sEnd != '\0' || errno == ERANGE || status < 100 || status > 599) {
1381       spec_err (logitem, SPEC_TOKN_INV, *p, tkn);
1382       free (tkn);
1383       return 1;
1384     }
1385     logitem->status = tkn;
1386     break;
1387     /* size of response in bytes - excluding HTTP headers */
1388   case 'b':
1389     if (logitem->resp_size)
1390       return 0;
1391     if (!(tkn = parse_string (&(*str), end, 1)))
1392       return spec_err (logitem, SPEC_TOKN_NUL, *p, NULL);
1393 
1394     bandw = strtoull (tkn, &bEnd, 10);
1395     if (tkn == bEnd || *bEnd != '\0' || errno == ERANGE)
1396       bandw = 0;
1397     logitem->resp_size = bandw;
1398     conf.bandwidth = 1;
1399     free (tkn);
1400     break;
1401     /* referrer */
1402   case 'R':
1403     if (logitem->ref)
1404       return 0;
1405 
1406     if (!(tkn = parse_string (&(*str), end, 1)))
1407       tkn = alloc_string ("-");
1408     if (*tkn == '\0') {
1409       free (tkn);
1410       tkn = alloc_string ("-");
1411     }
1412     if (strcmp (tkn, "-") != 0) {
1413       extract_keyphrase (tkn, &logitem->keyphrase);
1414       extract_referer_site (tkn, logitem->site);
1415 
1416       /* hide referrers from report */
1417       if (hide_referer (logitem->site)) {
1418         logitem->site[0] = '\0';
1419         free (tkn);
1420       } else
1421         logitem->ref = tkn;
1422       break;
1423     }
1424     logitem->ref = tkn;
1425 
1426     break;
1427     /* user agent */
1428   case 'u':
1429     if (logitem->agent)
1430       return 0;
1431 
1432     tkn = parse_string (&(*str), end, 1);
1433     if (tkn != NULL && *tkn != '\0') {
1434       /* Make sure the user agent is decoded (i.e.: CloudFront)
1435        * and replace all '+' with ' ' (i.e.: w3c) */
1436       logitem->agent = decode_url (tkn);
1437       free (tkn);
1438       break;
1439     } else if (tkn != NULL && *tkn == '\0') {
1440       free (tkn);
1441       tkn = alloc_string ("-");
1442     }
1443     /* must be null */
1444     else {
1445       tkn = alloc_string ("-");
1446     }
1447     logitem->agent = tkn;
1448     break;
1449     /* time taken to serve the request, in milliseconds as a decimal number */
1450   case 'L':
1451     /* ignore it if we already have served time */
1452     if (logitem->serve_time)
1453       return 0;
1454     if (!(tkn = parse_string (&(*str), end, 1)))
1455       return spec_err (logitem, SPEC_TOKN_NUL, *p, NULL);
1456 
1457     serve_secs = strtoull (tkn, &bEnd, 10);
1458     if (tkn == bEnd || *bEnd != '\0' || errno == ERANGE)
1459       serve_secs = 0;
1460     /* convert it to microseconds */
1461     logitem->serve_time = (serve_secs > 0) ? serve_secs * MILS : 0;
1462 
1463     contains_usecs ();  /* set flag */
1464     free (tkn);
1465     break;
1466     /* time taken to serve the request, in seconds with a milliseconds
1467      * resolution */
1468   case 'T':
1469     /* ignore it if we already have served time */
1470     if (logitem->serve_time)
1471       return 0;
1472     if (!(tkn = parse_string (&(*str), end, 1)))
1473       return spec_err (logitem, SPEC_TOKN_NUL, *p, NULL);
1474 
1475     if (strchr (tkn, '.') != NULL)
1476       serve_secs = strtod (tkn, &bEnd);
1477     else
1478       serve_secs = strtoull (tkn, &bEnd, 10);
1479 
1480     if (tkn == bEnd || *bEnd != '\0' || errno == ERANGE)
1481       serve_secs = 0;
1482     /* convert it to microseconds */
1483     logitem->serve_time = (serve_secs > 0) ? serve_secs * SECS : 0;
1484 
1485     contains_usecs ();  /* set flag */
1486     free (tkn);
1487     break;
1488     /* time taken to serve the request, in microseconds */
1489   case 'D':
1490     /* ignore it if we already have served time */
1491     if (logitem->serve_time)
1492       return 0;
1493     if (!(tkn = parse_string (&(*str), end, 1)))
1494       return spec_err (logitem, SPEC_TOKN_NUL, *p, NULL);
1495 
1496     serve_time = strtoull (tkn, &bEnd, 10);
1497     if (tkn == bEnd || *bEnd != '\0' || errno == ERANGE)
1498       serve_time = 0;
1499     logitem->serve_time = serve_time;
1500 
1501     contains_usecs ();  /* set flag */
1502     free (tkn);
1503     break;
1504 
1505     /* UMS: Krypto (TLS) "ECDHE-RSA-AES128-GCM-SHA256" */
1506   case 'k':
1507     /* error to set this twice */
1508     if (logitem->tls_cypher)
1509       return 0;
1510     if (!(tkn = parse_string (&(*str), end, 1)))
1511       return spec_err (logitem, SPEC_TOKN_NUL, *p, NULL);
1512 
1513 #if defined(HAVE_LIBSSL) && defined(HAVE_CIPHER_STD_NAME)
1514     {
1515       char *tmp = NULL;
1516       for (tmp = tkn; isdigit (*tmp); tmp++);
1517       if (!strlen (tmp))
1518         extract_tls_version_cipher (tkn, &logitem->tls_cypher, &logitem->tls_type);
1519       else
1520         logitem->tls_cypher = tkn;
1521     }
1522 #else
1523     logitem->tls_cypher = tkn;
1524 #endif
1525 
1526     break;
1527 
1528     /* UMS: Krypto (TLS) parameters like "TLSv1.2" */
1529   case 'K':
1530     /* error to set this twice */
1531     if (logitem->tls_type)
1532       return 0;
1533     if (!(tkn = parse_string (&(*str), end, 1)))
1534       return spec_err (logitem, SPEC_TOKN_NUL, *p, NULL);
1535 
1536     logitem->tls_type = tkn;
1537     break;
1538 
1539     /* UMS: Mime-Type like "text/html" */
1540   case 'M':
1541     /* error to set this twice */
1542     if (logitem->mime_type)
1543       return 0;
1544     if (!(tkn = parse_string (&(*str), end, 1)))
1545       return spec_err (logitem, SPEC_TOKN_NUL, *p, NULL);
1546 
1547     logitem->mime_type = tkn;
1548 
1549     break;
1550     /* move forward through str until not a space */
1551   case '~':
1552     find_alpha (&(*str));
1553     break;
1554     /* everything else skip it */
1555   default:
1556     if ((pch = strchr (*str, p[1])) != NULL)
1557       *str += pch - *str;
1558   }
1559 
1560   return 0;
1561 }
1562 
1563 /* Parse the special host specifier and extract the characters that
1564  * need to be rejected when attempting to parse the XFF field.
1565  *
1566  * If no unable to find both curly braces (boundaries), NULL is returned.
1567  * On success, the malloc'd reject set is returned. */
1568 static char *
extract_braces(char ** p)1569 extract_braces (char **p) {
1570   char *b1 = NULL, *b2 = NULL, *ret = NULL, *s = *p;
1571   int esc = 0;
1572   ptrdiff_t len = 0;
1573 
1574   /* iterate over the log format */
1575   for (; *s; s++) {
1576     if (*s == '\\') {
1577       esc = 1;
1578     } else if (*s == '{' && !esc) {
1579       b1 = s;
1580     } else if (*s == '}' && !esc) {
1581       b2 = s;
1582       break;
1583     } else {
1584       esc = 0;
1585     }
1586   }
1587 
1588   if ((!b1) || (!b2))
1589     return NULL;
1590   if ((len = b2 - (b1 + 1)) <= 0)
1591     return NULL;
1592 
1593   /* Found braces, extract 'reject' character set. */
1594   ret = xmalloc (len + 1);
1595   memcpy (ret, b1 + 1, len);
1596   ret[len] = '\0';
1597   (*p) = b2 + 1;
1598 
1599   return ret;
1600 }
1601 
1602 /* Attempt to extract the client IP from an X-Forwarded-For (XFF) field.
1603  *
1604  * If no IP is found, 1 is returned.
1605  * On success, the malloc'd token is assigned to a GLogItem->host and
1606  * 0 is returned. */
1607 static int
find_xff_host(GLogItem * logitem,char ** str,char ** p)1608 find_xff_host (GLogItem * logitem, char **str, char **p) {
1609   char *ptr = NULL, *tkn = NULL, *skips = NULL;
1610   int invalid_ip = 1, len = 0, type_ip = TYPE_IPINV;
1611   int idx = 0, skips_len = 0;
1612 
1613   if (!(skips = extract_braces (p)))
1614     return spec_err (logitem, SPEC_SFMT_MIS, **p, "{}");
1615 
1616   skips_len = strlen (skips);
1617   ptr = *str;
1618   while (*ptr != '\0') {
1619     if ((len = strcspn (ptr, skips)) == 0) {
1620       len++, ptr++, idx++;
1621       goto move;
1622     }
1623     /* If our index does not match the number of delimiters and we have already a
1624      * valid client IP, then we assume we have reached the length of the XFF */
1625     if (idx < skips_len && logitem->host)
1626       break;
1627 
1628     ptr += len;
1629     /* extract possible IP */
1630     if (!(tkn = parsed_string (ptr, str, 0)))
1631       break;
1632 
1633     invalid_ip = invalid_ipaddr (tkn, &type_ip);
1634     /* done, already have IP and current token is not a host */
1635     if (logitem->host && invalid_ip) {
1636       free (tkn);
1637       break;
1638     }
1639     if (!logitem->host && !invalid_ip) {
1640       logitem->host = xstrdup (tkn);
1641       logitem->type_ip = type_ip;
1642     }
1643     free (tkn);
1644     idx = 0;
1645 
1646   move:
1647     *str += len;
1648   }
1649 
1650   free (skips);
1651 
1652   return logitem->host == NULL;
1653 }
1654 
1655 /* Handle special specifiers.
1656  *
1657  * On error, or unable to parse it, 1 is returned.
1658  * On success, the malloc'd token is assigned to a GLogItem member and
1659  * 0 is returned. */
1660 static int
special_specifier(GLogItem * logitem,char ** str,char ** p)1661 special_specifier (GLogItem * logitem, char **str, char **p) {
1662   switch (**p) {
1663     /* XFF remote hostname (IP only) */
1664   case 'h':
1665     if (logitem->host)
1666       return 0;
1667     if (find_xff_host (logitem, str, p))
1668       return spec_err (logitem, SPEC_TOKN_NUL, 'h', NULL);
1669     break;
1670   }
1671 
1672   return 0;
1673 }
1674 
1675 /* Iterate over the given log format.
1676  *
1677  * On error, or unable to parse it, 1 is returned.
1678  * On success, the malloc'd token is assigned to a GLogItem member and
1679  * 0 is returned. */
1680 static int
parse_format(GLogItem * logitem,char * str,char * lfmt)1681 parse_format (GLogItem * logitem, char *str, char *lfmt) {
1682   char end[2 + 1] = { 0 };
1683   char *p = NULL;
1684   int perc = 0, tilde = 0, ret = 0;
1685 
1686   if (str == NULL || *str == '\0')
1687     return 1;
1688 
1689   /* iterate over the log format */
1690   for (p = lfmt; *p; p++) {
1691     if (*p == '%') {
1692       perc++;
1693       continue;
1694     }
1695     if (*p == '~' && perc == 0) {
1696       tilde++;
1697       continue;
1698     }
1699     if (*str == '\n')
1700       return 0;
1701 
1702     if (tilde && *p != '\0') {
1703       if ((str == NULL) || (*str == '\0'))
1704         return 0;
1705       if (special_specifier (logitem, &str, &p) == 1)
1706         return 1;
1707       tilde = 0;
1708     }
1709     /* %h */
1710     else if (perc && *p != '\0') {
1711       if ((str == NULL) || (*str == '\0'))
1712         return 0;
1713 
1714       memset (end, 0, sizeof end);
1715       get_delim (end, p);
1716       /* attempt to parse format specifiers */
1717       if ((ret = parse_specifier (logitem, &str, p, end)))
1718         return ret;
1719       perc = 0;
1720     } else if (perc && isspace (p[0])) {
1721       return 1;
1722     } else {
1723       str++;
1724     }
1725   }
1726 
1727   return 0;
1728 }
1729 
1730 /* Determine if the log string is valid and if it's not a comment.
1731  *
1732  * On error, or invalid, 1 is returned.
1733  * On success, or valid line, 0 is returned. */
1734 static int
valid_line(char * line)1735 valid_line (char *line) {
1736   /* invalid line */
1737   if ((line == NULL) || (*line == '\0'))
1738     return 1;
1739   /* ignore comments */
1740   if (*line == '#' || *line == '\n')
1741     return 1;
1742 
1743   return 0;
1744 }
1745 
1746 /* Determine if we need to lock the mutex. */
1747 static void
lock_spinner(void)1748 lock_spinner (void) {
1749   if (parsing_spinner != NULL && parsing_spinner->state == SPN_RUN)
1750     pthread_mutex_lock (&parsing_spinner->mutex);
1751 }
1752 
1753 /* Determine if we need to unlock the mutex. */
1754 static void
unlock_spinner(void)1755 unlock_spinner (void) {
1756   if (parsing_spinner != NULL && parsing_spinner->state == SPN_RUN)
1757     pthread_mutex_unlock (&parsing_spinner->mutex);
1758 }
1759 
1760 /* Ignore request's query string. e.g.,
1761  * /index.php?timestamp=1454385289 */
1762 static void
strip_qstring(char * req)1763 strip_qstring (char *req) {
1764   char *qmark;
1765   if ((qmark = strchr (req, '?')) != NULL) {
1766     if ((qmark - req) > 0)
1767       *qmark = '\0';
1768   }
1769 }
1770 
1771 /* Increment the overall bandwidth. */
1772 static void
count_bw(int numdate,uint64_t resp_size)1773 count_bw (int numdate, uint64_t resp_size) {
1774   ht_inc_cnt_bw (numdate, resp_size);
1775 }
1776 
1777 /* Output all log errors stored during parsing. */
1778 void
output_logerrors(Logs * logs)1779 output_logerrors (Logs * logs) {
1780   GLog *glog = NULL;
1781   int pid = getpid (), i;
1782 
1783   for (i = 0; i < logs->size; ++i) {
1784     glog = &logs->glog[i];
1785     if (!glog->log_erridx)
1786       continue;
1787 
1788     fprintf (stderr, "==%d== GoAccess - Copyright (C) 2009-2020 by Gerardo Orellana\n", pid);
1789     fprintf (stderr, "==%d== https://goaccess.io - <hello@goaccess.io>\n", pid);
1790     fprintf (stderr, "==%d== Released under the MIT License.\n", pid);
1791     fprintf (stderr, "==%d==\n", pid);
1792     fprintf (stderr, "==%d== FILE: %s\n", pid, glog->filename);
1793     fprintf (stderr, "==%d== ", pid);
1794     fprintf (stderr, ERR_PARSED_NLINES, glog->log_erridx);
1795     fprintf (stderr, " %s:\n", ERR_PARSED_NLINES_DESC);
1796     fprintf (stderr, "==%d==\n", pid);
1797     for (i = 0; i < glog->log_erridx; ++i)
1798       fprintf (stderr, "==%d== %s\n", pid, glog->errors[i]);
1799   }
1800   fprintf (stderr, "==%d==\n", pid);
1801   fprintf (stderr, "==%d== %s\n", pid, ERR_FORMAT_HEADER);
1802 }
1803 
1804 /* Ensure we have the following fields. */
1805 static int
verify_missing_fields(GLogItem * logitem)1806 verify_missing_fields (GLogItem * logitem) {
1807   /* must have the following fields */
1808   if (logitem->host == NULL)
1809     logitem->errstr = xstrdup ("IPv4/6 is required.");
1810   else if (logitem->date == NULL)
1811     logitem->errstr = xstrdup ("A valid date is required.");
1812   else if (logitem->req == NULL)
1813     logitem->errstr = xstrdup ("A request is required.");
1814 
1815   return logitem->errstr != NULL;
1816 }
1817 
1818 /* Keep track of all invalid log strings. */
1819 static void
count_invalid(GLog * glog,const char * line)1820 count_invalid (GLog * glog, const char *line) {
1821   glog->invalid++;
1822   ht_inc_cnt_overall ("failed_requests", 1);
1823 
1824   if (conf.invalid_requests_log) {
1825     LOG_INVALID (("%s", line));
1826   }
1827 
1828   if (glog->items->errstr && glog->invalid < MAX_LOG_ERRORS) {
1829     glog->errors[glog->log_erridx++] = xstrdup (glog->items->errstr);
1830   }
1831 }
1832 
1833 /* Count down the number of invalids hits.
1834  * Note: Upon performing a log test, invalid hits are counted, since
1835  * no valid records were found, then we count down by the number of
1836  * tests ran.
1837 */
1838 static void
uncount_invalid(GLog * glog)1839 uncount_invalid (GLog * glog) {
1840   if (glog->invalid > conf.num_tests)
1841     glog->invalid -= conf.num_tests;
1842   else
1843     glog->invalid = 0;
1844 }
1845 
1846 /* Count down the number of processed hits.
1847  * Note: Upon performing a log test, processed hits are counted, since
1848  * no valid records were found, then we count down by the number of
1849  * tests ran.
1850 */
1851 static void
uncount_processed(GLog * glog)1852 uncount_processed (GLog * glog) {
1853   lock_spinner ();
1854   if (glog->processed > conf.num_tests)
1855     glog->processed -= conf.num_tests;
1856   else
1857     glog->processed = 0;
1858   unlock_spinner ();
1859 }
1860 
1861 /* Keep track of all valid log strings. */
1862 static void
count_valid(int numdate)1863 count_valid (int numdate) {
1864   lock_spinner ();
1865   ht_inc_cnt_valid (numdate, 1);
1866   unlock_spinner ();
1867 }
1868 
1869 /* Keep track of all valid and processed log strings. */
1870 static void
count_process(GLog * glog)1871 count_process (GLog * glog) {
1872   lock_spinner ();
1873   glog->processed++;
1874   ht_inc_cnt_overall ("total_requests", 1);
1875   unlock_spinner ();
1876 }
1877 
1878 static void
count_process_and_invalid(GLog * glog,const char * line)1879 count_process_and_invalid (GLog * glog, const char *line) {
1880   count_process (glog);
1881   count_invalid (glog, line);
1882 }
1883 
1884 /* Keep track of all excluded log strings (IPs).
1885  *
1886  * If IP not range, 1 is returned.
1887  * If IP is excluded, 0 is returned. */
1888 static int
excluded_ip(GLogItem * logitem)1889 excluded_ip (GLogItem * logitem) {
1890   if (conf.ignore_ip_idx && ip_in_range (logitem->host)) {
1891     ht_inc_cnt_overall ("excluded_ip", 1);
1892     return 0;
1893   }
1894   return 1;
1895 }
1896 
1897 /* Determine if the request is from a robot or spider and check if we
1898  * need to ignore or show crawlers only.
1899  *
1900  * If the request line is not ignored, 0 is returned.
1901  * If the request line is ignored, 1 is returned. */
1902 static int
handle_crawler(const char * agent)1903 handle_crawler (const char *agent) {
1904   int bot = 0;
1905 
1906   if (!conf.ignore_crawlers && !conf.crawlers_only)
1907     return 1;
1908 
1909   bot = is_crawler (agent);
1910   return (conf.ignore_crawlers && bot) || (conf.crawlers_only && !bot) ? 0 : 1;
1911 }
1912 
1913 /* A wrapper function to determine if the request is static.
1914  *
1915  * If the request is not static, 0 is returned.
1916  * If the request is static, 1 is returned. */
1917 static int
is_static(const char * req)1918 is_static (const char *req) {
1919   return verify_static_content (req);
1920 }
1921 
1922 /* Determine if the request of the given status code needs to be
1923  * ignored.
1924  *
1925  * If the status code is not within the ignore-array, 0 is returned.
1926  * If the status code is within the ignore-array, 1 is returned. */
1927 static int
ignore_status_code(const char * status)1928 ignore_status_code (const char *status) {
1929   if (conf.ignore_status_idx == 0)
1930     return 0;
1931 
1932   if (str_inarray (status, conf.ignore_status, conf.ignore_status_idx) != -1)
1933     return 1;
1934   return 0;
1935 }
1936 
1937 /* Determine if static file request should be ignored
1938     *
1939     * If the request line is not ignored, 0 is returned.
1940     * If the request line is ignored, 1 is returned. */
1941 static int
ignore_static(const char * req)1942 ignore_static (const char *req) {
1943   if (conf.ignore_statics && is_static (req))
1944     return 1;
1945   return 0;
1946 }
1947 
1948 /* Determine if the request status code is a 404.
1949  *
1950  * If the request is not a 404, 0 is returned.
1951  * If the request is a 404, 1 is returned. */
1952 static int
is_404(GLogItem * logitem)1953 is_404 (GLogItem * logitem) {
1954   /* is this a 404? */
1955   if (logitem->status && !memcmp (logitem->status, "404", 3))
1956     return 1;
1957   /* treat 444 as 404? */
1958   else if (logitem->status && !memcmp (logitem->status, "444", 3) && conf.code444_as_404)
1959     return 1;
1960   return 0;
1961 }
1962 
1963 /* A wrapper function to determine if a log line needs to be ignored.
1964  *
1965  * If the request line is not ignored, 0 is returned.
1966  * If the request line is ignored, IGNORE_LEVEL_PANEL is returned.
1967  * If the request line is only not counted as valid, IGNORE_LEVEL_REQ is returned. */
1968 static int
ignore_line(GLogItem * logitem)1969 ignore_line (GLogItem * logitem) {
1970   if (excluded_ip (logitem) == 0)
1971     return IGNORE_LEVEL_PANEL;
1972   if (handle_crawler (logitem->agent) == 0)
1973     return IGNORE_LEVEL_PANEL;
1974   if (ignore_referer (logitem->ref))
1975     return IGNORE_LEVEL_PANEL;
1976   if (ignore_status_code (logitem->status))
1977     return IGNORE_LEVEL_PANEL;
1978   if (ignore_static (logitem->req))
1979     return conf.ignore_statics; // IGNORE_LEVEL_PANEL or IGNORE_LEVEL_REQ
1980 
1981   /* check if we need to remove the request's query string */
1982   if (conf.ignore_qstr)
1983     strip_qstring (logitem->req);
1984 
1985   return 0;
1986 }
1987 
1988 /* A wrapper function to insert a data keymap string key.
1989  *
1990  * If the given key exists, its value is returned.
1991  * On error, -1 is returned.
1992  * On success the value of the key inserted is returned */
1993 static int
insert_dkeymap(GModule module,GKeyData * kdata)1994 insert_dkeymap (GModule module, GKeyData * kdata) {
1995   return ht_insert_keymap (module, kdata->numdate, kdata->data_key, &kdata->cdnkey);
1996 }
1997 
1998 /* A wrapper function to insert a root keymap string key.
1999  *
2000  * If the given key exists, its value is returned.
2001  * On error, -1 is returned.
2002  * On success the value of the key inserted is returned */
2003 static int
insert_rkeymap(GModule module,GKeyData * kdata)2004 insert_rkeymap (GModule module, GKeyData * kdata) {
2005   return ht_insert_keymap (module, kdata->numdate, kdata->root_key, &kdata->crnkey);
2006 }
2007 
2008 /* A wrapper function to insert a datamap uint32_t key and string value. */
2009 static void
insert_data(GModule module,GKeyData * kdata)2010 insert_data (GModule module, GKeyData * kdata) {
2011   ht_insert_datamap (module, kdata->numdate, kdata->data_nkey, kdata->data, kdata->cdnkey);
2012 }
2013 
2014 /* A wrapper function to insert a uniqmap string key.
2015  *
2016  * If the given key exists, 0 is returned.
2017  * On error, -1 is returned.
2018  * On success the value of the key inserted is returned */
2019 static int
insert_uniqmap(GModule module,GKeyData * kdata,uint32_t uniq_nkey)2020 insert_uniqmap (GModule module, GKeyData * kdata, uint32_t uniq_nkey) {
2021   return ht_insert_uniqmap (module, kdata->numdate, kdata->data_nkey, uniq_nkey);
2022 }
2023 
2024 /* A wrapper function to insert a rootmap uint32_t key from the keymap
2025  * store mapped to its string value. */
2026 static void
insert_rootmap(GModule module,GKeyData * kdata)2027 insert_rootmap (GModule module, GKeyData * kdata) {
2028   ht_insert_rootmap (module, kdata->numdate, kdata->root_nkey, kdata->root, kdata->crnkey);
2029 }
2030 
2031 /* A wrapper function to insert a data uint32_t key mapped to the
2032  * corresponding uint32_t root key. */
2033 static void
insert_root(GModule module,GKeyData * kdata)2034 insert_root (GModule module, GKeyData * kdata) {
2035   ht_insert_root (module, kdata->numdate, kdata->data_nkey, kdata->root_nkey, kdata->cdnkey,
2036                   kdata->crnkey);
2037 }
2038 
2039 /* A wrapper function to increase hits counter from an uint32_t key. */
2040 static void
insert_hit(GModule module,GKeyData * kdata)2041 insert_hit (GModule module, GKeyData * kdata) {
2042   ht_insert_hits (module, kdata->numdate, kdata->data_nkey, 1, kdata->cdnkey);
2043   ht_insert_meta_data (module, kdata->numdate, "hits", 1);
2044 }
2045 
2046 /* A wrapper function to increase visitors counter from an uint32_t
2047  * key. */
2048 static void
insert_visitor(GModule module,GKeyData * kdata)2049 insert_visitor (GModule module, GKeyData * kdata) {
2050   ht_insert_visitor (module, kdata->numdate, kdata->data_nkey, 1, kdata->cdnkey);
2051   ht_insert_meta_data (module, kdata->numdate, "visitors", 1);
2052 }
2053 
2054 /* A wrapper function to increases bandwidth counter from an uint32_t
2055  * key. */
2056 static void
insert_bw(GModule module,GKeyData * kdata,uint64_t size)2057 insert_bw (GModule module, GKeyData * kdata, uint64_t size) {
2058   ht_insert_bw (module, kdata->numdate, kdata->data_nkey, size, kdata->cdnkey);
2059   ht_insert_meta_data (module, kdata->numdate, "bytes", size);
2060 }
2061 
2062 /* A wrapper call to increases cumulative time served counter
2063  * from an uint32_t key. */
2064 static void
insert_cumts(GModule module,GKeyData * kdata,uint64_t ts)2065 insert_cumts (GModule module, GKeyData * kdata, uint64_t ts) {
2066   ht_insert_cumts (module, kdata->numdate, kdata->data_nkey, ts, kdata->cdnkey);
2067   ht_insert_meta_data (module, kdata->numdate, "cumts", ts);
2068 }
2069 
2070 /* A wrapper call to insert the maximum time served counter from
2071  * an uint32_t key. */
2072 static void
insert_maxts(GModule module,GKeyData * kdata,uint64_t ts)2073 insert_maxts (GModule module, GKeyData * kdata, uint64_t ts) {
2074   ht_insert_maxts (module, kdata->numdate, kdata->data_nkey, ts, kdata->cdnkey);
2075   ht_insert_meta_data (module, kdata->numdate, "maxts", ts);
2076 }
2077 
2078 static void
insert_method(GModule module,GKeyData * kdata,const char * data)2079 insert_method (GModule module, GKeyData * kdata, const char *data) {
2080   ht_insert_method (module, kdata->numdate, kdata->data_nkey, data ? data : "---",
2081                     kdata->cdnkey);
2082 }
2083 
2084 /* A wrapper call to insert a method given an uint32_t key and string
2085  * value. */
2086 static void
insert_protocol(GModule module,GKeyData * kdata,const char * data)2087 insert_protocol (GModule module, GKeyData * kdata, const char *data) {
2088   ht_insert_protocol (module, kdata->numdate, kdata->data_nkey, data ? data : "---",
2089                       kdata->cdnkey);
2090 }
2091 
2092 /* A wrapper call to insert an agent for a hostname given an uint32_t
2093  * key and uint32_t value.  */
2094 static void
insert_agent(GModule module,GKeyData * kdata,uint32_t agent_nkey)2095 insert_agent (GModule module, GKeyData * kdata, uint32_t agent_nkey) {
2096   ht_insert_agent (module, kdata->numdate, kdata->data_nkey, agent_nkey);
2097 }
2098 
2099 /* The following generates a unique key to identity unique visitors.
2100  * The key is made out of the IP, date, and user agent.
2101  * Note that for readability, doing a simple snprintf/sprintf should
2102  * suffice, however, memcpy is the fastest solution
2103  *
2104  * On success the new unique visitor key is returned */
2105 static char *
get_uniq_visitor_key(GLogItem * logitem)2106 get_uniq_visitor_key (GLogItem * logitem) {
2107   char *ua = NULL, *key = NULL;
2108   size_t s1, s2, s3;
2109 
2110   ua = deblank (xstrdup (logitem->agent));
2111 
2112   s1 = strlen (logitem->date);
2113   s2 = strlen (logitem->host);
2114   s3 = strlen (ua);
2115 
2116   /* includes terminating null */
2117   key = xcalloc (s1 + s2 + s3 + 3, sizeof (char));
2118 
2119   memcpy (key, logitem->date, s1);
2120 
2121   key[s1] = '|';
2122   memcpy (key + s1 + 1, logitem->host, s2 + 1);
2123 
2124   key[s1 + s2 + 1] = '|';
2125   memcpy (key + s1 + s2 + 2, ua, s3 + 1);
2126 
2127   free (ua);
2128   return key;
2129 }
2130 
2131 /* The following generates a unique key to identity unique requests.
2132  * The key is made out of the actual request, and if available, the
2133  * method and the protocol.  Note that for readability, doing a simple
2134  * snprintf/sprintf should suffice, however, memcpy is the fastest
2135  * solution
2136  *
2137  * On success the new unique request key is returned */
2138 static char *
gen_unique_req_key(GLogItem * logitem)2139 gen_unique_req_key (GLogItem * logitem) {
2140   char *key = NULL;
2141   size_t s1 = 0, s2 = 0, s3 = 0, nul = 1, sep = 0;
2142 
2143   /* nothing to do */
2144   if (!conf.append_method && !conf.append_protocol)
2145     return xstrdup (logitem->req);
2146   /* still nothing to do */
2147   if (!logitem->method && !logitem->protocol)
2148     return xstrdup (logitem->req);
2149 
2150   s1 = strlen (logitem->req);
2151   if (logitem->method && conf.append_method) {
2152     s2 = strlen (logitem->method);
2153     nul++;
2154   }
2155   if (logitem->protocol && conf.append_protocol) {
2156     s3 = strlen (logitem->protocol);
2157     nul++;
2158   }
2159 
2160   /* includes terminating null */
2161   key = xcalloc (s1 + s2 + s3 + nul, sizeof (char));
2162   /* append request */
2163   memcpy (key, logitem->req, s1);
2164 
2165   if (logitem->method && conf.append_method) {
2166     key[s1] = '|';
2167     sep++;
2168     memcpy (key + s1 + sep, logitem->method, s2 + 1);
2169   }
2170   if (logitem->protocol && conf.append_protocol) {
2171     key[s1 + s2 + sep] = '|';
2172     sep++;
2173     memcpy (key + s1 + s2 + sep, logitem->protocol, s3 + 1);
2174   }
2175 
2176   return key;
2177 }
2178 
2179 /* Append the query string to the request, and therefore, it modifies
2180  * the original logitem->req */
2181 static void
append_query_string(char ** req,const char * qstr)2182 append_query_string (char **req, const char *qstr) {
2183   char *r;
2184   size_t s1, s2, qm = 0;
2185 
2186   s1 = strlen (*req);
2187   s2 = strlen (qstr);
2188 
2189   /* add '?' between the URL and the query string */
2190   if (*qstr != '?')
2191     qm = 1;
2192 
2193   r = xmalloc (s1 + s2 + qm + 1);
2194   memcpy (r, *req, s1);
2195   if (qm)
2196     r[s1] = '?';
2197   memcpy (r + s1 + qm, qstr, s2 + 1);
2198 
2199   free (*req);
2200   *req = r;
2201 }
2202 
2203 /* A wrapper to assign the given data key and the data item to the key
2204  * data structure */
2205 static void
get_kdata(GKeyData * kdata,char * data_key,char * data)2206 get_kdata (GKeyData * kdata, char *data_key, char *data) {
2207   /* inserted in keymap */
2208   kdata->data_key = data_key;
2209   /* inserted in datamap */
2210   kdata->data = data;
2211 }
2212 
2213 /* Generate a visitor's key given the date specificity. For instance,
2214  * if the specificity if set to hours, then a generated key would
2215  * look like: 03/Jan/2016:09 */
2216 static void
set_spec_visitor_key(char ** fdate,const char * ftime)2217 set_spec_visitor_key (char **fdate, const char *ftime) {
2218   size_t dlen = 0, tlen = 0;
2219   char *key = NULL, *tkey = NULL, *pch = NULL;
2220 
2221   tkey = xstrdup (ftime);
2222   if (conf.date_spec_hr && (pch = strchr (tkey, ':')) && (pch - tkey) > 0)
2223     *pch = '\0';
2224 
2225   dlen = strlen (*fdate);
2226   tlen = strlen (tkey);
2227 
2228   key = xmalloc (dlen + tlen + 1);
2229   memcpy (key, *fdate, dlen);
2230   memcpy (key + dlen, tkey, tlen + 1);
2231 
2232   free (*fdate);
2233   free (tkey);
2234   *fdate = key;
2235 }
2236 
2237 /* Generate a unique key for the visitors panel from the given logitem
2238  * structure and assign it to the output key data structure.
2239  *
2240  * On error, or if no date is found, 1 is returned.
2241  * On success, the date key is assigned to our key data structure.
2242  */
2243 static int
gen_visitor_key(GKeyData * kdata,GLogItem * logitem)2244 gen_visitor_key (GKeyData * kdata, GLogItem * logitem) {
2245   if (!logitem->date || !logitem->time)
2246     return 1;
2247 
2248   /* Append time specificity to date */
2249   if (conf.date_spec_hr)
2250     set_spec_visitor_key (&logitem->date, logitem->time);
2251 
2252   get_kdata (kdata, logitem->date, logitem->date);
2253   kdata->numdate = logitem->numdate;
2254 
2255   return 0;
2256 }
2257 
2258 /* Generate a unique key for the requests panel from the given logitem
2259  * structure and assign it to out key data structure.
2260  *
2261  * On success, the generated request key is assigned to our key data
2262  * structure.
2263  */
2264 static int
gen_req_key(GKeyData * kdata,GLogItem * logitem)2265 gen_req_key (GKeyData * kdata, GLogItem * logitem) {
2266   if (!logitem->req)
2267     return 1;
2268 
2269   if (logitem->qstr)
2270     append_query_string (&logitem->req, logitem->qstr);
2271   logitem->req_key = gen_unique_req_key (logitem);
2272 
2273   get_kdata (kdata, logitem->req_key, logitem->req);
2274   kdata->numdate = logitem->numdate;
2275 
2276   return 0;
2277 }
2278 
2279 /* A wrapper to generate a unique key for the request panel.
2280  *
2281  * On error, or if the request is static or a 404, 1 is returned.
2282  * On success, the generated request key is assigned to our key data
2283  * structure.
2284  */
2285 static int
gen_request_key(GKeyData * kdata,GLogItem * logitem)2286 gen_request_key (GKeyData * kdata, GLogItem * logitem) {
2287   if (!logitem->req || logitem->is_404 || logitem->is_static)
2288     return 1;
2289 
2290   return gen_req_key (kdata, logitem);
2291 }
2292 
2293 /* A wrapper to generate a unique key for the request panel.
2294  *
2295  * On error, or if the request is not a 404, 1 is returned.
2296  * On success, the generated request key is assigned to our key data
2297  * structure. */
2298 static int
gen_404_key(GKeyData * kdata,GLogItem * logitem)2299 gen_404_key (GKeyData * kdata, GLogItem * logitem) {
2300   if (logitem->req && logitem->is_404)
2301     return gen_req_key (kdata, logitem);
2302   return 1;
2303 }
2304 
2305 /* A wrapper to generate a unique key for the request panel.
2306  *
2307  * On error, or if the request is not a static request, 1 is returned.
2308  * On success, the generated request key is assigned to our key data
2309  * structure. */
2310 static int
gen_static_request_key(GKeyData * kdata,GLogItem * logitem)2311 gen_static_request_key (GKeyData * kdata, GLogItem * logitem) {
2312   if (logitem->req && logitem->is_static)
2313     return gen_req_key (kdata, logitem);
2314   return 1;
2315 }
2316 
2317 /* A wrapper to generate a unique key for the virtual host panel.
2318  *
2319  * On error, 1 is returned.
2320  * On success, the generated vhost key is assigned to our key data
2321  * structure. */
2322 static int
gen_vhost_key(GKeyData * kdata,GLogItem * logitem)2323 gen_vhost_key (GKeyData * kdata, GLogItem * logitem) {
2324   if (!logitem->vhost)
2325     return 1;
2326 
2327   get_kdata (kdata, logitem->vhost, logitem->vhost);
2328   kdata->numdate = logitem->numdate;
2329 
2330   return 0;
2331 }
2332 
2333 /* A wrapper to generate a unique key for the virtual host panel.
2334  *
2335  * On error, 1 is returned.
2336  * On success, the generated userid key is assigned to our key data
2337  * structure. */
2338 static int
gen_remote_user_key(GKeyData * kdata,GLogItem * logitem)2339 gen_remote_user_key (GKeyData * kdata, GLogItem * logitem) {
2340   if (!logitem->userid)
2341     return 1;
2342 
2343   get_kdata (kdata, logitem->userid, logitem->userid);
2344   kdata->numdate = logitem->numdate;
2345 
2346   return 0;
2347 }
2348 
2349 /* A wrapper to generate a unique key for the cache status panel.
2350  *
2351  * On error, 1 is returned.
2352  * On success, the generated cache status key is assigned to our key data
2353  * structure. */
2354 static int
gen_cache_status_key(GKeyData * kdata,GLogItem * logitem)2355 gen_cache_status_key (GKeyData * kdata, GLogItem * logitem) {
2356   if (!logitem->cache_status)
2357     return 1;
2358 
2359   get_kdata (kdata, logitem->cache_status, logitem->cache_status);
2360   kdata->numdate = logitem->numdate;
2361 
2362   return 0;
2363 }
2364 
2365 /* A wrapper to generate a unique key for the hosts panel.
2366  *
2367  * On error, 1 is returned.
2368  * On success, the generated host key is assigned to our key data
2369  * structure. */
2370 static int
gen_host_key(GKeyData * kdata,GLogItem * logitem)2371 gen_host_key (GKeyData * kdata, GLogItem * logitem) {
2372   if (!logitem->host)
2373     return 1;
2374 
2375   get_kdata (kdata, logitem->host, logitem->host);
2376   kdata->numdate = logitem->numdate;
2377 
2378   return 0;
2379 }
2380 
2381 /* Generate a browser unique key for the browser's panel given a user
2382  * agent and assign the browser type/category as a root element.
2383  *
2384  * On error, 1 is returned.
2385  * On success, the generated browser key is assigned to our key data
2386  * structure. */
2387 static int
gen_browser_key(GKeyData * kdata,GLogItem * logitem)2388 gen_browser_key (GKeyData * kdata, GLogItem * logitem) {
2389   char *agent = NULL;
2390   char browser_type[BROWSER_TYPE_LEN] = "";
2391 
2392   if (logitem->agent == NULL || *logitem->agent == '\0')
2393     return 1;
2394 
2395   agent = xstrdup (logitem->agent);
2396   logitem->browser = verify_browser (agent, browser_type);
2397   logitem->browser_type = xstrdup (browser_type);
2398 
2399   /* e.g., Firefox 11.12 */
2400   kdata->data = logitem->browser;
2401   kdata->data_key = logitem->browser;
2402 
2403   /* Firefox */
2404   kdata->root = logitem->browser_type;
2405   kdata->root_key = logitem->browser_type;
2406   kdata->numdate = logitem->numdate;
2407 
2408   free (agent);
2409 
2410   return 0;
2411 }
2412 
2413 /* Generate an operating system unique key for the OS' panel given a
2414  * user agent and assign the OS type/category as a root element.
2415  *
2416  * On error, 1 is returned.
2417  * On success, the generated OS key is assigned to our key data
2418  * structure. */
2419 static int
gen_os_key(GKeyData * kdata,GLogItem * logitem)2420 gen_os_key (GKeyData * kdata, GLogItem * logitem) {
2421   char *agent = NULL;
2422   char os_type[OPESYS_TYPE_LEN] = "";
2423 
2424   if (logitem->agent == NULL || *logitem->agent == '\0')
2425     return 1;
2426 
2427   agent = xstrdup (logitem->agent);
2428   logitem->os = verify_os (agent, os_type);
2429   logitem->os_type = xstrdup (os_type);
2430 
2431   /* e.g., Linux,Ubuntu 10.12 */
2432   kdata->data = logitem->os;
2433   kdata->data_key = logitem->os;
2434 
2435   /* Linux */
2436   kdata->root = logitem->os_type;
2437   kdata->root_key = logitem->os_type;
2438   kdata->numdate = logitem->numdate;
2439 
2440   free (agent);
2441 
2442   return 0;
2443 }
2444 
2445 /* Determine if the given token starts with a valid MIME major type.
2446  *
2447  * If not valid, NULL is returned.
2448  * If valid, the appropriate constant string is returned. */
2449 static const char *
extract_mimemajor(const char * token)2450 extract_mimemajor (const char *token) {
2451   const char *lookfor;
2452 
2453   /* official IANA registries as per https://www.iana.org/assignments/media-types/ */
2454 
2455   if ((lookfor = "application", !strncmp (token, lookfor, 11)) ||
2456       (lookfor = "audio", !strncmp (token, lookfor, 5)) ||
2457       (lookfor = "font", !strncmp (token, lookfor, 4)) ||
2458       /* unlikely */
2459       (lookfor = "example", !strncmp (token, lookfor, 7)) ||
2460       (lookfor = "image", !strncmp (token, lookfor, 5)) ||
2461       /* unlikely */
2462       (lookfor = "message", !strncmp (token, lookfor, 7)) ||
2463       (lookfor = "model", !strncmp (token, lookfor, 5)) ||
2464       (lookfor = "multipart", !strncmp (token, lookfor, 9)) ||
2465       (lookfor = "text", !strncmp (token, lookfor, 4)) ||
2466       (lookfor = "video", !strncmp (token, lookfor, 5))
2467     )
2468     return lookfor;
2469   return NULL;
2470 }
2471 
2472 /* UMS: generate an Mime-Type unique key
2473  *
2474  * On error, 1 is returned.
2475  * On success, the generated key is assigned to our key data structure.
2476  */
2477 static int
gen_mime_type_key(GKeyData * kdata,GLogItem * logitem)2478 gen_mime_type_key (GKeyData * kdata, GLogItem * logitem) {
2479   const char *major = NULL;
2480 
2481   if (!logitem->mime_type)
2482     return 1;
2483 
2484   /* redirects and the like only register as "-", ignore those */
2485   major = extract_mimemajor (logitem->mime_type);
2486   if (!major)
2487     return 1;
2488 
2489   kdata->data = logitem->mime_type;
2490   kdata->data_key = logitem->mime_type;
2491   kdata->numdate = logitem->numdate;
2492 
2493   kdata->root = major;
2494   kdata->root_key = major;
2495 
2496   return 0;
2497 }
2498 
2499 /* Determine if the given token starts with the usual TLS/SSL result string.
2500  *
2501  * If not valid, NULL is returned.
2502  * If valid, the appropriate constant string is returned. */
2503 static const char *
extract_tlsmajor(const char * token)2504 extract_tlsmajor (const char *token) {
2505   const char *lookfor;
2506 
2507   if ((lookfor = "SSLv3", !strncmp (token, lookfor, 5)) ||
2508       (lookfor = "TLSv1.1", !strncmp (token, lookfor, 7)) ||
2509       (lookfor = "TLSv1.2", !strncmp (token, lookfor, 7)) ||
2510       (lookfor = "TLSv1.3", !strncmp (token, lookfor, 7)) ||
2511       /* Nope, it's not 1.0 */
2512       (lookfor = "TLSv1", !strncmp (token, lookfor, 5)))
2513     return lookfor;
2514   return NULL;
2515 }
2516 
2517 /* UMS: generate a TLS settings unique key
2518  *
2519  * On error, 1 is returned.
2520  * On success, the generated key is assigned to our key data structure.
2521  */
2522 static int
gen_tls_type_key(GKeyData * kdata,GLogItem * logitem)2523 gen_tls_type_key (GKeyData * kdata, GLogItem * logitem) {
2524   const char *tls;
2525   size_t tlen = 0, clen = 0;
2526 
2527   if (!logitem->tls_type)
2528     return 1;
2529 
2530   /* '-' means no TLS at all, just ignore for the panel? */
2531   tls = extract_tlsmajor (logitem->tls_type);
2532 
2533   if (!tls)
2534     return 1;
2535 
2536   kdata->numdate = logitem->numdate;
2537   if (!logitem->tls_cypher) {
2538     kdata->data_key = kdata->data = kdata->root = kdata->root_key = tls;
2539     return 0;
2540   }
2541 
2542   clen = strlen (logitem->tls_cypher);
2543   tlen = strlen (tls);
2544 
2545   logitem->tls_type_cypher = xmalloc (tlen + clen + 2);
2546   memcpy (logitem->tls_type_cypher, tls, tlen);
2547   logitem->tls_type_cypher[tlen] = '/';
2548   /* includes terminating null */
2549   memcpy (logitem->tls_type_cypher + tlen + 1, logitem->tls_cypher, clen + 1);
2550 
2551   kdata->data = logitem->tls_type_cypher;
2552   kdata->data_key = logitem->tls_type_cypher;
2553 
2554   kdata->root = tls;
2555   kdata->root_key = tls;
2556 
2557   return 0;
2558 }
2559 
2560 
2561 /* A wrapper to generate a unique key for the referrers panel.
2562  *
2563  * On error, 1 is returned.
2564  * On success, the generated referrer key is assigned to our key data
2565  * structure. */
2566 static int
gen_referer_key(GKeyData * kdata,GLogItem * logitem)2567 gen_referer_key (GKeyData * kdata, GLogItem * logitem) {
2568   if (!logitem->ref)
2569     return 1;
2570 
2571   get_kdata (kdata, logitem->ref, logitem->ref);
2572   kdata->numdate = logitem->numdate;
2573 
2574   return 0;
2575 }
2576 
2577 /* A wrapper to generate a unique key for the referring sites panel.
2578  *
2579  * On error, 1 is returned.
2580  * On success, the generated referring site key is assigned to our key data
2581  * structure. */
2582 static int
gen_ref_site_key(GKeyData * kdata,GLogItem * logitem)2583 gen_ref_site_key (GKeyData * kdata, GLogItem * logitem) {
2584   if (logitem->site[0] == '\0')
2585     return 1;
2586 
2587   get_kdata (kdata, logitem->site, logitem->site);
2588   kdata->numdate = logitem->numdate;
2589 
2590   return 0;
2591 }
2592 
2593 /* A wrapper to generate a unique key for the keyphrases panel.
2594  *
2595  * On error, 1 is returned.
2596  * On success, the generated keyphrase key is assigned to our key data
2597  * structure. */
2598 static int
gen_keyphrase_key(GKeyData * kdata,GLogItem * logitem)2599 gen_keyphrase_key (GKeyData * kdata, GLogItem * logitem) {
2600   if (!logitem->keyphrase)
2601     return 1;
2602 
2603   get_kdata (kdata, logitem->keyphrase, logitem->keyphrase);
2604   kdata->numdate = logitem->numdate;
2605 
2606   return 0;
2607 }
2608 
2609 /* A wrapper to generate a unique key for the geolocation panel.
2610  *
2611  * On error, 1 is returned.
2612  * On success, the generated geolocation key is assigned to our key
2613  * data structure. */
2614 #ifdef HAVE_GEOLOCATION
2615 static int
gen_geolocation_key(GKeyData * kdata,GLogItem * logitem)2616 gen_geolocation_key (GKeyData * kdata, GLogItem * logitem) {
2617   char continent[CONTINENT_LEN] = "";
2618   char country[COUNTRY_LEN] = "";
2619 
2620   if (extract_geolocation (logitem, continent, country) == 1)
2621     return 1;
2622 
2623   if (country[0] != '\0')
2624     logitem->country = xstrdup (country);
2625 
2626   if (continent[0] != '\0')
2627     logitem->continent = xstrdup (continent);
2628 
2629   kdata->data_key = logitem->country;
2630   kdata->data = logitem->country;
2631 
2632   kdata->root = logitem->continent;
2633   kdata->root_key = logitem->continent;
2634   kdata->numdate = logitem->numdate;
2635 
2636   return 0;
2637 }
2638 #endif
2639 
2640 /* A wrapper to generate a unique key for the status code panel.
2641  *
2642  * On error, 1 is returned.
2643  * On success, the generated status code key is assigned to our key
2644  * data structure. */
2645 static int
gen_status_code_key(GKeyData * kdata,GLogItem * logitem)2646 gen_status_code_key (GKeyData * kdata, GLogItem * logitem) {
2647   const char *status = NULL, *type = NULL;
2648 
2649   if (!logitem->status)
2650     return 1;
2651 
2652   type = verify_status_code_type (logitem->status);
2653   status = verify_status_code (logitem->status);
2654 
2655   kdata->data = (char *) status;
2656   kdata->data_key = (char *) status;
2657 
2658   kdata->root = (char *) type;
2659   kdata->root_key = (char *) type;
2660   kdata->numdate = logitem->numdate;
2661 
2662   return 0;
2663 }
2664 
2665 /* Given a time string containing at least %H:%M, extract either the
2666  * tenth of a minute or an hour.
2667  *
2668  * On error, the given string is not modified.
2669  * On success, the conf specificity is extracted. */
2670 static void
parse_time_specificity_string(char * hmark,char * ftime)2671 parse_time_specificity_string (char *hmark, char *ftime) {
2672   /* tenth of a minute specificity - e.g., 18:2 */
2673   if (conf.hour_spec_min && hmark[1] != '\0') {
2674     hmark[2] = '\0';
2675     return;
2676   }
2677 
2678   /* hour specificity (default) */
2679   if ((hmark - ftime) > 0)
2680     *hmark = '\0';
2681 }
2682 
2683 /* A wrapper to generate a unique key for the time distribution panel.
2684  *
2685  * On error, 1 is returned.
2686  * On success, the generated time key is assigned to our key data
2687  * structure. */
2688 static int
gen_visit_time_key(GKeyData * kdata,GLogItem * logitem)2689 gen_visit_time_key (GKeyData * kdata, GLogItem * logitem) {
2690   char *hmark = NULL;
2691   char hour[HRMI_LEN] = "";     /* %H:%M */
2692   if (!logitem->time)
2693     return 1;
2694 
2695   /* if not a timestamp, then it must be a string containing the hour.
2696    * this is faster than actual date conversion */
2697   if (!has_timestamp (conf.time_format) && (hmark = strchr (logitem->time, ':'))) {
2698     parse_time_specificity_string (hmark, logitem->time);
2699 
2700     kdata->numdate = logitem->numdate;
2701     get_kdata (kdata, logitem->time, logitem->time);
2702     return 0;
2703   }
2704 
2705   /* otherwise it attempts to convert the date given a time format,
2706    * though this is slower */
2707   memset (hour, 0, sizeof *hour);
2708   if (convert_date (hour, logitem->time, "%T", "%H:%M", HRMI_LEN) != 0)
2709     return 1;
2710 
2711   if (*hour == '\0')
2712     return 1;
2713 
2714   if ((hmark = strchr (hour, ':')))
2715     parse_time_specificity_string (hmark, hour);
2716 
2717   free (logitem->time);
2718   logitem->time = xstrdup (hour);
2719 
2720   get_kdata (kdata, logitem->time, logitem->time);
2721   kdata->numdate = logitem->numdate;
2722 
2723   return 0;
2724 }
2725 
2726 /* Determine if 404s need to be added to the unique visitors count.
2727  *
2728  * If it needs to be added, 0 is returned else 1 is returned. */
2729 static int
include_uniq(GLogItem * logitem)2730 include_uniq (GLogItem * logitem) {
2731   int u = conf.client_err_to_unique_count;
2732 
2733   if (!logitem->status || logitem->status[0] != '4' || (u && logitem->status[0] == '4'))
2734     return 1;
2735   return 0;
2736 }
2737 
2738 /* Determine which data metrics need to be set and set them. */
2739 static void
set_datamap(GLogItem * logitem,GKeyData * kdata,const GParse * parse)2740 set_datamap (GLogItem * logitem, GKeyData * kdata, const GParse * parse) {
2741   GModule module;
2742   module = parse->module;
2743 
2744   /* insert data */
2745   parse->datamap (module, kdata);
2746 
2747   /* insert rootmap and root-data map */
2748   if (parse->rootmap && kdata->root) {
2749     parse->rootmap (module, kdata);
2750     insert_root (module, kdata);
2751   }
2752   /* insert hits */
2753   if (parse->hits)
2754     parse->hits (module, kdata);
2755   /* insert visitors */
2756   if (parse->visitor && kdata->uniq_nkey == 1)
2757     parse->visitor (module, kdata);
2758   /* insert bandwidth */
2759   if (parse->bw)
2760     parse->bw (module, kdata, logitem->resp_size);
2761   /* insert averages time served */
2762   if (parse->cumts)
2763     parse->cumts (module, kdata, logitem->serve_time);
2764   /* insert averages time served */
2765   if (parse->maxts)
2766     parse->maxts (module, kdata, logitem->serve_time);
2767   /* insert method */
2768   if (parse->method && conf.append_method)
2769     parse->method (module, kdata, logitem->method);
2770   /* insert protocol */
2771   if (parse->protocol && conf.append_protocol)
2772     parse->protocol (module, kdata, logitem->protocol);
2773   /* insert agent */
2774   if (parse->agent && conf.list_agents)
2775     parse->agent (module, kdata, logitem->agent_nkey);
2776 }
2777 
2778 /* Set data mapping and metrics. */
2779 static void
map_log(GLogItem * logitem,const GParse * parse,GModule module)2780 map_log (GLogItem * logitem, const GParse * parse, GModule module) {
2781   GKeyData kdata;
2782 
2783   new_modulekey (&kdata);
2784   /* set key data into out structure */
2785   if (parse->key_data (&kdata, logitem) == 1)
2786     return;
2787 
2788   /* each module requires a data key/value */
2789   if (parse->datamap && kdata.data_key)
2790     kdata.data_nkey = insert_dkeymap (module, &kdata);
2791 
2792   /* each module contains a uniq visitor key/value */
2793   if (parse->visitor && logitem->uniq_key && include_uniq (logitem))
2794     kdata.uniq_nkey = insert_uniqmap (module, &kdata, logitem->uniq_nkey);
2795 
2796   /* root keys are optional */
2797   if (parse->rootmap && kdata.root_key)
2798     kdata.root_nkey = insert_rkeymap (module, &kdata);
2799 
2800   /* each module requires a root key/value */
2801   if (parse->datamap && kdata.data_key)
2802     set_datamap (logitem, &kdata, parse);
2803 }
2804 
2805 static void
ins_agent_key_val(GLogItem * logitem,uint32_t numdate)2806 ins_agent_key_val (GLogItem * logitem, uint32_t numdate) {
2807   logitem->agent_nkey = ht_insert_agent_key (numdate, logitem->agent);
2808   /* insert UA key and get a numeric value */
2809   if (logitem->agent_nkey != 0) {
2810     /* insert a numeric key and map it to a UA string */
2811     ht_insert_agent_value (numdate, logitem->agent_nkey, logitem->agent);
2812   }
2813 }
2814 
2815 static int
clean_old_data_by_date(uint32_t numdate)2816 clean_old_data_by_date (uint32_t numdate) {
2817   uint32_t *dates = NULL;
2818   uint32_t idx, len = 0;
2819 
2820   if (ht_get_size_dates () < conf.keep_last)
2821     return 1;
2822 
2823   dates = get_sorted_dates (&len);
2824 
2825   /* If currently parsed date is in the set of dates, keep inserting it.
2826    * We count down since more likely the currently parsed date is at the last pos */
2827   for (idx = len; idx-- > 0;) {
2828     if (dates[idx] == numdate) {
2829       free (dates);
2830       return 1;
2831     }
2832   }
2833 
2834   /* ignore older dates */
2835   if (dates[0] > numdate) {
2836     free (dates);
2837     return -1;
2838   }
2839 
2840   /* invalidate the first date we inserted then */
2841   invalidate_date (dates[0]);
2842   /* rebuild all existing dates and let new data
2843    * be added upon existing cache */
2844   rebuild_rawdata_cache ();
2845   free (dates);
2846 
2847   return 0;
2848 }
2849 
2850 /* Process a log line and set the data into the corresponding data
2851  * structure. */
2852 static void
process_log(GLogItem * logitem)2853 process_log (GLogItem * logitem) {
2854   GModule module;
2855   const GParse *parse = NULL;
2856   size_t idx = 0;
2857   uint32_t numdate = logitem->numdate;
2858 
2859   if (conf.keep_last > 0 && clean_old_data_by_date (numdate) == -1)
2860     return;
2861 
2862   /* insert date and start partitioning tables */
2863   if (ht_insert_date (numdate) == -1)
2864     return;
2865 
2866   /* Insert one unique visitor key per request to avoid the
2867    * overhead of storing one key per module */
2868   if ((logitem->uniq_nkey = ht_insert_unique_key (numdate, logitem->uniq_key)) == 0)
2869     return;
2870 
2871   /* If we need to store user agents per IP, then we store them and retrieve
2872    * its numeric key.
2873    * It maintains two maps, one for key -> value, and another
2874    * map for value -> key*/
2875   if (conf.list_agents)
2876     ins_agent_key_val (logitem, numdate);
2877 
2878   FOREACH_MODULE (idx, module_list) {
2879     module = module_list[idx];
2880     if (!(parse = panel_lookup (module)))
2881       continue;
2882     map_log (logitem, parse, module);
2883   }
2884 
2885   count_bw (numdate, logitem->resp_size);
2886   /* don't ignore line but neither count as valid */
2887   if (logitem->ignorelevel != IGNORE_LEVEL_REQ)
2888     count_valid (numdate);
2889 }
2890 
2891 /* Determine if the current log has the content from the last time it was
2892  * parsed. It does this by comparing READ_BYTES against the beginning of the
2893  * log.
2894  *
2895  * Returns 1 if the content is likely the same or no data to compare
2896  * Returns 0 if it has different content */
2897 static int
is_likely_same_log(GLog * glog,const GLastParse * lp)2898 is_likely_same_log (GLog * glog, const GLastParse * lp) {
2899   size_t size = 0;
2900 
2901   if (!lp->size)
2902     return 1;
2903 
2904   /* Must be a LOG */
2905   size = MIN (glog->snippetlen, lp->snippetlen);
2906   if (glog->snippet[0] != '\0' && lp->snippet[0] != '\0' &&
2907       memcmp (glog->snippet, lp->snippet, size) == 0)
2908     return 1;
2909 
2910   return 0;
2911 }
2912 
2913 /* Determine if we should insert new record or if it's a duplicate record from
2914  * a previoulsy persisted dataset
2915  *
2916  * Returns 1 if it thinks the record it's being restored from disk
2917  * Returns 0 if we need to parse the record */
2918 static int
should_restore_from_disk(GLog * glog)2919 should_restore_from_disk (GLog * glog) {
2920   GLastParse lp = { 0 };
2921 
2922   if (!conf.restore)
2923     return 0;
2924 
2925   lp = ht_get_last_parse (glog->inode);
2926 
2927   /* No last parse timestamp, continue parsing as we got nothing to compare
2928    * against */
2929   if (!lp.ts)
2930     return 0;
2931 
2932   /* If our current line is greater or equal (zero indexed) to the last parsed
2933    * line and have equal timestamps, then keep parsing then */
2934   if (glog->inode && is_likely_same_log (glog, &lp)) {
2935     if (glog->size > lp.size && glog->read >= lp.line)
2936       return 0;
2937     return 1;
2938   }
2939 
2940   /* No inode (probably a pipe), prior or equal timestamps means restore from
2941    * disk (exclusive) */
2942   if (!glog->inode && lp.ts >= glog->lp.ts)
2943     return 1;
2944 
2945   /* If not likely the same content, then fallback to the following checks */
2946   /* If timestamp is greater than last parsed, read the line then */
2947   if (glog->lp.ts > lp.ts)
2948     return 0;
2949 
2950   /* Check if current log size is smaller than the one last parsed, if it is,
2951    * it was possibly truncated and thus it may be smaller, so fallback to
2952    * timestamp even if they are equal to the last parsed timestamp */
2953   else if (glog->size < lp.size && glog->lp.ts == lp.ts)
2954     return 0;
2955 
2956   /* Everything else we ignore it. For instance, we if current log size is
2957    * greater than the one last parsed, if the timestamp are equal, we ignore the
2958    * request.
2959    *
2960    * **NOTE* We try to play safe here as we would rather miss a few lines
2961    * than double-count a few. */
2962   return 1;
2963 }
2964 
2965 static void
process_invalid(GLog * glog,GLogItem * logitem,const char * line)2966 process_invalid (GLog * glog, GLogItem * logitem, const char *line) {
2967   GLastParse lp = { 0 };
2968 
2969   /* if not restoring from disk, then count entry as proceeded and invalid */
2970   if (!conf.restore) {
2971     count_process_and_invalid (glog, line);
2972     return;
2973   }
2974 
2975   lp = ht_get_last_parse (glog->inode);
2976 
2977   /* If our current line is greater or equal (zero indexed) to the last parsed
2978    * line then keep parsing then */
2979   if (glog->inode && is_likely_same_log (glog, &lp)) {
2980     /* only count invalids if we're past the last parsed line */
2981     if (glog->size > lp.size && glog->read >= lp.line)
2982       count_process_and_invalid (glog, line);
2983     return;
2984   }
2985 
2986   /* no timestamp to compare against, just count the invalid then */
2987   if (!logitem->numdate) {
2988     count_process_and_invalid (glog, line);
2989     return;
2990   }
2991 
2992   /* if there's a valid timestamp, count only if greater than last parsed ts */
2993   if ((glog->lp.ts = mktime (&logitem->dt)) == -1)
2994     return;
2995 
2996   /* check if we were able to at least parse the date/time, if no date/time
2997    * then we simply don't count the entry as proceed & invalid to attempt over
2998    * counting restored data */
2999   if (should_restore_from_disk (glog) == 0)
3000     count_process_and_invalid (glog, line);
3001 }
3002 
3003 static int
parse_json_specifier(void * ptr_data,char * key,char * str)3004 parse_json_specifier (void *ptr_data, char *key, char *str) {
3005   GLogItem *logitem = (GLogItem *) ptr_data;
3006   char *spec = NULL;
3007   int ret = 0;
3008 
3009   if (!(spec = ht_get_json_logfmt (key)) || 0 == strlen (str))
3010     return 0;
3011 
3012   ret = parse_format (logitem, str, spec);
3013   free (spec);
3014 
3015   return ret;
3016 }
3017 
3018 static int
parse_json_format(GLogItem * logitem,char * str)3019 parse_json_format (GLogItem * logitem, char *str) {
3020   return parse_json_string (logitem, str, parse_json_specifier);
3021 }
3022 
3023 /* Process a line from the log and store it accordingly taking into
3024  * account multiple parsing options prior to setting data into the
3025  * corresponding data structure.
3026  *
3027  * On success, 0 is returned */
3028 int
pre_process_log(GLog * glog,char * line,int dry_run)3029 pre_process_log (GLog * glog, char *line, int dry_run) {
3030   GLogItem *logitem;
3031   int ret = 0;
3032   char *fmt = conf.log_format;
3033 
3034   /* soft ignore these lines */
3035   if (valid_line (line))
3036     return -1;
3037 
3038   logitem = init_log_item (glog);
3039 
3040   /* Parse a line of log, and fill structure with appropriate values */
3041   if (conf.is_json_log_format)
3042     ret = parse_json_format (logitem, line);
3043   else
3044     ret = parse_format (logitem, line, fmt);
3045 
3046   if (ret || (ret = verify_missing_fields (logitem))) {
3047     process_invalid (glog, logitem, line);
3048     goto cleanup;
3049   }
3050 
3051   if ((glog->lp.ts = mktime (&logitem->dt)) == -1)
3052     goto cleanup;
3053 
3054   if (should_restore_from_disk (glog))
3055     goto cleanup;
3056 
3057   count_process (glog);
3058 
3059   /* agent will be null in cases where %u is not specified */
3060   if (logitem->agent == NULL)
3061     logitem->agent = alloc_string ("-");
3062 
3063   /* testing log only */
3064   if (dry_run)
3065     goto cleanup;
3066 
3067   logitem->ignorelevel = ignore_line (logitem);
3068   /* ignore line */
3069   if (logitem->ignorelevel == IGNORE_LEVEL_PANEL)
3070     goto cleanup;
3071 
3072   if (is_404 (logitem))
3073     logitem->is_404 = 1;
3074   else if (is_static (logitem->req))
3075     logitem->is_static = 1;
3076 
3077   logitem->uniq_key = get_uniq_visitor_key (logitem);
3078 
3079   process_log (logitem);
3080 
3081 cleanup:
3082   free_glog (logitem);
3083 
3084   return ret;
3085 }
3086 
3087 /* Entry point to process the given live from the log.
3088  *
3089  * On error, 1 is returned.
3090  * On success or soft ignores, 0 is returned. */
3091 static int
read_line(GLog * glog,char * line,int * test,int * cnt,int dry_run)3092 read_line (GLog * glog, char *line, int *test, int *cnt, int dry_run) {
3093   int ret = 0;
3094 
3095   /* start processing log line */
3096   if ((ret = pre_process_log (glog, line, dry_run)) == 0 && *test)
3097     *test = 0;
3098 
3099   /* soft ignores */
3100   if (ret == -1)
3101     return 0;
3102 
3103   /* reached num of lines to test and no valid records were found, log
3104    * format is likely not matching */
3105   if (conf.num_tests && ++(*cnt) == (int) conf.num_tests && *test) {
3106     uncount_processed (glog);
3107     uncount_invalid (glog);
3108     return 1;
3109   }
3110 
3111   return 0;
3112 }
3113 
3114 /* A replacement for GNU getline() to dynamically expand fgets buffer.
3115  *
3116  * On error, NULL is returned.
3117  * On success, the malloc'd line is returned. */
3118 char *
fgetline(FILE * fp)3119 fgetline (FILE * fp) {
3120   char buf[LINE_BUFFER] = { 0 };
3121   char *line = NULL, *tmp = NULL;
3122   size_t linelen = 0, len = 0;
3123 
3124   while (1) {
3125     if (!fgets (buf, sizeof (buf), fp)) {
3126       if (conf.process_and_exit && errno == EAGAIN) {
3127         nanosleep ((const struct timespec[]) { {0, 100000000L} }, NULL);
3128         continue;
3129       } else
3130         break;
3131     }
3132 
3133     len = strlen (buf);
3134 
3135     /* overflow check */
3136     if (SIZE_MAX - len - 1 < linelen)
3137       break;
3138 
3139     if ((tmp = realloc (line, linelen + len + 1)) == NULL)
3140       break;
3141 
3142     line = tmp;
3143     /* append */
3144     strcpy (line + linelen, buf);
3145     linelen += len;
3146 
3147     if (feof (fp) || buf[len - 1] == '\n')
3148       return line;
3149   }
3150   free (line);
3151 
3152   return NULL;
3153 }
3154 
3155 /* Iterate over the log and read line by line (use GNU get_line to parse the
3156  * whole line).
3157  *
3158  * On error, 1 is returned.
3159  * On success, 0 is returned. */
3160 #ifdef WITH_GETLINE
3161 static int
read_lines(FILE * fp,GLog * glog,int dry_run)3162 read_lines (FILE * fp, GLog * glog, int dry_run) {
3163   char *line = NULL;
3164   int ret = 0, cnt = 0, test = conf.num_tests > 0 ? 1 : 0;
3165 
3166   glog->bytes = 0;
3167   while ((line = fgetline (fp)) != NULL) {
3168     /* handle SIGINT */
3169     if (conf.stop_processing)
3170       goto out;
3171     if ((ret = read_line (glog, line, &test, &cnt, dry_run)))
3172       goto out;
3173     if (dry_run && NUM_TESTS == cnt)
3174       goto out;
3175     glog->bytes += strlen (line);
3176     free (line);
3177     glog->read++;
3178   }
3179 
3180   /* if no data was available to read from (probably from a pipe) and
3181    * still in test mode, we simply return until data becomes available */
3182   if (!line && (errno == EAGAIN || errno == EWOULDBLOCK) && test)
3183     return 0;
3184 
3185   return (line && test) || ret || (!line && test && glog->processed);
3186 
3187 out:
3188   free (line);
3189   /* fails if
3190      - we're still reading the log but the test flag was still set
3191      - ret flag is not 0, read_line failed
3192      - reached the end of file, test flag was still set and we processed lines */
3193   return test || ret || (test && glog->processed);
3194 }
3195 #endif
3196 
3197 /* Iterate over the log and read line by line (uses a buffer of fixed size).
3198  *
3199  * On error, 1 is returned.
3200  * On success, 0 is returned. */
3201 #ifndef WITH_GETLINE
3202 static int
read_lines(FILE * fp,GLog * glog,int dry_run)3203 read_lines (FILE * fp, GLog * glog, int dry_run) {
3204   char *s = NULL;
3205   char line[LINE_BUFFER] = { 0 };
3206   int ret = 0, cnt = 0, test = conf.num_tests > 0 ? 1 : 0;
3207 
3208   glog->bytes = 0;
3209   while ((s = fgets (line, LINE_BUFFER, fp)) != NULL) {
3210     /* handle SIGINT */
3211     if (conf.stop_processing)
3212       break;
3213     if ((ret = read_line (glog, line, &test, &cnt, dry_run)))
3214       break;
3215     if (dry_run && NUM_TESTS == cnt)
3216       break;
3217     glog->bytes += strlen (line);
3218     glog->read++;
3219   }
3220 
3221   /* if no data was available to read from (probably from a pipe) and
3222    * still in test mode, we simply return until data becomes available */
3223   if (!s && (errno == EAGAIN || errno == EWOULDBLOCK) && test)
3224     return 0;
3225 
3226   /* fails if
3227      - we're still reading the log but the test flag was still set
3228      - ret flag is not 0, read_line failed
3229      - reached the end of file, test flag was still set and we processed lines */
3230   return (s && test) || ret || (!s && test && glog->processed);
3231 }
3232 #endif
3233 
3234 /* Read the given log file and attempt to mmap a fixed number of bytes so we
3235  * can compare its content on future runs.
3236  *
3237  * On error, 1 is returned.
3238  * On success, 0 is returned. */
3239 int
set_initial_persisted_data(GLog * glog,FILE * fp,const char * fn)3240 set_initial_persisted_data (GLog * glog, FILE * fp, const char *fn) {
3241   size_t len;
3242 
3243   /* reset the snippet */
3244   memset (glog->snippet, 0, sizeof (glog->snippet));
3245   glog->snippetlen = 0;
3246 
3247   if (glog->size == 0)
3248     return 1;
3249 
3250   len = MIN (glog->size, READ_BYTES);
3251   if ((fread (glog->snippet, len, 1, fp)) != 1 && ferror (fp))
3252     FATAL ("Unable to fread the specified log file '%s'", fn);
3253   glog->snippetlen = len;
3254 
3255   fseek (fp, 0, SEEK_SET);
3256 
3257   return 0;
3258 }
3259 
3260 static void
persist_last_parse(GLog * glog)3261 persist_last_parse (GLog * glog) {
3262   /* insert last parsed data for the recently file parsed */
3263   if (glog->inode && glog->size) {
3264     glog->lp.line = glog->read;
3265     glog->lp.snippetlen = glog->snippetlen;
3266 
3267     memcpy (glog->lp.snippet, glog->snippet, glog->snippetlen);
3268 
3269     ht_insert_last_parse (glog->inode, glog->lp);
3270   }
3271   /* probably from a pipe */
3272   else if (!glog->inode) {
3273     ht_insert_last_parse (0, glog->lp);
3274   }
3275 }
3276 
3277 /* Read the given log line by line and process its data.
3278  *
3279  * On error, 1 is returned.
3280  * On success, 0 is returned. */
3281 static int
read_log(GLog * glog,int dry_run)3282 read_log (GLog * glog, int dry_run) {
3283   FILE *fp = NULL;
3284   int piping = 0;
3285   struct stat fdstat;
3286 
3287   /* Ensure we have a valid pipe to read from stdin. Only checking for
3288    * conf.read_stdin without verifying for a valid FILE pointer would certainly
3289    * lead to issues. */
3290   if (glog->filename[0] == '-' && glog->filename[1] == '\0' && glog->pipe) {
3291     fp = glog->pipe;
3292     glog->piping = piping = 1;
3293   }
3294 
3295   /* make sure we can open the log (if not reading from stdin) */
3296   if (!piping && (fp = fopen (glog->filename, "r")) == NULL)
3297     FATAL ("Unable to open the specified log file '%s'. %s", glog->filename, strerror (errno));
3298 
3299   /* grab the inode of the file being parsed */
3300   if (!piping && stat (glog->filename, &fdstat) == 0) {
3301     glog->inode = fdstat.st_ino;
3302     glog->size = glog->lp.size = fdstat.st_size;
3303     set_initial_persisted_data (glog, fp, glog->filename);
3304   }
3305 
3306   /* read line by line */
3307   if (read_lines (fp, glog, dry_run)) {
3308     if (!piping)
3309       fclose (fp);
3310     return 1;
3311   }
3312 
3313   persist_last_parse (glog);
3314 
3315   /* close log file if not a pipe */
3316   if (!piping)
3317     fclose (fp);
3318 
3319   return 0;
3320 }
3321 
3322 static void
set_log_processing(Logs * logs,GLog * glog)3323 set_log_processing (Logs * logs, GLog * glog) {
3324   lock_spinner ();
3325   logs->processed = &(glog->processed);
3326   logs->filename = glog->filename;
3327   unlock_spinner ();
3328 }
3329 
3330 /* Entry point to parse the log line by line.
3331  *
3332  * On error, 1 is returned.
3333  * On success, 0 is returned. */
3334 int
parse_log(Logs * logs,int dry_run)3335 parse_log (Logs * logs, int dry_run) {
3336   GLog *glog = NULL;
3337   const char *err_log = NULL;
3338   int idx;
3339 
3340   /* verify that we have the required formats */
3341   if ((err_log = verify_formats ()))
3342     FATAL ("%s", err_log);
3343 
3344   /* no data piped, no logs passed, load from disk only then */
3345   if (conf.restore && !logs->restored)
3346     logs->restored = rebuild_rawdata_cache ();
3347 
3348   /* no data piped, no logs passed, load from disk only then */
3349   if (conf.restore && !conf.filenames_idx && !conf.read_stdin) {
3350     logs->load_from_disk_only = 1;
3351     return 0;
3352   }
3353 
3354   for (idx = 0; idx < logs->size; ++idx) {
3355     glog = &logs->glog[idx];
3356     set_log_processing (logs, glog);
3357 
3358     if (read_log (glog, dry_run))
3359       return 1;
3360 
3361     glog->length = glog->bytes;
3362   }
3363 
3364   return 0;
3365 }
3366 
3367 /* Ensure we have valid hits
3368  *
3369  * On error, an array of pointers containing the error strings.
3370  * On success, NULL is returned. */
3371 char **
test_format(Logs * logs,int * len)3372 test_format (Logs * logs, int *len) {
3373   char **errors = NULL;
3374   GLog *glog = NULL;
3375   int i;
3376 
3377   if (parse_log (logs, 1) == 0)
3378     return NULL;
3379 
3380   for (i = 0; i < logs->size; ++i) {
3381     glog = &logs->glog[i];
3382     if (!glog->log_erridx)
3383       continue;
3384     break;
3385   }
3386 
3387   errors = xcalloc (glog->log_erridx, sizeof (char *));
3388   *len = glog->log_erridx;
3389   for (i = 0; i < glog->log_erridx; ++i)
3390     errors[i] = xstrdup (glog->errors[i]);
3391   free_logerrors (glog);
3392 
3393   return errors;
3394 }
3395