1 /* -*-mode: c; indent-tabs-mode: nil; c-basic-offset: 2; -*-
2  */
3 /**
4  * Filter utilities for the quality of service module mod_qos
5  * used to create white list rules for request line filters.
6  *
7  * See http://mod-qos.sourceforge.net/ for further
8  * details.
9  *
10  * Copyright (C) 2020 Pascal Buchbinder
11  *
12  * Licensed to the Apache Software Foundation (ASF) under one or more
13  * contributor license agreements.  See the NOTICE file distributed with
14  * this work for additional information regarding copyright ownership.
15  * The ASF licenses this file to You under the Apache License, Version 2.0
16  * (the "License"); you may not use this file except in compliance with
17  * the License.  You may obtain a copy of the License at
18  *
19  *     http://www.apache.org/licenses/LICENSE-2.0
20  *
21  * Unless required by applicable law or agreed to in writing, software
22  * distributed under the License is distributed on an "AS IS" BASIS,
23  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24  * See the License for the specific language governing permissions and
25  * limitations under the License.
26  *
27  */
28 
29 static const char revision[] = "$Id: qsfilter2.c 2595 2020-01-03 06:19:53Z pbuchbinder $";
30 
31 /* system */
32 #include <stdio.h>
33 #include <errno.h>
34 #include <string.h>
35 
36 #include <stdlib.h>
37 #include <unistd.h>
38 #include <time.h>
39 
40 #include <pcre.h>
41 
42 /* apr */
43 #include <apr.h>
44 #include <apr_uri.h>
45 #include <apr_signal.h>
46 #include <apr_strings.h>
47 #include <apr_network_io.h>
48 #include <apr_file_io.h>
49 #include <apr_time.h>
50 #include <apr_getopt.h>
51 #include <apr_general.h>
52 #include <apr_lib.h>
53 #include <apr_portable.h>
54 #include <apr_thread_proc.h>
55 #include <apr_thread_cond.h>
56 #include <apr_thread_mutex.h>
57 #include <apr_support.h>
58 //#include <ap_config.h>
59 
60 /* OpenSSL  */
61 #include <openssl/safestack.h>
62 
63 #include "qs_util.h"
64 
65 #define MAX_LINE 32768
66 /* 2mb */
67 #define MAX_BODY_BUFFER 2097152
68 #define CR 13
69 #define LF 10
70 
71 typedef enum  {
72   QS_UT_PATH,
73   QS_UT_QUERY
74 } qs_url_type_e;
75 
76 #define QS_PCRE_RESERVED      "{}[]()^$.|*+?\\-"
77 //#define QS_PCRE_RESERVED      "{}[]()^$.|*+?\"'\\-"
78 
79 /* reserved (to be escaped): {}[]()^$.|*+?\- */
80 #define QS_UNRESERVED         "a-zA-Z0-9-\\._~% "
81 #define QS_GEN                ":/\\?#\\[\\]@"
82 #define QS_SUB                "!$&'\\(\\)\\*\\+,;="
83 #define QS_SUB_S              "!$&\\(\\)\\*\\+,;="
84 
85 #define QS_SIMPLE_PATH_PCRE   "(/[a-zA-Z0-9\\-_]+)+[/]?\\.?[a-zA-Z]{0,4}"
86 #define QS_B64                "([a-z]+[a-z0-9]*[A-Z]+[A-Z0-9]*)"
87 #define QS_HX                 "([A-F0-9]*[A-F]+[0-9]+[A-F0-9]*)"
88 
89 #define QS_OVECCOUNT 3
90 
91 /* request line detection */
92 #define QOSC_REQ          "(OPTIONS|GET|HEAD|POST|PUT|DELETE|TRACE|CONNECT|PROPFIND|PROPPATCH|MKCOL|COPY|MOVE|LOCK|UNLOCK|VERSION-CONTROL|REPORT|CHECKOUT|CHECKIN|UNCHECKOUT|MKWORKSPACE|UPDATE|LABEL|MERGE|BASELINE-CONTROL|MKACTIVITY|ORDERPATCH|ACL|PATCH|SEARCH|BCOPY|BDELETE|BMOVE|BPROPFIND|BPROPPATCH|NOTIFY|POLL|SUBSCRIBE|UNSUBSCRIBE|X-MS-ENUMATTS|RPC_IN_DATA|RPC_OUT_DATA) /[\x20-\x21\x23-\xFF]* HTTP/"
93 
94 pcre *pcre_b64;
95 pcre *pcre_hx;
96 pcre *pcre_simple_path;
97 
98 #define QOS_DEC_MODE_FLAGS_URL        0x00
99 #define QOS_DEC_MODE_FLAGS_HTML       0x01
100 #define QOS_DEC_MODE_FLAGS_UNI        0x02
101 #define QOS_DEC_MODE_FLAGS_ANSI       0x04
102 
103 /* global variables to store settings */
104 static int m_mode = QOS_DEC_MODE_FLAGS_URL;
105 static int m_base64 = 5;
106 static int m_verbose = 1;
107 static int m_path_depth = 1;
108 static int m_redundant = 1;
109 static int m_query_pcre = 0;
110 static int m_query_multi_pcre = 0;
111 static int m_query_o_pcre = 0;
112 static int m_query_single_pcre = 0;
113 static int m_query_len_pcre = 10;
114 static int m_exit_on_error = 0;
115 static int m_handler = 0;
116 static pcre *m_req_regex = NULL;
117 static int m_log_req_regex = 0;
118 static const char *m_pfx = NULL;
119 static const char *m_filter = NULL;
120 
121 typedef struct {
122   pcre *pcre;
123   pcre_extra *extra;
124   char *rule;
125   char *path;
126   char *query_m_string;
127   char *query_m_pcre;
128   int fragment;
129 } qs_rule_t;
130 
131 
132 /* openssl stack compare function used to sort the rules */
STACK_qs_cmp(const char * const * _pA,const char * const * _pB)133 int STACK_qs_cmp(const char * const *_pA, const char * const *_pB) {
134   qs_rule_t *pA=*(( qs_rule_t **)_pA);
135   qs_rule_t *pB=*(( qs_rule_t **)_pB);
136   return strcmp(pA->rule,pB->rule);
137 }
138 
139 /* compiles a pcre (exit on error) */
qos_pcre_compile(char * pattern,int option)140 static pcre *qos_pcre_compile(char *pattern, int option) {
141   const char *errptr = NULL;
142   int erroffset;
143   pcre *pcre = pcre_compile(pattern, PCRE_DOTALL|option, &errptr, &erroffset, NULL);
144   if(pcre == NULL) {
145     fprintf(stderr, "ERROR, rule <%s> could not compile pcre at position %d,"
146             " reason: %s\n", pattern, erroffset, errptr);
147     exit(1);
148   }
149   return pcre;
150 }
151 
152 /* tries to detect base64/hex patterns (mix of upper and lower case characters) */
qos_detect_b64(char * line,int silent)153 static char *qos_detect_b64(char *line, int silent) {
154   int ovector[QS_OVECCOUNT];
155   int rc_c = pcre_exec(pcre_b64, NULL, line, strlen(line), 0, 0, ovector, QS_OVECCOUNT);
156   if(rc_c >= 0) {
157     if((m_verbose > 1) && !silent) printf("  B64: %.*s\n",
158                                           ovector[1] - ovector[0], &line[ovector[0]]);
159     return &line[ovector[0]];
160   }
161   rc_c = pcre_exec(pcre_hx, NULL, line, strlen(line), 0, 0, ovector, QS_OVECCOUNT);
162   if(rc_c >= 0) {
163     if((m_verbose > 1) && !silent) printf("  HX: %.*s\n",
164                                           ovector[1] - ovector[0], &line[ovector[0]]);
165     return &line[ovector[0]];
166   }
167   return NULL;
168 }
169 
170 /* escape double quotes and backslash (to be used for Apache directive) */
qs_apache_escape(apr_pool_t * pool,const char * line)171 static char *qs_apache_escape(apr_pool_t *pool, const char *line) {
172   char *ret = apr_pcalloc(pool, strlen(line) * 4);
173   int i = 0;
174   const char *in = line;
175   while(in && in[0]) {
176     if(in[0] == '"') {
177       ret[i] = '\\';
178       i++;
179       ret[i] = 'x';
180       i++;
181       ret[i] = '2';
182       i++;
183       ret[i] = '2';
184       i++;
185     } else if(in[0] == '\\' && in[1] == '\\') {
186       ret[i] = '\\';
187       i++;
188       ret[i] = 'x';
189       i++;
190       ret[i] = '5';
191       i++;
192       ret[i] = 'c';
193       i++;
194       in++;
195     } else {
196       ret[i] = (char)in[0];
197       i++;
198     }
199     in++;
200   }
201   return ret;
202 }
203 
204 /* escape a string in order to be used withn a pcre */
qos_escape_pcre(apr_pool_t * pool,char * line)205 static char *qos_escape_pcre(apr_pool_t *pool, char *line) {
206   int i = 0;
207   unsigned char prev = 0;
208   unsigned char *in = (unsigned char *)line;
209   char *ret = apr_pcalloc(pool, strlen(line) * 4);
210   int reti = 0;
211   if(strlen(line) == 0) return "";
212   while(in[i]) {
213     if(strchr(QS_PCRE_RESERVED, in[i]) != NULL) {
214       if(prev && (prev == '\\')) {
215         /* already escaped */
216         ret[reti] = in[i];
217         reti++;
218       } else if(prev && (in[i] == '\\') && (strchr(QS_PCRE_RESERVED, in[i+1]) != NULL)) {
219         /* escape char */
220         ret[reti] = in[i];
221         reti++;
222       } else {
223         ret[reti] = '\\';
224         reti++;
225         ret[reti] = in[i];
226         reti++;
227       }
228     } else if((in[i] < ' ') || (in[i]  > '~')) {
229       sprintf(&ret[reti], "\\x%02x", in[i]);
230       reti = reti + 4;
231     } else {
232       ret[reti] = in[i];
233       reti++;
234     }
235     prev = in[i];
236     i++;
237   }
238   return ret;
239 }
240 
241 /* helper for url decoding */
qos_hex2c(const char * x)242 static int qos_hex2c(const char *x) {
243   int i, ch;
244   ch = x[0];
245   if (isdigit(ch)) {
246     i = ch - '0';
247   }else if (isupper(ch)) {
248     i = ch - ('A' - 10);
249   } else {
250     i = ch - ('a' - 10);
251   }
252   i <<= 4;
253 
254   ch = x[1];
255   if (isdigit(ch)) {
256     i += ch - '0';
257   } else if (isupper(ch)) {
258     i += ch - ('A' - 10);
259   } else {
260     i += ch - ('a' - 10);
261   }
262   return i;
263 }
264 
qos_ishex(char x)265 static int qos_ishex(char x) {
266   if((x >= '0') && (x <= '9')) return 1;
267   if((x >= 'a') && (x <= 'f')) return 1;
268   if((x >= 'A') && (x <= 'F')) return 1;
269   return 0;
270 }
271 
272 /* url decoding */
qos_unescaping(char * x)273 static int qos_unescaping(char *x) {
274   int i, j, ch;
275   if (x[0] == '\0')
276     return 0;
277   for (i = 0, j = 0; x[i] != '\0'; i++, j++) {
278     ch = x[i];
279     if(ch == '%' && qos_ishex(x[i + 1]) && qos_ishex(x[i + 2])) {
280       ch = qos_hex2c(&x[i + 1]);
281       i += 2;
282     } else if((m_mode & QOS_DEC_MODE_FLAGS_UNI) &&
283               ((ch == '%') || (ch == '\\')) &&
284               ((x[i + 1] == 'u') || (x[i + 1] == 'U')) &&
285               qos_ishex(x[i + 2]) &&
286               qos_ishex(x[i + 3]) &&
287               qos_ishex(x[i + 4]) &&
288               qos_ishex(x[i + 5])) {
289       /* unicode %uXXXX */
290       ch = qos_hex2c(&x[i + 4]);
291       if((ch > 0x00) && (ch < 0x5f) &&
292          ((x[i + 2] == 'f') || (x[i + 2] == 'F')) &&
293          ((x[i + 3] == 'f') || (x[i + 3] == 'F'))) {
294         ch += 0x20;
295       }
296       i += 5;
297     } else if (ch == '\\' && (x[i + 1] == 'x') && qos_ishex(x[i + 2]) && qos_ishex(x[i + 3])) {
298       ch = qos_hex2c(&x[i + 2]);
299       i += 3;
300     } else if (ch == '+') {
301       ch = ' ';
302     }
303     x[j] = ch;
304   }
305   x[j] = '\0';
306   if(strlen(x) != j) {
307     fprintf(stderr, "WARNING, found escaped null char %s\n", x);
308   }
309   return j;
310 }
311 
qos_fgetline(char * s,int n,FILE * f)312 static int qos_fgetline(char *s, int n, FILE *f) {
313   register int i = 0;
314   while (1) {
315     s[i] = (char) fgetc(f);
316     if (s[i] == CR) {
317       s[i] = fgetc(f);
318     }
319     if ((s[i] == 0x4) || (s[i] == LF) || (i == (n - 1))) {
320       s[i] = '\0';
321       return (feof(f) ? 1 : 0);
322     }
323     ++i;
324   }
325 }
326 
327 /* init global pcre */
qos_init_pcre()328 static void qos_init_pcre() {
329   char buf[1024];
330   sprintf(buf, "%s{%d,}", QS_B64, m_base64);
331   pcre_b64 = qos_pcre_compile(buf, 0);
332   sprintf(buf, "%s{%d,}", QS_HX, m_base64);
333   pcre_hx = qos_pcre_compile(buf, 0);
334   pcre_simple_path = qos_pcre_compile("^"QS_SIMPLE_PATH_PCRE"$", 0);
335   m_req_regex = qos_pcre_compile(QOSC_REQ, 0);
336 }
337 
usage(char * cmd,int man)338 static void usage(char *cmd, int man) {
339   char space[1024];
340   memset(space, ' ', 1024);
341   space[strlen(cmd)] = '\0';
342   if(man) {
343     //.TH [name of program] [section number] [center footer] [left footer] [center header]
344     printf(".TH %s 1 \"%s\" \"mod_qos utilities %s\" \"%s man page\"\n", qs_CMD(cmd), man_date,
345            man_version, cmd);
346   }
347   printf("\n");
348   if(man) {
349     printf(".SH NAME\n");
350   }
351   qs_man_print(man, "%s - an utility to generate mod_qos request line rules out from\n",
352                cmd);
353   qs_man_print(man, "existing access/audit log data.\n");
354   printf("\n");
355   if(man) {
356     printf(".SH SYNOPSIS\n");
357   }
358   qs_man_print(man, "%s%s -i <path> [-c <path>] [-d <num>] [-h] [-b <num>]\n", man ? "" : "Usage: ", cmd);
359   qs_man_print(man, "       %s [-p|-s|-m|-o] [-l <len>] [-n] [-e] [-u 'uni']\n", space);
360   qs_man_print(man, "       %s [-k <prefix>] [-t] [-f <path>] [-v 0|1|2]\n", space);
361   printf("\n");
362   if(man) {
363     printf(".SH DESCRIPTION\n");
364   } else {
365     printf("Summary\n");
366   }
367   qs_man_print(man, " mod_qos implements a request filter which validates each request\n");
368   qs_man_print(man, " line. The module supports both, negative and positive security\n");
369   qs_man_print(man, " model. The QS_Deny* directives are used to specify request line\n");
370   qs_man_print(man, " patterns which are not allowed to access the server (negative\n");
371   qs_man_print(man, " security model / blacklist). These rules are used to restrict\n");
372   qs_man_print(man, " access to certain resources which should not be available to\n");
373   qs_man_print(man, " users or to protect the server from malicious patterns. The\n");
374   qs_man_print(man, " QS_Permit* rules implement a positive security model (whitelist).\n");
375   qs_man_print(man, " These directives are used to define allowed request line patterns.\n");
376   qs_man_print(man, " Request which do not match any of these patterns are not allowed\n");
377   qs_man_print(man, " to access the server.\n");
378   if(man) printf("\n\n");
379   qs_man_print(man, " %s is an audit log analyzer used to generate filter\n", cmd);
380   qs_man_print(man, " rules (perl compatible regular expressions) which may be used\n");
381   qs_man_print(man, " by mod_qos to deny access for suspect requests (QS_PermitUri rules).\n");
382   qs_man_print(man, " It parses existing audit log files in order to generate request\n");
383   qs_man_print(man, " patterns covering all allowed requests.\n");
384   printf("\n");
385   if(man) {
386     printf(".SH OPTIONS\n");
387   } else {
388     printf("Options\n");
389   }
390   if(man) printf(".TP\n");
391   qs_man_print(man, "  -i <path>\n");
392   if(man) printf("\n");
393   qs_man_print(man, "     Input file containing request URIs.\n");
394   qs_man_print(man, "     The URIs for this file have to be extracted from the servers\n");
395   qs_man_print(man, "     access logs. Each line of the input file contains a request\n");
396   qs_man_print(man, "     URI consisting of a path and and query.\n");
397   printf("\n");
398   printf("     Example:\n");
399   qs_man_println(man, "       /aaa/index.do\n");
400   qs_man_println(man, "       /aaa/edit?image=1.jpg\n");
401   qs_man_println(man, "       /aaa/image/1.jpg\n");
402   qs_man_println(man, "       /aaa/view?page=1\n");
403   qs_man_println(man, "       /aaa/edit?document=1\n");
404   printf("\n");
405   qs_man_print(man, "     These access log data must include current request URIs but\n");
406   qs_man_print(man, "     also request lines from previous rule generation steps. It\n");
407   qs_man_print(man, "     must also include request lines which cover manually generated\n");
408   qs_man_print(man, "     rules.\n");
409   qs_man_print(man, "     You may use the 'qos-path' and 'qos-query' variables to create\n");
410   qs_man_print(man, "     an audit log containing all request data (path and query/body data).\n");
411   qs_man_print(man, "     Example: 'CustomLog audit_log %{qos-path}n%{qos-query}n'.\n");
412   qs_man_print(man, "     See also http://mod-qos.sourceforge.net#qsfiltersample about\n");
413   qs_man_print(man, "     the module settings.\n");
414   if(man) printf("\n.TP\n");
415   qs_man_print(man, "  -c <path>\n");
416   if(man) printf("\n");
417   qs_man_print(man, "     mod_qos configuration file defining QS_DenyRequestLine and\n");
418   qs_man_print(man, "     QS_PermitUri directives.\n");
419   qs_man_print(man, "     %s generates rules from access log data automatically.\n", cmd);
420   qs_man_print(man, "     Manually generated rules (QS_PermitUri) may be provided from\n");
421   qs_man_print(man, "     this file. Note: each manual rule must be represented by a\n");
422   qs_man_print(man, "     request URI in the input data (-i) in order to make sure not\n");
423   qs_man_print(man, "     to be deleted by the rule optimisation algorithm.\n");
424   qs_man_print(man, "     QS_Deny* rules from this file are used to filter request lines\n");
425   qs_man_print(man, "     which should not be used for whitelist rule generation.\n");
426   printf("\n");
427   printf("     Example:\n");
428   qs_man_println(man, "       # manually defined whitelist rule:\n");
429   qs_man_println(man, "       QS_PermitUri +view deny \"^[/a-zA-Z0-9]+/view\\?(page=[0-9]+)?$\"\n");
430   qs_man_println(man, "       # filter unwanted request line patterns:\n");
431   qs_man_println(man, "       QS_DenyRequestLine +printable deny \".*[\\x00-\\x19].*\"\n");
432   printf("\n");
433   if(man) printf("\n.TP\n");
434   qs_man_print(man, "  -d <num>\n");
435   if(man) printf("\n");
436   qs_man_print(man, "     Depth (sub locations) of the path string which is defined as a\n");
437   qs_man_print(man, "     literal string. Default is 1.\n");
438   if(man) printf("\n.TP\n");
439   qs_man_print(man, "  -h\n");
440   if(man) printf("\n");
441   qs_man_print(man, "     Always use a string representing the handler name in the path even\n");
442   qs_man_print(man, "     the url does not have a query. See also -d option.\n");
443   if(man) printf("\n.TP\n");
444   qs_man_print(man, "  -b <num>\n");
445   if(man) printf("\n");
446   qs_man_print(man, "     Replaces url pattern by the regular expression when detecting\n");
447   qs_man_print(man, "     a base64/hex encoded string. Detecting sensibility is defined by a\n");
448   qs_man_print(man, "     numeric value. You should use values higher than 5 (default)\n");
449   qs_man_print(man, "     or 0 to disable this function.\n");
450   if(man) printf("\n.TP\n");
451   qs_man_print(man, "  -p\n");
452   if(man) printf("\n");
453   qs_man_print(man, "     Represents query by pcre only (no literal strings).\n");
454   if(man) printf("\n.TP\n");
455   qs_man_print(man, "  -s\n");
456   if(man) printf("\n");
457   qs_man_print(man, "     Uses one single pcre for the whole query string.\n");
458   if(man) printf("\n.TP\n");
459   qs_man_print(man, "  -m\n");
460   if(man) printf("\n");
461   qs_man_print(man, "     Uses one pcre for multiple query values (recommended mode).\n");
462   if(man) printf("\n.TP\n");
463   qs_man_print(man, "  -o\n");
464   if(man) printf("\n");
465   qs_man_print(man, "     Does not care the order of query parameters.\n");
466   if(man) printf("\n.TP\n");
467   qs_man_print(man, "  -l <len>\n");
468   if(man) printf("\n");
469   qs_man_print(man, "     Outsizes the query length by the defined length ({0,size+len}),\n");
470   qs_man_print(man, "     default is %d.\n", m_query_len_pcre);
471   if(man) printf("\n.TP\n");
472   qs_man_print(man, "  -n\n");
473   if(man) printf("\n");
474   qs_man_print(man, "     Disables redundant rules elimination.\n");
475   if(man) printf("\n.TP\n");
476   qs_man_print(man, "  -e\n");
477   if(man) printf("\n");
478   qs_man_print(man, "     Exit on error.\n");
479   if(man) printf("\n.TP\n");
480   qs_man_print(man, "  -u 'uni'\n");
481   if(man) printf("\n");
482   qs_man_print(man, "     Enables additional decoding methods. Use the same settings as you have\n");
483   qs_man_print(man, "     used for the QS_Decoding directive.\n");
484   if(man) printf("\n.TP\n");
485   qs_man_print(man, "  -k <prefix>\n");
486   if(man) printf("\n");
487   qs_man_print(man, "     Prefix used to generate rule identifiers (QSF by default).\n");
488   if(man) printf("\n.TP\n");
489   qs_man_print(man, "  -t\n");
490   if(man) printf("\n");
491   qs_man_print(man, "     Calculates the maximal latency per request (worst case) using the\n");
492   qs_man_print(man, "     generated rules.\n");
493   if(man) printf("\n.TP\n");
494   qs_man_print(man, "  -f <path>\n");
495   if(man) printf("\n");
496   qs_man_print(man, "     Filters the input by the provided path (prefix) only processing\n");
497   qs_man_print(man, "     matching lines.\n");
498   if(man) printf("\n.TP\n");
499   qs_man_print(man, "  -v <level>\n");
500   if(man) printf("\n");
501   qs_man_print(man, "     Verbose mode. (0=silent, 1=rule source, 2=detailed). Default is 1.\n");
502   qs_man_print(man, "     Don't use rules you haven't checked the request data used to\n");
503   qs_man_print(man, "     generate it! Level 1 is highly recommended (as long as you don't\n");
504   qs_man_print(man, "     have created the log data using your own web crawler).\n");
505   printf("\n");
506   if(man) {
507     printf(".SH OUTPUT\n");
508   } else {
509     printf("Output\n");
510   }
511   qs_man_print(man, " The output of %s is written to stdout. The output\n", cmd);
512   qs_man_print(man, " contains the generated QS_PermitUri directives but also\n");
513   qs_man_print(man, " information about the source which has been used to generate\n");
514   qs_man_print(man, " these rules. It is very important to check the validity of\n");
515   qs_man_print(man, " each request line which has been used to calculate the\n");
516   qs_man_print(man, " QS_PermitUri rules. Each request line which has been used to\n");
517   qs_man_print(man, " generate a new rule is shown in the output prefixed by\n");
518   qs_man_print(man, " \"ADD line <line number>:\". These request lines should be\n");
519   qs_man_print(man, " stored and reused at any later rule generation (add them to\n");
520   qs_man_print(man, " the URI input file). The subsequent line shows the generated\n");
521   qs_man_print(man, " rule.\n");
522   qs_man_print(man, " At the end of data processing a list of all generated\n");
523   qs_man_print(man, " QS_PermitUri rules is shown. These directives may be used\n");
524   qs_man_print(man, " withn the configuration file used by mod_qos.\n");
525   printf("\n");
526   if(man) {
527     printf(".SH EXAMPLE\n");
528   } else {
529     printf("Sample Usage and Output\n");
530   }
531   qs_man_println(man, "  %s -i loc.txt -c httpd.conf -m -e\n", cmd);
532   qs_man_println(man, "  ...\n");
533   qs_man_println(man, "  # ADD line 1: /aaa/index.do\n");
534   qs_man_println(man, "  # 003 ^(/[a-zA-Z0-9\\-_]+)+[/]?\\.?[a-zA-Z]{0,4}$\n");
535   qs_man_println(man, "  # ADD line 3: /aaa/view?page=1\n");
536   qs_man_println(man, "  # --- ^[/a-zA-Z0-9]+/view\\?(page=[0-9]+)?$\n");
537   qs_man_println(man, "  # ADD line 4: /aaa/edit?document=1\n");
538   qs_man_println(man, "  # 004 ^[/a-zA-Z]+/edit\\?((document)(=[0-9]*)*[&]?)*$\n");
539   qs_man_println(man, "  # ADD line 5: /aaa/edit?image=1.jpg\n");
540   qs_man_println(man, "  # 005 ^[/a-zA-Z]+/edit\\?((image)(=[0-9\\.a-zA-Z]*)*[&]?)*$\n");
541   qs_man_println(man, "  ...\n");
542   qs_man_println(man, "  QS_PermitUri +QSF001 deny \"^[/a-zA-Z]+/edit\\?((document|image)(=[0-9\\.a-zA-Z]*)*[&]?)*$\"\n");
543   qs_man_println(man, "  QS_PermitUri +QSF002 deny \"^[/a-zA-Z0-9]+/view\\?(page=[0-9]+)?$\"\n");
544   qs_man_println(man, "  QS_PermitUri +QSF003 deny \"^(/[a-zA-Z0-9\\-_]+)+[/]?\\.?[a-zA-Z]{0,4}$\"\n");
545   printf("\n");
546   if(man) {
547     printf(".SH SEE ALSO\n");
548     printf("qsdt(1), qsexec(1), qsgeo(1), qsgrep(1), qshead(1), qslog(1), qslogger(1), qspng(1), qsre(1), qsrespeed(1), qsrotate(1), qssign(1), qstail(1)\n");
549     printf(".SH AUTHOR\n");
550     printf("Pascal Buchbinder, http://mod-qos.sourceforge.net/\n");
551   } else {
552     printf("mod_qos %s\n", man_version);
553     printf("See http://mod-qos.sourceforge.net/ for further details.\n");
554   }
555   if(man) {
556     exit(0);
557   } else {
558     exit(1);
559   }
560 }
561 
562 /* worker struct, used for parallel processing */
563 typedef struct {
564   apr_pool_t *pool;
565   apr_table_t *rules;
566   apr_table_t *rules_url;
567   int from;
568   int to;
569 } qs_worker_t;
570 
571 /* determines, if a rule is really required */
qos_get_used(apr_pool_t * pool,apr_table_t * rules,apr_table_t * rules_url,int from,int to)572 static apr_table_t *qos_get_used(apr_pool_t *pool, apr_table_t *rules, apr_table_t *rules_url,
573                                  int from, int to) {
574   apr_table_t *used = apr_table_make(pool, 1);
575   int j;
576   for(j = from; j < to; j++) {
577     int l;
578     apr_table_entry_t *linee = (apr_table_entry_t *)apr_table_elts(rules_url)->elts;
579     if(m_verbose) {
580       printf("[%d]", j);
581       fflush(stdout);
582     }
583     for(l = 0; l < apr_table_elts(rules_url)->nelts; l++) {
584       char *line = linee[l].key;
585       int i;
586       int match = 0;
587       apr_table_entry_t *entry = (apr_table_entry_t *)apr_table_elts(rules)->elts;
588       for(i = 0; i < apr_table_elts(rules)->nelts; i++) {
589         if(i != j) {
590           qs_rule_t *rs = (qs_rule_t *)entry[i].val;
591           if(pcre_exec(rs->pcre, rs->extra, line, strlen(line), 0, 0, NULL, 0) >= 0) {
592             match = 1;
593             break;
594           }
595         }
596       }
597       if(!match) {
598         /* no match, rule j is required */
599         apr_table_add(used, entry[j].key, "+");
600       }
601     }
602   }
603   return used;
604 }
605 
qos_worker(void * argv)606 static void *qos_worker(void *argv) {
607   qs_worker_t *wt = argv;
608   return qos_get_used(wt->pool, wt->rules, wt->rules_url, wt->from, wt->to);
609 }
610 
611 /* get the characters used withn the string in order to define a pcre */
qos_2pcre(apr_pool_t * pool,const char * line)612 static char *qos_2pcre(apr_pool_t *pool, const char *line) {
613   int hasA = 0;
614   int hasD = 0;
615   int hasE = 0;
616   int hasB = 0;
617   int i = 0;
618   unsigned char *in = (unsigned char *)line;
619   char *ret = apr_pcalloc(pool, strlen(line) * 6);
620   int reti = 0;
621   char *existing = "";
622   if(strlen(line) == 0) return "";
623   while(in[i]) {
624     if(isdigit(in[i])) {
625       if(!hasD) {
626         hasD = 1;
627         strcpy(&ret[reti], "0-9");
628         reti = reti + 3;
629       }
630     } else if(isalpha(in[i])) {
631       if(!hasA) {
632         hasA = 1;
633         strcpy(&ret[reti], "a-zA-Z");
634         reti = reti + 6;
635       }
636     } else if(in[i] == '\\') {
637       if(!hasE) {
638         hasE = 1;
639         strcpy(&ret[reti], "\\\\");
640         reti = reti + 2;
641       }
642     } else if(in[i] == '-') {
643       if(!hasB) {
644         hasB = 1;
645         strcpy(&ret[reti], "\\-");
646         reti = reti + 2;
647       }
648     } else if(in[i] == '\0') {
649       char *ck = apr_psprintf(pool, "#\\x%02x#", in[i]);
650       if(strstr(existing, ck) == NULL) {
651         sprintf(&ret[reti], "\\x%02x", in[i]);
652         reti = reti + 4;
653         existing = apr_pstrcat(pool, existing, ck, NULL);
654       }
655     } else if(strchr(ret, in[i]) == NULL) {
656       if(strchr(QS_PCRE_RESERVED, in[i]) != NULL) {
657         ret[reti] = '\\';
658         reti++;
659         ret[reti] = in[i];
660         reti++;
661       } else if((in[i] < ' ') || (in[i]  > '~')) {
662         char *ck = apr_psprintf(pool, "#\\x%02x#", in[i]);
663         if(strstr(existing, ck) == NULL) {
664           sprintf(&ret[reti], "\\x%02x", in[i]);
665           reti = reti + 4;
666           existing = apr_pstrcat(pool, existing, ck, NULL);
667         }
668       } else {
669         ret[reti] = in[i];
670         reti++;
671       }
672     }
673     i++;
674   }
675   if(strlen(ret) == 0) return NULL;
676   ret[reti] = '\0';
677   return ret;
678 }
679 
680 /* check for the pattern "p" in "r" using the delimter "d",
681    returns 1 if it is in the string */
qos_checkstr(apr_pool_t * pool,char * r,char * d,char * p)682 static int qos_checkstr(apr_pool_t *pool, char *r, char *d, char *p) {
683   /*
684    * r = ..|p|..
685    * r = p|...
686    * r = ..|p
687    * r = p
688    */
689   char *check1 = apr_pstrcat(pool, d, p, d, NULL);
690   char *check2 = apr_pstrcat(pool, p, d, NULL);
691   char *check3 = apr_pstrcat(pool, d, p, NULL);
692 
693   if(strstr(r, check1) != NULL) {
694     return 1;
695   }
696   if(strncmp(r, check2, strlen(check2)) == 0) {
697     return 1;
698   }
699   if(strlen(r) > strlen(check3)) {
700     if((strncmp(&r[strlen(r)-strlen(check3)], check3, strlen(check3)) == 0)) {
701       return 1;
702     }
703   }
704   if(strcmp(r, p) == 0) {
705     return 1;
706   }
707 
708   return 0;
709 }
710 
711 /* add the string "n" to "o" using the delimiter "d" (only if not
712    already available */
qos_addstr(apr_pool_t * pool,char * o,char * d,char * n)713 static char *qos_addstr(apr_pool_t *pool, char *o, char *d, char *n) {
714   char *p = apr_pstrdup(pool, n);
715   char *r = o;
716   if(n == NULL) return o;
717   while(p && p[0]) {
718     char *this = p;
719     char *next = strchr(p, d[0]);
720 
721     /* \| */
722     while(next) {
723       if((next > this) && (next[-1] == '\\')) {
724         next++;
725         next = strchr(next, d[0]);
726       } else {
727         break;
728       }
729     }
730     if(next == NULL) {
731       p = NULL;
732     } else {
733       next[0] = '\0';
734       next++;
735       p = next;
736     }
737     if(!qos_checkstr(pool, r, d, this)) {
738       r = apr_pstrcat(pool, r, d, this, NULL);
739     }
740   }
741   return r;
742 }
743 
744 
745 /* create a name=pcre string like this: ((s1|s2)(=[<pcre>]*)*[&]?)*" */
qos_qqs(apr_pool_t * pool,char * string,char * query_pcre,int singleEq,int hasEq,int startAmp)746 static char *qos_qqs(apr_pool_t *pool, char *string, char *query_pcre, int singleEq, int hasEq, int startAmp) {
747   char *se = NULL;
748   char *s = "";
749   if(startAmp) s = "[&]?";
750   if(singleEq) {
751     se = "(=[&]?)*";
752   }
753   if(strlen(query_pcre) > 0) {
754     return apr_pstrcat(pool, s, "((", string, ")(=[", qos_2pcre(pool, query_pcre), "]*)*[&]?)*", se, NULL);
755   } else {
756     if(hasEq && !singleEq) {
757       se = "(=[&]?)*";
758       return apr_pstrcat(pool, s, "(((", string, ")[&]?)*", se, ")*", NULL);
759     }
760     return apr_pstrcat(pool, s, "((", string, ")[&]?)*", se, NULL);
761   }
762 }
763 
764 /* tries to optimize the rules by merging all query into one single pcre matching
765    all values */
qos_query_optimization(apr_pool_t * pool,apr_table_t * rules)766 static void qos_query_optimization(apr_pool_t *pool, apr_table_t *rules) {
767   apr_table_t *delete = apr_table_make(pool, 1);
768   apr_table_t *checked_path = apr_table_make(pool, 1);
769   apr_table_t *new = apr_table_make(pool, 1);
770   int i, j;
771   apr_table_entry_t *entry = (apr_table_entry_t *)apr_table_elts(rules)->elts;
772   for(i = 0; i < apr_table_elts(rules)->nelts; i++) {
773     char *rule_str = entry[i].key;
774     qs_rule_t *r = (qs_rule_t *)entry[i].val;
775     if(!r->fragment && r->path && (apr_table_get(checked_path, r->path) == NULL)) {
776       int merged = 0;
777       char *query_m_string = r->query_m_string == NULL ? "" : r->query_m_string;
778       char *query_m_pcre = r->query_m_pcre == NULL ? "" : r->query_m_pcre;
779       if(m_verbose > 1) printf("  search for path %s (%s)\n", r->path, rule_str);
780       if(m_verbose > 1) printf("  . %s %s\n", query_m_string, query_m_pcre);
781       apr_table_add(checked_path, r->path, "");
782       /* search for rules with the same path and delete them */
783       for(j = 0; j < apr_table_elts(rules)->nelts; j++) {
784         if(i != j) {
785           qs_rule_t *n = (qs_rule_t *)entry[j].val;
786           if(!n->fragment && n->path && (strcmp(r->path, n->path) == 0)) {
787             if(m_verbose > 1) printf("  + %s %s\n",
788                                      n->query_m_string == NULL ? "-" : n->query_m_string,
789                                      n->query_m_pcre == NULL ? "-" : n->query_m_pcre);
790             if(strlen(query_m_string) == 0) {
791               query_m_string = apr_pstrcat(pool, query_m_string, n->query_m_string, NULL);
792             } else {
793               query_m_string = qos_addstr(pool, query_m_string, "|", n->query_m_string);
794             }
795             if(m_verbose > 1) printf("  > %s\n", query_m_string);
796             query_m_pcre = apr_pstrcat(pool, query_m_pcre, n->query_m_pcre, NULL);
797             apr_table_add(delete, entry[j].key, "");
798             merged = 1;
799           }
800         }
801       }
802       /* update rule if merged to any */
803       if(merged) {
804         apr_table_add(delete, entry[i].key, "");
805         if(m_verbose) {
806           printf("# CHANGE: <%s>", rule_str);
807         }
808         {
809           const char *errptr = NULL;
810           char *rule = apr_pstrcat(pool, "^", r->path, NULL);
811           qs_rule_t *rs = apr_pcalloc(pool, sizeof(qs_rule_t));
812           if(strlen(query_m_string) > 0) {
813             rule = apr_pstrcat(pool, rule, "\\?",
814                                qos_qqs(pool, query_m_string, query_m_pcre, 0, 0, 0), NULL);
815           }
816           rule = apr_pstrcat(pool, rule, "$", NULL);
817           rs->pcre = qos_pcre_compile(rule, 0);
818           rs->extra = pcre_study(rs->pcre, 0, &errptr);
819           rs->path = r->path;
820           apr_table_setn(new, rule, (char *)rs);
821           if(m_verbose) {
822             printf(" to <%s>\n", rule);
823             fflush(stdout);
824           }
825         }
826       }
827     }
828   }
829   entry = (apr_table_entry_t *)apr_table_elts(delete)->elts;
830   for(i = 0; i < apr_table_elts(delete)->nelts; i++) {
831     if(m_verbose) printf("# DEL rule: %s\n", entry[i].key);
832     apr_table_unset(rules, entry[i].key);
833   }
834   entry = (apr_table_entry_t *)apr_table_elts(new)->elts;
835   for(i = 0; i < apr_table_elts(new)->nelts; i++) {
836     apr_table_setn(rules, entry[i].key, entry[i].val);
837   }
838 }
839 
840 /* deletes rules which are not required and merge query name/value pairs */
qos_delete_obsolete_rules(apr_pool_t * pool,apr_table_t * rules,apr_table_t * rules_url)841 static void qos_delete_obsolete_rules(apr_pool_t *pool, apr_table_t *rules, apr_table_t *rules_url) {
842   apr_table_t *not_used = apr_table_make(pool, 1);
843   apr_table_t *used;
844   apr_table_t *used1;
845   pthread_attr_t *tha = NULL;
846   pthread_t tid;
847   qs_worker_t *wt = apr_pcalloc(pool, sizeof(qs_worker_t));
848 
849 
850   if(m_query_multi_pcre) {
851     if(m_verbose) {
852       printf("# search for redundant rules ...\n");
853       fflush(stdout);
854     }
855     qos_query_optimization(pool, rules);
856     if(m_verbose) printf("# ");
857   } else {
858     if(m_verbose) {
859       printf("# search for redundant rules ");
860       fflush(stdout);
861     }
862   }
863 
864   wt->pool = pool;
865   wt->rules = rules;
866   wt->rules_url = rules_url;
867   wt->from = apr_table_elts(rules)->nelts / 2;
868   wt->to = apr_table_elts(rules)->nelts;
869 
870   pthread_create(&tid, tha, qos_worker, (void *)wt);
871   used = qos_get_used(pool, rules, rules_url, 0, apr_table_elts(rules)->nelts / 2);
872   pthread_join(tid, (void *)&used1);
873   if(m_verbose) printf(" done\n");
874   {
875     int i;
876     apr_table_entry_t *entry = (apr_table_entry_t *)apr_table_elts(rules)->elts;
877     for(i = 0; i < apr_table_elts(rules)->nelts; i++) {
878       if((apr_table_get(used, entry[i].key) == NULL) &&
879          (apr_table_get(used1, entry[i].key) == NULL)) {
880         if(m_verbose) printf("# DEL rule (not required): %s\n", entry[i].key);
881         apr_table_add(not_used, entry[i].key, "-");
882       }
883     }
884     entry = (apr_table_entry_t *)apr_table_elts(not_used)->elts;
885     for(i = 0; i < apr_table_elts(not_used)->nelts; i++) {
886       apr_table_unset(rules, entry[i].key);
887     }
888   }
889 }
890 
891 /* test if we need to create a new url (and save line if the rule is used the very
892    first time (rule has been read from the configuration file)) */
qos_test_for_existing_rule(char * plain,char * line,apr_table_t * rules,apr_table_t * special_rules,int line_nr,apr_table_t * rules_url,apr_table_t * source_rules,int first)893 static int qos_test_for_existing_rule(char *plain, char *line, apr_table_t *rules,
894                                       apr_table_t *special_rules, int line_nr,
895                                       apr_table_t *rules_url, apr_table_t *source_rules, int first) {
896   int i;
897   apr_table_entry_t *entry = (apr_table_entry_t *)apr_table_elts(rules)->elts;
898   if((line == 0) || (strlen(line) == 0)) return 0;
899   for(i = 0; i < apr_table_elts(rules)->nelts; i++) {
900     qs_rule_t *rs = (qs_rule_t *)entry[i].val;
901     if(pcre_exec(rs->pcre, rs->extra, line, strlen(line), 0, 0, NULL, 0) >= 0) {
902       if(first && (apr_table_get(source_rules, entry[i].key) == NULL)) {
903         apr_table_add(source_rules, entry[i].key, "");
904         apr_table_add(rules_url, line, "");
905         apr_table_setn(special_rules, entry[i].key, (char *)rs);
906         if(m_verbose) {
907           printf("# ADD line %d: %s\n", line_nr, plain);
908           printf("# --- %s\n", entry[i].key);
909         }
910       }
911       if(m_verbose > 1){
912         printf("LINE %d, exiting rule: %s\n", line_nr, entry[i].key);
913       }
914       return 1;
915     }
916   }
917   /* check for special rules */
918   entry = (apr_table_entry_t *)apr_table_elts(special_rules)->elts;
919   for(i = 0; i < apr_table_elts(special_rules)->nelts; i++) {
920     qs_rule_t *rs = (qs_rule_t *)entry[i].val;
921     if(pcre_exec(rs->pcre, rs->extra, line, strlen(line), 0, 0, NULL, 0) >= 0) {
922       if(m_verbose) {
923         printf("# ADD line %d: %s\n", line_nr, plain);
924         printf("# -(S) %s\n", entry[i].key);
925       }
926       apr_table_setn(rules, entry[i].key, (char *)rs);
927       return 1;
928     }
929   }
930   return 0;
931 }
932 
933 /* filter lines we don't want to add to the whitelist */
qos_enforce_blacklist(apr_table_t * rules,const char * line)934 static int qos_enforce_blacklist(apr_table_t *rules, const char *line) {
935   int i;
936   apr_table_entry_t *entry = (apr_table_entry_t *)apr_table_elts(rules)->elts;
937   if((line == 0) || (strlen(line) == 0)) return 0;
938   for(i = 0; i < apr_table_elts(rules)->nelts; i++) {
939     qs_rule_t *rs = (qs_rule_t *)entry[i].val;
940     if(pcre_exec(rs->pcre, rs->extra, line, strlen(line), 0, 0, NULL, 0) == 0) {
941       if(m_verbose > 1) printf(" blacklist match, rule %s\n", entry[i].key);
942       return 1;
943     }
944   }
945   return 0;
946 }
947 
948 /* load existing rules */
qos_load_rules(apr_pool_t * pool,apr_table_t * ruletable,const char * httpdconf,const char * command,int option)949 static void qos_load_rules(apr_pool_t *pool, apr_table_t *ruletable,
950                            const char *httpdconf, const char *command, int option) {
951   FILE *f = fopen(httpdconf, "r");
952   char line[MAX_LINE];
953   if(f == NULL) {
954     fprintf(stderr, "ERROR, could not open %s\n", httpdconf);
955     exit(1);
956   }
957   while(!qos_fgetline(line, sizeof(line), f)) {
958     // QS_DenyRequestLine '+'|'-'<id> 'log'|'deny' <pcre>
959     char *p = strstr(line, command);
960     if(p) {
961       p[0] = '\0';
962       p++;
963     }
964     if(p && (strchr(line, '#') == NULL)) {
965       p = strchr(p, ' ');
966       if(p) {
967         while(p[0] == ' ') p++;
968         p = strchr(p, ' ');
969         if(p) {
970           while(p[0] == ' ') p++;
971           p = strchr(p, ' ');
972           if(p) {
973             while(p[0] == ' ') p++;
974             if(m_verbose > 1) {
975               printf("load %s\n", p);
976             }
977             {
978               const char *errptr = NULL;
979               char *pattern;
980               pcre *pcre_test;
981               pcre_extra *extra;
982               qs_rule_t *rs;
983               if(p[0] == '"') {
984                 int fl = strlen(p)-2;
985                 pattern = apr_psprintf(pool, "%.*s", fl, &p[1]);
986               } else {
987                 int fl = strlen(p);
988                 pattern = apr_psprintf(pool, "%.*s", fl, p);
989               }
990               pcre_test = qos_pcre_compile(pattern, option);
991               extra = pcre_study(pcre_test, 0, &errptr);
992               rs = apr_pcalloc(pool, sizeof(qs_rule_t));
993               rs->pcre = pcre_test;
994               rs->extra = extra;
995               apr_table_setn(ruletable, pattern, (char *)rs);
996             }
997           }
998         }
999       }
1000     }
1001   }
1002   fclose(f);
1003 }
1004 
qos_load_blacklist(apr_pool_t * pool,apr_table_t * blacklist,const char * httpdconf)1005 static void qos_load_blacklist(apr_pool_t *pool, apr_table_t *blacklist, const char *httpdconf) {
1006   qos_load_rules(pool, blacklist, httpdconf, "QS_DenyRequestLine", PCRE_CASELESS);
1007 }
qos_load_whitelist(apr_pool_t * pool,apr_table_t * rules,const char * httpdconf)1008 static void qos_load_whitelist(apr_pool_t *pool, apr_table_t *rules, const char *httpdconf) {
1009   qos_load_rules(pool, rules, httpdconf, "QS_PermitUri", 0);
1010 }
1011 
1012 /* tries to map a base64 string to a pcre */
qos_b64_2pcre(apr_pool_t * pool,const char * line)1013 static char *qos_b64_2pcre(apr_pool_t *pool, const char *line) {
1014   char *copy = apr_pstrdup(pool, line);
1015   char *b64 = qos_detect_b64(copy, 1);
1016   char *st = b64;
1017   char *ed = &b64[1];
1018   if(m_verbose > 1) printf("  B642pcre: %s", copy);
1019   /* reserved: {}[]()^$.|*+?\ */
1020 #define QS_BX "-_$+!"
1021   while(st[0] && (isdigit(st[0]) || isalpha(st[0]) || (strchr(QS_BX, st[0]) != NULL))) {
1022     st--;
1023   }
1024   st++;
1025   st[0] = '\0';
1026   while(ed[0] && (isdigit(ed[0]) || isalpha(ed[0]) || (strchr(QS_BX, ed[0]) != NULL))) {
1027     ed++;
1028   }
1029   if(m_verbose > 1) printf(" %s <> %s\n", copy, ed);
1030   return apr_pstrcat(pool, qos_escape_pcre(pool, copy),
1031                      "[a-zA-Z0-9\\-_\\$\\+!]+",
1032                      ed[0] == '\0' ? NULL : qos_escape_pcre(pool, ed), NULL);
1033 }
1034 
1035 
1036 /* maps a query string to a pairs of <string>=<pcre> or <pcre>=<pcre> */
qos_query_string_pcre(apr_pool_t * pool,const char * path)1037 static char *qos_query_string_pcre(apr_pool_t *pool, const char *path) {
1038   char *copy = apr_pstrdup(pool, path);
1039   char *pos = copy;
1040   char *ret = "";
1041   int isValue = 0;
1042   int open = 0;
1043   while(copy[0]) {
1044     if((copy[0] == '=') && (copy[1] != '=') && !open) {
1045       copy[0] = '\0';
1046       qos_unescaping(pos);
1047       if(!open) {
1048         ret = apr_pstrcat(pool, ret, "(", NULL);
1049         open = 1;
1050       }
1051       if(m_query_pcre) {
1052         if(strlen(pos) > 0) {
1053           ret = apr_pstrcat(pool, ret, "[", qos_2pcre(pool, pos), "]+=", NULL);
1054         } else {
1055           ret = apr_pstrcat(pool, ret, "=", NULL);
1056         }
1057       } else {
1058         ret = apr_pstrcat(pool, ret, qos_escape_pcre(pool, pos), "=", NULL);
1059       }
1060       open = 1;
1061       pos = copy;
1062       pos++;
1063       isValue = 1;
1064     }
1065     if(copy[0] == '&') {
1066       copy[0] = '\0';
1067       if(strlen(pos) == 0) {
1068         ret = apr_pstrcat(pool, ret, "[&]?", NULL);
1069         if(open) {
1070           ret = apr_pstrcat(pool, ret, ")?", NULL);
1071           open = 0;
1072         }
1073       } else {
1074         qos_unescaping(pos);
1075         ret = apr_psprintf(pool, "%s[%s]{0,%"APR_SIZE_T_FMT"}[&]?", ret, qos_2pcre(pool, pos),
1076                            strlen(pos) + m_query_len_pcre);
1077         if(open) {
1078           ret = apr_pstrcat(pool, ret, ")?", NULL);
1079           open = 0;
1080         }
1081       }
1082       pos = copy;
1083       pos++;
1084       isValue = 0;
1085     }
1086     copy++;
1087   }
1088   if(pos != copy) {
1089     qos_unescaping(pos);
1090     if(isValue) {
1091       ret = apr_psprintf(pool, "%s[%s]{0,%"APR_SIZE_T_FMT"}[&]?", ret, qos_2pcre(pool, pos),
1092                          strlen(pos) + m_query_len_pcre);
1093     } else {
1094       if(!open) {
1095         ret = apr_pstrcat(pool, "(", ret, NULL);
1096         open = 1;
1097       }
1098       if(m_query_pcre) {
1099         ret = apr_pstrcat(pool, ret, "[", qos_2pcre(pool, pos), "]+", NULL);
1100       } else {
1101         ret = apr_pstrcat(pool, ret, qos_escape_pcre(pool, pos), NULL);
1102       }
1103     }
1104     if(open) {
1105       ret = apr_pstrcat(pool, ret, ")?", NULL);
1106       open = 0;
1107     }
1108   }
1109   if(open) {
1110     ret = apr_pstrcat(pool, ret, ")?", NULL);
1111     open = 0;
1112   }
1113   if(m_query_pcre) {
1114     return ret;
1115   } else {
1116     return ret;
1117     /* it would be nice to use (see -o):
1118      *  ((a=b)?(c=d)?)*
1119      * instead of:
1120      *  (a=b)?(c=d)? and (c=d)?(a=b)?
1121      * but in this case, two rules are much faster than one
1122      * it's probably better to use the -m option
1123      */
1124   }
1125 }
1126 
1127 /* maps a query string to a list of names and a single pcre for all values:
1128    <string>|<string>=<pcre> */
qos_multi_query_string_pcre(apr_pool_t * pool,const char * path,char ** query_m_string,char ** query_m_pcre)1129 static char *qos_multi_query_string_pcre(apr_pool_t *pool, const char *path,
1130                                          char **query_m_string, char **query_m_pcre) {
1131   char *copy = apr_pstrdup(pool, path);
1132   char *pos = copy;
1133   char *string = "";
1134   char *query_pcre = "";
1135   int isValue = 0;
1136   int singleEq = 0;
1137   int hasEq = 0;
1138   int startAmp = 0;
1139   if(copy[0] == '&') startAmp = 1;
1140   while(copy[0]) {
1141     if(copy[0] == '=') hasEq = 1;
1142     if((copy[0] == '=') && (copy[1] != '=') && !isValue) {
1143       copy[0] = '\0';
1144       qos_unescaping(pos);
1145       if(strlen(pos) > 0) {
1146         if(strlen(string) > 0) string = apr_pstrcat(pool, string, "|",  NULL);
1147         string = apr_pstrcat(pool, string, qos_escape_pcre(pool, pos),  NULL);
1148       } else {
1149         if((copy[1] == '&') || (copy[1] == '\0')) {
1150           singleEq = 1;
1151         }
1152       }
1153       pos = copy;
1154       pos++;
1155       isValue = 1;
1156     }
1157     if(copy[0] == '&') {
1158       copy[0] = '\0';
1159       if(!isValue) {
1160         qos_unescaping(pos);
1161         if(strlen(string) > 0) string = apr_pstrcat(pool, string, "|",  NULL);
1162         string = apr_pstrcat(pool, string, qos_escape_pcre(pool, pos),  NULL);
1163       } else {
1164         if(strlen(pos) != 0) {
1165           qos_unescaping(pos);
1166           query_pcre = apr_pstrcat(pool, query_pcre, pos,  NULL);
1167         }
1168       }
1169       pos = copy;
1170       pos++;
1171       isValue = 0;
1172     }
1173     copy++;
1174   }
1175   if(pos != copy) {
1176     qos_unescaping(pos);
1177     if(isValue) {
1178       query_pcre = apr_pstrcat(pool, query_pcre, pos, NULL);
1179     } else {
1180       if(strlen(string) > 0) string = apr_pstrcat(pool, string, "|",  NULL);
1181       string = apr_pstrcat(pool, string, qos_escape_pcre(pool, pos),  NULL);
1182     }
1183   }
1184   *query_m_string = string;
1185   *query_m_pcre = query_pcre;
1186   return qos_qqs(pool, string, query_pcre, singleEq, hasEq, startAmp);
1187 }
1188 
1189 /* maps a path to a single pcre (don't mind its length) */
qos_path_pcre(apr_pool_t * lpool,const char * path)1190 static char *qos_path_pcre(apr_pool_t *lpool, const char *path) {
1191   char *dec = apr_pstrdup(lpool, path);
1192   qos_unescaping(dec);
1193   return apr_pstrcat(lpool, "[", qos_2pcre(lpool, dec), "]+", NULL);
1194 }
1195 
1196 /* maps a path to <pcre>/<string> */
qos_path_pcre_string(apr_pool_t * lpool,const char * path)1197 static char *qos_path_pcre_string(apr_pool_t *lpool, const char *path) {
1198   int nohandler = 0;
1199   char *lpath = apr_pstrdup(lpool, path);
1200   char *last;
1201   char *str = "";
1202   int depth = m_path_depth;
1203   char *rx = "";
1204   if(lpath[strlen(lpath)-1] == '/') {
1205     lpath[strlen(lpath)-1] = '\0';
1206     nohandler = 1;
1207   }
1208   last = strrchr(lpath, '/');
1209   while(last && depth) {
1210     qos_unescaping(last);
1211     if(m_base64 && qos_detect_b64(last, 0)) {
1212       str = apr_pstrcat(lpool, qos_b64_2pcre(lpool, last), str, NULL);
1213     } else {
1214       str = apr_pstrcat(lpool, qos_escape_pcre(lpool, last), str, NULL);
1215     }
1216     last[0] = '\0';
1217     last = strrchr(lpath, '/');
1218     depth--;
1219   }
1220   if(lpath[0]) {
1221     qos_unescaping(lpath);
1222     rx = apr_pstrcat(lpool, "[", qos_2pcre(lpool, lpath), "]+", NULL);
1223   }
1224   if(strlen(str) > 0) {
1225     if(nohandler) {
1226       rx = apr_pstrcat(lpool, rx, str, "[/]?", NULL);
1227     } else {
1228       rx = apr_pstrcat(lpool, rx, str, NULL);
1229     }
1230   }
1231   return rx;
1232 }
1233 
qos_is_alnum(const char * string)1234 static int qos_is_alnum(const char *string) {
1235   unsigned char *in = (unsigned char *)string;
1236   int i = 0;
1237   if(in == NULL) return 0;
1238   while(in[i]) {
1239     if(!apr_isalnum(in[i])) return 0;
1240     i++;
1241   }
1242   return 1;
1243 }
1244 
qos_rule_optimization(apr_pool_t * pool,apr_pool_t * lpool,apr_table_t * rules,apr_table_t * special_rules)1245 static void qos_rule_optimization(apr_pool_t *pool, apr_pool_t *lpool,
1246                                   apr_table_t *rules, apr_table_t *special_rules) {
1247   int i;
1248   apr_table_t *new_rules = apr_table_make(pool, 5);
1249   apr_table_t *del_rules = apr_table_make(pool, 5);
1250   apr_table_entry_t *entry = (apr_table_entry_t *)apr_table_elts(rules)->elts;
1251   for(i = 0; i < apr_table_elts(rules)->nelts; i++) {
1252     qs_rule_t *rs = (qs_rule_t *)entry[i].val;
1253     int hit = 0;
1254     int j;
1255     for(j = 0; j < apr_table_elts(rules)->nelts; j++) {
1256       if(i != j) {
1257         qs_rule_t *rsj = (qs_rule_t *)entry[j].val;
1258         if(rs->query_m_string && rsj->query_m_string) {
1259           if(strcmp(rs->query_m_string, rsj->query_m_string) == 0) {
1260             if(strlen(entry[i].key) == strlen(entry[j].key)) {
1261               hit++;
1262             }
1263           }
1264           if(hit == 5) {
1265             int s = 0;
1266             int e = 0;
1267             while(entry[i].key[s] && (entry[i].key[s] == entry[j].key[s])) s++;
1268             e = s;
1269             while(entry[i].key[e] &&
1270                   ((entry[i].key[e] != entry[j].key[e]) ||
1271                    (apr_isalnum(entry[i].key[e]) && apr_isalnum(entry[j].key[e])))) e++;
1272             if((e > s) &&
1273                (s > 14) &&
1274                (e < strlen(entry[i].key)) &&
1275                (strstr(&entry[i].key[e], "\?") != NULL)) {
1276               const char *errptr = NULL;
1277               char *match = apr_psprintf(lpool, "%.*s%.*s",
1278                                          e-s, &entry[i].key[s],
1279                                          e-s, &entry[j].key[s]);
1280               if(qos_is_alnum(match)) {
1281                 char *matchx = apr_psprintf(lpool, "[%s]{%d}", qos_2pcre(lpool, match), e-s);
1282                 char *new = apr_psprintf(pool, "%.*s%s%s", s, entry[i].key, matchx, &entry[i].key[e]);
1283                 qs_rule_t *rsn = apr_pcalloc(pool, sizeof(qs_rule_t));
1284                 rsn->pcre = qos_pcre_compile(new, 0);
1285                 rsn->extra = pcre_study(rsn->pcre, 0, &errptr);
1286                 rsn->path = rs->path;
1287                 rsn->query_m_string = rs->query_m_string;
1288                 rsn->query_m_pcre = rs->query_m_pcre;
1289                 rsn->fragment = rs->fragment;
1290                 if(m_verbose) {
1291                   printf("# CHANGE: <%s> to <%s>\n", entry[i].key, new);
1292                   fflush(stdout);
1293                 }
1294                 apr_table_setn(new_rules, new, (char *)rsn);
1295                 apr_table_addn(del_rules, entry[i].key, entry[i].val);
1296                 apr_table_addn(del_rules, entry[j].key, entry[j].val);
1297                 if(m_verbose > 1) {
1298                   if(m_verbose) printf("  [%s] [%s]\n", entry[i].key, entry[j].key);
1299                   if(m_verbose) printf("  [%s] [%s]\n", match, matchx);
1300                 }
1301                 break;
1302               }
1303             }
1304           }
1305         }
1306       }
1307     }
1308   }
1309   entry = (apr_table_entry_t *)apr_table_elts(new_rules)->elts;
1310   for(i = 0; i < apr_table_elts(new_rules)->nelts; i++) {
1311     apr_table_setn(rules, entry[i].key, entry[i].val);
1312   }
1313   entry = (apr_table_entry_t *)apr_table_elts(del_rules)->elts;
1314   for(i = 0; i < apr_table_elts(del_rules)->nelts; i++) {
1315     apr_table_unset(rules, entry[i].key);
1316   }
1317 }
1318 
1319 /* rules do not care the order of parameter values (makes rule processing slow)
1320  *  (id=[0-9]{0,13}[&]?)?(name=[a-zA-Z]{0,12}[&]?)?
1321  * ((id=[0-9]{0,13}[&]?)|(name=[a-zA-Z]{0,12}[&]?))*
1322  */
qos_post_optimization(apr_pool_t * lpool,char * query)1323 static char *qos_post_optimization(apr_pool_t *lpool, char *query) {
1324   int hit = 0;
1325   char *p = query;
1326   while(p && p[0]) {
1327     if(strncmp(p, "[&]?)?(", 7) == 0) {
1328       hit = 1;
1329       p[5] = '|';
1330     }
1331     p++;
1332   }
1333   if(hit) {
1334     query[strlen(query)-1] = '\0';
1335     return apr_psprintf(lpool, "(%s)*", query);
1336   }
1337   return query;
1338 }
1339 
qos_auto_detect(char ** raw)1340 static void qos_auto_detect(char **raw) {
1341   char *line = *raw;
1342   int rc_c = -1;
1343   if(m_req_regex) {
1344     int ovector[QS_OVECCOUNT];
1345     /* no request line, maybe raw Apache access log? */
1346     rc_c = pcre_exec(m_req_regex, NULL, line, strlen(line), 0, 0, ovector, QS_OVECCOUNT);
1347     if(rc_c >= 0) {
1348       char *sr;
1349       line = &line[ovector[0]];
1350       line[ovector[1] - ovector[0]] = '\0';
1351       sr = strchr(line, ' ');
1352       while(sr[0] == ' ')sr++;
1353       *raw = sr;
1354       sr = strrchr(line, ' ');
1355       sr[0] = '\0';
1356     }
1357   }
1358   if(rc_c < 0) {
1359     /* or an audit log like "%h %>s %{qos-loc}n %{qos-path}n%{qos-query}n" */
1360     char *pe = line;
1361     int pi = 3;
1362     while(pe && (pi > 0)) {
1363       pi--;
1364       pe = strchr(pe, ' ');
1365       if(pe) {
1366         pe++;
1367       }
1368     }
1369     if(pe && pe[0] == '/' && (pi == 0)) {
1370       *raw = pe;
1371     }
1372   }
1373   return;
1374 }
1375 
1376 /* process the input file line by line */
qos_process_log(apr_pool_t * pool,apr_table_t * blacklist,apr_table_t * rules,apr_table_t * rules_url,apr_table_t * special_rules,FILE * f,int * ln,int * dc,int first)1377 static void qos_process_log(apr_pool_t *pool, apr_table_t *blacklist, apr_table_t *rules,
1378                             apr_table_t *rules_url, apr_table_t *special_rules,
1379                             FILE *f, int *ln, int *dc, int first) {
1380   char *readline = apr_pcalloc(pool, MAX_BODY_BUFFER);
1381   int deny_count = *dc;
1382   int line_nr = *ln;
1383   apr_table_t *source_rules = apr_table_make(pool, 10);
1384   int rule_optimization = 300;
1385   while(!qos_fgetline(readline, MAX_BODY_BUFFER, f)) {
1386     int doubleSlash = 0;
1387     apr_uri_t parsed_uri;
1388     apr_pool_t *lpool;
1389     char *line = readline;
1390     apr_pool_create(&lpool, NULL);
1391     line_nr++;
1392     if((strlen(line) > 1) && line[1] == '/') {
1393       doubleSlash = 1;
1394       line++;
1395     }
1396     if(line[0] != '/') {
1397       if(!m_log_req_regex) {
1398         m_log_req_regex = 1;
1399         fprintf(stderr, "WARNING, line %d: "
1400                 "unexpected data format, try to detect request lines automatically\n",
1401                 line_nr);
1402       }
1403       qos_auto_detect(&line);
1404     }
1405     if(apr_uri_parse(lpool, line, &parsed_uri) != APR_SUCCESS) {
1406       fprintf(stderr, "ERROR, could parse uri %s\n", line);
1407       if(m_exit_on_error) exit(1);
1408     }
1409     if(parsed_uri.path == NULL || (parsed_uri.path[0] != '/')) {
1410       fprintf(stderr, "WARNING, line %d: invalid request %s\n", line_nr, line);
1411     } else if(m_filter && parsed_uri.path && strncmp(parsed_uri.path, m_filter, strlen(m_filter)) != 0) {
1412       // skip filtered line
1413     } else {
1414       char *path = NULL;
1415       char *query = NULL;
1416       char *query_m_string = NULL;
1417       char *query_m_pcre = NULL;
1418       char *fragment = NULL;
1419       char *copy = apr_pstrdup(lpool, line);
1420       qos_unescaping(copy);
1421       if(qos_enforce_blacklist(blacklist, copy)) {
1422         fprintf(stderr, "WARNING: blacklist filter match at line %d for %s\n",
1423                 line_nr, line);
1424         deny_count++;
1425       } else {
1426         if(!qos_test_for_existing_rule(line, copy, rules, special_rules,
1427                                        line_nr, rules_url, source_rules, first)) {
1428           if(m_verbose > 1) printf("LINE %d, analyse: %s\n", line_nr, line);
1429           if(parsed_uri.query) {
1430             if(strcmp(parsed_uri.path, "/") == 0) {
1431               path = apr_pstrdup(lpool, "/");
1432             } else {
1433               path = qos_path_pcre_string(lpool, parsed_uri.path);
1434             }
1435             if(m_query_single_pcre) {
1436               char *qc = apr_pstrdup(lpool, parsed_uri.query);
1437               qos_unescaping(qc);
1438               query = apr_pstrcat(lpool, "[", qos_2pcre(lpool, qc), "]+", NULL);
1439             } else {
1440               if(!m_query_multi_pcre) {
1441                 query = qos_query_string_pcre(lpool, parsed_uri.query);
1442                 if(m_query_o_pcre) {
1443                   query = qos_post_optimization(lpool, query);
1444                 }
1445               } else {
1446                 query = qos_multi_query_string_pcre(lpool, parsed_uri.query,
1447                                                     &query_m_string, &query_m_pcre);
1448               }
1449             }
1450           } else {
1451             if(strcmp(parsed_uri.path, "/") == 0) {
1452               path = apr_pstrdup(lpool, "/");
1453             } else {
1454               if(m_handler) {
1455                 path = qos_path_pcre_string(lpool, parsed_uri.path);
1456               } else {
1457                 if(pcre_exec(pcre_simple_path, NULL, parsed_uri.path,
1458                              strlen(parsed_uri.path), 0, 0, NULL, 0) >= 0) {
1459                   path = apr_pstrdup(lpool, QS_SIMPLE_PATH_PCRE);
1460                 } else {
1461                   path = qos_path_pcre(lpool, parsed_uri.path);
1462                 }
1463               }
1464             }
1465           }
1466           if(parsed_uri.fragment) {
1467             char *f = apr_pstrdup(lpool, parsed_uri.fragment);
1468             if(strlen(f) > 0) {
1469               qos_unescaping(f);
1470               fragment = apr_pstrcat(lpool, "[", qos_2pcre(lpool, f), "]+", NULL);
1471             } else {
1472               fragment = apr_pstrcat(lpool, "", NULL);
1473             }
1474           }
1475           if(m_verbose > 1) {
1476             printf(" path:      %s\n", parsed_uri.path);
1477             printf(" path rule: %s\n", path);
1478             if(query) {
1479               printf(" query:      %s\n", parsed_uri.query);
1480               printf(" query rule: %s\n", query);
1481             }
1482             if(fragment) {
1483               printf(" fragment:      %s\n", parsed_uri.fragment);
1484               printf(" fragment rule: %s\n", fragment);
1485             }
1486           }
1487           {
1488             const char *errptr = NULL;
1489             char *rule;
1490             qs_rule_t *rs = apr_pcalloc(pool, sizeof(qs_rule_t));
1491             if(doubleSlash) {
1492               rule = apr_pstrcat(pool, "^[/]?", path, NULL);
1493             } else {
1494               rule = apr_pstrcat(pool, "^", path, NULL);
1495             }
1496             if(query) {
1497               rule = apr_pstrcat(pool, rule, "\\?", query, NULL);
1498             }
1499             if(fragment) {
1500               rule = apr_pstrcat(pool, rule, "#", fragment, NULL);
1501               rs->fragment = 1;
1502             } else {
1503               rs->fragment = 0;
1504             }
1505             rule = apr_pstrcat(pool, rule, "$", NULL);
1506             rs->pcre = qos_pcre_compile(rule, 0);
1507             rs->extra = pcre_study(rs->pcre, 0, &errptr);
1508             rs->path = apr_pstrdup(pool, path);
1509             if(m_query_multi_pcre && !fragment) {
1510               rs->query_m_string = apr_pstrdup(pool, query_m_string);
1511               rs->query_m_pcre = apr_pstrdup(pool, query_m_pcre);
1512             } else {
1513               rs->query_m_string = NULL;
1514               rs->query_m_pcre = NULL;
1515             }
1516             // don't mind if extra is null
1517             if(m_verbose) {
1518               printf("# ADD line %d: %s\n", line_nr, line);
1519               printf("# %.3d %s\n", apr_table_elts(rules)->nelts+1, rule);
1520               fflush(stdout);
1521             }
1522             if(pcre_exec(rs->pcre, rs->extra, copy, strlen(copy), 0, 0, NULL, 0) < 0) {
1523               fprintf(stderr, "ERROR, rule check failed (did not match)!\n");
1524               fprintf(stderr, " line %d: %s\n", line_nr, line);
1525               fprintf(stderr, " string: %s\n", copy);
1526               fprintf(stderr, " rule: %s\n", rule);
1527               if(m_exit_on_error) exit(1);
1528             } else {
1529               apr_table_add(rules_url, copy, "unescaped line");
1530               apr_table_add(source_rules, rule, "");
1531               apr_table_setn(rules, rule, (char *)rs);
1532             }
1533             if(apr_table_elts(rules)->nelts == 2000) {
1534               fprintf(stderr, "ERROR, too many rules (limited to max. 2000)\n");
1535               if(m_exit_on_error) exit(1);
1536             }
1537             /* rule optimazion searching for redundant patterns (only in
1538 	       conjunction with -m, -b and !-n */
1539             if((apr_table_elts(rules)->nelts == rule_optimization) &&
1540                m_redundant &&
1541                m_query_multi_pcre &&
1542                m_base64) {
1543               /* got too many rules, try to find more general rules */
1544               if(m_verbose) {
1545                 printf("# too many rules: start rule optimization ...\n");
1546                 fflush(stdout);
1547               }
1548               qos_rule_optimization(pool, lpool, rules, special_rules);
1549               if(m_verbose) {
1550                 printf("# continue with rule generation\n");
1551                 fflush(stdout);
1552               }
1553               rule_optimization = rule_optimization + 200;
1554             }
1555           }
1556         }
1557       }
1558     }
1559     apr_pool_destroy(lpool);
1560   }
1561   *dc = deny_count;
1562   *ln = line_nr;
1563 }
1564 
qos_measurement(apr_pool_t * pool,apr_table_t * blacklist,apr_table_t * rules,FILE * f,int * ln)1565 static void qos_measurement(apr_pool_t *pool, apr_table_t *blacklist, apr_table_t *rules, FILE *f, int *ln) {
1566   char *readline = apr_pcalloc(pool, MAX_BODY_BUFFER);
1567   int line_nr = 0;
1568   while(!qos_fgetline(readline, MAX_BODY_BUFFER, f)) {
1569     apr_uri_t parsed_uri;
1570     apr_pool_t *lpool;
1571     char *line = readline;
1572     apr_pool_create(&lpool, NULL);
1573     line_nr++;
1574     if((strlen(line) > 1) && line[1] == '/') {
1575       strcpy(line, &line[1]);
1576     }
1577     if(line[0] != '/') {
1578       qos_auto_detect(&line);
1579     }
1580     if(apr_uri_parse(lpool, line, &parsed_uri) != APR_SUCCESS) {
1581       fprintf(stderr, "ERROR, could parse uri %s\n", line);
1582       if(m_exit_on_error) exit(1);
1583     }
1584     if(parsed_uri.path == NULL || (parsed_uri.path[0] != '/')) {
1585       fprintf(stderr, "WARNING, line %d: invalid request %s\n", line_nr, line);
1586     } else {
1587       char *copy = apr_pstrdup(lpool, line);
1588       int i;
1589       apr_table_entry_t *entry = (apr_table_entry_t *)apr_table_elts(rules)->elts;
1590       qos_unescaping(copy);
1591       for(i = 0; i < apr_table_elts(rules)->nelts; i++) {
1592         qs_rule_t *rs = (qs_rule_t *)entry[i].val;
1593         pcre_exec(rs->pcre, NULL, copy, strlen(copy), 0, 0, NULL, 0);
1594       }
1595     }
1596     apr_pool_destroy(lpool);
1597   }
1598   *ln = line_nr;
1599 }
1600 
main(int argc,const char * const argv[])1601 int main(int argc, const char * const argv[]) {
1602   apr_table_entry_t *entry;
1603   long performance = -1;
1604   time_t start = time(NULL);
1605   time_t end;
1606   int line_nr = 0;
1607   int deny_count = 0;
1608   char *time_string;
1609   int i, rc;
1610   const char *access_log = NULL;
1611   FILE *f;
1612   apr_pool_t *pool;
1613   apr_table_t *rules;
1614   apr_table_t *special_rules;
1615   apr_table_t *blacklist;
1616   apr_table_t *rules_url;
1617   int blacklist_size = 0;
1618   int whitelist_size = 0;
1619   char *cmd = strrchr(argv[0], '/');
1620   const char *httpdconf = NULL;
1621   apr_app_initialize(&argc, &argv, NULL);
1622   apr_pool_create(&pool, NULL);
1623   rules = apr_table_make(pool, 10);
1624   special_rules = apr_table_make(pool, 10);
1625   blacklist = apr_table_make(pool, 10);
1626   rules_url = apr_table_make(pool, 10);
1627   rc = nice(10);
1628   if(rc == -1) {
1629     fprintf(stderr, "ERROR, failed to change nice value: %s\n", strerror(errno));
1630   }
1631   if(cmd == NULL) {
1632     cmd = (char *)argv[0];
1633   } else {
1634     cmd++;
1635   }
1636 
1637   argc--;
1638   argv++;
1639   while(argc >= 1) {
1640     if(strcmp(*argv,"-v") == 0) {
1641       if (--argc >= 1) {
1642         m_verbose = atoi(*(++argv));
1643       }
1644     } else if(strcmp(*argv,"-c") == 0) {
1645       if (--argc >= 1) {
1646         httpdconf = *(++argv);
1647       }
1648     } else if(strcmp(*argv,"-i") == 0) {
1649       if (--argc >= 1) {
1650         access_log = *(++argv);
1651       }
1652     } else if(strcmp(*argv,"-k") == 0) {
1653       if (--argc >= 1) {
1654         m_pfx = *(++argv);
1655       }
1656     } else if(strcmp(*argv,"-f") == 0) {
1657       if (--argc >= 1) {
1658         m_filter = *(++argv);
1659       }
1660     } else if(strcmp(*argv,"-d") == 0) {
1661       if (--argc >= 1) {
1662         m_path_depth = atoi(*(++argv));
1663       }
1664     } else if(strcmp(*argv,"-u") == 0) {
1665       if (--argc >= 1) {
1666         const char *coders = *(++argv);
1667         if(strstr(coders, "uni")) {
1668           m_mode |= QOS_DEC_MODE_FLAGS_UNI;
1669         }
1670         if(strstr(coders, "ansi")) {
1671           m_mode |= QOS_DEC_MODE_FLAGS_ANSI;
1672         }
1673         if(strstr(coders, "html")) {
1674           m_mode |= QOS_DEC_MODE_FLAGS_HTML;
1675         }
1676       }
1677     } else if(strcmp(*argv,"-n") == 0) {
1678       m_redundant = 0;
1679     } else if(strcmp(*argv,"-b") == 0) {
1680       if (--argc >= 1) {
1681         m_base64 = atoi(*(++argv));
1682       }
1683     } else if(strcmp(*argv,"-l") == 0) {
1684       if (--argc >= 1) {
1685         m_query_len_pcre = atoi(*(++argv));
1686       }
1687     } else if(strcmp(*argv,"-p") == 0) {
1688       m_query_pcre = 1;
1689     } else if(strcmp(*argv,"-m") == 0) {
1690       m_query_multi_pcre = 1;
1691     } else if(strcmp(*argv,"-o") == 0) {
1692       m_query_o_pcre = 1;
1693     } else if(strcmp(*argv,"-s") == 0) {
1694       m_query_single_pcre = 1;
1695     } else if(strcmp(*argv,"-e") == 0) {
1696       m_exit_on_error = 1;
1697     } else if(strcmp(*argv,"-t") == 0) {
1698       performance = 0;
1699     } else if(strcmp(*argv,"-h") == 0) {
1700       m_handler = 1;
1701     } else if(strcmp(*argv,"-?") == 0) {
1702       usage(cmd, 0);
1703     } else if(strcmp(*argv,"-help") == 0) {
1704       usage(cmd, 0);
1705     } else if(strcmp(*argv,"--help") == 0) {
1706       usage(cmd, 0);
1707     } else if(strcmp(*argv,"--man") == 0) {
1708       usage(cmd, 1);
1709     }
1710     argc--;
1711     argv++;
1712   }
1713   qos_init_pcre();
1714 
1715   if((m_query_pcre && m_query_multi_pcre) ||
1716      (m_query_pcre && m_query_single_pcre) ||
1717      (m_query_multi_pcre && m_query_single_pcre) ||
1718      (m_query_pcre && m_query_o_pcre) ||
1719      (m_query_multi_pcre && m_query_o_pcre) ||
1720      (m_query_single_pcre && m_query_o_pcre)) {
1721     fprintf(stderr, "ERROR, option -s,-m,-o or -p can't be used together.\n");
1722     exit(1);
1723   }
1724 
1725   if(httpdconf) {
1726     qos_load_blacklist(pool, blacklist, httpdconf);
1727     blacklist_size = apr_table_elts(blacklist)->nelts;
1728     qos_load_whitelist(pool, rules, httpdconf);
1729     whitelist_size = apr_table_elts(rules)->nelts;
1730   }
1731 
1732   if(access_log == NULL) usage(cmd, 0);
1733   f = fopen(access_log, "r");
1734   if(f == NULL) {
1735     fprintf(stderr, "ERROR, could not open input file %s\n", access_log);
1736     exit(1);
1737   }
1738   qos_process_log(pool, blacklist, rules, rules_url, special_rules, f, &line_nr, &deny_count, 1);
1739   fclose(f);
1740 
1741   if(m_redundant) {
1742     int xl = 0;
1743     int y = 0;
1744     // delete useless rules
1745     qos_delete_obsolete_rules(pool, rules, rules_url);
1746     // ensure, we have not deleted to many!
1747     if(m_verbose) {
1748       printf("# verify new rules ...\n");
1749       fflush(stdout);
1750     }
1751     //    if(httpdconf) {
1752     //      qos_load_whitelist(pool, rules, httpdconf);
1753     //    }
1754     f = fopen(access_log, "r");
1755     qos_process_log(pool, blacklist, rules, rules_url, special_rules, f, &xl, &y, 0);
1756     fclose(f);
1757   }
1758 
1759   if(performance == 0) {
1760     int lx = 0;
1761     apr_time_t tv;
1762     f = fopen(access_log, "r");
1763     tv = apr_time_now();
1764     qos_measurement(pool, blacklist, rules, f, &lx);
1765     tv = apr_time_now() - tv;
1766     performance = apr_time_msec(tv) + (apr_time_sec(tv) * 1000);
1767     performance = performance / lx;
1768     fclose(f);
1769   }
1770 
1771   end = time(NULL);
1772   time_string = ctime(&end);
1773   time_string[strlen(time_string) - 1] = '\0';
1774   printf("\n# --------------------------------------------------------\n");
1775   printf("# %s\n", time_string);
1776   printf("# %d rules from %d access log lines\n", apr_table_elts(rules)->nelts, line_nr);
1777   printf("#  mod_qos version: %s\n", man_version);
1778   if(performance >= 0) {
1779     printf("#  performance index (ms/req): %ld\n", performance);
1780   }
1781   printf("#  source (-i): %s\n", access_log);
1782   printf("#  path depth (-d): %d\n", m_path_depth);
1783   printf("#  disable path only regex (-h): %s\n", m_handler == 1 ? "yes" : "no");
1784   printf("#  base64 detection level (-b): %d\n", m_base64);
1785   printf("#  redundancy check (-n): %s\n", m_redundant == 1 ? "yes" : "no");
1786   printf("#  pcre only for query (-p): %s\n", m_query_pcre == 1 ? "yes" : "no");
1787   printf("#  decoding (-u): url");
1788   if(m_mode & QOS_DEC_MODE_FLAGS_UNI) {
1789     printf(" uni");
1790   }
1791   if(m_mode & QOS_DEC_MODE_FLAGS_HTML) {
1792     printf(" html");
1793   }
1794   if(m_mode & QOS_DEC_MODE_FLAGS_ANSI) {
1795     printf(" ansi");
1796   }
1797   printf("\n");
1798   printf("#  one pcre for query value (-m): %s\n", m_query_multi_pcre == 1 ? "yes" : "no");
1799   if(m_query_o_pcre) {
1800     printf("#  ignore query order (-o): yes\n");
1801   }
1802   printf("#  single pcre for query (-s): %s\n", m_query_single_pcre == 1 ? "yes" : "no");
1803   printf("#  query outsize (-l): %d\n", m_query_len_pcre);
1804   printf("#  exit on error (-e): %s\n", m_exit_on_error == 1 ? "yes" : "no");
1805   printf("#  rule file (-c): %s\n", httpdconf == NULL ? "-" : httpdconf);
1806   if(httpdconf) {
1807     printf("#    whitelist (loaded existing rules): %d\n", whitelist_size);
1808     printf("#    blacklist (loaded deny rules): %d\n", blacklist_size);
1809     printf("#    blacklist matches: %d\n", deny_count);
1810   }
1811   printf("#  duration: %ld minutes\n", (end - start) / 60);
1812   printf("# --------------------------------------------------------\n");
1813 
1814   {
1815     STACK_OF(qs_rule_t) *st = sk_new(STACK_qs_cmp);
1816     qs_rule_t *r;
1817     int j = 1;
1818     entry = (apr_table_entry_t *)apr_table_elts(rules)->elts;
1819     for(i = 0; i < apr_table_elts(rules)->nelts; i++) {
1820       //  printf("QS_PermitUri +QSF%0.3d deny \"%s\"\n", i+1, entry[i].key);
1821       r = apr_pcalloc(pool, sizeof(qs_rule_t));
1822       r->rule = entry[i].key;
1823       sk_push(st, (char *)r);
1824     }
1825     sk_sort(st);
1826     i = sk_num(st);
1827     for(; i > 0; i--) {
1828       r = (qs_rule_t *)sk_value(st, i-1);
1829       printf("QS_PermitUri +%s%.3d deny \"%s\"\n",
1830              m_pfx ? m_pfx : "QSF",
1831              j, qs_apache_escape(pool, r->rule));
1832       j++;
1833     }
1834   }
1835 
1836   apr_pool_destroy(pool);
1837   return 0;
1838 }
1839