1 /* -*-mode: c; indent-tabs-mode: nil; c-basic-offset: 2; -*-
2 */
3 /**
4 * Filter utilities for the quality of service module mod_qos
5 * used to create white list rules for request line filters.
6 *
7 * See http://mod-qos.sourceforge.net/ for further
8 * details.
9 *
10 * Copyright (C) 2020 Pascal Buchbinder
11 *
12 * Licensed to the Apache Software Foundation (ASF) under one or more
13 * contributor license agreements. See the NOTICE file distributed with
14 * this work for additional information regarding copyright ownership.
15 * The ASF licenses this file to You under the Apache License, Version 2.0
16 * (the "License"); you may not use this file except in compliance with
17 * the License. You may obtain a copy of the License at
18 *
19 * http://www.apache.org/licenses/LICENSE-2.0
20 *
21 * Unless required by applicable law or agreed to in writing, software
22 * distributed under the License is distributed on an "AS IS" BASIS,
23 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24 * See the License for the specific language governing permissions and
25 * limitations under the License.
26 *
27 */
28
29 static const char revision[] = "$Id: qsfilter2.c 2595 2020-01-03 06:19:53Z pbuchbinder $";
30
31 /* system */
32 #include <stdio.h>
33 #include <errno.h>
34 #include <string.h>
35
36 #include <stdlib.h>
37 #include <unistd.h>
38 #include <time.h>
39
40 #include <pcre.h>
41
42 /* apr */
43 #include <apr.h>
44 #include <apr_uri.h>
45 #include <apr_signal.h>
46 #include <apr_strings.h>
47 #include <apr_network_io.h>
48 #include <apr_file_io.h>
49 #include <apr_time.h>
50 #include <apr_getopt.h>
51 #include <apr_general.h>
52 #include <apr_lib.h>
53 #include <apr_portable.h>
54 #include <apr_thread_proc.h>
55 #include <apr_thread_cond.h>
56 #include <apr_thread_mutex.h>
57 #include <apr_support.h>
58 //#include <ap_config.h>
59
60 /* OpenSSL */
61 #include <openssl/safestack.h>
62
63 #include "qs_util.h"
64
65 #define MAX_LINE 32768
66 /* 2mb */
67 #define MAX_BODY_BUFFER 2097152
68 #define CR 13
69 #define LF 10
70
71 typedef enum {
72 QS_UT_PATH,
73 QS_UT_QUERY
74 } qs_url_type_e;
75
76 #define QS_PCRE_RESERVED "{}[]()^$.|*+?\\-"
77 //#define QS_PCRE_RESERVED "{}[]()^$.|*+?\"'\\-"
78
79 /* reserved (to be escaped): {}[]()^$.|*+?\- */
80 #define QS_UNRESERVED "a-zA-Z0-9-\\._~% "
81 #define QS_GEN ":/\\?#\\[\\]@"
82 #define QS_SUB "!$&'\\(\\)\\*\\+,;="
83 #define QS_SUB_S "!$&\\(\\)\\*\\+,;="
84
85 #define QS_SIMPLE_PATH_PCRE "(/[a-zA-Z0-9\\-_]+)+[/]?\\.?[a-zA-Z]{0,4}"
86 #define QS_B64 "([a-z]+[a-z0-9]*[A-Z]+[A-Z0-9]*)"
87 #define QS_HX "([A-F0-9]*[A-F]+[0-9]+[A-F0-9]*)"
88
89 #define QS_OVECCOUNT 3
90
91 /* request line detection */
92 #define QOSC_REQ "(OPTIONS|GET|HEAD|POST|PUT|DELETE|TRACE|CONNECT|PROPFIND|PROPPATCH|MKCOL|COPY|MOVE|LOCK|UNLOCK|VERSION-CONTROL|REPORT|CHECKOUT|CHECKIN|UNCHECKOUT|MKWORKSPACE|UPDATE|LABEL|MERGE|BASELINE-CONTROL|MKACTIVITY|ORDERPATCH|ACL|PATCH|SEARCH|BCOPY|BDELETE|BMOVE|BPROPFIND|BPROPPATCH|NOTIFY|POLL|SUBSCRIBE|UNSUBSCRIBE|X-MS-ENUMATTS|RPC_IN_DATA|RPC_OUT_DATA) /[\x20-\x21\x23-\xFF]* HTTP/"
93
94 pcre *pcre_b64;
95 pcre *pcre_hx;
96 pcre *pcre_simple_path;
97
98 #define QOS_DEC_MODE_FLAGS_URL 0x00
99 #define QOS_DEC_MODE_FLAGS_HTML 0x01
100 #define QOS_DEC_MODE_FLAGS_UNI 0x02
101 #define QOS_DEC_MODE_FLAGS_ANSI 0x04
102
103 /* global variables to store settings */
104 static int m_mode = QOS_DEC_MODE_FLAGS_URL;
105 static int m_base64 = 5;
106 static int m_verbose = 1;
107 static int m_path_depth = 1;
108 static int m_redundant = 1;
109 static int m_query_pcre = 0;
110 static int m_query_multi_pcre = 0;
111 static int m_query_o_pcre = 0;
112 static int m_query_single_pcre = 0;
113 static int m_query_len_pcre = 10;
114 static int m_exit_on_error = 0;
115 static int m_handler = 0;
116 static pcre *m_req_regex = NULL;
117 static int m_log_req_regex = 0;
118 static const char *m_pfx = NULL;
119 static const char *m_filter = NULL;
120
121 typedef struct {
122 pcre *pcre;
123 pcre_extra *extra;
124 char *rule;
125 char *path;
126 char *query_m_string;
127 char *query_m_pcre;
128 int fragment;
129 } qs_rule_t;
130
131
132 /* openssl stack compare function used to sort the rules */
STACK_qs_cmp(const char * const * _pA,const char * const * _pB)133 int STACK_qs_cmp(const char * const *_pA, const char * const *_pB) {
134 qs_rule_t *pA=*(( qs_rule_t **)_pA);
135 qs_rule_t *pB=*(( qs_rule_t **)_pB);
136 return strcmp(pA->rule,pB->rule);
137 }
138
139 /* compiles a pcre (exit on error) */
qos_pcre_compile(char * pattern,int option)140 static pcre *qos_pcre_compile(char *pattern, int option) {
141 const char *errptr = NULL;
142 int erroffset;
143 pcre *pcre = pcre_compile(pattern, PCRE_DOTALL|option, &errptr, &erroffset, NULL);
144 if(pcre == NULL) {
145 fprintf(stderr, "ERROR, rule <%s> could not compile pcre at position %d,"
146 " reason: %s\n", pattern, erroffset, errptr);
147 exit(1);
148 }
149 return pcre;
150 }
151
152 /* tries to detect base64/hex patterns (mix of upper and lower case characters) */
qos_detect_b64(char * line,int silent)153 static char *qos_detect_b64(char *line, int silent) {
154 int ovector[QS_OVECCOUNT];
155 int rc_c = pcre_exec(pcre_b64, NULL, line, strlen(line), 0, 0, ovector, QS_OVECCOUNT);
156 if(rc_c >= 0) {
157 if((m_verbose > 1) && !silent) printf(" B64: %.*s\n",
158 ovector[1] - ovector[0], &line[ovector[0]]);
159 return &line[ovector[0]];
160 }
161 rc_c = pcre_exec(pcre_hx, NULL, line, strlen(line), 0, 0, ovector, QS_OVECCOUNT);
162 if(rc_c >= 0) {
163 if((m_verbose > 1) && !silent) printf(" HX: %.*s\n",
164 ovector[1] - ovector[0], &line[ovector[0]]);
165 return &line[ovector[0]];
166 }
167 return NULL;
168 }
169
170 /* escape double quotes and backslash (to be used for Apache directive) */
qs_apache_escape(apr_pool_t * pool,const char * line)171 static char *qs_apache_escape(apr_pool_t *pool, const char *line) {
172 char *ret = apr_pcalloc(pool, strlen(line) * 4);
173 int i = 0;
174 const char *in = line;
175 while(in && in[0]) {
176 if(in[0] == '"') {
177 ret[i] = '\\';
178 i++;
179 ret[i] = 'x';
180 i++;
181 ret[i] = '2';
182 i++;
183 ret[i] = '2';
184 i++;
185 } else if(in[0] == '\\' && in[1] == '\\') {
186 ret[i] = '\\';
187 i++;
188 ret[i] = 'x';
189 i++;
190 ret[i] = '5';
191 i++;
192 ret[i] = 'c';
193 i++;
194 in++;
195 } else {
196 ret[i] = (char)in[0];
197 i++;
198 }
199 in++;
200 }
201 return ret;
202 }
203
204 /* escape a string in order to be used withn a pcre */
qos_escape_pcre(apr_pool_t * pool,char * line)205 static char *qos_escape_pcre(apr_pool_t *pool, char *line) {
206 int i = 0;
207 unsigned char prev = 0;
208 unsigned char *in = (unsigned char *)line;
209 char *ret = apr_pcalloc(pool, strlen(line) * 4);
210 int reti = 0;
211 if(strlen(line) == 0) return "";
212 while(in[i]) {
213 if(strchr(QS_PCRE_RESERVED, in[i]) != NULL) {
214 if(prev && (prev == '\\')) {
215 /* already escaped */
216 ret[reti] = in[i];
217 reti++;
218 } else if(prev && (in[i] == '\\') && (strchr(QS_PCRE_RESERVED, in[i+1]) != NULL)) {
219 /* escape char */
220 ret[reti] = in[i];
221 reti++;
222 } else {
223 ret[reti] = '\\';
224 reti++;
225 ret[reti] = in[i];
226 reti++;
227 }
228 } else if((in[i] < ' ') || (in[i] > '~')) {
229 sprintf(&ret[reti], "\\x%02x", in[i]);
230 reti = reti + 4;
231 } else {
232 ret[reti] = in[i];
233 reti++;
234 }
235 prev = in[i];
236 i++;
237 }
238 return ret;
239 }
240
241 /* helper for url decoding */
qos_hex2c(const char * x)242 static int qos_hex2c(const char *x) {
243 int i, ch;
244 ch = x[0];
245 if (isdigit(ch)) {
246 i = ch - '0';
247 }else if (isupper(ch)) {
248 i = ch - ('A' - 10);
249 } else {
250 i = ch - ('a' - 10);
251 }
252 i <<= 4;
253
254 ch = x[1];
255 if (isdigit(ch)) {
256 i += ch - '0';
257 } else if (isupper(ch)) {
258 i += ch - ('A' - 10);
259 } else {
260 i += ch - ('a' - 10);
261 }
262 return i;
263 }
264
qos_ishex(char x)265 static int qos_ishex(char x) {
266 if((x >= '0') && (x <= '9')) return 1;
267 if((x >= 'a') && (x <= 'f')) return 1;
268 if((x >= 'A') && (x <= 'F')) return 1;
269 return 0;
270 }
271
272 /* url decoding */
qos_unescaping(char * x)273 static int qos_unescaping(char *x) {
274 int i, j, ch;
275 if (x[0] == '\0')
276 return 0;
277 for (i = 0, j = 0; x[i] != '\0'; i++, j++) {
278 ch = x[i];
279 if(ch == '%' && qos_ishex(x[i + 1]) && qos_ishex(x[i + 2])) {
280 ch = qos_hex2c(&x[i + 1]);
281 i += 2;
282 } else if((m_mode & QOS_DEC_MODE_FLAGS_UNI) &&
283 ((ch == '%') || (ch == '\\')) &&
284 ((x[i + 1] == 'u') || (x[i + 1] == 'U')) &&
285 qos_ishex(x[i + 2]) &&
286 qos_ishex(x[i + 3]) &&
287 qos_ishex(x[i + 4]) &&
288 qos_ishex(x[i + 5])) {
289 /* unicode %uXXXX */
290 ch = qos_hex2c(&x[i + 4]);
291 if((ch > 0x00) && (ch < 0x5f) &&
292 ((x[i + 2] == 'f') || (x[i + 2] == 'F')) &&
293 ((x[i + 3] == 'f') || (x[i + 3] == 'F'))) {
294 ch += 0x20;
295 }
296 i += 5;
297 } else if (ch == '\\' && (x[i + 1] == 'x') && qos_ishex(x[i + 2]) && qos_ishex(x[i + 3])) {
298 ch = qos_hex2c(&x[i + 2]);
299 i += 3;
300 } else if (ch == '+') {
301 ch = ' ';
302 }
303 x[j] = ch;
304 }
305 x[j] = '\0';
306 if(strlen(x) != j) {
307 fprintf(stderr, "WARNING, found escaped null char %s\n", x);
308 }
309 return j;
310 }
311
qos_fgetline(char * s,int n,FILE * f)312 static int qos_fgetline(char *s, int n, FILE *f) {
313 register int i = 0;
314 while (1) {
315 s[i] = (char) fgetc(f);
316 if (s[i] == CR) {
317 s[i] = fgetc(f);
318 }
319 if ((s[i] == 0x4) || (s[i] == LF) || (i == (n - 1))) {
320 s[i] = '\0';
321 return (feof(f) ? 1 : 0);
322 }
323 ++i;
324 }
325 }
326
327 /* init global pcre */
qos_init_pcre()328 static void qos_init_pcre() {
329 char buf[1024];
330 sprintf(buf, "%s{%d,}", QS_B64, m_base64);
331 pcre_b64 = qos_pcre_compile(buf, 0);
332 sprintf(buf, "%s{%d,}", QS_HX, m_base64);
333 pcre_hx = qos_pcre_compile(buf, 0);
334 pcre_simple_path = qos_pcre_compile("^"QS_SIMPLE_PATH_PCRE"$", 0);
335 m_req_regex = qos_pcre_compile(QOSC_REQ, 0);
336 }
337
usage(char * cmd,int man)338 static void usage(char *cmd, int man) {
339 char space[1024];
340 memset(space, ' ', 1024);
341 space[strlen(cmd)] = '\0';
342 if(man) {
343 //.TH [name of program] [section number] [center footer] [left footer] [center header]
344 printf(".TH %s 1 \"%s\" \"mod_qos utilities %s\" \"%s man page\"\n", qs_CMD(cmd), man_date,
345 man_version, cmd);
346 }
347 printf("\n");
348 if(man) {
349 printf(".SH NAME\n");
350 }
351 qs_man_print(man, "%s - an utility to generate mod_qos request line rules out from\n",
352 cmd);
353 qs_man_print(man, "existing access/audit log data.\n");
354 printf("\n");
355 if(man) {
356 printf(".SH SYNOPSIS\n");
357 }
358 qs_man_print(man, "%s%s -i <path> [-c <path>] [-d <num>] [-h] [-b <num>]\n", man ? "" : "Usage: ", cmd);
359 qs_man_print(man, " %s [-p|-s|-m|-o] [-l <len>] [-n] [-e] [-u 'uni']\n", space);
360 qs_man_print(man, " %s [-k <prefix>] [-t] [-f <path>] [-v 0|1|2]\n", space);
361 printf("\n");
362 if(man) {
363 printf(".SH DESCRIPTION\n");
364 } else {
365 printf("Summary\n");
366 }
367 qs_man_print(man, " mod_qos implements a request filter which validates each request\n");
368 qs_man_print(man, " line. The module supports both, negative and positive security\n");
369 qs_man_print(man, " model. The QS_Deny* directives are used to specify request line\n");
370 qs_man_print(man, " patterns which are not allowed to access the server (negative\n");
371 qs_man_print(man, " security model / blacklist). These rules are used to restrict\n");
372 qs_man_print(man, " access to certain resources which should not be available to\n");
373 qs_man_print(man, " users or to protect the server from malicious patterns. The\n");
374 qs_man_print(man, " QS_Permit* rules implement a positive security model (whitelist).\n");
375 qs_man_print(man, " These directives are used to define allowed request line patterns.\n");
376 qs_man_print(man, " Request which do not match any of these patterns are not allowed\n");
377 qs_man_print(man, " to access the server.\n");
378 if(man) printf("\n\n");
379 qs_man_print(man, " %s is an audit log analyzer used to generate filter\n", cmd);
380 qs_man_print(man, " rules (perl compatible regular expressions) which may be used\n");
381 qs_man_print(man, " by mod_qos to deny access for suspect requests (QS_PermitUri rules).\n");
382 qs_man_print(man, " It parses existing audit log files in order to generate request\n");
383 qs_man_print(man, " patterns covering all allowed requests.\n");
384 printf("\n");
385 if(man) {
386 printf(".SH OPTIONS\n");
387 } else {
388 printf("Options\n");
389 }
390 if(man) printf(".TP\n");
391 qs_man_print(man, " -i <path>\n");
392 if(man) printf("\n");
393 qs_man_print(man, " Input file containing request URIs.\n");
394 qs_man_print(man, " The URIs for this file have to be extracted from the servers\n");
395 qs_man_print(man, " access logs. Each line of the input file contains a request\n");
396 qs_man_print(man, " URI consisting of a path and and query.\n");
397 printf("\n");
398 printf(" Example:\n");
399 qs_man_println(man, " /aaa/index.do\n");
400 qs_man_println(man, " /aaa/edit?image=1.jpg\n");
401 qs_man_println(man, " /aaa/image/1.jpg\n");
402 qs_man_println(man, " /aaa/view?page=1\n");
403 qs_man_println(man, " /aaa/edit?document=1\n");
404 printf("\n");
405 qs_man_print(man, " These access log data must include current request URIs but\n");
406 qs_man_print(man, " also request lines from previous rule generation steps. It\n");
407 qs_man_print(man, " must also include request lines which cover manually generated\n");
408 qs_man_print(man, " rules.\n");
409 qs_man_print(man, " You may use the 'qos-path' and 'qos-query' variables to create\n");
410 qs_man_print(man, " an audit log containing all request data (path and query/body data).\n");
411 qs_man_print(man, " Example: 'CustomLog audit_log %{qos-path}n%{qos-query}n'.\n");
412 qs_man_print(man, " See also http://mod-qos.sourceforge.net#qsfiltersample about\n");
413 qs_man_print(man, " the module settings.\n");
414 if(man) printf("\n.TP\n");
415 qs_man_print(man, " -c <path>\n");
416 if(man) printf("\n");
417 qs_man_print(man, " mod_qos configuration file defining QS_DenyRequestLine and\n");
418 qs_man_print(man, " QS_PermitUri directives.\n");
419 qs_man_print(man, " %s generates rules from access log data automatically.\n", cmd);
420 qs_man_print(man, " Manually generated rules (QS_PermitUri) may be provided from\n");
421 qs_man_print(man, " this file. Note: each manual rule must be represented by a\n");
422 qs_man_print(man, " request URI in the input data (-i) in order to make sure not\n");
423 qs_man_print(man, " to be deleted by the rule optimisation algorithm.\n");
424 qs_man_print(man, " QS_Deny* rules from this file are used to filter request lines\n");
425 qs_man_print(man, " which should not be used for whitelist rule generation.\n");
426 printf("\n");
427 printf(" Example:\n");
428 qs_man_println(man, " # manually defined whitelist rule:\n");
429 qs_man_println(man, " QS_PermitUri +view deny \"^[/a-zA-Z0-9]+/view\\?(page=[0-9]+)?$\"\n");
430 qs_man_println(man, " # filter unwanted request line patterns:\n");
431 qs_man_println(man, " QS_DenyRequestLine +printable deny \".*[\\x00-\\x19].*\"\n");
432 printf("\n");
433 if(man) printf("\n.TP\n");
434 qs_man_print(man, " -d <num>\n");
435 if(man) printf("\n");
436 qs_man_print(man, " Depth (sub locations) of the path string which is defined as a\n");
437 qs_man_print(man, " literal string. Default is 1.\n");
438 if(man) printf("\n.TP\n");
439 qs_man_print(man, " -h\n");
440 if(man) printf("\n");
441 qs_man_print(man, " Always use a string representing the handler name in the path even\n");
442 qs_man_print(man, " the url does not have a query. See also -d option.\n");
443 if(man) printf("\n.TP\n");
444 qs_man_print(man, " -b <num>\n");
445 if(man) printf("\n");
446 qs_man_print(man, " Replaces url pattern by the regular expression when detecting\n");
447 qs_man_print(man, " a base64/hex encoded string. Detecting sensibility is defined by a\n");
448 qs_man_print(man, " numeric value. You should use values higher than 5 (default)\n");
449 qs_man_print(man, " or 0 to disable this function.\n");
450 if(man) printf("\n.TP\n");
451 qs_man_print(man, " -p\n");
452 if(man) printf("\n");
453 qs_man_print(man, " Represents query by pcre only (no literal strings).\n");
454 if(man) printf("\n.TP\n");
455 qs_man_print(man, " -s\n");
456 if(man) printf("\n");
457 qs_man_print(man, " Uses one single pcre for the whole query string.\n");
458 if(man) printf("\n.TP\n");
459 qs_man_print(man, " -m\n");
460 if(man) printf("\n");
461 qs_man_print(man, " Uses one pcre for multiple query values (recommended mode).\n");
462 if(man) printf("\n.TP\n");
463 qs_man_print(man, " -o\n");
464 if(man) printf("\n");
465 qs_man_print(man, " Does not care the order of query parameters.\n");
466 if(man) printf("\n.TP\n");
467 qs_man_print(man, " -l <len>\n");
468 if(man) printf("\n");
469 qs_man_print(man, " Outsizes the query length by the defined length ({0,size+len}),\n");
470 qs_man_print(man, " default is %d.\n", m_query_len_pcre);
471 if(man) printf("\n.TP\n");
472 qs_man_print(man, " -n\n");
473 if(man) printf("\n");
474 qs_man_print(man, " Disables redundant rules elimination.\n");
475 if(man) printf("\n.TP\n");
476 qs_man_print(man, " -e\n");
477 if(man) printf("\n");
478 qs_man_print(man, " Exit on error.\n");
479 if(man) printf("\n.TP\n");
480 qs_man_print(man, " -u 'uni'\n");
481 if(man) printf("\n");
482 qs_man_print(man, " Enables additional decoding methods. Use the same settings as you have\n");
483 qs_man_print(man, " used for the QS_Decoding directive.\n");
484 if(man) printf("\n.TP\n");
485 qs_man_print(man, " -k <prefix>\n");
486 if(man) printf("\n");
487 qs_man_print(man, " Prefix used to generate rule identifiers (QSF by default).\n");
488 if(man) printf("\n.TP\n");
489 qs_man_print(man, " -t\n");
490 if(man) printf("\n");
491 qs_man_print(man, " Calculates the maximal latency per request (worst case) using the\n");
492 qs_man_print(man, " generated rules.\n");
493 if(man) printf("\n.TP\n");
494 qs_man_print(man, " -f <path>\n");
495 if(man) printf("\n");
496 qs_man_print(man, " Filters the input by the provided path (prefix) only processing\n");
497 qs_man_print(man, " matching lines.\n");
498 if(man) printf("\n.TP\n");
499 qs_man_print(man, " -v <level>\n");
500 if(man) printf("\n");
501 qs_man_print(man, " Verbose mode. (0=silent, 1=rule source, 2=detailed). Default is 1.\n");
502 qs_man_print(man, " Don't use rules you haven't checked the request data used to\n");
503 qs_man_print(man, " generate it! Level 1 is highly recommended (as long as you don't\n");
504 qs_man_print(man, " have created the log data using your own web crawler).\n");
505 printf("\n");
506 if(man) {
507 printf(".SH OUTPUT\n");
508 } else {
509 printf("Output\n");
510 }
511 qs_man_print(man, " The output of %s is written to stdout. The output\n", cmd);
512 qs_man_print(man, " contains the generated QS_PermitUri directives but also\n");
513 qs_man_print(man, " information about the source which has been used to generate\n");
514 qs_man_print(man, " these rules. It is very important to check the validity of\n");
515 qs_man_print(man, " each request line which has been used to calculate the\n");
516 qs_man_print(man, " QS_PermitUri rules. Each request line which has been used to\n");
517 qs_man_print(man, " generate a new rule is shown in the output prefixed by\n");
518 qs_man_print(man, " \"ADD line <line number>:\". These request lines should be\n");
519 qs_man_print(man, " stored and reused at any later rule generation (add them to\n");
520 qs_man_print(man, " the URI input file). The subsequent line shows the generated\n");
521 qs_man_print(man, " rule.\n");
522 qs_man_print(man, " At the end of data processing a list of all generated\n");
523 qs_man_print(man, " QS_PermitUri rules is shown. These directives may be used\n");
524 qs_man_print(man, " withn the configuration file used by mod_qos.\n");
525 printf("\n");
526 if(man) {
527 printf(".SH EXAMPLE\n");
528 } else {
529 printf("Sample Usage and Output\n");
530 }
531 qs_man_println(man, " %s -i loc.txt -c httpd.conf -m -e\n", cmd);
532 qs_man_println(man, " ...\n");
533 qs_man_println(man, " # ADD line 1: /aaa/index.do\n");
534 qs_man_println(man, " # 003 ^(/[a-zA-Z0-9\\-_]+)+[/]?\\.?[a-zA-Z]{0,4}$\n");
535 qs_man_println(man, " # ADD line 3: /aaa/view?page=1\n");
536 qs_man_println(man, " # --- ^[/a-zA-Z0-9]+/view\\?(page=[0-9]+)?$\n");
537 qs_man_println(man, " # ADD line 4: /aaa/edit?document=1\n");
538 qs_man_println(man, " # 004 ^[/a-zA-Z]+/edit\\?((document)(=[0-9]*)*[&]?)*$\n");
539 qs_man_println(man, " # ADD line 5: /aaa/edit?image=1.jpg\n");
540 qs_man_println(man, " # 005 ^[/a-zA-Z]+/edit\\?((image)(=[0-9\\.a-zA-Z]*)*[&]?)*$\n");
541 qs_man_println(man, " ...\n");
542 qs_man_println(man, " QS_PermitUri +QSF001 deny \"^[/a-zA-Z]+/edit\\?((document|image)(=[0-9\\.a-zA-Z]*)*[&]?)*$\"\n");
543 qs_man_println(man, " QS_PermitUri +QSF002 deny \"^[/a-zA-Z0-9]+/view\\?(page=[0-9]+)?$\"\n");
544 qs_man_println(man, " QS_PermitUri +QSF003 deny \"^(/[a-zA-Z0-9\\-_]+)+[/]?\\.?[a-zA-Z]{0,4}$\"\n");
545 printf("\n");
546 if(man) {
547 printf(".SH SEE ALSO\n");
548 printf("qsdt(1), qsexec(1), qsgeo(1), qsgrep(1), qshead(1), qslog(1), qslogger(1), qspng(1), qsre(1), qsrespeed(1), qsrotate(1), qssign(1), qstail(1)\n");
549 printf(".SH AUTHOR\n");
550 printf("Pascal Buchbinder, http://mod-qos.sourceforge.net/\n");
551 } else {
552 printf("mod_qos %s\n", man_version);
553 printf("See http://mod-qos.sourceforge.net/ for further details.\n");
554 }
555 if(man) {
556 exit(0);
557 } else {
558 exit(1);
559 }
560 }
561
562 /* worker struct, used for parallel processing */
563 typedef struct {
564 apr_pool_t *pool;
565 apr_table_t *rules;
566 apr_table_t *rules_url;
567 int from;
568 int to;
569 } qs_worker_t;
570
571 /* determines, if a rule is really required */
qos_get_used(apr_pool_t * pool,apr_table_t * rules,apr_table_t * rules_url,int from,int to)572 static apr_table_t *qos_get_used(apr_pool_t *pool, apr_table_t *rules, apr_table_t *rules_url,
573 int from, int to) {
574 apr_table_t *used = apr_table_make(pool, 1);
575 int j;
576 for(j = from; j < to; j++) {
577 int l;
578 apr_table_entry_t *linee = (apr_table_entry_t *)apr_table_elts(rules_url)->elts;
579 if(m_verbose) {
580 printf("[%d]", j);
581 fflush(stdout);
582 }
583 for(l = 0; l < apr_table_elts(rules_url)->nelts; l++) {
584 char *line = linee[l].key;
585 int i;
586 int match = 0;
587 apr_table_entry_t *entry = (apr_table_entry_t *)apr_table_elts(rules)->elts;
588 for(i = 0; i < apr_table_elts(rules)->nelts; i++) {
589 if(i != j) {
590 qs_rule_t *rs = (qs_rule_t *)entry[i].val;
591 if(pcre_exec(rs->pcre, rs->extra, line, strlen(line), 0, 0, NULL, 0) >= 0) {
592 match = 1;
593 break;
594 }
595 }
596 }
597 if(!match) {
598 /* no match, rule j is required */
599 apr_table_add(used, entry[j].key, "+");
600 }
601 }
602 }
603 return used;
604 }
605
qos_worker(void * argv)606 static void *qos_worker(void *argv) {
607 qs_worker_t *wt = argv;
608 return qos_get_used(wt->pool, wt->rules, wt->rules_url, wt->from, wt->to);
609 }
610
611 /* get the characters used withn the string in order to define a pcre */
qos_2pcre(apr_pool_t * pool,const char * line)612 static char *qos_2pcre(apr_pool_t *pool, const char *line) {
613 int hasA = 0;
614 int hasD = 0;
615 int hasE = 0;
616 int hasB = 0;
617 int i = 0;
618 unsigned char *in = (unsigned char *)line;
619 char *ret = apr_pcalloc(pool, strlen(line) * 6);
620 int reti = 0;
621 char *existing = "";
622 if(strlen(line) == 0) return "";
623 while(in[i]) {
624 if(isdigit(in[i])) {
625 if(!hasD) {
626 hasD = 1;
627 strcpy(&ret[reti], "0-9");
628 reti = reti + 3;
629 }
630 } else if(isalpha(in[i])) {
631 if(!hasA) {
632 hasA = 1;
633 strcpy(&ret[reti], "a-zA-Z");
634 reti = reti + 6;
635 }
636 } else if(in[i] == '\\') {
637 if(!hasE) {
638 hasE = 1;
639 strcpy(&ret[reti], "\\\\");
640 reti = reti + 2;
641 }
642 } else if(in[i] == '-') {
643 if(!hasB) {
644 hasB = 1;
645 strcpy(&ret[reti], "\\-");
646 reti = reti + 2;
647 }
648 } else if(in[i] == '\0') {
649 char *ck = apr_psprintf(pool, "#\\x%02x#", in[i]);
650 if(strstr(existing, ck) == NULL) {
651 sprintf(&ret[reti], "\\x%02x", in[i]);
652 reti = reti + 4;
653 existing = apr_pstrcat(pool, existing, ck, NULL);
654 }
655 } else if(strchr(ret, in[i]) == NULL) {
656 if(strchr(QS_PCRE_RESERVED, in[i]) != NULL) {
657 ret[reti] = '\\';
658 reti++;
659 ret[reti] = in[i];
660 reti++;
661 } else if((in[i] < ' ') || (in[i] > '~')) {
662 char *ck = apr_psprintf(pool, "#\\x%02x#", in[i]);
663 if(strstr(existing, ck) == NULL) {
664 sprintf(&ret[reti], "\\x%02x", in[i]);
665 reti = reti + 4;
666 existing = apr_pstrcat(pool, existing, ck, NULL);
667 }
668 } else {
669 ret[reti] = in[i];
670 reti++;
671 }
672 }
673 i++;
674 }
675 if(strlen(ret) == 0) return NULL;
676 ret[reti] = '\0';
677 return ret;
678 }
679
680 /* check for the pattern "p" in "r" using the delimter "d",
681 returns 1 if it is in the string */
qos_checkstr(apr_pool_t * pool,char * r,char * d,char * p)682 static int qos_checkstr(apr_pool_t *pool, char *r, char *d, char *p) {
683 /*
684 * r = ..|p|..
685 * r = p|...
686 * r = ..|p
687 * r = p
688 */
689 char *check1 = apr_pstrcat(pool, d, p, d, NULL);
690 char *check2 = apr_pstrcat(pool, p, d, NULL);
691 char *check3 = apr_pstrcat(pool, d, p, NULL);
692
693 if(strstr(r, check1) != NULL) {
694 return 1;
695 }
696 if(strncmp(r, check2, strlen(check2)) == 0) {
697 return 1;
698 }
699 if(strlen(r) > strlen(check3)) {
700 if((strncmp(&r[strlen(r)-strlen(check3)], check3, strlen(check3)) == 0)) {
701 return 1;
702 }
703 }
704 if(strcmp(r, p) == 0) {
705 return 1;
706 }
707
708 return 0;
709 }
710
711 /* add the string "n" to "o" using the delimiter "d" (only if not
712 already available */
qos_addstr(apr_pool_t * pool,char * o,char * d,char * n)713 static char *qos_addstr(apr_pool_t *pool, char *o, char *d, char *n) {
714 char *p = apr_pstrdup(pool, n);
715 char *r = o;
716 if(n == NULL) return o;
717 while(p && p[0]) {
718 char *this = p;
719 char *next = strchr(p, d[0]);
720
721 /* \| */
722 while(next) {
723 if((next > this) && (next[-1] == '\\')) {
724 next++;
725 next = strchr(next, d[0]);
726 } else {
727 break;
728 }
729 }
730 if(next == NULL) {
731 p = NULL;
732 } else {
733 next[0] = '\0';
734 next++;
735 p = next;
736 }
737 if(!qos_checkstr(pool, r, d, this)) {
738 r = apr_pstrcat(pool, r, d, this, NULL);
739 }
740 }
741 return r;
742 }
743
744
745 /* create a name=pcre string like this: ((s1|s2)(=[<pcre>]*)*[&]?)*" */
qos_qqs(apr_pool_t * pool,char * string,char * query_pcre,int singleEq,int hasEq,int startAmp)746 static char *qos_qqs(apr_pool_t *pool, char *string, char *query_pcre, int singleEq, int hasEq, int startAmp) {
747 char *se = NULL;
748 char *s = "";
749 if(startAmp) s = "[&]?";
750 if(singleEq) {
751 se = "(=[&]?)*";
752 }
753 if(strlen(query_pcre) > 0) {
754 return apr_pstrcat(pool, s, "((", string, ")(=[", qos_2pcre(pool, query_pcre), "]*)*[&]?)*", se, NULL);
755 } else {
756 if(hasEq && !singleEq) {
757 se = "(=[&]?)*";
758 return apr_pstrcat(pool, s, "(((", string, ")[&]?)*", se, ")*", NULL);
759 }
760 return apr_pstrcat(pool, s, "((", string, ")[&]?)*", se, NULL);
761 }
762 }
763
764 /* tries to optimize the rules by merging all query into one single pcre matching
765 all values */
qos_query_optimization(apr_pool_t * pool,apr_table_t * rules)766 static void qos_query_optimization(apr_pool_t *pool, apr_table_t *rules) {
767 apr_table_t *delete = apr_table_make(pool, 1);
768 apr_table_t *checked_path = apr_table_make(pool, 1);
769 apr_table_t *new = apr_table_make(pool, 1);
770 int i, j;
771 apr_table_entry_t *entry = (apr_table_entry_t *)apr_table_elts(rules)->elts;
772 for(i = 0; i < apr_table_elts(rules)->nelts; i++) {
773 char *rule_str = entry[i].key;
774 qs_rule_t *r = (qs_rule_t *)entry[i].val;
775 if(!r->fragment && r->path && (apr_table_get(checked_path, r->path) == NULL)) {
776 int merged = 0;
777 char *query_m_string = r->query_m_string == NULL ? "" : r->query_m_string;
778 char *query_m_pcre = r->query_m_pcre == NULL ? "" : r->query_m_pcre;
779 if(m_verbose > 1) printf(" search for path %s (%s)\n", r->path, rule_str);
780 if(m_verbose > 1) printf(" . %s %s\n", query_m_string, query_m_pcre);
781 apr_table_add(checked_path, r->path, "");
782 /* search for rules with the same path and delete them */
783 for(j = 0; j < apr_table_elts(rules)->nelts; j++) {
784 if(i != j) {
785 qs_rule_t *n = (qs_rule_t *)entry[j].val;
786 if(!n->fragment && n->path && (strcmp(r->path, n->path) == 0)) {
787 if(m_verbose > 1) printf(" + %s %s\n",
788 n->query_m_string == NULL ? "-" : n->query_m_string,
789 n->query_m_pcre == NULL ? "-" : n->query_m_pcre);
790 if(strlen(query_m_string) == 0) {
791 query_m_string = apr_pstrcat(pool, query_m_string, n->query_m_string, NULL);
792 } else {
793 query_m_string = qos_addstr(pool, query_m_string, "|", n->query_m_string);
794 }
795 if(m_verbose > 1) printf(" > %s\n", query_m_string);
796 query_m_pcre = apr_pstrcat(pool, query_m_pcre, n->query_m_pcre, NULL);
797 apr_table_add(delete, entry[j].key, "");
798 merged = 1;
799 }
800 }
801 }
802 /* update rule if merged to any */
803 if(merged) {
804 apr_table_add(delete, entry[i].key, "");
805 if(m_verbose) {
806 printf("# CHANGE: <%s>", rule_str);
807 }
808 {
809 const char *errptr = NULL;
810 char *rule = apr_pstrcat(pool, "^", r->path, NULL);
811 qs_rule_t *rs = apr_pcalloc(pool, sizeof(qs_rule_t));
812 if(strlen(query_m_string) > 0) {
813 rule = apr_pstrcat(pool, rule, "\\?",
814 qos_qqs(pool, query_m_string, query_m_pcre, 0, 0, 0), NULL);
815 }
816 rule = apr_pstrcat(pool, rule, "$", NULL);
817 rs->pcre = qos_pcre_compile(rule, 0);
818 rs->extra = pcre_study(rs->pcre, 0, &errptr);
819 rs->path = r->path;
820 apr_table_setn(new, rule, (char *)rs);
821 if(m_verbose) {
822 printf(" to <%s>\n", rule);
823 fflush(stdout);
824 }
825 }
826 }
827 }
828 }
829 entry = (apr_table_entry_t *)apr_table_elts(delete)->elts;
830 for(i = 0; i < apr_table_elts(delete)->nelts; i++) {
831 if(m_verbose) printf("# DEL rule: %s\n", entry[i].key);
832 apr_table_unset(rules, entry[i].key);
833 }
834 entry = (apr_table_entry_t *)apr_table_elts(new)->elts;
835 for(i = 0; i < apr_table_elts(new)->nelts; i++) {
836 apr_table_setn(rules, entry[i].key, entry[i].val);
837 }
838 }
839
840 /* deletes rules which are not required and merge query name/value pairs */
qos_delete_obsolete_rules(apr_pool_t * pool,apr_table_t * rules,apr_table_t * rules_url)841 static void qos_delete_obsolete_rules(apr_pool_t *pool, apr_table_t *rules, apr_table_t *rules_url) {
842 apr_table_t *not_used = apr_table_make(pool, 1);
843 apr_table_t *used;
844 apr_table_t *used1;
845 pthread_attr_t *tha = NULL;
846 pthread_t tid;
847 qs_worker_t *wt = apr_pcalloc(pool, sizeof(qs_worker_t));
848
849
850 if(m_query_multi_pcre) {
851 if(m_verbose) {
852 printf("# search for redundant rules ...\n");
853 fflush(stdout);
854 }
855 qos_query_optimization(pool, rules);
856 if(m_verbose) printf("# ");
857 } else {
858 if(m_verbose) {
859 printf("# search for redundant rules ");
860 fflush(stdout);
861 }
862 }
863
864 wt->pool = pool;
865 wt->rules = rules;
866 wt->rules_url = rules_url;
867 wt->from = apr_table_elts(rules)->nelts / 2;
868 wt->to = apr_table_elts(rules)->nelts;
869
870 pthread_create(&tid, tha, qos_worker, (void *)wt);
871 used = qos_get_used(pool, rules, rules_url, 0, apr_table_elts(rules)->nelts / 2);
872 pthread_join(tid, (void *)&used1);
873 if(m_verbose) printf(" done\n");
874 {
875 int i;
876 apr_table_entry_t *entry = (apr_table_entry_t *)apr_table_elts(rules)->elts;
877 for(i = 0; i < apr_table_elts(rules)->nelts; i++) {
878 if((apr_table_get(used, entry[i].key) == NULL) &&
879 (apr_table_get(used1, entry[i].key) == NULL)) {
880 if(m_verbose) printf("# DEL rule (not required): %s\n", entry[i].key);
881 apr_table_add(not_used, entry[i].key, "-");
882 }
883 }
884 entry = (apr_table_entry_t *)apr_table_elts(not_used)->elts;
885 for(i = 0; i < apr_table_elts(not_used)->nelts; i++) {
886 apr_table_unset(rules, entry[i].key);
887 }
888 }
889 }
890
891 /* test if we need to create a new url (and save line if the rule is used the very
892 first time (rule has been read from the configuration file)) */
qos_test_for_existing_rule(char * plain,char * line,apr_table_t * rules,apr_table_t * special_rules,int line_nr,apr_table_t * rules_url,apr_table_t * source_rules,int first)893 static int qos_test_for_existing_rule(char *plain, char *line, apr_table_t *rules,
894 apr_table_t *special_rules, int line_nr,
895 apr_table_t *rules_url, apr_table_t *source_rules, int first) {
896 int i;
897 apr_table_entry_t *entry = (apr_table_entry_t *)apr_table_elts(rules)->elts;
898 if((line == 0) || (strlen(line) == 0)) return 0;
899 for(i = 0; i < apr_table_elts(rules)->nelts; i++) {
900 qs_rule_t *rs = (qs_rule_t *)entry[i].val;
901 if(pcre_exec(rs->pcre, rs->extra, line, strlen(line), 0, 0, NULL, 0) >= 0) {
902 if(first && (apr_table_get(source_rules, entry[i].key) == NULL)) {
903 apr_table_add(source_rules, entry[i].key, "");
904 apr_table_add(rules_url, line, "");
905 apr_table_setn(special_rules, entry[i].key, (char *)rs);
906 if(m_verbose) {
907 printf("# ADD line %d: %s\n", line_nr, plain);
908 printf("# --- %s\n", entry[i].key);
909 }
910 }
911 if(m_verbose > 1){
912 printf("LINE %d, exiting rule: %s\n", line_nr, entry[i].key);
913 }
914 return 1;
915 }
916 }
917 /* check for special rules */
918 entry = (apr_table_entry_t *)apr_table_elts(special_rules)->elts;
919 for(i = 0; i < apr_table_elts(special_rules)->nelts; i++) {
920 qs_rule_t *rs = (qs_rule_t *)entry[i].val;
921 if(pcre_exec(rs->pcre, rs->extra, line, strlen(line), 0, 0, NULL, 0) >= 0) {
922 if(m_verbose) {
923 printf("# ADD line %d: %s\n", line_nr, plain);
924 printf("# -(S) %s\n", entry[i].key);
925 }
926 apr_table_setn(rules, entry[i].key, (char *)rs);
927 return 1;
928 }
929 }
930 return 0;
931 }
932
933 /* filter lines we don't want to add to the whitelist */
qos_enforce_blacklist(apr_table_t * rules,const char * line)934 static int qos_enforce_blacklist(apr_table_t *rules, const char *line) {
935 int i;
936 apr_table_entry_t *entry = (apr_table_entry_t *)apr_table_elts(rules)->elts;
937 if((line == 0) || (strlen(line) == 0)) return 0;
938 for(i = 0; i < apr_table_elts(rules)->nelts; i++) {
939 qs_rule_t *rs = (qs_rule_t *)entry[i].val;
940 if(pcre_exec(rs->pcre, rs->extra, line, strlen(line), 0, 0, NULL, 0) == 0) {
941 if(m_verbose > 1) printf(" blacklist match, rule %s\n", entry[i].key);
942 return 1;
943 }
944 }
945 return 0;
946 }
947
948 /* load existing rules */
qos_load_rules(apr_pool_t * pool,apr_table_t * ruletable,const char * httpdconf,const char * command,int option)949 static void qos_load_rules(apr_pool_t *pool, apr_table_t *ruletable,
950 const char *httpdconf, const char *command, int option) {
951 FILE *f = fopen(httpdconf, "r");
952 char line[MAX_LINE];
953 if(f == NULL) {
954 fprintf(stderr, "ERROR, could not open %s\n", httpdconf);
955 exit(1);
956 }
957 while(!qos_fgetline(line, sizeof(line), f)) {
958 // QS_DenyRequestLine '+'|'-'<id> 'log'|'deny' <pcre>
959 char *p = strstr(line, command);
960 if(p) {
961 p[0] = '\0';
962 p++;
963 }
964 if(p && (strchr(line, '#') == NULL)) {
965 p = strchr(p, ' ');
966 if(p) {
967 while(p[0] == ' ') p++;
968 p = strchr(p, ' ');
969 if(p) {
970 while(p[0] == ' ') p++;
971 p = strchr(p, ' ');
972 if(p) {
973 while(p[0] == ' ') p++;
974 if(m_verbose > 1) {
975 printf("load %s\n", p);
976 }
977 {
978 const char *errptr = NULL;
979 char *pattern;
980 pcre *pcre_test;
981 pcre_extra *extra;
982 qs_rule_t *rs;
983 if(p[0] == '"') {
984 int fl = strlen(p)-2;
985 pattern = apr_psprintf(pool, "%.*s", fl, &p[1]);
986 } else {
987 int fl = strlen(p);
988 pattern = apr_psprintf(pool, "%.*s", fl, p);
989 }
990 pcre_test = qos_pcre_compile(pattern, option);
991 extra = pcre_study(pcre_test, 0, &errptr);
992 rs = apr_pcalloc(pool, sizeof(qs_rule_t));
993 rs->pcre = pcre_test;
994 rs->extra = extra;
995 apr_table_setn(ruletable, pattern, (char *)rs);
996 }
997 }
998 }
999 }
1000 }
1001 }
1002 fclose(f);
1003 }
1004
qos_load_blacklist(apr_pool_t * pool,apr_table_t * blacklist,const char * httpdconf)1005 static void qos_load_blacklist(apr_pool_t *pool, apr_table_t *blacklist, const char *httpdconf) {
1006 qos_load_rules(pool, blacklist, httpdconf, "QS_DenyRequestLine", PCRE_CASELESS);
1007 }
qos_load_whitelist(apr_pool_t * pool,apr_table_t * rules,const char * httpdconf)1008 static void qos_load_whitelist(apr_pool_t *pool, apr_table_t *rules, const char *httpdconf) {
1009 qos_load_rules(pool, rules, httpdconf, "QS_PermitUri", 0);
1010 }
1011
1012 /* tries to map a base64 string to a pcre */
qos_b64_2pcre(apr_pool_t * pool,const char * line)1013 static char *qos_b64_2pcre(apr_pool_t *pool, const char *line) {
1014 char *copy = apr_pstrdup(pool, line);
1015 char *b64 = qos_detect_b64(copy, 1);
1016 char *st = b64;
1017 char *ed = &b64[1];
1018 if(m_verbose > 1) printf(" B642pcre: %s", copy);
1019 /* reserved: {}[]()^$.|*+?\ */
1020 #define QS_BX "-_$+!"
1021 while(st[0] && (isdigit(st[0]) || isalpha(st[0]) || (strchr(QS_BX, st[0]) != NULL))) {
1022 st--;
1023 }
1024 st++;
1025 st[0] = '\0';
1026 while(ed[0] && (isdigit(ed[0]) || isalpha(ed[0]) || (strchr(QS_BX, ed[0]) != NULL))) {
1027 ed++;
1028 }
1029 if(m_verbose > 1) printf(" %s <> %s\n", copy, ed);
1030 return apr_pstrcat(pool, qos_escape_pcre(pool, copy),
1031 "[a-zA-Z0-9\\-_\\$\\+!]+",
1032 ed[0] == '\0' ? NULL : qos_escape_pcre(pool, ed), NULL);
1033 }
1034
1035
1036 /* maps a query string to a pairs of <string>=<pcre> or <pcre>=<pcre> */
qos_query_string_pcre(apr_pool_t * pool,const char * path)1037 static char *qos_query_string_pcre(apr_pool_t *pool, const char *path) {
1038 char *copy = apr_pstrdup(pool, path);
1039 char *pos = copy;
1040 char *ret = "";
1041 int isValue = 0;
1042 int open = 0;
1043 while(copy[0]) {
1044 if((copy[0] == '=') && (copy[1] != '=') && !open) {
1045 copy[0] = '\0';
1046 qos_unescaping(pos);
1047 if(!open) {
1048 ret = apr_pstrcat(pool, ret, "(", NULL);
1049 open = 1;
1050 }
1051 if(m_query_pcre) {
1052 if(strlen(pos) > 0) {
1053 ret = apr_pstrcat(pool, ret, "[", qos_2pcre(pool, pos), "]+=", NULL);
1054 } else {
1055 ret = apr_pstrcat(pool, ret, "=", NULL);
1056 }
1057 } else {
1058 ret = apr_pstrcat(pool, ret, qos_escape_pcre(pool, pos), "=", NULL);
1059 }
1060 open = 1;
1061 pos = copy;
1062 pos++;
1063 isValue = 1;
1064 }
1065 if(copy[0] == '&') {
1066 copy[0] = '\0';
1067 if(strlen(pos) == 0) {
1068 ret = apr_pstrcat(pool, ret, "[&]?", NULL);
1069 if(open) {
1070 ret = apr_pstrcat(pool, ret, ")?", NULL);
1071 open = 0;
1072 }
1073 } else {
1074 qos_unescaping(pos);
1075 ret = apr_psprintf(pool, "%s[%s]{0,%"APR_SIZE_T_FMT"}[&]?", ret, qos_2pcre(pool, pos),
1076 strlen(pos) + m_query_len_pcre);
1077 if(open) {
1078 ret = apr_pstrcat(pool, ret, ")?", NULL);
1079 open = 0;
1080 }
1081 }
1082 pos = copy;
1083 pos++;
1084 isValue = 0;
1085 }
1086 copy++;
1087 }
1088 if(pos != copy) {
1089 qos_unescaping(pos);
1090 if(isValue) {
1091 ret = apr_psprintf(pool, "%s[%s]{0,%"APR_SIZE_T_FMT"}[&]?", ret, qos_2pcre(pool, pos),
1092 strlen(pos) + m_query_len_pcre);
1093 } else {
1094 if(!open) {
1095 ret = apr_pstrcat(pool, "(", ret, NULL);
1096 open = 1;
1097 }
1098 if(m_query_pcre) {
1099 ret = apr_pstrcat(pool, ret, "[", qos_2pcre(pool, pos), "]+", NULL);
1100 } else {
1101 ret = apr_pstrcat(pool, ret, qos_escape_pcre(pool, pos), NULL);
1102 }
1103 }
1104 if(open) {
1105 ret = apr_pstrcat(pool, ret, ")?", NULL);
1106 open = 0;
1107 }
1108 }
1109 if(open) {
1110 ret = apr_pstrcat(pool, ret, ")?", NULL);
1111 open = 0;
1112 }
1113 if(m_query_pcre) {
1114 return ret;
1115 } else {
1116 return ret;
1117 /* it would be nice to use (see -o):
1118 * ((a=b)?(c=d)?)*
1119 * instead of:
1120 * (a=b)?(c=d)? and (c=d)?(a=b)?
1121 * but in this case, two rules are much faster than one
1122 * it's probably better to use the -m option
1123 */
1124 }
1125 }
1126
1127 /* maps a query string to a list of names and a single pcre for all values:
1128 <string>|<string>=<pcre> */
qos_multi_query_string_pcre(apr_pool_t * pool,const char * path,char ** query_m_string,char ** query_m_pcre)1129 static char *qos_multi_query_string_pcre(apr_pool_t *pool, const char *path,
1130 char **query_m_string, char **query_m_pcre) {
1131 char *copy = apr_pstrdup(pool, path);
1132 char *pos = copy;
1133 char *string = "";
1134 char *query_pcre = "";
1135 int isValue = 0;
1136 int singleEq = 0;
1137 int hasEq = 0;
1138 int startAmp = 0;
1139 if(copy[0] == '&') startAmp = 1;
1140 while(copy[0]) {
1141 if(copy[0] == '=') hasEq = 1;
1142 if((copy[0] == '=') && (copy[1] != '=') && !isValue) {
1143 copy[0] = '\0';
1144 qos_unescaping(pos);
1145 if(strlen(pos) > 0) {
1146 if(strlen(string) > 0) string = apr_pstrcat(pool, string, "|", NULL);
1147 string = apr_pstrcat(pool, string, qos_escape_pcre(pool, pos), NULL);
1148 } else {
1149 if((copy[1] == '&') || (copy[1] == '\0')) {
1150 singleEq = 1;
1151 }
1152 }
1153 pos = copy;
1154 pos++;
1155 isValue = 1;
1156 }
1157 if(copy[0] == '&') {
1158 copy[0] = '\0';
1159 if(!isValue) {
1160 qos_unescaping(pos);
1161 if(strlen(string) > 0) string = apr_pstrcat(pool, string, "|", NULL);
1162 string = apr_pstrcat(pool, string, qos_escape_pcre(pool, pos), NULL);
1163 } else {
1164 if(strlen(pos) != 0) {
1165 qos_unescaping(pos);
1166 query_pcre = apr_pstrcat(pool, query_pcre, pos, NULL);
1167 }
1168 }
1169 pos = copy;
1170 pos++;
1171 isValue = 0;
1172 }
1173 copy++;
1174 }
1175 if(pos != copy) {
1176 qos_unescaping(pos);
1177 if(isValue) {
1178 query_pcre = apr_pstrcat(pool, query_pcre, pos, NULL);
1179 } else {
1180 if(strlen(string) > 0) string = apr_pstrcat(pool, string, "|", NULL);
1181 string = apr_pstrcat(pool, string, qos_escape_pcre(pool, pos), NULL);
1182 }
1183 }
1184 *query_m_string = string;
1185 *query_m_pcre = query_pcre;
1186 return qos_qqs(pool, string, query_pcre, singleEq, hasEq, startAmp);
1187 }
1188
1189 /* maps a path to a single pcre (don't mind its length) */
qos_path_pcre(apr_pool_t * lpool,const char * path)1190 static char *qos_path_pcre(apr_pool_t *lpool, const char *path) {
1191 char *dec = apr_pstrdup(lpool, path);
1192 qos_unescaping(dec);
1193 return apr_pstrcat(lpool, "[", qos_2pcre(lpool, dec), "]+", NULL);
1194 }
1195
1196 /* maps a path to <pcre>/<string> */
qos_path_pcre_string(apr_pool_t * lpool,const char * path)1197 static char *qos_path_pcre_string(apr_pool_t *lpool, const char *path) {
1198 int nohandler = 0;
1199 char *lpath = apr_pstrdup(lpool, path);
1200 char *last;
1201 char *str = "";
1202 int depth = m_path_depth;
1203 char *rx = "";
1204 if(lpath[strlen(lpath)-1] == '/') {
1205 lpath[strlen(lpath)-1] = '\0';
1206 nohandler = 1;
1207 }
1208 last = strrchr(lpath, '/');
1209 while(last && depth) {
1210 qos_unescaping(last);
1211 if(m_base64 && qos_detect_b64(last, 0)) {
1212 str = apr_pstrcat(lpool, qos_b64_2pcre(lpool, last), str, NULL);
1213 } else {
1214 str = apr_pstrcat(lpool, qos_escape_pcre(lpool, last), str, NULL);
1215 }
1216 last[0] = '\0';
1217 last = strrchr(lpath, '/');
1218 depth--;
1219 }
1220 if(lpath[0]) {
1221 qos_unescaping(lpath);
1222 rx = apr_pstrcat(lpool, "[", qos_2pcre(lpool, lpath), "]+", NULL);
1223 }
1224 if(strlen(str) > 0) {
1225 if(nohandler) {
1226 rx = apr_pstrcat(lpool, rx, str, "[/]?", NULL);
1227 } else {
1228 rx = apr_pstrcat(lpool, rx, str, NULL);
1229 }
1230 }
1231 return rx;
1232 }
1233
qos_is_alnum(const char * string)1234 static int qos_is_alnum(const char *string) {
1235 unsigned char *in = (unsigned char *)string;
1236 int i = 0;
1237 if(in == NULL) return 0;
1238 while(in[i]) {
1239 if(!apr_isalnum(in[i])) return 0;
1240 i++;
1241 }
1242 return 1;
1243 }
1244
qos_rule_optimization(apr_pool_t * pool,apr_pool_t * lpool,apr_table_t * rules,apr_table_t * special_rules)1245 static void qos_rule_optimization(apr_pool_t *pool, apr_pool_t *lpool,
1246 apr_table_t *rules, apr_table_t *special_rules) {
1247 int i;
1248 apr_table_t *new_rules = apr_table_make(pool, 5);
1249 apr_table_t *del_rules = apr_table_make(pool, 5);
1250 apr_table_entry_t *entry = (apr_table_entry_t *)apr_table_elts(rules)->elts;
1251 for(i = 0; i < apr_table_elts(rules)->nelts; i++) {
1252 qs_rule_t *rs = (qs_rule_t *)entry[i].val;
1253 int hit = 0;
1254 int j;
1255 for(j = 0; j < apr_table_elts(rules)->nelts; j++) {
1256 if(i != j) {
1257 qs_rule_t *rsj = (qs_rule_t *)entry[j].val;
1258 if(rs->query_m_string && rsj->query_m_string) {
1259 if(strcmp(rs->query_m_string, rsj->query_m_string) == 0) {
1260 if(strlen(entry[i].key) == strlen(entry[j].key)) {
1261 hit++;
1262 }
1263 }
1264 if(hit == 5) {
1265 int s = 0;
1266 int e = 0;
1267 while(entry[i].key[s] && (entry[i].key[s] == entry[j].key[s])) s++;
1268 e = s;
1269 while(entry[i].key[e] &&
1270 ((entry[i].key[e] != entry[j].key[e]) ||
1271 (apr_isalnum(entry[i].key[e]) && apr_isalnum(entry[j].key[e])))) e++;
1272 if((e > s) &&
1273 (s > 14) &&
1274 (e < strlen(entry[i].key)) &&
1275 (strstr(&entry[i].key[e], "\?") != NULL)) {
1276 const char *errptr = NULL;
1277 char *match = apr_psprintf(lpool, "%.*s%.*s",
1278 e-s, &entry[i].key[s],
1279 e-s, &entry[j].key[s]);
1280 if(qos_is_alnum(match)) {
1281 char *matchx = apr_psprintf(lpool, "[%s]{%d}", qos_2pcre(lpool, match), e-s);
1282 char *new = apr_psprintf(pool, "%.*s%s%s", s, entry[i].key, matchx, &entry[i].key[e]);
1283 qs_rule_t *rsn = apr_pcalloc(pool, sizeof(qs_rule_t));
1284 rsn->pcre = qos_pcre_compile(new, 0);
1285 rsn->extra = pcre_study(rsn->pcre, 0, &errptr);
1286 rsn->path = rs->path;
1287 rsn->query_m_string = rs->query_m_string;
1288 rsn->query_m_pcre = rs->query_m_pcre;
1289 rsn->fragment = rs->fragment;
1290 if(m_verbose) {
1291 printf("# CHANGE: <%s> to <%s>\n", entry[i].key, new);
1292 fflush(stdout);
1293 }
1294 apr_table_setn(new_rules, new, (char *)rsn);
1295 apr_table_addn(del_rules, entry[i].key, entry[i].val);
1296 apr_table_addn(del_rules, entry[j].key, entry[j].val);
1297 if(m_verbose > 1) {
1298 if(m_verbose) printf(" [%s] [%s]\n", entry[i].key, entry[j].key);
1299 if(m_verbose) printf(" [%s] [%s]\n", match, matchx);
1300 }
1301 break;
1302 }
1303 }
1304 }
1305 }
1306 }
1307 }
1308 }
1309 entry = (apr_table_entry_t *)apr_table_elts(new_rules)->elts;
1310 for(i = 0; i < apr_table_elts(new_rules)->nelts; i++) {
1311 apr_table_setn(rules, entry[i].key, entry[i].val);
1312 }
1313 entry = (apr_table_entry_t *)apr_table_elts(del_rules)->elts;
1314 for(i = 0; i < apr_table_elts(del_rules)->nelts; i++) {
1315 apr_table_unset(rules, entry[i].key);
1316 }
1317 }
1318
1319 /* rules do not care the order of parameter values (makes rule processing slow)
1320 * (id=[0-9]{0,13}[&]?)?(name=[a-zA-Z]{0,12}[&]?)?
1321 * ((id=[0-9]{0,13}[&]?)|(name=[a-zA-Z]{0,12}[&]?))*
1322 */
qos_post_optimization(apr_pool_t * lpool,char * query)1323 static char *qos_post_optimization(apr_pool_t *lpool, char *query) {
1324 int hit = 0;
1325 char *p = query;
1326 while(p && p[0]) {
1327 if(strncmp(p, "[&]?)?(", 7) == 0) {
1328 hit = 1;
1329 p[5] = '|';
1330 }
1331 p++;
1332 }
1333 if(hit) {
1334 query[strlen(query)-1] = '\0';
1335 return apr_psprintf(lpool, "(%s)*", query);
1336 }
1337 return query;
1338 }
1339
qos_auto_detect(char ** raw)1340 static void qos_auto_detect(char **raw) {
1341 char *line = *raw;
1342 int rc_c = -1;
1343 if(m_req_regex) {
1344 int ovector[QS_OVECCOUNT];
1345 /* no request line, maybe raw Apache access log? */
1346 rc_c = pcre_exec(m_req_regex, NULL, line, strlen(line), 0, 0, ovector, QS_OVECCOUNT);
1347 if(rc_c >= 0) {
1348 char *sr;
1349 line = &line[ovector[0]];
1350 line[ovector[1] - ovector[0]] = '\0';
1351 sr = strchr(line, ' ');
1352 while(sr[0] == ' ')sr++;
1353 *raw = sr;
1354 sr = strrchr(line, ' ');
1355 sr[0] = '\0';
1356 }
1357 }
1358 if(rc_c < 0) {
1359 /* or an audit log like "%h %>s %{qos-loc}n %{qos-path}n%{qos-query}n" */
1360 char *pe = line;
1361 int pi = 3;
1362 while(pe && (pi > 0)) {
1363 pi--;
1364 pe = strchr(pe, ' ');
1365 if(pe) {
1366 pe++;
1367 }
1368 }
1369 if(pe && pe[0] == '/' && (pi == 0)) {
1370 *raw = pe;
1371 }
1372 }
1373 return;
1374 }
1375
1376 /* process the input file line by line */
qos_process_log(apr_pool_t * pool,apr_table_t * blacklist,apr_table_t * rules,apr_table_t * rules_url,apr_table_t * special_rules,FILE * f,int * ln,int * dc,int first)1377 static void qos_process_log(apr_pool_t *pool, apr_table_t *blacklist, apr_table_t *rules,
1378 apr_table_t *rules_url, apr_table_t *special_rules,
1379 FILE *f, int *ln, int *dc, int first) {
1380 char *readline = apr_pcalloc(pool, MAX_BODY_BUFFER);
1381 int deny_count = *dc;
1382 int line_nr = *ln;
1383 apr_table_t *source_rules = apr_table_make(pool, 10);
1384 int rule_optimization = 300;
1385 while(!qos_fgetline(readline, MAX_BODY_BUFFER, f)) {
1386 int doubleSlash = 0;
1387 apr_uri_t parsed_uri;
1388 apr_pool_t *lpool;
1389 char *line = readline;
1390 apr_pool_create(&lpool, NULL);
1391 line_nr++;
1392 if((strlen(line) > 1) && line[1] == '/') {
1393 doubleSlash = 1;
1394 line++;
1395 }
1396 if(line[0] != '/') {
1397 if(!m_log_req_regex) {
1398 m_log_req_regex = 1;
1399 fprintf(stderr, "WARNING, line %d: "
1400 "unexpected data format, try to detect request lines automatically\n",
1401 line_nr);
1402 }
1403 qos_auto_detect(&line);
1404 }
1405 if(apr_uri_parse(lpool, line, &parsed_uri) != APR_SUCCESS) {
1406 fprintf(stderr, "ERROR, could parse uri %s\n", line);
1407 if(m_exit_on_error) exit(1);
1408 }
1409 if(parsed_uri.path == NULL || (parsed_uri.path[0] != '/')) {
1410 fprintf(stderr, "WARNING, line %d: invalid request %s\n", line_nr, line);
1411 } else if(m_filter && parsed_uri.path && strncmp(parsed_uri.path, m_filter, strlen(m_filter)) != 0) {
1412 // skip filtered line
1413 } else {
1414 char *path = NULL;
1415 char *query = NULL;
1416 char *query_m_string = NULL;
1417 char *query_m_pcre = NULL;
1418 char *fragment = NULL;
1419 char *copy = apr_pstrdup(lpool, line);
1420 qos_unescaping(copy);
1421 if(qos_enforce_blacklist(blacklist, copy)) {
1422 fprintf(stderr, "WARNING: blacklist filter match at line %d for %s\n",
1423 line_nr, line);
1424 deny_count++;
1425 } else {
1426 if(!qos_test_for_existing_rule(line, copy, rules, special_rules,
1427 line_nr, rules_url, source_rules, first)) {
1428 if(m_verbose > 1) printf("LINE %d, analyse: %s\n", line_nr, line);
1429 if(parsed_uri.query) {
1430 if(strcmp(parsed_uri.path, "/") == 0) {
1431 path = apr_pstrdup(lpool, "/");
1432 } else {
1433 path = qos_path_pcre_string(lpool, parsed_uri.path);
1434 }
1435 if(m_query_single_pcre) {
1436 char *qc = apr_pstrdup(lpool, parsed_uri.query);
1437 qos_unescaping(qc);
1438 query = apr_pstrcat(lpool, "[", qos_2pcre(lpool, qc), "]+", NULL);
1439 } else {
1440 if(!m_query_multi_pcre) {
1441 query = qos_query_string_pcre(lpool, parsed_uri.query);
1442 if(m_query_o_pcre) {
1443 query = qos_post_optimization(lpool, query);
1444 }
1445 } else {
1446 query = qos_multi_query_string_pcre(lpool, parsed_uri.query,
1447 &query_m_string, &query_m_pcre);
1448 }
1449 }
1450 } else {
1451 if(strcmp(parsed_uri.path, "/") == 0) {
1452 path = apr_pstrdup(lpool, "/");
1453 } else {
1454 if(m_handler) {
1455 path = qos_path_pcre_string(lpool, parsed_uri.path);
1456 } else {
1457 if(pcre_exec(pcre_simple_path, NULL, parsed_uri.path,
1458 strlen(parsed_uri.path), 0, 0, NULL, 0) >= 0) {
1459 path = apr_pstrdup(lpool, QS_SIMPLE_PATH_PCRE);
1460 } else {
1461 path = qos_path_pcre(lpool, parsed_uri.path);
1462 }
1463 }
1464 }
1465 }
1466 if(parsed_uri.fragment) {
1467 char *f = apr_pstrdup(lpool, parsed_uri.fragment);
1468 if(strlen(f) > 0) {
1469 qos_unescaping(f);
1470 fragment = apr_pstrcat(lpool, "[", qos_2pcre(lpool, f), "]+", NULL);
1471 } else {
1472 fragment = apr_pstrcat(lpool, "", NULL);
1473 }
1474 }
1475 if(m_verbose > 1) {
1476 printf(" path: %s\n", parsed_uri.path);
1477 printf(" path rule: %s\n", path);
1478 if(query) {
1479 printf(" query: %s\n", parsed_uri.query);
1480 printf(" query rule: %s\n", query);
1481 }
1482 if(fragment) {
1483 printf(" fragment: %s\n", parsed_uri.fragment);
1484 printf(" fragment rule: %s\n", fragment);
1485 }
1486 }
1487 {
1488 const char *errptr = NULL;
1489 char *rule;
1490 qs_rule_t *rs = apr_pcalloc(pool, sizeof(qs_rule_t));
1491 if(doubleSlash) {
1492 rule = apr_pstrcat(pool, "^[/]?", path, NULL);
1493 } else {
1494 rule = apr_pstrcat(pool, "^", path, NULL);
1495 }
1496 if(query) {
1497 rule = apr_pstrcat(pool, rule, "\\?", query, NULL);
1498 }
1499 if(fragment) {
1500 rule = apr_pstrcat(pool, rule, "#", fragment, NULL);
1501 rs->fragment = 1;
1502 } else {
1503 rs->fragment = 0;
1504 }
1505 rule = apr_pstrcat(pool, rule, "$", NULL);
1506 rs->pcre = qos_pcre_compile(rule, 0);
1507 rs->extra = pcre_study(rs->pcre, 0, &errptr);
1508 rs->path = apr_pstrdup(pool, path);
1509 if(m_query_multi_pcre && !fragment) {
1510 rs->query_m_string = apr_pstrdup(pool, query_m_string);
1511 rs->query_m_pcre = apr_pstrdup(pool, query_m_pcre);
1512 } else {
1513 rs->query_m_string = NULL;
1514 rs->query_m_pcre = NULL;
1515 }
1516 // don't mind if extra is null
1517 if(m_verbose) {
1518 printf("# ADD line %d: %s\n", line_nr, line);
1519 printf("# %.3d %s\n", apr_table_elts(rules)->nelts+1, rule);
1520 fflush(stdout);
1521 }
1522 if(pcre_exec(rs->pcre, rs->extra, copy, strlen(copy), 0, 0, NULL, 0) < 0) {
1523 fprintf(stderr, "ERROR, rule check failed (did not match)!\n");
1524 fprintf(stderr, " line %d: %s\n", line_nr, line);
1525 fprintf(stderr, " string: %s\n", copy);
1526 fprintf(stderr, " rule: %s\n", rule);
1527 if(m_exit_on_error) exit(1);
1528 } else {
1529 apr_table_add(rules_url, copy, "unescaped line");
1530 apr_table_add(source_rules, rule, "");
1531 apr_table_setn(rules, rule, (char *)rs);
1532 }
1533 if(apr_table_elts(rules)->nelts == 2000) {
1534 fprintf(stderr, "ERROR, too many rules (limited to max. 2000)\n");
1535 if(m_exit_on_error) exit(1);
1536 }
1537 /* rule optimazion searching for redundant patterns (only in
1538 conjunction with -m, -b and !-n */
1539 if((apr_table_elts(rules)->nelts == rule_optimization) &&
1540 m_redundant &&
1541 m_query_multi_pcre &&
1542 m_base64) {
1543 /* got too many rules, try to find more general rules */
1544 if(m_verbose) {
1545 printf("# too many rules: start rule optimization ...\n");
1546 fflush(stdout);
1547 }
1548 qos_rule_optimization(pool, lpool, rules, special_rules);
1549 if(m_verbose) {
1550 printf("# continue with rule generation\n");
1551 fflush(stdout);
1552 }
1553 rule_optimization = rule_optimization + 200;
1554 }
1555 }
1556 }
1557 }
1558 }
1559 apr_pool_destroy(lpool);
1560 }
1561 *dc = deny_count;
1562 *ln = line_nr;
1563 }
1564
qos_measurement(apr_pool_t * pool,apr_table_t * blacklist,apr_table_t * rules,FILE * f,int * ln)1565 static void qos_measurement(apr_pool_t *pool, apr_table_t *blacklist, apr_table_t *rules, FILE *f, int *ln) {
1566 char *readline = apr_pcalloc(pool, MAX_BODY_BUFFER);
1567 int line_nr = 0;
1568 while(!qos_fgetline(readline, MAX_BODY_BUFFER, f)) {
1569 apr_uri_t parsed_uri;
1570 apr_pool_t *lpool;
1571 char *line = readline;
1572 apr_pool_create(&lpool, NULL);
1573 line_nr++;
1574 if((strlen(line) > 1) && line[1] == '/') {
1575 strcpy(line, &line[1]);
1576 }
1577 if(line[0] != '/') {
1578 qos_auto_detect(&line);
1579 }
1580 if(apr_uri_parse(lpool, line, &parsed_uri) != APR_SUCCESS) {
1581 fprintf(stderr, "ERROR, could parse uri %s\n", line);
1582 if(m_exit_on_error) exit(1);
1583 }
1584 if(parsed_uri.path == NULL || (parsed_uri.path[0] != '/')) {
1585 fprintf(stderr, "WARNING, line %d: invalid request %s\n", line_nr, line);
1586 } else {
1587 char *copy = apr_pstrdup(lpool, line);
1588 int i;
1589 apr_table_entry_t *entry = (apr_table_entry_t *)apr_table_elts(rules)->elts;
1590 qos_unescaping(copy);
1591 for(i = 0; i < apr_table_elts(rules)->nelts; i++) {
1592 qs_rule_t *rs = (qs_rule_t *)entry[i].val;
1593 pcre_exec(rs->pcre, NULL, copy, strlen(copy), 0, 0, NULL, 0);
1594 }
1595 }
1596 apr_pool_destroy(lpool);
1597 }
1598 *ln = line_nr;
1599 }
1600
main(int argc,const char * const argv[])1601 int main(int argc, const char * const argv[]) {
1602 apr_table_entry_t *entry;
1603 long performance = -1;
1604 time_t start = time(NULL);
1605 time_t end;
1606 int line_nr = 0;
1607 int deny_count = 0;
1608 char *time_string;
1609 int i, rc;
1610 const char *access_log = NULL;
1611 FILE *f;
1612 apr_pool_t *pool;
1613 apr_table_t *rules;
1614 apr_table_t *special_rules;
1615 apr_table_t *blacklist;
1616 apr_table_t *rules_url;
1617 int blacklist_size = 0;
1618 int whitelist_size = 0;
1619 char *cmd = strrchr(argv[0], '/');
1620 const char *httpdconf = NULL;
1621 apr_app_initialize(&argc, &argv, NULL);
1622 apr_pool_create(&pool, NULL);
1623 rules = apr_table_make(pool, 10);
1624 special_rules = apr_table_make(pool, 10);
1625 blacklist = apr_table_make(pool, 10);
1626 rules_url = apr_table_make(pool, 10);
1627 rc = nice(10);
1628 if(rc == -1) {
1629 fprintf(stderr, "ERROR, failed to change nice value: %s\n", strerror(errno));
1630 }
1631 if(cmd == NULL) {
1632 cmd = (char *)argv[0];
1633 } else {
1634 cmd++;
1635 }
1636
1637 argc--;
1638 argv++;
1639 while(argc >= 1) {
1640 if(strcmp(*argv,"-v") == 0) {
1641 if (--argc >= 1) {
1642 m_verbose = atoi(*(++argv));
1643 }
1644 } else if(strcmp(*argv,"-c") == 0) {
1645 if (--argc >= 1) {
1646 httpdconf = *(++argv);
1647 }
1648 } else if(strcmp(*argv,"-i") == 0) {
1649 if (--argc >= 1) {
1650 access_log = *(++argv);
1651 }
1652 } else if(strcmp(*argv,"-k") == 0) {
1653 if (--argc >= 1) {
1654 m_pfx = *(++argv);
1655 }
1656 } else if(strcmp(*argv,"-f") == 0) {
1657 if (--argc >= 1) {
1658 m_filter = *(++argv);
1659 }
1660 } else if(strcmp(*argv,"-d") == 0) {
1661 if (--argc >= 1) {
1662 m_path_depth = atoi(*(++argv));
1663 }
1664 } else if(strcmp(*argv,"-u") == 0) {
1665 if (--argc >= 1) {
1666 const char *coders = *(++argv);
1667 if(strstr(coders, "uni")) {
1668 m_mode |= QOS_DEC_MODE_FLAGS_UNI;
1669 }
1670 if(strstr(coders, "ansi")) {
1671 m_mode |= QOS_DEC_MODE_FLAGS_ANSI;
1672 }
1673 if(strstr(coders, "html")) {
1674 m_mode |= QOS_DEC_MODE_FLAGS_HTML;
1675 }
1676 }
1677 } else if(strcmp(*argv,"-n") == 0) {
1678 m_redundant = 0;
1679 } else if(strcmp(*argv,"-b") == 0) {
1680 if (--argc >= 1) {
1681 m_base64 = atoi(*(++argv));
1682 }
1683 } else if(strcmp(*argv,"-l") == 0) {
1684 if (--argc >= 1) {
1685 m_query_len_pcre = atoi(*(++argv));
1686 }
1687 } else if(strcmp(*argv,"-p") == 0) {
1688 m_query_pcre = 1;
1689 } else if(strcmp(*argv,"-m") == 0) {
1690 m_query_multi_pcre = 1;
1691 } else if(strcmp(*argv,"-o") == 0) {
1692 m_query_o_pcre = 1;
1693 } else if(strcmp(*argv,"-s") == 0) {
1694 m_query_single_pcre = 1;
1695 } else if(strcmp(*argv,"-e") == 0) {
1696 m_exit_on_error = 1;
1697 } else if(strcmp(*argv,"-t") == 0) {
1698 performance = 0;
1699 } else if(strcmp(*argv,"-h") == 0) {
1700 m_handler = 1;
1701 } else if(strcmp(*argv,"-?") == 0) {
1702 usage(cmd, 0);
1703 } else if(strcmp(*argv,"-help") == 0) {
1704 usage(cmd, 0);
1705 } else if(strcmp(*argv,"--help") == 0) {
1706 usage(cmd, 0);
1707 } else if(strcmp(*argv,"--man") == 0) {
1708 usage(cmd, 1);
1709 }
1710 argc--;
1711 argv++;
1712 }
1713 qos_init_pcre();
1714
1715 if((m_query_pcre && m_query_multi_pcre) ||
1716 (m_query_pcre && m_query_single_pcre) ||
1717 (m_query_multi_pcre && m_query_single_pcre) ||
1718 (m_query_pcre && m_query_o_pcre) ||
1719 (m_query_multi_pcre && m_query_o_pcre) ||
1720 (m_query_single_pcre && m_query_o_pcre)) {
1721 fprintf(stderr, "ERROR, option -s,-m,-o or -p can't be used together.\n");
1722 exit(1);
1723 }
1724
1725 if(httpdconf) {
1726 qos_load_blacklist(pool, blacklist, httpdconf);
1727 blacklist_size = apr_table_elts(blacklist)->nelts;
1728 qos_load_whitelist(pool, rules, httpdconf);
1729 whitelist_size = apr_table_elts(rules)->nelts;
1730 }
1731
1732 if(access_log == NULL) usage(cmd, 0);
1733 f = fopen(access_log, "r");
1734 if(f == NULL) {
1735 fprintf(stderr, "ERROR, could not open input file %s\n", access_log);
1736 exit(1);
1737 }
1738 qos_process_log(pool, blacklist, rules, rules_url, special_rules, f, &line_nr, &deny_count, 1);
1739 fclose(f);
1740
1741 if(m_redundant) {
1742 int xl = 0;
1743 int y = 0;
1744 // delete useless rules
1745 qos_delete_obsolete_rules(pool, rules, rules_url);
1746 // ensure, we have not deleted to many!
1747 if(m_verbose) {
1748 printf("# verify new rules ...\n");
1749 fflush(stdout);
1750 }
1751 // if(httpdconf) {
1752 // qos_load_whitelist(pool, rules, httpdconf);
1753 // }
1754 f = fopen(access_log, "r");
1755 qos_process_log(pool, blacklist, rules, rules_url, special_rules, f, &xl, &y, 0);
1756 fclose(f);
1757 }
1758
1759 if(performance == 0) {
1760 int lx = 0;
1761 apr_time_t tv;
1762 f = fopen(access_log, "r");
1763 tv = apr_time_now();
1764 qos_measurement(pool, blacklist, rules, f, &lx);
1765 tv = apr_time_now() - tv;
1766 performance = apr_time_msec(tv) + (apr_time_sec(tv) * 1000);
1767 performance = performance / lx;
1768 fclose(f);
1769 }
1770
1771 end = time(NULL);
1772 time_string = ctime(&end);
1773 time_string[strlen(time_string) - 1] = '\0';
1774 printf("\n# --------------------------------------------------------\n");
1775 printf("# %s\n", time_string);
1776 printf("# %d rules from %d access log lines\n", apr_table_elts(rules)->nelts, line_nr);
1777 printf("# mod_qos version: %s\n", man_version);
1778 if(performance >= 0) {
1779 printf("# performance index (ms/req): %ld\n", performance);
1780 }
1781 printf("# source (-i): %s\n", access_log);
1782 printf("# path depth (-d): %d\n", m_path_depth);
1783 printf("# disable path only regex (-h): %s\n", m_handler == 1 ? "yes" : "no");
1784 printf("# base64 detection level (-b): %d\n", m_base64);
1785 printf("# redundancy check (-n): %s\n", m_redundant == 1 ? "yes" : "no");
1786 printf("# pcre only for query (-p): %s\n", m_query_pcre == 1 ? "yes" : "no");
1787 printf("# decoding (-u): url");
1788 if(m_mode & QOS_DEC_MODE_FLAGS_UNI) {
1789 printf(" uni");
1790 }
1791 if(m_mode & QOS_DEC_MODE_FLAGS_HTML) {
1792 printf(" html");
1793 }
1794 if(m_mode & QOS_DEC_MODE_FLAGS_ANSI) {
1795 printf(" ansi");
1796 }
1797 printf("\n");
1798 printf("# one pcre for query value (-m): %s\n", m_query_multi_pcre == 1 ? "yes" : "no");
1799 if(m_query_o_pcre) {
1800 printf("# ignore query order (-o): yes\n");
1801 }
1802 printf("# single pcre for query (-s): %s\n", m_query_single_pcre == 1 ? "yes" : "no");
1803 printf("# query outsize (-l): %d\n", m_query_len_pcre);
1804 printf("# exit on error (-e): %s\n", m_exit_on_error == 1 ? "yes" : "no");
1805 printf("# rule file (-c): %s\n", httpdconf == NULL ? "-" : httpdconf);
1806 if(httpdconf) {
1807 printf("# whitelist (loaded existing rules): %d\n", whitelist_size);
1808 printf("# blacklist (loaded deny rules): %d\n", blacklist_size);
1809 printf("# blacklist matches: %d\n", deny_count);
1810 }
1811 printf("# duration: %ld minutes\n", (end - start) / 60);
1812 printf("# --------------------------------------------------------\n");
1813
1814 {
1815 STACK_OF(qs_rule_t) *st = sk_new(STACK_qs_cmp);
1816 qs_rule_t *r;
1817 int j = 1;
1818 entry = (apr_table_entry_t *)apr_table_elts(rules)->elts;
1819 for(i = 0; i < apr_table_elts(rules)->nelts; i++) {
1820 // printf("QS_PermitUri +QSF%0.3d deny \"%s\"\n", i+1, entry[i].key);
1821 r = apr_pcalloc(pool, sizeof(qs_rule_t));
1822 r->rule = entry[i].key;
1823 sk_push(st, (char *)r);
1824 }
1825 sk_sort(st);
1826 i = sk_num(st);
1827 for(; i > 0; i--) {
1828 r = (qs_rule_t *)sk_value(st, i-1);
1829 printf("QS_PermitUri +%s%.3d deny \"%s\"\n",
1830 m_pfx ? m_pfx : "QSF",
1831 j, qs_apache_escape(pool, r->rule));
1832 j++;
1833 }
1834 }
1835
1836 apr_pool_destroy(pool);
1837 return 0;
1838 }
1839