1 /*
2  * $Id: rewrite.c,v 1.3 1998/08/15 00:01:14 elkner Exp $
3  *
4  * Author:  Squirm derived      http://www.senet.com.au/squirm/
5  * Project: Jesred       http://ivs.cs.uni-magdeburg.de/~elkner/webtools/jesred/
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version 2
10  * of the License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * http://www.gnu.org/copyleft/gpl.html or ./gpl.html
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
21  *
22  * Thanks to Chris Foote, chris@senet.com.au - except parse_buff
23  * not much to change here (i.e. don't like to go deeper into the pattern stuff)
24  * ;-)
25  *
26  */
27 
28 #include<stdio.h>
29 #include<strings.h>
30 #include<ctype.h>
31 #include<sys/types.h>
32 #include<sys/socket.h>
33 #include<netinet/in.h>
34 #include<arpa/inet.h>
35 
36 #ifdef LOCAL_REGEX
37 #include "regex.h"
38 #else
39 #include<regex.h>
40 #endif
41 
42 #include "log.h"
43 #include "ip_list.h"
44 #include "pattern_list.h"
45 #include "rewrite.h"
46 #include "main.h"
47 
48 /* load the stdin for the redirector into an IN_BUFF structure
49    Sets in_buff.url to "" if the fields can't be converted */
50 
51 int replace_string(pattern_item *, char *, char *);
52 #ifdef USE_ACCEL
53 static int match_accel(char *, char *, int, int);
54 #endif
55 
56 int
parse_buff(char * buff,char ** url,char ** src_addr,char ** ident,char ** method,ip_acl * ip,pattern_item * p)57 parse_buff(char *buff, char **url, char **src_addr, char **ident,
58 	     char **method, ip_acl *ip, pattern_item *p)
59 {
60     int c, i;
61     struct in_addr address;
62     char *token, *new_token;
63     char *end[5];
64 
65     c = 0;
66     token = strchr(buff,' ');
67     if ( token ) {       /* URL */
68 	c++;
69 	*token = '\0';
70 	end[0] = token;
71 	*url = buff;
72 	new_token = strchr(++token,' ');
73 	if (new_token) {     /* Address */
74 	    c++;
75 	    *new_token = '\0';
76 	    end[1] = new_token;
77 	    *src_addr = token;
78 	    token = strchr(++new_token,' ');
79 	    if (token) {      /* Ident */
80 		c++;
81 		*token = '\0';
82 		end[2] = token;
83 		*ident = new_token;
84 		new_token = strchr(++token,'\n');
85 		if (new_token) {
86 		    c++;
87 		    *new_token = '\0';
88 		    end[3] = new_token;
89 		    *method = token;
90 		    new_token = strchr(token,' ');
91 		    if (new_token) {
92 			c++;
93 			*new_token = '\0';
94 			end[4] = new_token;
95 		    }
96 		}
97 	    }
98 	}
99     }
100     if(c < 4) {
101 	for(i = 0; i < c; i++) {
102 	    if ( end[i] )
103 		*end[i] = ' ';
104 	}
105 	log(ERROR, "incorrect input (%d): %s", c, buff);
106 	return 1;
107     }
108 #ifdef DEBUG
109     log(DEBG, "Request: %s %s %s %s\n", *url, *src_addr, *ident, *method);
110 #endif
111 
112     /* all methods must be GET or ICP_QUERY */
113     i = 0;
114     if (allow_siblings && (! strcmp(*method, "ICP_QUERY")) )
115 	i--;
116     if( strcmp(*method, "GET") )
117 	i++;
118     if ( i ) {
119 #ifdef DEBUG
120 	for(i = 0; i < c; i++) {
121 	    if ( end[i] )
122 		*end[i] = ' ';
123 	}
124 	log(DEBG, "method not \"GET\" %s\n", buff);
125 #endif
126 	return 1;
127     }
128 
129     /* URL with less than 7 char is invalid */
130     if(strlen(*url) <= 7) {
131 	log(ERROR, "strlen url to short (%d)\n", strlen(*url));
132 	return 1;
133     }
134 
135     /* check that the IP source address supplied is valid */
136     token = strchr(*src_addr,'/');
137     if ( token )
138 	*token = '\0';
139     /* for inet_addr we have to link with libnsl on Solaris:
140        i.e. on 2.6  448K r-x + 40K rwx, but since it is a shared lib,
141        it is already loaded, when squid runs - so not much waste of
142        memory ;-) */
143     if ( (address.s_addr = inet_addr(*src_addr)) == -1 ) {
144 	log(ERROR, "client IP address not valid %s\n",
145 	    *src_addr ? *src_addr : "");
146 	if ( token )
147 	    *token = '/';
148 	return 1;
149     }
150     if ( token )
151 	*token = '/';
152 
153     /* make sure the IP source address matches that of the ones in our list */
154     if( ip_access_check(address, ip) == IP_DENY ) {
155 #ifdef DEBUG
156 	log(DEBG, "client IP address %s not matched\n", *src_addr);
157 #endif
158 	return 1;
159     }
160     return 0;
161 }
162 
163 /* returns replacement URL for a match in newurl
164    < 0 if abort pattern match, 0 if no match found, > 1 pattern match
165    if match, the number of the matching rule will be returned */
166 int
pattern_compare(char * url,char * newurl,pattern_item * phead)167 pattern_compare(char *url,char *newurl, pattern_item *phead)
168 {
169     pattern_item *curr;
170     int pos;
171     int len;
172     int i;
173     int matched;
174     int pattern_no = 0;
175     curr = NULL;
176 
177     for(curr = phead; curr != NULL; curr = curr->next) {
178 	pattern_no++;
179 	matched = 1;
180 	/* assume a match until a character isn't the same */
181 	if(curr->type == ABORT) {
182 	    len = strlen(curr->pattern);
183 	    pos = strlen(url) - len; /* this is dangerous */
184 	    for(i = 0; i <= len; i++) {
185 		if (url[pos] != curr->pattern[i]) {
186 		    matched = 0;
187 		    break;
188 		}
189 		pos++;
190 	    }
191 	    if(matched) {
192 #ifdef DEBUG
193 		log(DEBG, "abort pattern matched: %s (rule %d)\n",
194 		    url, pattern_no);
195 #endif
196 		return (0 - pattern_no); /* URL matches abort file extension */
197 	    }
198 	}
199 	else {
200 	    /* check for accelerator string */
201 #ifdef USE_ACCEL
202 	    if(curr->has_accel) {
203 		/* check to see if the accelerator string matches, then bother
204 		   doing a regexec() on it */
205 		if(match_accel(url, curr->accel,
206 			       curr->accel_type,
207 			       curr->case_sensitive)) {
208 #ifdef DEBUG
209 		    log(DEBG, "URL %s matches accelerator %s (rule %d)\n",
210 			url, curr->accel, pattern_no);
211 #endif
212 		    /* Now we must test for normal or extended */
213 		    if (curr->type == EXTENDED) {
214 			if ( replace_string(curr, url, newurl) == 1 )
215 			    return pattern_no;
216 		    }
217 		    else /* Type == NORMAL */ {
218 			if(regexec(&curr->cpattern, url, 0, 0, 0) == 0){
219 			    strcpy(newurl,curr->replacement);
220 			    return pattern_no;
221 			}
222 		    }
223 		} /* end match_accel loop */
224 	    }
225 	    else {
226 		/* we haven't got an accelerator string, so we use regex
227 		   instead */
228 		/* Now we must test for normal or extended */
229 #endif
230 		if (curr->type == EXTENDED) {
231 		    if ( replace_string(curr, url, newurl) == 1)
232 			return pattern_no;
233 		}
234 		else /* Type == NORMAL */ {
235 		    if(regexec(&curr->cpattern, url, 0, 0, 0) == 0) {
236 			strcpy(newurl,curr->replacement);
237 			return pattern_no;
238 		    }
239 		}
240 #ifdef USE_ACCEL
241 	    }
242 #endif
243 	}
244     }
245     return 0;
246 }
247 
248 int
replace_string(pattern_item * curr,char * url,char * buffer)249 replace_string (pattern_item *curr, char *url, char *buffer)
250 {
251     char *replacement_string = NULL;
252     regmatch_t match_data[10];
253     int parenthesis;
254     char *in_ptr;
255     char *out_ptr;
256     int replay_num;
257     int count;
258 
259     /* Perform the regex call */
260     if (regexec (&curr->cpattern, url, 10, &match_data[0], 0) != 0)
261 	return 0;
262 
263     /* Ok, setup the traversal pointers */
264     in_ptr = curr->replacement;
265     out_ptr = buffer;
266 
267     /* Count the number of replays in the pattern */
268     parenthesis = count_parenthesis (curr->pattern);
269     if (parenthesis < 0) {
270 	/* Invalid return value - don't log because we already have done it */
271 	return 0;
272     }
273 
274     /* Traverse the url string now */
275     while (*in_ptr != '\0') {
276 	if (isdigit (*in_ptr)) {
277 	    /* We have a number, how many chars are there before us? */
278 	    switch (in_ptr - curr->replacement) {
279 		case 0:
280 		    /* This is the first char
281 		       Since there is no backslash before hand, this is not
282 		       a pattern match, so loop around */
283 		    {
284 			*out_ptr = *in_ptr;
285 			out_ptr++;
286 			in_ptr++;
287 			continue;
288 		    }
289 		case 1:
290 		    /* Only one char back to check, so see if it's a backslash */
291 		    if (*(in_ptr - 1) != '\\') {
292 			*out_ptr = *in_ptr;
293 			out_ptr++;
294 			in_ptr++;
295 			continue;
296 		    }
297 		    break;
298 		default:
299 		    /* Two or more chars back to check, so see if the previous is
300 		       a backslash, and also the one before. Two backslashes mean
301 		       that we should not replace anything! */
302 		    if ( (*(in_ptr - 1) != '\\') ||
303 			 ((*(in_ptr - 1) == '\\') && (*(in_ptr - 2) == '\\')) ) {
304 			*out_ptr = *in_ptr;
305 			out_ptr++;
306 			in_ptr++;
307 			continue;
308 		    }
309 	    }
310 
311 	    /* Ok, if we reach this point, then we have found something to
312 	       replace. It also means that the last time we went through here,
313 	       we copied in a backslash char, so we should backtrack one on
314 	       the output string before continuing */
315 	    out_ptr--;
316 
317 	    /* We need to convert the current in_ptr into a number for array
318 	       lookups */
319 	    replay_num = (*in_ptr) - '0';
320 
321 	    /* Now copy in the chars from the replay string */
322 	    for (count = match_data[replay_num].rm_so;
323 		 count < match_data[replay_num].rm_eo; count++) {
324 		/* Copy in the chars */
325 		*out_ptr = url[count];
326 		out_ptr++;
327 	    }
328 
329 	    /* Increment the in pointer */
330 	    in_ptr++;
331 	} else {
332 	    *out_ptr = *in_ptr;
333 	    out_ptr++;
334 	    in_ptr++;
335 	}
336 
337 	/* Increment the in pointer and loop around */
338 	/* in_ptr++; */
339     }
340 
341     /* Terminate the string */
342     *out_ptr = '\0';
343 
344     /* return to the caller (buffer contains the new url) */
345     return 1;
346 }
347 
348 #ifdef USE_ACCEL
349 static int
match_accel(char * url,char * accel,int accel_type,int case_sensitive)350 match_accel(char *url, char *accel, int accel_type, int case_sensitive)
351 {
352     /* return 1 if url contains accel */
353     int i, offset;
354     static char l_accel[BUFSIZE];
355     int accel_len;
356     int url_len;
357 
358     if(accel_type == ACCEL_NORMAL) {
359 	if(case_sensitive) {
360 	    if(strstr(url, accel))
361 		return 1;
362 	    else
363 		return 0;
364 	}
365 	else {
366 	    /* convert to lower case */
367 	    for(i = 0; url[i] != '\0'; i++)
368 		l_accel[i] = tolower(url[i]);
369 	    l_accel[i] = '\0';
370 	    if(strstr(l_accel, accel))
371 		return 1;
372 	    else
373 		return 0;
374 	}
375     }
376     if(accel_type == ACCEL_START) {
377 	accel_len = strlen(accel);
378 	url_len = strlen(url);
379 	if(url_len < accel_len)
380 	    return 0;
381 	if(case_sensitive) {
382 	    for(i = 0; i < accel_len; i++) {
383 		if(accel[i] != url[i])
384 		    return 0;
385 	    }
386 	}
387 	else {
388 	    for(i = 0; i < accel_len; i++) {
389 		if(accel[i] != tolower(url[i]))
390 		    return 0;
391 	    }
392 	}
393 	return 1;
394     }
395     if(accel_type == ACCEL_END) {
396 	accel_len = strlen(accel);
397 	url_len = strlen(url);
398 	offset = url_len - accel_len;
399 	if(offset < 0)
400 	    return 0;
401 	if(case_sensitive) {
402 	    for(i = 0; i < accel_len; i++) {
403 		if(accel[i] != url[i+offset])
404 		    return 0;
405 	    }
406 	}
407 	else {
408 	    for(i = 0; i < accel_len; i++) {
409 		if(accel[i] != tolower(url[i+offset]))
410 		    return 0;
411 	    }
412 	}
413 	return 1;
414     }
415 
416     /* we shouldn't reach this section! */
417     return 0;
418 }
419 #endif
420 
421 
422 
423 
424 
425 
426