1 /*
2 * $Id: rewrite.c,v 1.3 1998/08/15 00:01:14 elkner Exp $
3 *
4 * Author: Squirm derived http://www.senet.com.au/squirm/
5 * Project: Jesred http://ivs.cs.uni-magdeburg.de/~elkner/webtools/jesred/
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * http://www.gnu.org/copyleft/gpl.html or ./gpl.html
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 *
22 * Thanks to Chris Foote, chris@senet.com.au - except parse_buff
23 * not much to change here (i.e. don't like to go deeper into the pattern stuff)
24 * ;-)
25 *
26 */
27
28 #include<stdio.h>
29 #include<strings.h>
30 #include<ctype.h>
31 #include<sys/types.h>
32 #include<sys/socket.h>
33 #include<netinet/in.h>
34 #include<arpa/inet.h>
35
36 #ifdef LOCAL_REGEX
37 #include "regex.h"
38 #else
39 #include<regex.h>
40 #endif
41
42 #include "log.h"
43 #include "ip_list.h"
44 #include "pattern_list.h"
45 #include "rewrite.h"
46 #include "main.h"
47
48 /* load the stdin for the redirector into an IN_BUFF structure
49 Sets in_buff.url to "" if the fields can't be converted */
50
51 int replace_string(pattern_item *, char *, char *);
52 #ifdef USE_ACCEL
53 static int match_accel(char *, char *, int, int);
54 #endif
55
56 int
parse_buff(char * buff,char ** url,char ** src_addr,char ** ident,char ** method,ip_acl * ip,pattern_item * p)57 parse_buff(char *buff, char **url, char **src_addr, char **ident,
58 char **method, ip_acl *ip, pattern_item *p)
59 {
60 int c, i;
61 struct in_addr address;
62 char *token, *new_token;
63 char *end[5];
64
65 c = 0;
66 token = strchr(buff,' ');
67 if ( token ) { /* URL */
68 c++;
69 *token = '\0';
70 end[0] = token;
71 *url = buff;
72 new_token = strchr(++token,' ');
73 if (new_token) { /* Address */
74 c++;
75 *new_token = '\0';
76 end[1] = new_token;
77 *src_addr = token;
78 token = strchr(++new_token,' ');
79 if (token) { /* Ident */
80 c++;
81 *token = '\0';
82 end[2] = token;
83 *ident = new_token;
84 new_token = strchr(++token,'\n');
85 if (new_token) {
86 c++;
87 *new_token = '\0';
88 end[3] = new_token;
89 *method = token;
90 new_token = strchr(token,' ');
91 if (new_token) {
92 c++;
93 *new_token = '\0';
94 end[4] = new_token;
95 }
96 }
97 }
98 }
99 }
100 if(c < 4) {
101 for(i = 0; i < c; i++) {
102 if ( end[i] )
103 *end[i] = ' ';
104 }
105 log(ERROR, "incorrect input (%d): %s", c, buff);
106 return 1;
107 }
108 #ifdef DEBUG
109 log(DEBG, "Request: %s %s %s %s\n", *url, *src_addr, *ident, *method);
110 #endif
111
112 /* all methods must be GET or ICP_QUERY */
113 i = 0;
114 if (allow_siblings && (! strcmp(*method, "ICP_QUERY")) )
115 i--;
116 if( strcmp(*method, "GET") )
117 i++;
118 if ( i ) {
119 #ifdef DEBUG
120 for(i = 0; i < c; i++) {
121 if ( end[i] )
122 *end[i] = ' ';
123 }
124 log(DEBG, "method not \"GET\" %s\n", buff);
125 #endif
126 return 1;
127 }
128
129 /* URL with less than 7 char is invalid */
130 if(strlen(*url) <= 7) {
131 log(ERROR, "strlen url to short (%d)\n", strlen(*url));
132 return 1;
133 }
134
135 /* check that the IP source address supplied is valid */
136 token = strchr(*src_addr,'/');
137 if ( token )
138 *token = '\0';
139 /* for inet_addr we have to link with libnsl on Solaris:
140 i.e. on 2.6 448K r-x + 40K rwx, but since it is a shared lib,
141 it is already loaded, when squid runs - so not much waste of
142 memory ;-) */
143 if ( (address.s_addr = inet_addr(*src_addr)) == -1 ) {
144 log(ERROR, "client IP address not valid %s\n",
145 *src_addr ? *src_addr : "");
146 if ( token )
147 *token = '/';
148 return 1;
149 }
150 if ( token )
151 *token = '/';
152
153 /* make sure the IP source address matches that of the ones in our list */
154 if( ip_access_check(address, ip) == IP_DENY ) {
155 #ifdef DEBUG
156 log(DEBG, "client IP address %s not matched\n", *src_addr);
157 #endif
158 return 1;
159 }
160 return 0;
161 }
162
163 /* returns replacement URL for a match in newurl
164 < 0 if abort pattern match, 0 if no match found, > 1 pattern match
165 if match, the number of the matching rule will be returned */
166 int
pattern_compare(char * url,char * newurl,pattern_item * phead)167 pattern_compare(char *url,char *newurl, pattern_item *phead)
168 {
169 pattern_item *curr;
170 int pos;
171 int len;
172 int i;
173 int matched;
174 int pattern_no = 0;
175 curr = NULL;
176
177 for(curr = phead; curr != NULL; curr = curr->next) {
178 pattern_no++;
179 matched = 1;
180 /* assume a match until a character isn't the same */
181 if(curr->type == ABORT) {
182 len = strlen(curr->pattern);
183 pos = strlen(url) - len; /* this is dangerous */
184 for(i = 0; i <= len; i++) {
185 if (url[pos] != curr->pattern[i]) {
186 matched = 0;
187 break;
188 }
189 pos++;
190 }
191 if(matched) {
192 #ifdef DEBUG
193 log(DEBG, "abort pattern matched: %s (rule %d)\n",
194 url, pattern_no);
195 #endif
196 return (0 - pattern_no); /* URL matches abort file extension */
197 }
198 }
199 else {
200 /* check for accelerator string */
201 #ifdef USE_ACCEL
202 if(curr->has_accel) {
203 /* check to see if the accelerator string matches, then bother
204 doing a regexec() on it */
205 if(match_accel(url, curr->accel,
206 curr->accel_type,
207 curr->case_sensitive)) {
208 #ifdef DEBUG
209 log(DEBG, "URL %s matches accelerator %s (rule %d)\n",
210 url, curr->accel, pattern_no);
211 #endif
212 /* Now we must test for normal or extended */
213 if (curr->type == EXTENDED) {
214 if ( replace_string(curr, url, newurl) == 1 )
215 return pattern_no;
216 }
217 else /* Type == NORMAL */ {
218 if(regexec(&curr->cpattern, url, 0, 0, 0) == 0){
219 strcpy(newurl,curr->replacement);
220 return pattern_no;
221 }
222 }
223 } /* end match_accel loop */
224 }
225 else {
226 /* we haven't got an accelerator string, so we use regex
227 instead */
228 /* Now we must test for normal or extended */
229 #endif
230 if (curr->type == EXTENDED) {
231 if ( replace_string(curr, url, newurl) == 1)
232 return pattern_no;
233 }
234 else /* Type == NORMAL */ {
235 if(regexec(&curr->cpattern, url, 0, 0, 0) == 0) {
236 strcpy(newurl,curr->replacement);
237 return pattern_no;
238 }
239 }
240 #ifdef USE_ACCEL
241 }
242 #endif
243 }
244 }
245 return 0;
246 }
247
248 int
replace_string(pattern_item * curr,char * url,char * buffer)249 replace_string (pattern_item *curr, char *url, char *buffer)
250 {
251 char *replacement_string = NULL;
252 regmatch_t match_data[10];
253 int parenthesis;
254 char *in_ptr;
255 char *out_ptr;
256 int replay_num;
257 int count;
258
259 /* Perform the regex call */
260 if (regexec (&curr->cpattern, url, 10, &match_data[0], 0) != 0)
261 return 0;
262
263 /* Ok, setup the traversal pointers */
264 in_ptr = curr->replacement;
265 out_ptr = buffer;
266
267 /* Count the number of replays in the pattern */
268 parenthesis = count_parenthesis (curr->pattern);
269 if (parenthesis < 0) {
270 /* Invalid return value - don't log because we already have done it */
271 return 0;
272 }
273
274 /* Traverse the url string now */
275 while (*in_ptr != '\0') {
276 if (isdigit (*in_ptr)) {
277 /* We have a number, how many chars are there before us? */
278 switch (in_ptr - curr->replacement) {
279 case 0:
280 /* This is the first char
281 Since there is no backslash before hand, this is not
282 a pattern match, so loop around */
283 {
284 *out_ptr = *in_ptr;
285 out_ptr++;
286 in_ptr++;
287 continue;
288 }
289 case 1:
290 /* Only one char back to check, so see if it's a backslash */
291 if (*(in_ptr - 1) != '\\') {
292 *out_ptr = *in_ptr;
293 out_ptr++;
294 in_ptr++;
295 continue;
296 }
297 break;
298 default:
299 /* Two or more chars back to check, so see if the previous is
300 a backslash, and also the one before. Two backslashes mean
301 that we should not replace anything! */
302 if ( (*(in_ptr - 1) != '\\') ||
303 ((*(in_ptr - 1) == '\\') && (*(in_ptr - 2) == '\\')) ) {
304 *out_ptr = *in_ptr;
305 out_ptr++;
306 in_ptr++;
307 continue;
308 }
309 }
310
311 /* Ok, if we reach this point, then we have found something to
312 replace. It also means that the last time we went through here,
313 we copied in a backslash char, so we should backtrack one on
314 the output string before continuing */
315 out_ptr--;
316
317 /* We need to convert the current in_ptr into a number for array
318 lookups */
319 replay_num = (*in_ptr) - '0';
320
321 /* Now copy in the chars from the replay string */
322 for (count = match_data[replay_num].rm_so;
323 count < match_data[replay_num].rm_eo; count++) {
324 /* Copy in the chars */
325 *out_ptr = url[count];
326 out_ptr++;
327 }
328
329 /* Increment the in pointer */
330 in_ptr++;
331 } else {
332 *out_ptr = *in_ptr;
333 out_ptr++;
334 in_ptr++;
335 }
336
337 /* Increment the in pointer and loop around */
338 /* in_ptr++; */
339 }
340
341 /* Terminate the string */
342 *out_ptr = '\0';
343
344 /* return to the caller (buffer contains the new url) */
345 return 1;
346 }
347
348 #ifdef USE_ACCEL
349 static int
match_accel(char * url,char * accel,int accel_type,int case_sensitive)350 match_accel(char *url, char *accel, int accel_type, int case_sensitive)
351 {
352 /* return 1 if url contains accel */
353 int i, offset;
354 static char l_accel[BUFSIZE];
355 int accel_len;
356 int url_len;
357
358 if(accel_type == ACCEL_NORMAL) {
359 if(case_sensitive) {
360 if(strstr(url, accel))
361 return 1;
362 else
363 return 0;
364 }
365 else {
366 /* convert to lower case */
367 for(i = 0; url[i] != '\0'; i++)
368 l_accel[i] = tolower(url[i]);
369 l_accel[i] = '\0';
370 if(strstr(l_accel, accel))
371 return 1;
372 else
373 return 0;
374 }
375 }
376 if(accel_type == ACCEL_START) {
377 accel_len = strlen(accel);
378 url_len = strlen(url);
379 if(url_len < accel_len)
380 return 0;
381 if(case_sensitive) {
382 for(i = 0; i < accel_len; i++) {
383 if(accel[i] != url[i])
384 return 0;
385 }
386 }
387 else {
388 for(i = 0; i < accel_len; i++) {
389 if(accel[i] != tolower(url[i]))
390 return 0;
391 }
392 }
393 return 1;
394 }
395 if(accel_type == ACCEL_END) {
396 accel_len = strlen(accel);
397 url_len = strlen(url);
398 offset = url_len - accel_len;
399 if(offset < 0)
400 return 0;
401 if(case_sensitive) {
402 for(i = 0; i < accel_len; i++) {
403 if(accel[i] != url[i+offset])
404 return 0;
405 }
406 }
407 else {
408 for(i = 0; i < accel_len; i++) {
409 if(accel[i] != tolower(url[i+offset]))
410 return 0;
411 }
412 }
413 return 1;
414 }
415
416 /* we shouldn't reach this section! */
417 return 0;
418 }
419 #endif
420
421
422
423
424
425
426