1 /* HTProxy.c
2 ** GATEWAY AND PROXY MANAGER
3 **
4 ** (c) COPYRIGHT MIT 1995.
5 ** Please first read the full copyright statement in the file COPYRIGH.
6 ** @(#) $Id$
7 **
8 ** Replaces the old env variables for gateways and proxies. However for
9 ** backward compatibility there is a function that reads the env variables
10 ** at start up. Note that there is a difference between a proxy and a
11 ** gateway!
12 **
13 ** Authors
14 ** HF Henrik Frystyk, frystyk@w3.org
15 ** History
16 ** 4 Jun 95 Written on a rainy day
17 */
18
19 #if !defined(HT_DIRECT_WAIS) && !defined(HT_DEFAULT_WAIS_GATEWAY)
20 #define HT_DEFAULT_WAIS_GATEWAY "http://www.w3.org:8001/"
21 #endif
22
23 /* Library include files */
24 #include "wwwsys.h"
25 #include "WWWUtil.h"
26 #include "WWWCore.h"
27 #include "WWWHTTP.h"
28 #include "WWWApp.h"
29 #include "HTProxy.h" /* Implemented here */
30
31 /* Variables and typedefs local to this module */
32
33 typedef struct _HTProxy {
34 char * access;
35 char * url; /* URL of Gateway or Proxy */
36 #ifdef HT_POSIX_REGEX
37 regex_t * regex; /* Compiled regex */
38 #endif
39 } HTProxy;
40
41 typedef struct _HTHostlist {
42 char * access;
43 char * host; /* Host or domain name */
44 unsigned port;
45 #ifdef HT_POSIX_REGEX
46 regex_t * regex; /* Compiled regex */
47 #endif
48 } HTHostList;
49
50 PRIVATE HTList * proxies = NULL; /* List of proxy servers */
51 PRIVATE HTList * gateways = NULL; /* List of gateways */
52 PRIVATE HTList * noproxy = NULL; /* Don't proxy on these hosts and domains */
53 PRIVATE int noproxy_is_onlyproxy = 0; /* Interpret the noproxy list as an onlyproxy one */
54
55 #if 0
56 PRIVATE HTList * onlyproxy = NULL; /* Proxy only on these hosts and domains */
57 #endif
58
59 /* ------------------------------------------------------------------------- */
60
61 #ifdef HT_POSIX_REGEX
get_regex_error(int errcode,regex_t * compiled)62 PRIVATE char * get_regex_error (int errcode, regex_t * compiled)
63 {
64 size_t length = regerror (errcode, compiled, NULL, 0);
65 char * str = NULL;
66 if ((str = (char *) HT_MALLOC(length+1)) == NULL)
67 HT_OUTOFMEM("get_regex_error");
68 (void) regerror (errcode, compiled, str, length);
69 return str;
70 }
71
get_regex_t(const char * regex_str,int cflags)72 PRIVATE regex_t * get_regex_t (const char * regex_str, int cflags)
73 {
74 regex_t * regex = NULL;
75 if (regex_str && *regex_str) {
76 int status;
77 if ((regex = (regex_t *) HT_CALLOC(1, sizeof(regex_t))) == NULL)
78 HT_OUTOFMEM("get_regex_t");
79 if ((status = regcomp(regex, regex_str, cflags))) {
80 char * err_msg = get_regex_error(status, regex);
81 HTTRACE(PROT_TRACE, "HTProxy..... Regular expression error: %s\n" _ err_msg);
82 HT_FREE(err_msg);
83 HT_FREE(regex);
84 }
85 }
86 return regex;
87 }
88 #endif
89
90 /*
91 ** Existing entries are replaced with new ones
92 */
add_object(HTList * list,const char * access,const char * url,BOOL regex,int regex_flags)93 PRIVATE BOOL add_object (HTList * list, const char * access, const char * url,
94 BOOL regex, int regex_flags)
95 {
96 HTProxy *me;
97 if (!list || !access || !url || !*url)
98 return NO;
99 if ((me = (HTProxy *) HT_CALLOC(1, sizeof(HTProxy))) == NULL)
100 HT_OUTOFMEM("add_object");
101 StrAllocCopy(me->access, access); /* Access method */
102
103 #ifdef HT_POSIX_REGEX
104 /*
105 ** If we support regular expressions then compile one up for
106 ** this regular expression. Otherwise use is as a normal
107 ** access scheme.
108 */
109 if (regex) {
110 me->regex = get_regex_t(access,
111 regex_flags < 0 ?
112 W3C_DEFAULT_REGEX_FLAGS : regex_flags);
113 } else
114 #endif
115 {
116 char *ptr = me->access;
117 while ((*ptr = TOLOWER(*ptr))) ptr++;
118 }
119
120 me->url = HTParse(url, "", PARSE_ACCESS+PARSE_HOST+PARSE_PUNCTUATION);
121 if (*(me->url+strlen(me->url)-1) != '/')
122 StrAllocCat(me->url, "/");
123 me->url = HTSimplify(&me->url);
124
125 /* See if we already have this one */
126 {
127 HTList *cur = list;
128 HTProxy *pres;
129 while ((pres = (HTProxy *) HTList_nextObject(cur)) != NULL) {
130 if (!strcmp(pres->access, me->access))
131 break; /* We already have it */
132 }
133 if (pres) {
134 HTTRACE(PROT_TRACE, "HTProxy..... replacing for `%s\' access %s\n" _
135 me->url _ me->access);
136 HT_FREE(pres->access);
137 HT_FREE(pres->url);
138 #ifdef HT_POSIX_REGEX
139 if (pres->regex) regfree(pres->regex);
140 #endif
141 HTList_removeObject(list, (void *) pres);
142 HT_FREE(pres);
143 }
144 HTTRACE(PROT_TRACE, "HTProxy..... adding for `%s\' access %s\n" _
145 me->url _ me->access);
146 HTList_addObject(list, (void *) me);
147 }
148 return YES;
149 }
150
remove_allObjects(HTList * list)151 PRIVATE BOOL remove_allObjects (HTList * list)
152 {
153 if (list) {
154 HTList *cur = list;
155 HTProxy *pres;
156 while ((pres = (HTProxy *) HTList_nextObject(cur)) != NULL) {
157 HT_FREE(pres->access);
158 HT_FREE(pres->url);
159 #ifdef HT_POSIX_REGEX
160 if (pres->regex) regfree(pres->regex);
161 #endif
162 HT_FREE(pres);
163 }
164 return YES;
165 }
166 return NO;
167 }
168
169 /* Add an entry to a list of host names
170 ** ------------------------------------
171 ** Existing entries are replaced with new ones
172 */
add_hostname(HTList * list,const char * host,const char * access,unsigned port,BOOL regex,int regex_flags)173 PRIVATE BOOL add_hostname (HTList * list, const char * host,
174 const char * access, unsigned port,
175 BOOL regex, int regex_flags)
176 {
177 HTHostList *me;
178 if (!list || !host || !*host)
179 return NO;
180 if ((me = (HTHostList *) HT_CALLOC(1, sizeof(HTHostList))) == NULL)
181 HT_OUTOFMEM("add_hostname");
182 #ifdef HT_POSIX_REGEX
183 if (regex)
184 me->regex = get_regex_t(host,
185 regex_flags < 0 ?
186 W3C_DEFAULT_REGEX_FLAGS : regex_flags);
187 #endif
188
189 if (access) {
190 char *ptr;
191 StrAllocCopy(me->access, access); /* Access method */
192 ptr = me->access;
193 while ((*ptr = TOLOWER(*ptr))) ptr++;
194 }
195 StrAllocCopy(me->host, host); /* Host name */
196 {
197 char *ptr = me->host;
198 while ((*ptr = TOLOWER(*ptr))) ptr++;
199 }
200 me->port = port; /* Port number */
201 HTTRACE(PROT_TRACE, "HTHostList.. adding `%s\' to list\n" _ me->host);
202 HTList_addObject(list, (void *) me);
203 return YES;
204 }
205
remove_AllHostnames(HTList * list)206 PRIVATE BOOL remove_AllHostnames (HTList * list)
207 {
208 if (list) {
209 HTList *cur = list;
210 HTHostList *pres;
211 while ((pres = (HTHostList *) HTList_nextObject(cur)) != NULL) {
212 HT_FREE(pres->access);
213 HT_FREE(pres->host);
214 #ifdef HT_POSIX_REGEX
215 if (pres->regex) regfree(pres->regex);
216 #endif
217 HT_FREE(pres);
218 }
219 return YES;
220 }
221 return NO;
222 }
223
224 /* HTProxy_add
225 ** -----------
226 ** Registers a proxy as the server to contact for a specific
227 ** access method. `proxy' should be a fully valid name, like
228 ** "http://proxy.w3.org:8001" but domain name is not required.
229 ** If an entry exists for this access then delete it and use the
230 ** ne one. Returns YES if OK, else NO
231 */
HTProxy_add(const char * access,const char * proxy)232 PUBLIC BOOL HTProxy_add (const char * access, const char * proxy)
233 {
234 /*
235 ** If this is the first time here then also add a before filter to handle
236 ** proxy authentication and the normal AA after filter as well.
237 ** These filters will be removed if we remove all proxies again.
238 */
239 if (!proxies) {
240 proxies = HTList_new();
241 HTNet_addBefore(HTAA_proxyBeforeFilter, NULL, NULL,
242 HT_FILTER_MIDDLE);
243 HTNet_addAfter(HTAuthFilter, NULL, NULL,
244 HT_NO_PROXY_ACCESS, HT_FILTER_MIDDLE);
245 HTNet_addAfter(HTAuthFilter, NULL, NULL,
246 HT_PROXY_REAUTH, HT_FILTER_MIDDLE);
247 }
248 return add_object(proxies, access, proxy, NO, -1);
249 }
250
251 /* HTProxy_addRegex
252 ** ----------------
253 ** Registers a proxy as the server to contact for any URL matching the
254 ** regular expression. `proxy' should be a fully valid name, like
255 ** "http://proxy.w3.org:8001".
256 ** If an entry exists for this access then delete it and use the
257 ** new one. Returns YES if OK, else NO
258 */
HTProxy_addRegex(const char * regex,const char * proxy,int regex_flags)259 PUBLIC BOOL HTProxy_addRegex (const char * regex,
260 const char * proxy,
261 int regex_flags)
262 {
263 /*
264 ** If this is the first time here then also add a before filter to handle
265 ** proxy authentication and the normal AA after filter as well.
266 ** These filters will be removed if we remove all proxies again.
267 */
268 if (!proxies) {
269 proxies = HTList_new();
270 HTNet_addBefore(HTAA_proxyBeforeFilter, NULL, NULL,
271 HT_FILTER_MIDDLE);
272 HTNet_addAfter(HTAuthFilter, NULL, NULL,
273 HT_NO_PROXY_ACCESS, HT_FILTER_MIDDLE);
274 HTNet_addAfter(HTAuthFilter, NULL, NULL,
275 HT_PROXY_REAUTH, HT_FILTER_MIDDLE);
276 }
277 #ifdef HT_POSIX_REGEX
278 return add_object(proxies, regex, proxy, YES, regex_flags);
279 #else
280 return add_object(proxies, regex, proxy, NO, -1);
281 #endif
282 }
283
284 /*
285 ** Removes all registered proxies
286 */
HTProxy_deleteAll(void)287 PUBLIC BOOL HTProxy_deleteAll (void)
288 {
289 if (remove_allObjects(proxies)) {
290 HTList_delete(proxies);
291
292 /*
293 ** If we have no more proxies then there is no reason for checking
294 ** proxy authentication. We therefore unregister the filters for
295 ** handling proxy authentication
296 */
297 HTNet_deleteBefore(HTAA_proxyBeforeFilter);
298 HTNet_deleteAfterStatus(HT_NO_PROXY_ACCESS);
299 HTNet_deleteAfterStatus(HT_PROXY_REAUTH);
300
301 proxies = NULL;
302 return YES;
303 }
304 return NO;
305 }
306
307 /* HTGateway_add
308 ** -------------
309 ** Registers a gateway as the server to contact for a specific
310 ** access method. `gateway' should be a fully valid name, like
311 ** "http://gateway.w3.org:8001" but domain name is not required.
312 ** If an entry exists for this access then delete it and use the
313 ** ne one. Returns YES if OK, else NO
314 */
HTGateway_add(const char * access,const char * gate)315 PUBLIC BOOL HTGateway_add (const char * access, const char * gate)
316 {
317 if (!gateways)
318 gateways = HTList_new();
319 return add_object(gateways, access, gate, NO, -1);
320 }
321
322 /*
323 ** Removes all registered gateways
324 */
HTGateway_deleteAll(void)325 PUBLIC BOOL HTGateway_deleteAll (void)
326 {
327 if (remove_allObjects(gateways)) {
328 HTList_delete(gateways);
329 gateways = NULL;
330 return YES;
331 }
332 return NO;
333 }
334
335 /* HTNoProxy_add
336 ** -------------
337 ** Registers a host name or a domain as a place where no proxy should
338 ** be contacted - for example a very fast link. If `port' is '0' then
339 ** it applies to all ports and if `access' is NULL then it applies to
340 ** to all access methods.
341 **
342 ** Examples: w3.org
343 ** www.close.com
344 */
HTNoProxy_add(const char * host,const char * access,unsigned port)345 PUBLIC BOOL HTNoProxy_add (const char * host, const char * access,
346 unsigned port)
347 {
348 if (!noproxy)
349 noproxy = HTList_new();
350 return add_hostname(noproxy, host, access, port, NO, -1);
351 }
352
353 /* HTNoProxy_addRegex
354 ** ------------------
355 ** Registers a regular expression where URIs matching this expression
356 ** should go directly and not via a proxy.
357 **
358 */
HTNoProxy_addRegex(const char * regex,int regex_flags)359 PUBLIC BOOL HTNoProxy_addRegex (const char * regex, int regex_flags)
360 {
361 if (!noproxy)
362 noproxy = HTList_new();
363 #ifdef HT_POSIX_REGEX
364 return add_hostname(noproxy, regex, NULL, 0, YES, regex_flags);
365 #else
366 return add_hostname(noproxy, regex, NULL, 0, NO, -1);
367 #endif
368 }
369
370 /* HTNoProxy_deleteAll
371 ** -------------------
372 ** Removes all registered no_proxy directives
373 */
HTNoProxy_deleteAll(void)374 PUBLIC BOOL HTNoProxy_deleteAll (void)
375 {
376 if (remove_AllHostnames(noproxy)) {
377 HTList_delete(noproxy);
378 noproxy = NULL;
379 return YES;
380 }
381 return NO;
382 }
383
384 /* HTNProxy_noProxyIsOnlyProxy
385 ** `----------------------------
386 ** Returns the state of the noproxy_is_onlyproxy flag
387 */
HTProxy_NoProxyIsOnlyProxy(void)388 PUBLIC int HTProxy_NoProxyIsOnlyProxy (void)
389 {
390 return noproxy_is_onlyproxy;
391 }
392
393 /* HTNProxy_setNoProxyisOnlyProxy
394 ** --------------------------
395 ** Sets the state of the noproxy_is_onlyproxy flag
396 */
HTProxy_setNoProxyIsOnlyProxy(int value)397 PUBLIC void HTProxy_setNoProxyIsOnlyProxy (int value)
398 {
399 noproxy_is_onlyproxy = value;
400 }
401
402 /* HTProxy_find
403 ** ------------
404 ** This function evaluates the lists of registered proxies and if
405 ** one is found for the actual access method and it is not registered
406 ** in the `noproxy' list, then a URL containing the host to be contacted
407 ** is returned to the caller. This string must be freed be the caller.
408 **
409 ** Returns: proxy If OK (must be freed by caller)
410 ** NULL If no proxy is found or error
411 */
HTProxy_find(const char * url)412 PUBLIC char * HTProxy_find (const char * url)
413 {
414 char * access;
415 char * proxy = NULL;
416 int no_proxy_found = 0;
417
418 if (!url || !proxies)
419 return NULL;
420 access = HTParse(url, "", PARSE_ACCESS);
421
422 /* First check if the host (if any) is registered in the noproxy list */
423 if (noproxy) {
424 char *host = HTParse(url, "", PARSE_HOST);
425 char *ptr;
426 unsigned port=0;
427 if ((ptr = strchr(host, ':')) != NULL) {
428 *ptr++ = '\0'; /* Chop off port */
429 if (*ptr) port = (unsigned) atoi(ptr);
430 }
431 if (*host) { /* If we have a host name */
432 HTList *cur = noproxy;
433 HTHostList *pres;
434 while ((pres = (HTHostList *) HTList_nextObject(cur)) != NULL) {
435 #ifdef HT_POSIX_REGEX
436 if (pres->regex) {
437 BOOL match = regexec(pres->regex, url, 0, NULL, 0) ? NO : YES;
438 if (match) {
439 HTTRACE(PROT_TRACE, "GetProxy.... No proxy directive found: `%s\'\n" _ pres->host);
440 no_proxy_found = 1;
441 break;
442 }
443 } else
444 #endif
445 if (!pres->access ||
446 (pres->access && !strcmp(pres->access, access))) {
447 if ((pres->port == 0) || (pres->port == port)) {
448 char *np = pres->host+strlen(pres->host);
449 char *hp = host+strlen(host);
450 while (np>=pres->host && hp>=host && (*np--==*hp--));
451 if (np==pres->host-1 && (hp==host-1 || *hp=='.')) {
452 HTTRACE(PROT_TRACE, "GetProxy.... No proxy directive found: `%s\'\n" _ pres->host);
453 no_proxy_found = 1;
454 break;
455 }
456 }
457 }
458 }
459 }
460 HT_FREE(host);
461 }
462
463 if ((no_proxy_found && !noproxy_is_onlyproxy)
464 || (!no_proxy_found && noproxy_is_onlyproxy)) {
465 HT_FREE(access);
466 return NULL;
467 }
468
469 /* Now check if we have a proxy registered for this access method */
470 {
471 HTList *cur = proxies;
472 HTProxy *pres;
473 while ((pres = (HTProxy *) HTList_nextObject(cur)) != NULL) {
474 #ifdef HT_POSIX_REGEX
475 if (pres->regex) {
476 BOOL match = regexec(pres->regex, url, 0, NULL, 0) ? NO : YES;
477 if (match) {
478 StrAllocCopy(proxy, pres->url);
479 HTTRACE(PROT_TRACE, "GetProxy.... Found: `%s\'\n" _ pres->url);
480 break;
481 }
482 } else
483 #endif
484 if (!strcmp(pres->access, access)) {
485 StrAllocCopy(proxy, pres->url);
486 HTTRACE(PROT_TRACE, "GetProxy.... Found: `%s\'\n" _ pres->url);
487 break;
488 }
489 }
490 }
491 HT_FREE(access);
492 return proxy;
493 }
494
495
496 /* HTGateway_find
497 ** --------------
498 ** This function evaluates the lists of registered gateways and if
499 ** one is found for the actual access method then it is returned
500 **
501 ** Returns: gateway If OK (must be freed by caller)
502 ** NULL If no gateway is found or error
503 */
HTGateway_find(const char * url)504 PUBLIC char * HTGateway_find (const char * url)
505 {
506 char * access;
507 char * gateway = NULL;
508 if (!url || !gateways)
509 return NULL;
510 access = HTParse(url, "", PARSE_ACCESS);
511
512 /* Check if we have a gateway registered for this access method */
513 {
514 HTList *cur = gateways;
515 HTProxy *pres;
516 while ((pres = (HTProxy *) HTList_nextObject(cur)) != NULL) {
517 if (!strcmp(pres->access, access)) {
518 StrAllocCopy(gateway, pres->url);
519 HTTRACE(PROT_TRACE, "GetGateway.. Found: `%s\'\n" _ pres->url);
520 break;
521 }
522 }
523 }
524 HT_FREE(access);
525 return gateway;
526 }
527
528
529 /*
530 ** This function maintains backwards compatibility with the old
531 ** environment variables and searches for the most common values:
532 ** http, ftp, news, wais, and gopher
533 */
HTProxy_getEnvVar(void)534 PUBLIC void HTProxy_getEnvVar (void)
535 {
536 char buf[80];
537 static const char *accesslist[] = {
538 "http",
539 "ftp",
540 "news",
541 "wais",
542 "gopher",
543 NULL
544 };
545 const char **access = accesslist;
546 HTTRACE(PROT_TRACE, "Proxy....... Looking for environment variables\n");
547 while (*access) {
548 BOOL found = NO;
549 char *gateway=NULL;
550 char *proxy=NULL;
551
552 /* Search for proxy gateways */
553 if (found == NO) {
554 strcpy(buf, *access);
555 strcat(buf, "_proxy");
556 if ((proxy = (char *) getenv(buf)) && *proxy) {
557 HTProxy_add(*access, proxy);
558 found = YES;
559 }
560
561 /* Try the same with upper case */
562 if (found == NO) {
563 char * up = buf;
564 while ((*up = TOUPPER(*up))) up++;
565 if ((proxy = (char *) getenv(buf)) && *proxy) {
566 HTProxy_add(*access, proxy);
567 found = YES;
568 }
569 }
570 }
571
572 /* As a last resort, search for gateway servers */
573 if (found == NO) {
574 strcpy(buf, "WWW_");
575 strcat(buf, *access);
576 strcat(buf, "_GATEWAY");
577 if ((gateway = (char *) getenv(buf)) && *gateway) {
578 HTGateway_add(*access, gateway);
579 found = YES;
580 }
581 }
582 ++access;
583 }
584
585 /* Search for `noproxy' directive */
586 {
587 char *noproxy = getenv("no_proxy");
588 if (noproxy && *noproxy) {
589 char *str = NULL;
590 char *strptr;
591 char *name;
592 StrAllocCopy(str, noproxy); /* Get copy we can mutilate */
593 strptr = str;
594 while ((name = HTNextField(&strptr)) != NULL) {
595 char *portstr = strchr(name, ':');
596 unsigned port=0;
597 if (portstr) {
598 *portstr++ = '\0';
599 if (*portstr) port = (unsigned) atoi(portstr);
600 }
601
602 /* Register it for all access methods */
603 HTNoProxy_add(name, NULL, port);
604 }
605 HT_FREE(str);
606 }
607 }
608 }
609
610