1 /*								     HTProxy.c
2 **	GATEWAY AND PROXY MANAGER
3 **
4 **	(c) COPYRIGHT MIT 1995.
5 **	Please first read the full copyright statement in the file COPYRIGH.
6 **	@(#) $Id$
7 **
8 **	Replaces the old env variables for gateways and proxies. However for
9 **	backward compatibility there is a function that reads the env variables
10 **	at start up. Note that there is a difference between a proxy and a
11 **	gateway!
12 **
13 ** Authors
14 **	HF	Henrik Frystyk, frystyk@w3.org
15 ** History
16 **       4 Jun 95 Written on a rainy day
17 */
18 
19 #if !defined(HT_DIRECT_WAIS) && !defined(HT_DEFAULT_WAIS_GATEWAY)
20 #define HT_DEFAULT_WAIS_GATEWAY "http://www.w3.org:8001/"
21 #endif
22 
23 /* Library include files */
24 #include "wwwsys.h"
25 #include "WWWUtil.h"
26 #include "WWWCore.h"
27 #include "WWWHTTP.h"
28 #include "WWWApp.h"
29 #include "HTProxy.h"					 /* Implemented here */
30 
31 /* Variables and typedefs local to this module */
32 
33 typedef struct _HTProxy {
34     char *	access;
35     char *	url;			          /* URL of Gateway or Proxy */
36 #ifdef HT_POSIX_REGEX
37     regex_t *	regex;				  /* Compiled regex */
38 #endif
39 } HTProxy;
40 
41 typedef struct _HTHostlist {
42     char *	access;
43     char *	host;				  /* Host or domain name */
44     unsigned	port;
45 #ifdef HT_POSIX_REGEX
46     regex_t *	regex;				  /* Compiled regex */
47 #endif
48 } HTHostList;
49 
50 PRIVATE HTList * proxies = NULL;		    /* List of proxy servers */
51 PRIVATE HTList * gateways = NULL;			 /* List of gateways */
52 PRIVATE HTList * noproxy = NULL;   /* Don't proxy on these hosts and domains */
53 PRIVATE int      noproxy_is_onlyproxy = 0; /* Interpret the noproxy list as an onlyproxy one */
54 
55 #if 0
56 PRIVATE HTList * onlyproxy = NULL;  /* Proxy only on these hosts and domains */
57 #endif
58 
59 /* ------------------------------------------------------------------------- */
60 
61 #ifdef HT_POSIX_REGEX
get_regex_error(int errcode,regex_t * compiled)62 PRIVATE char * get_regex_error (int errcode, regex_t * compiled)
63 {
64     size_t length = regerror (errcode, compiled, NULL, 0);
65     char * str = NULL;
66     if ((str = (char *) HT_MALLOC(length+1)) == NULL)
67 	HT_OUTOFMEM("get_regex_error");
68     (void) regerror (errcode, compiled, str, length);
69     return str;
70 }
71 
get_regex_t(const char * regex_str,int cflags)72 PRIVATE regex_t * get_regex_t (const char * regex_str, int cflags)
73 {
74     regex_t * regex = NULL;
75     if (regex_str && *regex_str) {
76 	int status;
77 	if ((regex = (regex_t *) HT_CALLOC(1, sizeof(regex_t))) == NULL)
78 	    HT_OUTOFMEM("get_regex_t");
79 	if ((status = regcomp(regex, regex_str, cflags))) {
80 	    char * err_msg = get_regex_error(status, regex);
81 	    HTTRACE(PROT_TRACE, "HTProxy..... Regular expression error: %s\n" _ err_msg);
82 	    HT_FREE(err_msg);
83 	    HT_FREE(regex);
84 	}
85     }
86     return regex;
87 }
88 #endif
89 
90 /*
91 **	Existing entries are replaced with new ones
92 */
add_object(HTList * list,const char * access,const char * url,BOOL regex,int regex_flags)93 PRIVATE BOOL add_object (HTList * list, const char * access, const char * url,
94 			 BOOL regex, int regex_flags)
95 {
96     HTProxy *me;
97     if (!list || !access || !url || !*url)
98 	return NO;
99     if ((me = (HTProxy *) HT_CALLOC(1, sizeof(HTProxy))) == NULL)
100 	HT_OUTOFMEM("add_object");
101     StrAllocCopy(me->access, access);		     	    /* Access method */
102 
103 #ifdef HT_POSIX_REGEX
104     /*
105     **  If we support regular expressions then compile one up for
106     **  this regular expression. Otherwise use is as a normal
107     **  access scheme.
108     */
109     if (regex) {
110 	me->regex = get_regex_t(access,
111 				regex_flags < 0 ?
112 				W3C_DEFAULT_REGEX_FLAGS : regex_flags);
113     } else
114 #endif
115     {
116 	char *ptr = me->access;
117 	while ((*ptr = TOLOWER(*ptr))) ptr++;
118     }
119 
120     me->url = HTParse(url, "", PARSE_ACCESS+PARSE_HOST+PARSE_PUNCTUATION);
121     if (*(me->url+strlen(me->url)-1) != '/')
122 	StrAllocCat(me->url, "/");
123     me->url = HTSimplify(&me->url);
124 
125     /* See if we already have this one */
126     {
127 	HTList *cur = list;
128 	HTProxy *pres;
129 	while ((pres = (HTProxy *) HTList_nextObject(cur)) != NULL) {
130 	    if (!strcmp(pres->access, me->access))
131 		break;				       /* We already have it */
132 	}
133 	if (pres) {
134 	    HTTRACE(PROT_TRACE, "HTProxy..... replacing for `%s\' access %s\n" _
135 			me->url _ me->access);
136 	    HT_FREE(pres->access);
137 	    HT_FREE(pres->url);
138 #ifdef HT_POSIX_REGEX
139 	    if (pres->regex) regfree(pres->regex);
140 #endif
141 	    HTList_removeObject(list, (void *) pres);
142 	    HT_FREE(pres);
143 	}
144 	HTTRACE(PROT_TRACE, "HTProxy..... adding for `%s\' access %s\n" _
145 		    me->url _ me->access);
146 	HTList_addObject(list, (void *) me);
147     }
148     return YES;
149 }
150 
remove_allObjects(HTList * list)151 PRIVATE BOOL remove_allObjects (HTList * list)
152 {
153     if (list) {
154 	HTList *cur = list;
155 	HTProxy *pres;
156 	while ((pres = (HTProxy *) HTList_nextObject(cur)) != NULL) {
157 	    HT_FREE(pres->access);
158 	    HT_FREE(pres->url);
159 #ifdef HT_POSIX_REGEX
160 	    if (pres->regex) regfree(pres->regex);
161 #endif
162 	    HT_FREE(pres);
163 	}
164 	return YES;
165     }
166     return NO;
167 }
168 
169 /*	Add an entry to a list of host names
170 **	------------------------------------
171 **	Existing entries are replaced with new ones
172 */
add_hostname(HTList * list,const char * host,const char * access,unsigned port,BOOL regex,int regex_flags)173 PRIVATE BOOL add_hostname (HTList * list, const char * host,
174 			   const char * access, unsigned port,
175 			   BOOL regex, int regex_flags)
176 {
177     HTHostList *me;
178     if (!list || !host || !*host)
179 	return NO;
180     if ((me = (HTHostList *) HT_CALLOC(1, sizeof(HTHostList))) == NULL)
181         HT_OUTOFMEM("add_hostname");
182 #ifdef HT_POSIX_REGEX
183     if (regex)
184 	me->regex = get_regex_t(host,
185 				regex_flags < 0 ?
186 				W3C_DEFAULT_REGEX_FLAGS : regex_flags);
187 #endif
188 
189     if (access) {
190 	char *ptr;
191 	StrAllocCopy(me->access, access);      	     	    /* Access method */
192 	ptr = me->access;
193 	while ((*ptr = TOLOWER(*ptr))) ptr++;
194     }
195     StrAllocCopy(me->host, host);		     	    	/* Host name */
196     {
197 	char *ptr = me->host;
198 	while ((*ptr = TOLOWER(*ptr))) ptr++;
199     }
200     me->port = port;					      /* Port number */
201     HTTRACE(PROT_TRACE, "HTHostList.. adding `%s\' to list\n" _ me->host);
202     HTList_addObject(list, (void *) me);
203     return YES;
204 }
205 
remove_AllHostnames(HTList * list)206 PRIVATE BOOL remove_AllHostnames (HTList * list)
207 {
208     if (list) {
209 	HTList *cur = list;
210 	HTHostList *pres;
211 	while ((pres = (HTHostList *) HTList_nextObject(cur)) != NULL) {
212 	    HT_FREE(pres->access);
213 	    HT_FREE(pres->host);
214 #ifdef HT_POSIX_REGEX
215 	    if (pres->regex) regfree(pres->regex);
216 #endif
217 	    HT_FREE(pres);
218 	}
219 	return YES;
220     }
221     return NO;
222 }
223 
224 /*	HTProxy_add
225 **	-----------
226 **	Registers a proxy as the server to contact for a specific
227 **	access method. `proxy' should be a fully valid name, like
228 **	"http://proxy.w3.org:8001" but domain name is not required.
229 **	If an entry exists for this access then delete it and use the
230 **	ne one. Returns YES if OK, else NO
231 */
HTProxy_add(const char * access,const char * proxy)232 PUBLIC BOOL HTProxy_add (const char * access, const char * proxy)
233 {
234     /*
235     **  If this is the first time here then also add a before filter to handle
236     **  proxy authentication and the normal AA after filter as well.
237     **  These filters will be removed if we remove all proxies again.
238     */
239     if (!proxies) {
240 	proxies = HTList_new();
241 	HTNet_addBefore(HTAA_proxyBeforeFilter, NULL, NULL,
242 			HT_FILTER_MIDDLE);
243 	HTNet_addAfter(HTAuthFilter, NULL, NULL,
244 		       HT_NO_PROXY_ACCESS, HT_FILTER_MIDDLE);
245 	HTNet_addAfter(HTAuthFilter, NULL, NULL,
246 		       HT_PROXY_REAUTH, HT_FILTER_MIDDLE);
247     }
248     return add_object(proxies, access, proxy, NO, -1);
249 }
250 
251 /*	HTProxy_addRegex
252 **	----------------
253 **	Registers a proxy as the server to contact for any URL matching the
254 **	regular expression. `proxy' should be a fully valid name, like
255 **	"http://proxy.w3.org:8001".
256 **	If an entry exists for this access then delete it and use the
257 **	new one. Returns YES if OK, else NO
258 */
HTProxy_addRegex(const char * regex,const char * proxy,int regex_flags)259 PUBLIC BOOL HTProxy_addRegex (const char * regex,
260 			      const char * proxy,
261 			      int regex_flags)
262 {
263     /*
264     **  If this is the first time here then also add a before filter to handle
265     **  proxy authentication and the normal AA after filter as well.
266     **  These filters will be removed if we remove all proxies again.
267     */
268     if (!proxies) {
269 	proxies = HTList_new();
270 	HTNet_addBefore(HTAA_proxyBeforeFilter, NULL, NULL,
271 			HT_FILTER_MIDDLE);
272 	HTNet_addAfter(HTAuthFilter, NULL, NULL,
273 		       HT_NO_PROXY_ACCESS, HT_FILTER_MIDDLE);
274 	HTNet_addAfter(HTAuthFilter, NULL, NULL,
275 		       HT_PROXY_REAUTH, HT_FILTER_MIDDLE);
276     }
277 #ifdef HT_POSIX_REGEX
278     return add_object(proxies, regex, proxy, YES, regex_flags);
279 #else
280     return add_object(proxies, regex, proxy, NO, -1);
281 #endif
282 }
283 
284 /*
285 **	Removes all registered proxies
286 */
HTProxy_deleteAll(void)287 PUBLIC BOOL HTProxy_deleteAll (void)
288 {
289     if (remove_allObjects(proxies)) {
290 	HTList_delete(proxies);
291 
292 	/*
293 	** If we have no more proxies then there is no reason for checking
294 	** proxy authentication. We therefore unregister the filters for
295 	** handling proxy authentication
296 	*/
297 	HTNet_deleteBefore(HTAA_proxyBeforeFilter);
298         HTNet_deleteAfterStatus(HT_NO_PROXY_ACCESS);
299         HTNet_deleteAfterStatus(HT_PROXY_REAUTH);
300 
301 	proxies = NULL;
302 	return YES;
303     }
304     return NO;
305 }
306 
307 /*	HTGateway_add
308 **	-------------
309 **	Registers a gateway as the server to contact for a specific
310 **	access method. `gateway' should be a fully valid name, like
311 **	"http://gateway.w3.org:8001" but domain name is not required.
312 **	If an entry exists for this access then delete it and use the
313 **	ne one. Returns YES if OK, else NO
314 */
HTGateway_add(const char * access,const char * gate)315 PUBLIC BOOL HTGateway_add (const char * access, const char * gate)
316 {
317     if (!gateways)
318 	gateways = HTList_new();
319     return add_object(gateways, access, gate, NO, -1);
320 }
321 
322 /*
323 **	Removes all registered gateways
324 */
HTGateway_deleteAll(void)325 PUBLIC BOOL HTGateway_deleteAll (void)
326 {
327     if (remove_allObjects(gateways)) {
328 	HTList_delete(gateways);
329 	gateways = NULL;
330 	return YES;
331     }
332     return NO;
333 }
334 
335 /*	HTNoProxy_add
336 **	-------------
337 **	Registers a host name or a domain as a place where no proxy should
338 **	be contacted - for example a very fast link. If `port' is '0' then
339 **	it applies to all ports and if `access' is NULL then it applies to
340 **	to all access methods.
341 **
342 **	Examples:	w3.org
343 **			www.close.com
344 */
HTNoProxy_add(const char * host,const char * access,unsigned port)345 PUBLIC BOOL HTNoProxy_add (const char * host, const char * access,
346 			   unsigned port)
347 {
348     if (!noproxy)
349 	noproxy = HTList_new();
350     return add_hostname(noproxy, host, access, port, NO, -1);
351 }
352 
353 /*	HTNoProxy_addRegex
354 **	------------------
355 **	Registers a regular expression where URIs matching this expression
356 **      should go directly and not via a proxy.
357 **
358 */
HTNoProxy_addRegex(const char * regex,int regex_flags)359 PUBLIC BOOL HTNoProxy_addRegex (const char * regex, int regex_flags)
360 {
361     if (!noproxy)
362 	noproxy = HTList_new();
363 #ifdef HT_POSIX_REGEX
364     return add_hostname(noproxy, regex, NULL, 0, YES, regex_flags);
365 #else
366     return add_hostname(noproxy, regex, NULL, 0, NO, -1);
367 #endif
368 }
369 
370 /*	HTNoProxy_deleteAll
371 **	-------------------
372 **	Removes all registered no_proxy directives
373 */
HTNoProxy_deleteAll(void)374 PUBLIC BOOL HTNoProxy_deleteAll (void)
375 {
376     if (remove_AllHostnames(noproxy)) {
377 	HTList_delete(noproxy);
378 	noproxy = NULL;
379 	return YES;
380     }
381     return NO;
382 }
383 
384 /*      HTNProxy_noProxyIsOnlyProxy
385 **     `----------------------------
386 **      Returns the state of the noproxy_is_onlyproxy flag
387 */
HTProxy_NoProxyIsOnlyProxy(void)388 PUBLIC int HTProxy_NoProxyIsOnlyProxy (void)
389 {
390   return noproxy_is_onlyproxy;
391 }
392 
393 /*      HTNProxy_setNoProxyisOnlyProxy
394 **      --------------------------
395 **      Sets the state of the noproxy_is_onlyproxy flag
396 */
HTProxy_setNoProxyIsOnlyProxy(int value)397 PUBLIC void HTProxy_setNoProxyIsOnlyProxy (int value)
398 {
399   noproxy_is_onlyproxy = value;
400 }
401 
402 /*	HTProxy_find
403 **	------------
404 **	This function evaluates the lists of registered proxies and if
405 **	one is found for the actual access method and it is not registered
406 **	in the `noproxy' list, then a URL containing the host to be contacted
407 **	is returned to the caller. This string must be freed be the caller.
408 **
409 **	Returns: proxy	If OK (must be freed by caller)
410 **		 NULL	If no proxy is found or error
411 */
HTProxy_find(const char * url)412 PUBLIC char * HTProxy_find (const char * url)
413 {
414     char * access;
415     char * proxy = NULL;
416     int no_proxy_found = 0;
417 
418     if (!url || !proxies)
419 	return NULL;
420     access = HTParse(url, "", PARSE_ACCESS);
421 
422     /* First check if the host (if any) is registered in the noproxy list */
423     if (noproxy) {
424 	char *host = HTParse(url, "", PARSE_HOST);
425 	char *ptr;
426 	unsigned port=0;
427 	if ((ptr = strchr(host, ':')) != NULL) {
428 	    *ptr++ = '\0';				    /* Chop off port */
429 	    if (*ptr) port = (unsigned) atoi(ptr);
430 	}
431 	if (*host) {				   /* If we have a host name */
432 	    HTList *cur = noproxy;
433 	    HTHostList *pres;
434 	    while ((pres = (HTHostList *) HTList_nextObject(cur)) != NULL) {
435 #ifdef HT_POSIX_REGEX
436 		if (pres->regex) {
437 		    BOOL match = regexec(pres->regex, url, 0, NULL, 0) ? NO : YES;
438 		    if (match) {
439 			HTTRACE(PROT_TRACE, "GetProxy.... No proxy directive found: `%s\'\n" _ pres->host);
440 			no_proxy_found = 1;
441 			break;
442 		    }
443 		} else
444 #endif
445 		if (!pres->access ||
446 		    (pres->access && !strcmp(pres->access, access))) {
447 		    if ((pres->port == 0) || (pres->port == port)) {
448 			char *np = pres->host+strlen(pres->host);
449 			char *hp = host+strlen(host);
450 			while (np>=pres->host && hp>=host && (*np--==*hp--));
451 			if (np==pres->host-1 && (hp==host-1 || *hp=='.')) {
452 			    HTTRACE(PROT_TRACE, "GetProxy.... No proxy directive found: `%s\'\n" _ pres->host);
453 			    no_proxy_found = 1;
454 			    break;
455 			}
456 		    }
457 		}
458 	    }
459 	}
460 	HT_FREE(host);
461     }
462 
463     if ((no_proxy_found && !noproxy_is_onlyproxy)
464         || (!no_proxy_found && noproxy_is_onlyproxy)) {
465       HT_FREE(access);
466       return NULL;
467     }
468 
469     /* Now check if we have a proxy registered for this access method */
470     {
471 	HTList *cur = proxies;
472 	HTProxy *pres;
473 	while ((pres = (HTProxy *) HTList_nextObject(cur)) != NULL) {
474 #ifdef HT_POSIX_REGEX
475 	    if (pres->regex) {
476 		BOOL match = regexec(pres->regex, url, 0, NULL, 0) ? NO : YES;
477 		if (match) {
478 		    StrAllocCopy(proxy, pres->url);
479 		    HTTRACE(PROT_TRACE, "GetProxy.... Found: `%s\'\n" _ pres->url);
480 		    break;
481 		}
482 	    } else
483 #endif
484 	    if (!strcmp(pres->access, access)) {
485 		StrAllocCopy(proxy, pres->url);
486 		HTTRACE(PROT_TRACE, "GetProxy.... Found: `%s\'\n" _ pres->url);
487 		break;
488 	    }
489 	}
490     }
491     HT_FREE(access);
492     return proxy;
493 }
494 
495 
496 /*	HTGateway_find
497 **	--------------
498 **	This function evaluates the lists of registered gateways and if
499 **	one is found for the actual access method then it is returned
500 **
501 **	Returns: gateway If OK (must be freed by caller)
502 **		 NULL	 If no gateway is found or error
503 */
HTGateway_find(const char * url)504 PUBLIC char * HTGateway_find (const char * url)
505 {
506     char * access;
507     char * gateway = NULL;
508     if (!url || !gateways)
509 	return NULL;
510     access = HTParse(url, "", PARSE_ACCESS);
511 
512     /* Check if we have a gateway registered for this access method */
513     {
514 	HTList *cur = gateways;
515 	HTProxy *pres;
516 	while ((pres = (HTProxy *) HTList_nextObject(cur)) != NULL) {
517 	    if (!strcmp(pres->access, access)) {
518 		StrAllocCopy(gateway, pres->url);
519 		HTTRACE(PROT_TRACE, "GetGateway.. Found: `%s\'\n" _ pres->url);
520 		break;
521 	    }
522 	}
523     }
524     HT_FREE(access);
525     return gateway;
526 }
527 
528 
529 /*
530 **	This function maintains backwards compatibility with the old
531 **	environment variables and searches for the most common values:
532 **	http, ftp, news, wais, and gopher
533 */
HTProxy_getEnvVar(void)534 PUBLIC void HTProxy_getEnvVar (void)
535 {
536     char buf[80];
537     static const char *accesslist[] = {
538 	"http",
539 	"ftp",
540 	"news",
541 	"wais",
542 	"gopher",
543 	NULL
544     };
545     const char **access = accesslist;
546     HTTRACE(PROT_TRACE, "Proxy....... Looking for environment variables\n");
547     while (*access) {
548 	BOOL found = NO;
549 	char *gateway=NULL;
550 	char *proxy=NULL;
551 
552 	/* Search for proxy gateways */
553 	if (found == NO) {
554 	    strcpy(buf, *access);
555 	    strcat(buf, "_proxy");
556 	    if ((proxy = (char *) getenv(buf)) && *proxy) {
557 		HTProxy_add(*access, proxy);
558 		found = YES;
559 	    }
560 
561 	    /* Try the same with upper case */
562 	    if (found == NO) {
563 		char * up = buf;
564 		while ((*up = TOUPPER(*up))) up++;
565 		if ((proxy = (char *) getenv(buf)) && *proxy) {
566 		    HTProxy_add(*access, proxy);
567 		    found = YES;
568 		}
569 	    }
570 	}
571 
572 	/* As a last resort, search for gateway servers */
573 	if (found == NO) {
574 	    strcpy(buf, "WWW_");
575 	    strcat(buf, *access);
576 	    strcat(buf, "_GATEWAY");
577 	    if ((gateway = (char *) getenv(buf)) && *gateway) {
578 		HTGateway_add(*access, gateway);
579 		found = YES;
580 	    }
581 	}
582 	++access;
583     }
584 
585     /* Search for `noproxy' directive */
586     {
587 	char *noproxy = getenv("no_proxy");
588 	if (noproxy && *noproxy) {
589 	    char *str = NULL;
590 	    char *strptr;
591 	    char *name;
592 	    StrAllocCopy(str, noproxy);		 /* Get copy we can mutilate */
593 	    strptr = str;
594 	    while ((name = HTNextField(&strptr)) != NULL) {
595 		char *portstr = strchr(name, ':');
596 		unsigned port=0;
597 		if (portstr) {
598 		    *portstr++ = '\0';
599 		    if (*portstr) port = (unsigned) atoi(portstr);
600 		}
601 
602 		/* Register it for all access methods */
603 		HTNoProxy_add(name, NULL, port);
604 	    }
605 	    HT_FREE(str);
606 	}
607     }
608 }
609 
610