1 /*
2  * $LynxId: HTAccess.c,v 1.85 2019/08/24 00:27:06 tom Exp $
3  *
4  *		Access Manager					HTAccess.c
5  *		==============
6  *
7  *  Authors
8  *	TBL	Tim Berners-Lee timbl@info.cern.ch
9  *	JFG	Jean-Francois Groff jfg@dxcern.cern.ch
10  *	DD	Denis DeLaRoca (310) 825-4580  <CSP1DWD@mvs.oac.ucla.edu>
11  *	FM	Foteos Macrides macrides@sci.wfeb.edu
12  *	PDM	Danny Mayer mayer@ljo.dec.com
13  *
14  *  History
15  *	 8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL
16  *	26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG
17  *	 6 Oct 92 Moved HTClientHost and logfile into here. TBL
18  *	17 Dec 92 Tn3270 added, bug fix. DD
19  *	 4 Feb 93 Access registration, Search escapes bad chars TBL
20  *		  PARAMETERS TO HTSEARCH AND HTLOADRELATIVE CHANGED
21  *	28 May 93 WAIS gateway explicit if no WAIS library linked in.
22  *	31 May 94 Added DIRECT_WAIS support for VMS. FM
23  *	27 Jan 95 Fixed proxy support to use NNTPSERVER for checking
24  *		  whether or not to use the proxy server. PDM
25  *	27 Jan 95 Ensured that proxy service will be overridden for files
26  *		  on the local host (because HTLoadFile() doesn't try ftp
27  *		  for those) and will substitute ftp for remote files. FM
28  *	28 Jan 95 Tweaked PDM's proxy override mods to handle port info
29  *		  for news and wais URL's. FM
30  *
31  *  Bugs
32  *	This module assumes that that the graphic object is hypertext, as it
33  *	needs to select it when it has been loaded.  A superclass needs to be
34  *	defined which accepts select and select_anchor.
35  */
36 
37 #ifdef VMS
38 #define DIRECT_WAIS
39 #endif /* VMS */
40 
41 #include <HTUtils.h>
42 #include <HTTP.h>
43 #include <HTAlert.h>
44 /*
45  *  Implements:
46  */
47 #include <HTAccess.h>
48 
49 /*
50  *  Uses:
51  */
52 #include <HTParse.h>
53 #include <HTML.h>		/* SCW */
54 
55 #ifndef NO_RULES
56 #include <HTRules.h>
57 #endif
58 
59 #include <HTList.h>
60 #include <HText.h>		/* See bugs above */
61 #include <HTCJK.h>
62 #include <UCMap.h>
63 #include <GridText.h>
64 
65 #include <LYGlobalDefs.h>
66 #include <LYexit.h>
67 #include <LYStrings.h>
68 #include <LYUtils.h>
69 #include <LYLeaks.h>
70 
71 /*
72  *  These flags may be set to modify the operation of this module
73  */
74 char *HTClientHost = NULL;	/* Name of remote login host if any */
75 FILE *HTlogfile = NULL;		/* File to which to output one-liners */
76 BOOL HTSecure = NO;		/* Disable access for telnet users? */
77 BOOL HTPermitRedir = NO;	/* Always allow redirection in getfile()? */
78 
79 BOOL using_proxy = NO;		/* are we using a proxy gateway? */
80 
81 /*
82  *  To generate other things, play with these:
83  */
84 HTFormat HTOutputFormat = NULL;
85 HTStream *HTOutputStream = NULL;	/* For non-interactive, set this */
86 
87 static HTList *protocols = NULL;	/* List of registered protocol descriptors */
88 
89 char *use_this_url_instead = NULL;
90 
91 static int pushed_assume_LYhndl = -1;	/* see LYUC* functions below - kw */
92 static char *pushed_assume_MIMEname = NULL;
93 
94 #ifdef LY_FIND_LEAKS
free_protocols(void)95 static void free_protocols(void)
96 {
97     HTList_delete(protocols);
98     protocols = NULL;
99     FREE(pushed_assume_MIMEname);	/* shouldn't happen, just in case - kw */
100 }
101 #endif /* LY_FIND_LEAKS */
102 
103 /*	Register a Protocol.				HTRegisterProtocol()
104  *	--------------------
105  */
HTRegisterProtocol(HTProtocol * protocol)106 BOOL HTRegisterProtocol(HTProtocol * protocol)
107 {
108     if (!protocols) {
109 	protocols = HTList_new();
110 #ifdef LY_FIND_LEAKS
111 	atexit(free_protocols);
112 #endif
113     }
114     HTList_addObject(protocols, protocol);
115     return YES;
116 }
117 
118 /*	Register all known protocols.			HTAccessInit()
119  *	-----------------------------
120  *
121  *	Add to or subtract from this list if you add or remove protocol
122  *	modules.  This routine is called the first time the protocol list
123  *	is needed, unless any protocols are already registered, in which
124  *	case it is not called.	Therefore the application can override
125  *	this list.
126  *
127  *	Compiling with NO_INIT prevents all known protocols from being
128  *	forced in at link time.
129  */
130 #ifndef NO_INIT
131 #ifdef GLOBALREF_IS_MACRO
132 extern GLOBALREF (HTProtocol, HTTP);
133 extern GLOBALREF (HTProtocol, HTTPS);
134 extern GLOBALREF (HTProtocol, HTFile);
135 extern GLOBALREF (HTProtocol, HTTelnet);
136 extern GLOBALREF (HTProtocol, HTTn3270);
137 extern GLOBALREF (HTProtocol, HTRlogin);
138 
139 #ifndef DECNET
140 #ifndef DISABLE_FTP
141 extern GLOBALREF (HTProtocol, HTFTP);
142 #endif /* DISABLE_FTP */
143 #ifndef DISABLE_NEWS
144 extern GLOBALREF (HTProtocol, HTNews);
145 extern GLOBALREF (HTProtocol, HTNNTP);
146 extern GLOBALREF (HTProtocol, HTNewsPost);
147 extern GLOBALREF (HTProtocol, HTNewsReply);
148 extern GLOBALREF (HTProtocol, HTSNews);
149 extern GLOBALREF (HTProtocol, HTSNewsPost);
150 extern GLOBALREF (HTProtocol, HTSNewsReply);
151 #endif /* not DISABLE_NEWS */
152 #ifndef DISABLE_GOPHER
153 extern GLOBALREF (HTProtocol, HTGopher);
154 extern GLOBALREF (HTProtocol, HTCSO);
155 #endif /* not DISABLE_GOPHER */
156 #ifndef DISABLE_FINGER
157 extern GLOBALREF (HTProtocol, HTFinger);
158 #endif /* not DISABLE_FINGER */
159 #ifdef DIRECT_WAIS
160 extern GLOBALREF (HTProtocol, HTWAIS);
161 #endif /* DIRECT_WAIS */
162 #endif /* !DECNET */
163 #else
164 GLOBALREF HTProtocol HTTP, HTTPS, HTFile, HTTelnet, HTTn3270, HTRlogin;
165 
166 #ifndef DECNET
167 #ifndef DISABLE_FTP
168 GLOBALREF HTProtocol HTFTP;
169 #endif /* DISABLE_FTP */
170 #ifndef DISABLE_NEWS
171 GLOBALREF HTProtocol HTNews, HTNNTP, HTNewsPost, HTNewsReply;
172 GLOBALREF HTProtocol HTSNews, HTSNewsPost, HTSNewsReply;
173 #endif /* not DISABLE_NEWS */
174 #ifndef DISABLE_GOPHER
175 GLOBALREF HTProtocol HTGopher, HTCSO;
176 #endif /* not DISABLE_GOPHER */
177 #ifndef DISABLE_FINGER
178 GLOBALREF HTProtocol HTFinger;
179 #endif /* not DISABLE_FINGER */
180 #ifdef DIRECT_WAIS
181 GLOBALREF HTProtocol HTWAIS;
182 #endif /* DIRECT_WAIS */
183 #endif /* !DECNET */
184 #endif /* GLOBALREF_IS_MACRO */
185 
HTAccessInit(void)186 static void HTAccessInit(void)	/* Call me once */
187 {
188     HTRegisterProtocol(&HTTP);
189     HTRegisterProtocol(&HTTPS);
190     HTRegisterProtocol(&HTFile);
191     HTRegisterProtocol(&HTTelnet);
192     HTRegisterProtocol(&HTTn3270);
193     HTRegisterProtocol(&HTRlogin);
194 #ifndef DECNET
195 #ifndef DISABLE_FTP
196     HTRegisterProtocol(&HTFTP);
197 #endif /* DISABLE_FTP */
198 #ifndef DISABLE_NEWS
199     HTRegisterProtocol(&HTNews);
200     HTRegisterProtocol(&HTNNTP);
201     HTRegisterProtocol(&HTNewsPost);
202     HTRegisterProtocol(&HTNewsReply);
203     HTRegisterProtocol(&HTSNews);
204     HTRegisterProtocol(&HTSNewsPost);
205     HTRegisterProtocol(&HTSNewsReply);
206 #endif /* not DISABLE_NEWS */
207 #ifndef DISABLE_GOPHER
208     HTRegisterProtocol(&HTGopher);
209     HTRegisterProtocol(&HTCSO);
210 #endif /* not DISABLE_GOPHER */
211 #ifndef DISABLE_FINGER
212     HTRegisterProtocol(&HTFinger);
213 #endif /* not DISABLE_FINGER */
214 #ifdef DIRECT_WAIS
215     HTRegisterProtocol(&HTWAIS);
216 #endif /* DIRECT_WAIS */
217 #endif /* !DECNET */
218     LYRegisterLynxProtocols();
219 }
220 #endif /* !NO_INIT */
221 
222 /*	Check for proxy override.			override_proxy()
223  *	-------------------------
224  *
225  *	Check the no_proxy environment variable to get the list
226  *	of hosts for which proxy server is not consulted.
227  *
228  *	no_proxy is a comma- or space-separated list of machine
229  *	or domain names, with optional :port part.  If no :port
230  *	part is present, it applies to all ports on that domain.
231  *
232  *  Example:
233  *	    no_proxy="cern.ch,some.domain:8001"
234  *
235  *  Use "*" to override all proxy service:
236  *	     no_proxy="*"
237  */
override_proxy(const char * addr)238 BOOL override_proxy(const char *addr)
239 {
240     const char *no_proxy = getenv("no_proxy");
241     char *p = NULL;
242     char *at = NULL;
243     char *host = NULL;
244     char *Host = NULL;
245     char *acc_method = NULL;
246     int port = 0;
247     int h_len = 0;
248 
249     /*
250      * Check for global override.
251      */
252     if (no_proxy) {
253 	if (!strcmp(no_proxy, "*"))
254 	    return YES;
255     }
256 
257     /*
258      * Never proxy file:// URLs if they are on the local host.  HTLoadFile()
259      * will not attempt ftp for those if direct access fails.  We'll check that
260      * first, in case no_proxy hasn't been defined.  - FM
261      */
262     if (!addr)
263 	return NO;
264     if (!(host = HTParse(addr, "", PARSE_HOST)))
265 	return NO;
266     if (!*host) {
267 	FREE(host);
268 	return NO;
269     }
270     Host = (((at = StrChr(host, '@')) != NULL) ? (at + 1) : host);
271 
272     if ((acc_method = HTParse(addr, "", PARSE_ACCESS))) {
273 	if (!strcmp("file", acc_method) &&
274 	    (LYSameHostname(Host, "localhost") ||
275 	     LYSameHostname(Host, HTHostName()))) {
276 	    FREE(host);
277 	    FREE(acc_method);
278 	    return YES;
279 	}
280 	FREE(acc_method);
281     }
282 
283     if (!no_proxy) {
284 	FREE(host);
285 	return NO;
286     }
287 
288     if (NULL != (p = HTParsePort(Host, &port))) {	/* Port specified */
289 	*p = 0;			/* Chop off port */
290     } else {			/* Use default port */
291 	acc_method = HTParse(addr, "", PARSE_ACCESS);
292 	if (acc_method != NULL) {
293 	    /* *INDENT-OFF* */
294 	    if	    (!strcmp(acc_method, "http"))	port = 80;
295 	    else if (!strcmp(acc_method, "https"))	port = 443;
296 	    else if (!strcmp(acc_method, "ftp"))	port = 21;
297 #ifndef DISABLE_GOPHER
298 	    else if (!strcmp(acc_method, "gopher"))	port = 70;
299 #endif
300 	    else if (!strcmp(acc_method, "cso"))	port = 105;
301 #ifndef DISABLE_NEWS
302 	    else if (!strcmp(acc_method, "news"))	port = 119;
303 	    else if (!strcmp(acc_method, "nntp"))	port = 119;
304 	    else if (!strcmp(acc_method, "newspost"))	port = 119;
305 	    else if (!strcmp(acc_method, "newsreply"))	port = 119;
306 	    else if (!strcmp(acc_method, "snews"))	port = 563;
307 	    else if (!strcmp(acc_method, "snewspost"))	port = 563;
308 	    else if (!strcmp(acc_method, "snewsreply")) port = 563;
309 #endif
310 	    else if (!strcmp(acc_method, "wais"))	port = 210;
311 #ifndef DISABLE_FINGER
312 	    else if (!strcmp(acc_method, "finger"))	port = 79;
313 #endif
314 	    else if (!strcmp(acc_method, "telnet"))	port = 23;
315 	    else if (!strcmp(acc_method, "tn3270"))	port = 23;
316 	    else if (!strcmp(acc_method, "rlogin"))	port = 513;
317 	    /* *INDENT-ON* */
318 
319 	    FREE(acc_method);
320 	}
321     }
322     if (!port)
323 	port = 80;		/* Default */
324     h_len = (int) strlen(Host);
325 
326     while (*no_proxy) {
327 	const char *end;
328 	const char *colon = NULL;
329 	int templ_port = 0;
330 	int t_len;
331 	int brackets = 0;
332 
333 	while (*no_proxy && (WHITE(*no_proxy) || *no_proxy == ','))
334 	    no_proxy++;		/* Skip whitespace and separators */
335 
336 	end = no_proxy;
337 	while (*end && !WHITE(*end) && *end != ',') {	/* Find separator */
338 	    if (!brackets && (*end == ':'))
339 		colon = end;	/* Port number given */
340 	    else if (*end == '[')
341 		++brackets;
342 	    else if (*end == ']')
343 		--brackets;
344 	    end++;
345 	}
346 
347 	if (colon) {
348 	    /* unlike HTParsePort(), this may be followed by non-digits */
349 	    templ_port = atoi(colon + 1);
350 	    t_len = (int) (colon - no_proxy);
351 	} else {
352 	    t_len = (int) (end - no_proxy);
353 	}
354 
355 	if ((!templ_port || templ_port == port) &&
356 	    (t_len > 0 && t_len <= h_len &&
357 	     !strncasecomp(Host + h_len - t_len, no_proxy, t_len))) {
358 	    FREE(host);
359 	    return YES;
360 	}
361 #ifdef CJK_EX			/* ASATAKU PROXY HACK */
362 	if ((!templ_port || templ_port == port) &&
363 	    (t_len > 0 && t_len <= h_len &&
364 	     isdigit(UCH(*no_proxy)) &&
365 	     !StrNCmp(host, no_proxy, t_len))) {
366 	    FREE(host);
367 	    return YES;
368 	}
369 #endif /* ASATAKU PROXY HACK */
370 
371 	if (*end)
372 	    no_proxy = (end + 1);
373 	else
374 	    break;
375     }
376 
377     FREE(host);
378     return NO;
379 }
380 
381 /*	Find physical name and access protocol		get_physical()
382  *	--------------------------------------
383  *
384  *  On entry,
385  *	addr		must point to the fully qualified hypertext reference.
386  *	anchor		a parent anchor with whose address is addr
387  *
388  *  On exit,
389  *	returns		HT_NO_ACCESS		Error has occurred.
390  *			HT_OK			Success
391  */
get_physical(const char * addr,HTParentAnchor * anchor)392 static int get_physical(const char *addr,
393 			HTParentAnchor *anchor)
394 {
395     int result;
396     char *acc_method = NULL;	/* Name of access method */
397     char *physical = NULL;
398     char *Server_addr = NULL;
399     BOOL override_flag = NO;
400 
401     CTRACE((tfp, "get_physical %s\n", addr));
402 
403     /*
404      * Make sure the using_proxy variable is FALSE.
405      */
406     using_proxy = NO;
407 
408 #ifndef NO_RULES
409     if ((physical = HTTranslate(addr)) == 0) {
410 	if (redirecting_url) {
411 	    return HT_REDIRECTING;
412 	}
413 	return HT_FORBIDDEN;
414     }
415     if (anchor->isISMAPScript == TRUE) {
416 	StrAllocCat(physical, "?0,0");
417 	CTRACE((tfp, "HTAccess: Appending '?0,0' coordinate pair.\n"));
418     }
419     if (!StrNCmp(physical, "Proxied=", 8)) {
420 	HTAnchor_setPhysical(anchor, physical + 8);
421 	using_proxy = YES;
422     } else if (!StrNCmp(physical, "NoProxy=", 8)) {
423 	HTAnchor_setPhysical(anchor, physical + 8);
424 	override_flag = YES;
425     } else {
426 	HTAnchor_setPhysical(anchor, physical);
427     }
428     FREE(physical);		/* free our copy */
429 #else
430     if (anchor->isISMAPScript == TRUE) {
431 	StrAllocCopy(physical, addr);
432 	StrAllocCat(physical, "?0,0");
433 	CTRACE((tfp, "HTAccess: Appending '?0,0' coordinate pair.\n"));
434 	HTAnchor_setPhysical(anchor, physical);
435 	FREE(physical);		/* free our copy */
436     } else {
437 	HTAnchor_setPhysical(anchor, addr);
438     }
439 #endif /* NO_RULES */
440 
441     acc_method = HTParse(HTAnchor_physical(anchor), STR_FILE_URL, PARSE_ACCESS);
442 
443     /*
444      * Check whether gateway access has been set up for this.
445      *
446      * This function can be replaced by the rule system above.
447      *
448      * If the rule system has already determined that we should use a proxy, or
449      * that we shouldn't, ignore proxy-related settings, don't use no_proxy
450      * either.
451      */
452 #define USE_GATEWAYS
453 #ifdef USE_GATEWAYS
454 
455     if (!override_flag && !using_proxy) {	/* else ignore no_proxy env var */
456 	char *host = NULL;
457 	int port;
458 
459 	if (!strcasecomp(acc_method, "news")) {
460 	    /*
461 	     * News is different, so we need to check the name of the server,
462 	     * as well as the default port for selective exclusions.
463 	     */
464 	    if ((host = HTParse(addr, "", PARSE_HOST))) {
465 		if (HTParsePort(host, &port) == NULL) {
466 		    StrAllocCopy(Server_addr, "news://");
467 		    StrAllocCat(Server_addr, host);
468 		    StrAllocCat(Server_addr, ":119/");
469 		}
470 		FREE(host);
471 	    } else if (LYGetEnv("NNTPSERVER") != NULL) {
472 		StrAllocCopy(Server_addr, "news://");
473 		StrAllocCat(Server_addr, LYGetEnv("NNTPSERVER"));
474 		StrAllocCat(Server_addr, ":119/");
475 	    }
476 	} else if (!strcasecomp(acc_method, "wais")) {
477 	    /*
478 	     * Wais also needs checking of the default port for selective
479 	     * exclusions.
480 	     */
481 	    if ((host = HTParse(addr, "", PARSE_HOST))) {
482 		if (!(HTParsePort(host, &port))) {
483 		    StrAllocCopy(Server_addr, "wais://");
484 		    StrAllocCat(Server_addr, host);
485 		    StrAllocCat(Server_addr, ":210/");
486 		}
487 		FREE(host);
488 	    } else
489 		StrAllocCopy(Server_addr, addr);
490 	} else {
491 	    StrAllocCopy(Server_addr, addr);
492 	}
493 	override_flag = override_proxy(Server_addr);
494     }
495 
496     if (!override_flag && !using_proxy) {
497 	char *gateway_parameter = NULL, *gateway, *proxy;
498 
499 	/*
500 	 * Search for gateways.
501 	 */
502 	HTSprintf0(&gateway_parameter, "WWW_%s_GATEWAY", acc_method);
503 	gateway = LYGetEnv(gateway_parameter);	/* coerce for decstation */
504 
505 	/*
506 	 * Search for proxy servers.
507 	 */
508 	if (!strcmp(acc_method, "file"))
509 	    /*
510 	     * If we got to here, a file URL is for ftp on a remote host. - FM
511 	     */
512 	    strcpy(gateway_parameter, "ftp_proxy");
513 	else
514 	    sprintf(gateway_parameter, "%s_proxy", acc_method);
515 	proxy = LYGetEnv(gateway_parameter);
516 	FREE(gateway_parameter);
517 
518 	if (gateway)
519 	    CTRACE((tfp, "Gateway found: %s\n", gateway));
520 	if (proxy)
521 	    CTRACE((tfp, "proxy server found: %s\n", proxy));
522 
523 	/*
524 	 * Proxy servers have precedence over gateway servers.
525 	 */
526 	if (proxy) {
527 	    char *gatewayed = NULL;
528 
529 	    StrAllocCopy(gatewayed, proxy);
530 	    if (!StrNCmp(gatewayed, "http", 4)) {
531 		char *cp = strrchr(gatewayed, '/');
532 
533 		/* Append a slash to the proxy specification if it doesn't
534 		 * end in one but otherwise looks normal (starts with "http",
535 		 * has no '/' other than ones before the hostname). - kw */
536 		if (cp && (cp - gatewayed) <= 7)
537 		    LYAddHtmlSep(&gatewayed);
538 	    }
539 	    /*
540 	     * Ensure that the proxy server uses ftp for file URLs. - FM
541 	     */
542 	    if (!StrNCmp(addr, "file", 4)) {
543 		StrAllocCat(gatewayed, "ftp");
544 		StrAllocCat(gatewayed, (addr + 4));
545 	    } else
546 		StrAllocCat(gatewayed, addr);
547 	    using_proxy = YES;
548 	    if (anchor->isISMAPScript == TRUE)
549 		StrAllocCat(gatewayed, "?0,0");
550 	    HTAnchor_setPhysical(anchor, gatewayed);
551 	    FREE(gatewayed);
552 	    FREE(acc_method);
553 
554 	    acc_method = HTParse(HTAnchor_physical(anchor),
555 				 STR_HTTP_URL, PARSE_ACCESS);
556 
557 	} else if (gateway) {
558 	    char *path = HTParse(addr, "",
559 				 PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION);
560 
561 	    /* Chop leading / off to make host into part of path */
562 	    char *gatewayed = HTParse(path + 1, gateway, PARSE_ALL);
563 
564 	    FREE(path);
565 	    HTAnchor_setPhysical(anchor, gatewayed);
566 	    FREE(gatewayed);
567 	    FREE(acc_method);
568 
569 	    acc_method = HTParse(HTAnchor_physical(anchor),
570 				 STR_HTTP_URL, PARSE_ACCESS);
571 	}
572     }
573     FREE(Server_addr);
574 #endif /* use gateways */
575 
576     /*
577      * Search registered protocols to find suitable one.
578      */
579     result = HT_NO_ACCESS;
580     {
581 	int i, n;
582 
583 #ifndef NO_INIT
584 	if (!protocols)
585 	    HTAccessInit();
586 #endif
587 	n = HTList_count(protocols);
588 	for (i = 0; i < n; i++) {
589 	    HTProtocol *p = (HTProtocol *) HTList_objectAt(protocols, i);
590 
591 	    if (!strcmp(p->name, acc_method)) {
592 		HTAnchor_setProtocol(anchor, p);
593 		FREE(acc_method);
594 		result = HT_OK;
595 		break;
596 	    }
597 	}
598     }
599 
600     FREE(acc_method);
601     return result;
602 }
603 
604 /*
605  * Temporarily set the int UCLYhndl_for_unspec and string UCLYhndl_for_unspec
606  * used for charset "assuming" to the values implied by a HTParentAnchor's
607  * UCStages, after saving the current values for later restoration.  - kw @@@
608  * These functions may not really belong here, but where else?  I want the
609  * "pop" to occur as soon as possible after loading has finished.  - kw @@@
610  */
LYUCPushAssumed(HTParentAnchor * anchor)611 void LYUCPushAssumed(HTParentAnchor *anchor)
612 {
613     int anchor_LYhndl = -1;
614     LYUCcharset *anchor_UCI = NULL;
615 
616     if (anchor) {
617 	anchor_LYhndl = HTAnchor_getUCLYhndl(anchor, UCT_STAGE_PARSER);
618 	if (anchor_LYhndl >= 0)
619 	    anchor_UCI = HTAnchor_getUCInfoStage(anchor,
620 						 UCT_STAGE_PARSER);
621 	if (anchor_UCI && anchor_UCI->MIMEname) {
622 	    pushed_assume_MIMEname = UCAssume_MIMEcharset;
623 	    UCAssume_MIMEcharset = NULL;
624 	    if (HTCJK == JAPANESE)
625 		StrAllocCopy(UCAssume_MIMEcharset, pushed_assume_MIMEname);
626 	    else
627 		StrAllocCopy(UCAssume_MIMEcharset, anchor_UCI->MIMEname);
628 	    pushed_assume_LYhndl = anchor_LYhndl;
629 	    /* some diagnostics */
630 	    if (UCLYhndl_for_unspec != anchor_LYhndl)
631 		CTRACE((tfp,
632 			"LYUCPushAssumed: UCLYhndl_for_unspec changed %d -> %d\n",
633 			UCLYhndl_for_unspec,
634 			anchor_LYhndl));
635 	    UCLYhndl_for_unspec = anchor_LYhndl;
636 	    return;
637 	}
638     }
639     pushed_assume_LYhndl = -1;
640     FREE(pushed_assume_MIMEname);
641 }
642 
643 /*
644  * Restore the int UCLYhndl_for_unspec and string UCLYhndl_for_unspec used for
645  * charset "assuming" from the values saved by LYUCPushAssumed, if any.  - kw
646  */
LYUCPopAssumed(void)647 int LYUCPopAssumed(void)
648 {
649     if (pushed_assume_LYhndl >= 0) {
650 	/* some diagnostics */
651 	if (UCLYhndl_for_unspec != pushed_assume_LYhndl)
652 	    CTRACE((tfp,
653 		    "LYUCPopAssumed: UCLYhndl_for_unspec changed %d -> %d\n",
654 		    UCLYhndl_for_unspec,
655 		    pushed_assume_LYhndl));
656 	UCLYhndl_for_unspec = pushed_assume_LYhndl;
657 	pushed_assume_LYhndl = -1;
658 	FREE(UCAssume_MIMEcharset);
659 	UCAssume_MIMEcharset = pushed_assume_MIMEname;
660 	pushed_assume_MIMEname = NULL;
661 	return UCLYhndl_for_unspec;
662     }
663     return -1;
664 }
665 
666 /*	Load a document					HTLoad()
667  *	---------------
668  *
669  *	This is an internal routine, which has an address AND a matching
670  *	anchor.  (The public routines are called with one OR the other.)
671  *
672  *  On entry,
673  *	addr		must point to the fully qualified hypertext reference.
674  *	anchor		a parent anchor with whose address is addr
675  *
676  *  On exit,
677  *	returns		<0		Error has occurred.
678  *			HT_LOADED	Success
679  *			HT_NO_DATA	Success, but no document loaded.
680  *					(telnet session started etc)
681  */
HTLoad(const char * addr,HTParentAnchor * anchor,HTFormat format_out,HTStream * sink)682 static int HTLoad(const char *addr,
683 		  HTParentAnchor *anchor,
684 		  HTFormat format_out,
685 		  HTStream *sink)
686 {
687     HTProtocol *p;
688     int status = get_physical(addr, anchor);
689 
690     if (reloading) {
691 	FREE(anchor->charset);
692 	FREE(anchor->UCStages);
693     }
694 
695     if (status == HT_FORBIDDEN) {
696 	/* prevent crash if telnet or similar was forbidden by rule. - kw */
697 	LYFixCursesOn("show alert:");
698 	status = HTLoadError(sink, 500, gettext("Access forbidden by rule"));
699     } else if (status == HT_REDIRECTING) {
700 	;			/* fake redirection by rule, to redirecting_url */
701     } else if (status >= 0) {
702 	/* prevent crash if telnet or similar mapped or proxied by rule. - kw */
703 	LYFixCursesOnForAccess(addr, HTAnchor_physical(anchor));
704 	p = (HTProtocol *) HTAnchor_protocol(anchor);
705 	anchor->parent->underway = TRUE;	/* Hack to deal with caching */
706 	status = p->load(HTAnchor_physical(anchor),
707 			 anchor, format_out, sink);
708 	anchor->parent->underway = FALSE;
709 	LYUCPopAssumed();
710     }
711     return status;
712 }
713 
714 /*	Get a save stream for a document		HTSaveStream()
715  *	--------------------------------
716  */
HTSaveStream(HTParentAnchor * anchor)717 HTStream *HTSaveStream(HTParentAnchor *anchor)
718 {
719     HTProtocol *p = (HTProtocol *) HTAnchor_protocol(anchor);
720 
721     if (!p)
722 	return NULL;
723 
724     return p->saveStream(anchor);
725 }
726 
727 int redirection_limit = 10;
728 int redirection_attempts = 0;	/* counter in HTLoadDocument */
729 
too_many_redirections(void)730 static BOOL too_many_redirections(void)
731 {
732     if (redirection_attempts > redirection_limit) {
733 	char *msg = NULL;
734 
735 	HTSprintf0(&msg, TOO_MANY_REDIRECTIONS, redirection_limit);
736 	free(msg);
737 	redirection_attempts = 0;
738 	return TRUE;
739     }
740     return FALSE;
741 }
742 
743 /*	Load a document - with logging etc		HTLoadDocument()
744  *	----------------------------------
745  *
746  *	- Checks or documents already loaded
747  *	- Logs the access
748  *	- Allows stdin filter option
749  *	- Trace output and error messages
750  *
751  *  On Entry,
752  *	  anchor	    is the node_anchor for the document
753  *	  full_address	    The address of the document to be accessed.
754  *	  filter	    if YES, treat stdin as HTML
755  *
756  *  On Exit,
757  *	  returns    YES     Success in opening document
758  *		     NO      Failure
759  */
HTLoadDocument(const char * full_address,HTParentAnchor * anchor,HTFormat format_out,HTStream * sink)760 static BOOL HTLoadDocument(const char *full_address,	/* may include #fragment */
761 			   HTParentAnchor *anchor,
762 			   HTFormat format_out,
763 			   HTStream *sink)
764 {
765     int status;
766     HText *text;
767     const char *address_to_load = full_address;
768     char *cp;
769     BOOL ForcingNoCache = LYforce_no_cache;
770 
771     CTRACE((tfp, "HTAccess: loading document %s\n", NonNull(address_to_load)));
772     if (isEmpty(address_to_load))
773 	return NO;
774 
775     /*
776      * Free use_this_url_instead and reset permanent_redirection if not done
777      * elsewhere.  - FM
778      */
779     FREE(use_this_url_instead);
780     permanent_redirection = FALSE;
781 
782     if (too_many_redirections()) {
783 	return NO;
784     }
785 
786     /*
787      * If this is marked as an internal link but we don't have the document
788      * loaded any more, and we haven't explicitly flagged that we want to
789      * reload with LYforce_no_cache, then something has disappeared from the
790      * cache when we expected it to be still there.  The user probably doesn't
791      * expect a new network access.  So if we have POST data and safe is not
792      * set in the anchor, ask for confirmation, and fail if not granted.  The
793      * exception are LYNXIMGMAP documents, for which we defer to LYLoadIMGmap
794      * for prompting if necessary.  - kw
795      */
796     text = (HText *) HTAnchor_document(anchor);
797     if (LYinternal_flag && !text && !LYforce_no_cache &&
798 	anchor->post_data && !anchor->safe &&
799 	!isLYNXIMGMAP(full_address) &&
800 	HTConfirm(gettext("Document with POST content not found in cache.  Resubmit?"))
801 	!= TRUE) {
802 	return NO;
803     }
804 
805     /*
806      * If we don't have POST content, check whether this is a previous
807      * redirecting URL, and keep re-checking until we get to the final
808      * destination or redirection limit.  If we do have POST content, we didn't
809      * allow permanent redirection, and an interactive user will be deciding
810      * whether to keep redirecting.  - FM
811      */
812     if (!anchor->post_data) {
813 	while ((cp = HTAnchor_physical(anchor)) != NULL &&
814 	       !StrNCmp(cp, "Location=", 9)) {
815 	    DocAddress NewDoc;
816 
817 	    CTRACE((tfp, "HTAccess: '%s' is a redirection URL.\n",
818 		    anchor->address));
819 	    CTRACE((tfp, "HTAccess: Redirecting to '%s'\n", cp + 9));
820 
821 	    /*
822 	     * Don't exceed the redirection_attempts limit.  - FM
823 	     */
824 	    ++redirection_attempts;
825 	    if (too_many_redirections()) {
826 		FREE(use_this_url_instead);
827 		return NO;
828 	    }
829 
830 	    /*
831 	     * Set up the redirection. - FM
832 	     */
833 	    StrAllocCopy(use_this_url_instead, cp + 9);
834 	    NewDoc.address = use_this_url_instead;
835 	    NewDoc.post_data = NULL;
836 	    NewDoc.post_content_type = NULL;
837 	    NewDoc.bookmark = anchor->bookmark;
838 	    NewDoc.isHEAD = anchor->isHEAD;
839 	    NewDoc.safe = anchor->safe;
840 	    anchor = HTAnchor_findAddress(&NewDoc);
841 	}
842     }
843     /*
844      * If we had previous redirection, go back and check out that the URL under
845      * the current restrictions.  - FM
846      */
847     if (use_this_url_instead) {
848 	FREE(redirecting_url);
849 	return (NO);
850     }
851 
852     /*
853      * See if we can use an already loaded document.
854      */
855     text = (HText *) HTAnchor_document(anchor);
856     if (text && !LYforce_no_cache) {
857 	/*
858 	 * We have a cached rendition of the target document.  Check if it's OK
859 	 * to re-use it.  We consider it OK if:
860 	 * (1) the anchor does not have the no_cache element set, or
861 	 * (2) we've overridden it, e.g., because we are acting on a PREV_DOC
862 	 * command or a link in the History Page and it's not a reply from a
863 	 * POST with the LYresubmit_posts flag set, or
864 	 * (3) we are repositioning within the currently loaded document based
865 	 * on the target anchor's address (URL_Reference).
866 	 *
867 	 * If track_internal_links is false, HText_AreDifferent() is
868 	 * used to determine whether (3) applies.  If the target address
869 	 * differs from that of the current document only by a fragment and the
870 	 * target address has an appended fragment, repositioning without
871 	 * reloading is always assumed.  Note that HText_AreDifferent()
872 	 * currently always returns TRUE if the target has a LYNXIMGMAP URL, so
873 	 * that an internally generated pseudo-document will normally not be
874 	 * re-used unless condition (2) applies.  (Condition (1) cannot apply
875 	 * since in LYMap.c, no_cache is always set in the anchor object).
876 	 * This doesn't guarantee that the resource from which the MAP element
877 	 * is taken will be read again (reloaded) when the list of links for a
878 	 * client-side image map is regenerated, when in some cases it should
879 	 * (e.g., user requested RELOAD, or HTTP response with no-cache header
880 	 * and we are not overriding).
881 	 *
882 	 * If track_internal_links is true, a target address that
883 	 * points to the same URL as the current document may still result in
884 	 * reloading, depending on whether the original URL-Reference was given
885 	 * as an internal link in the context of the previously loaded
886 	 * document.  HText_AreDifferent() is not used here for testing whether
887 	 * we are just repositioning.  For an internal link, the potential
888 	 * callers of this function from mainloop() down will either avoid
889 	 * making the call (and do the repositioning differently) or set
890 	 * LYinternal_flag (or LYoverride_no_cache).  Note that (a) LYNXIMGMAP
891 	 * pseudo-documents and (b) The "List Page" document are treated
892 	 * logically as being part of the document on which they are based, for
893 	 * the purpose of whether to treat a link as internal, but the logic
894 	 * for this (by setting LYinternal_flag as necessary) is implemented
895 	 * elsewhere.  There is a specific test for LYNXIMGMAP here so that the
896 	 * generated pseudo-document will not be re-used unless
897 	 * LYoverride_no_cache is set.  The same caveat as above applies w.r.t.
898 	 * reloading of the underlying resource.
899 	 *
900 	 * We also should be checking other aspects of cache regulation (e.g.,
901 	 * based on an If-Modified-Since check, etc.) but the code for doing
902 	 * those other things isn't available yet.
903 	 */
904 	if ((reloading != REAL_RELOAD) &&
905 	    (LYoverride_no_cache ||
906 	     ((!track_internal_links &&
907 	       (!HText_hasNoCacheSet(text) ||
908 		!HText_AreDifferent(anchor, full_address))) ||
909 	      (track_internal_links &&
910 	       (((LYinternal_flag || !HText_hasNoCacheSet(text)) &&
911 		 !isLYNXIMGMAP(full_address))))))) {
912 	    CTRACE((tfp, "HTAccess: Document already in memory.\n"));
913 	    HText_select(text);
914 
915 #ifdef DIRED_SUPPORT
916 	    if (HTAnchor_format(anchor) == WWW_DIRED)
917 		lynx_edit_mode = TRUE;
918 #endif
919 	    redirection_attempts = 0;
920 	    return YES;
921 	} else {
922 	    ForcingNoCache = YES;
923 	    BStrFree(anchor->post_data);
924 	    CTRACE((tfp, "HTAccess: Auto-reloading document.\n"));
925 	}
926     }
927 
928     if (HText_HaveUserChangedForms(text)) {
929 	/*
930 	 * Issue a warning.  User forms content will be lost.
931 	 * Will not restore changed forms, currently.
932 	 */
933 	HTAlert(RELOADING_FORM);
934     }
935 
936     /*
937      * Get the document from the net.  If we are auto-reloading, the mutable
938      * anchor elements from the previous rendition should be freed in
939      * conjunction with loading of the new rendition.  - FM
940      */
941     LYforce_no_cache = NO;	/* reset after each time through */
942     if (ForcingNoCache) {
943 	FREE(anchor->title);	/* ??? */
944     }
945     status = HTLoad(address_to_load, anchor, format_out, sink);
946     CTRACE((tfp, "HTAccess:  status=%d\n", status));
947 
948     /*
949      * RECOVERY:  if the loading failed, and we had a cached HText copy, and no
950      * new HText created - use a previous copy, issue a warning.
951      */
952     if (text && status < 0 && (HText *) HTAnchor_document(anchor) == text) {
953 	HTAlert(gettext("Loading failed, use a previous copy."));
954 	CTRACE((tfp, "HTAccess: Loading failed, use a previous copy.\n"));
955 	HText_select(text);
956 
957 #ifdef DIRED_SUPPORT
958 	if (HTAnchor_format(anchor) == WWW_DIRED)
959 	    lynx_edit_mode = TRUE;
960 #endif
961 	redirection_attempts = 0;
962 	return YES;
963     }
964 
965     /*
966      * Log the access if necessary.
967      */
968     if (HTlogfile) {
969 	time_t theTime;
970 
971 	time(&theTime);
972 	fprintf(HTlogfile, "%24.24s %s %s %s\n",
973 		ctime(&theTime),
974 		HTClientHost ? HTClientHost : "local",
975 		status < 0 ? "FAIL" : "GET",
976 		full_address);
977 	fflush(HTlogfile);	/* Actually update it on disk */
978 	CTRACE((tfp, "Log: %24.24s %s %s %s\n",
979 		ctime(&theTime),
980 		HTClientHost ? HTClientHost : "local",
981 		status < 0 ? "FAIL" : "GET",
982 		full_address));
983     }
984 
985     /*
986      * Check out what we received from the net.
987      */
988     if (status == HT_REDIRECTING) {
989 	/* Exported from HTMIME.c, of all places.  */
990 	/* NO!! - FM */
991 	/*
992 	 * Doing this via HTMIME.c meant that the redirection cover page was
993 	 * already loaded before we learned that we want a different URL.
994 	 * Also, changing anchor->address, as Lynx was doing, meant we could
995 	 * never again access its hash table entry, creating an insolvable
996 	 * memory leak.  Instead, if we had a 301 status and set
997 	 * permanent_redirection, we'll load the new URL in anchor->physical,
998 	 * preceded by a token, which we can check to make replacements on
999 	 * subsequent access attempts.  We'll check recursively, and retrieve
1000 	 * the final URL if we had multiple redirections to it.  If we just
1001 	 * went to HTLoad now, as Lou originally had this, we couldn't do
1002 	 * Lynx's security checks and alternate handling of some URL types.
1003 	 * So, instead, we'll go all the way back to the top of getfile in
1004 	 * LYGetFile.c when the status is HT_REDIRECTING.  This may seem
1005 	 * bizarre, but it works like a charm!  - FM
1006 	 *
1007 	 * Actually, the location header for redirections is now again picked
1008 	 * up in HTMIME.c.  But that's an internal matter between HTTP.c and
1009 	 * HTMIME.c, is still under control of HTLoadHTTP for http URLs, is
1010 	 * done in a way that doesn't load the redirection response's body
1011 	 * (except when wanted as an error fallback), and thus need not concern
1012 	 * us here.  - kw 1999-12-02
1013 	 */
1014 	CTRACE((tfp, "HTAccess: '%s' is a redirection URL.\n",
1015 		address_to_load));
1016 	CTRACE((tfp, "HTAccess: Redirecting to '%s'\n",
1017 		redirecting_url));
1018 	/*
1019 	 * Prevent circular references.
1020 	 */
1021 	if (strcmp(address_to_load, redirecting_url)) {		/* if different */
1022 	    /*
1023 	     * Load token and redirecting url into anchor->physical if we had
1024 	     * 301 Permanent redirection.  HTTP.c does not allow this if we
1025 	     * have POST content.  - FM
1026 	     */
1027 	    if (permanent_redirection) {
1028 		StrAllocCopy(anchor->physical, "Location=");
1029 		StrAllocCat(anchor->physical, redirecting_url);
1030 	    }
1031 
1032 	    /*
1033 	     * Set up flags before return to getfile.  - FM
1034 	     */
1035 	    StrAllocCopy(use_this_url_instead, redirecting_url);
1036 	    if (ForcingNoCache)
1037 		LYforce_no_cache = YES;
1038 	    ++redirection_attempts;
1039 	    FREE(redirecting_url);
1040 	    permanent_redirection = FALSE;
1041 	    return (NO);
1042 	}
1043 	++redirection_attempts;
1044 	FREE(redirecting_url);
1045 	permanent_redirection = FALSE;
1046 	return (YES);
1047     }
1048 
1049     /*
1050      * We did not receive a redirecting URL.  - FM
1051      */
1052     redirection_attempts = 0;
1053     FREE(redirecting_url);
1054     permanent_redirection = FALSE;
1055 
1056     if (status == HT_LOADED) {
1057 	CTRACE((tfp, "HTAccess: `%s' has been accessed.\n",
1058 		full_address));
1059 	return YES;
1060     }
1061     if (status == HT_PARTIAL_CONTENT) {
1062 	HTAlert(gettext("Loading incomplete."));
1063 	CTRACE((tfp, "HTAccess: `%s' has been accessed, partial content.\n",
1064 		full_address));
1065 	return YES;
1066     }
1067 
1068     if (status == HT_NO_DATA) {
1069 	CTRACE((tfp, "HTAccess: `%s' has been accessed, No data left.\n",
1070 		full_address));
1071 	return NO;
1072     }
1073 
1074     if (status == HT_NOT_LOADED) {
1075 	CTRACE((tfp, "HTAccess: `%s' has been accessed, No data loaded.\n",
1076 		full_address));
1077 	return NO;
1078     }
1079 
1080     if (status == HT_INTERRUPTED) {
1081 	CTRACE((tfp,
1082 		"HTAccess: `%s' has been accessed, transfer interrupted.\n",
1083 		full_address));
1084 	return NO;
1085     }
1086 
1087     if (status > 0) {
1088 	/*
1089 	 * If you get this, then please find which routine is returning a
1090 	 * positive unrecognized error code!
1091 	 */
1092 	fprintf(stderr,
1093 		gettext("**** HTAccess: socket or file number returned by obsolete load routine!\n"));
1094 	fprintf(stderr,
1095 		gettext("**** HTAccess: Internal software error.  Please mail lynx-dev@nongnu.org!\n"));
1096 	fprintf(stderr, gettext("**** HTAccess: Status returned was: %d\n"), status);
1097 	exit_immediately(EXIT_FAILURE);
1098     }
1099 
1100     /* Failure in accessing a document */
1101     cp = NULL;
1102     StrAllocCopy(cp, gettext("Can't Access"));
1103     StrAllocCat(cp, " `");
1104     StrAllocCat(cp, full_address);
1105     StrAllocCat(cp, "'");
1106     _HTProgress(cp);
1107     FREE(cp);
1108 
1109     CTRACE((tfp, "HTAccess: Can't access `%s'\n", full_address));
1110     HTLoadError(sink, 500, gettext("Unable to access document."));
1111     return NO;
1112 }				/* HTLoadDocument */
1113 
1114 /*	Load a document from absolute name.		HTLoadAbsolute()
1115  *	-----------------------------------
1116  *
1117  *  On Entry,
1118  *	  addr	   The absolute address of the document to be accessed.
1119  *	  filter   if YES, treat document as HTML
1120  *
1121  *  On Exit,
1122  *	  returns    YES     Success in opening document
1123  *		     NO      Failure
1124  */
HTLoadAbsolute(const DocAddress * docaddr)1125 BOOL HTLoadAbsolute(const DocAddress *docaddr)
1126 {
1127     BOOL result;
1128     HTParentAnchor *anchor = HTAnchor_findAddress(docaddr);
1129 
1130     result = HTLoadDocument(docaddr->address,
1131 			    anchor,
1132 			    (HTOutputFormat ? HTOutputFormat : WWW_PRESENT),
1133 			    HTOutputStream);
1134     if (!result) {
1135 	HTAnchor_delete(anchor->parent);
1136     }
1137     return result;
1138 }
1139 
1140 #ifdef NOT_USED_CODE
1141 /*	Load a document from absolute name to stream.	HTLoadToStream()
1142  *	---------------------------------------------
1143  *
1144  *  On Entry,
1145  *	  addr	   The absolute address of the document to be accessed.
1146  *	  sink	   if non-NULL, send data down this stream
1147  *
1148  *  On Exit,
1149  *	  returns    YES     Success in opening document
1150  *		     NO      Failure
1151  */
HTLoadToStream(const char * addr,BOOL filter,HTStream * sink)1152 BOOL HTLoadToStream(const char *addr,
1153 		    BOOL filter,
1154 		    HTStream *sink)
1155 {
1156     return HTLoadDocument(addr,
1157 			  HTAnchor_findSimpleAddress(addr),
1158 			  (HTOutputFormat ? HTOutputFormat : WWW_PRESENT),
1159 			  sink);
1160 }
1161 #endif /* NOT_USED_CODE */
1162 
1163 /*	Load a document from relative name.		HTLoadRelative()
1164  *	-----------------------------------
1165  *
1166  *  On Entry,
1167  *	  relative_name     The relative address of the document
1168  *			    to be accessed.
1169  *
1170  *  On Exit,
1171  *	  returns    YES     Success in opening document
1172  *		     NO      Failure
1173  */
HTLoadRelative(const char * relative_name,HTParentAnchor * here)1174 BOOL HTLoadRelative(const char *relative_name,
1175 		    HTParentAnchor *here)
1176 {
1177     DocAddress full_address;
1178     BOOL result;
1179     char *mycopy = NULL;
1180     char *stripped = NULL;
1181 
1182     full_address.address = NULL;
1183     full_address.post_data = NULL;
1184     full_address.post_content_type = NULL;
1185     full_address.bookmark = NULL;
1186     full_address.isHEAD = FALSE;
1187     full_address.safe = FALSE;
1188 
1189     StrAllocCopy(mycopy, relative_name);
1190 
1191     stripped = HTStrip(mycopy);
1192     full_address.address =
1193 	HTParse(stripped,
1194 		here->address,
1195 		PARSE_ALL_WITHOUT_ANCHOR);
1196     result = HTLoadAbsolute(&full_address);
1197     /*
1198      * If we got redirection, result will be NO, but use_this_url_instead will
1199      * be set.  The calling routine should check both and do whatever is
1200      * appropriate.  - FM
1201      */
1202     FREE(full_address.address);
1203     FREE(mycopy);		/* Memory leak fixed 10/7/92 -- JFG */
1204     return result;
1205 }
1206 
1207 /*	Load if necessary, and select an anchor.	HTLoadAnchor()
1208  *	----------------------------------------
1209  *
1210  *  On Entry,
1211  *	  destination		    The child or parent anchor to be loaded.
1212  *
1213  *  On Exit,
1214  *	  returns    YES     Success
1215  *		     NO      Failure
1216  */
HTLoadAnchor(HTAnchor * destination)1217 BOOL HTLoadAnchor(HTAnchor * destination)
1218 {
1219     HTParentAnchor *parent;
1220     BOOL loaded = NO;
1221 
1222     if (!destination)
1223 	return NO;		/* No link */
1224 
1225     parent = HTAnchor_parent(destination);
1226 
1227     if (HTAnchor_document(parent) == NULL) {	/* If not already loaded */
1228 	/* TBL 921202 */
1229 	BOOL result;
1230 
1231 	result = HTLoadDocument(parent->address,
1232 				parent,
1233 				HTOutputFormat ?
1234 				HTOutputFormat : WWW_PRESENT,
1235 				HTOutputStream);
1236 	if (!result)
1237 	    return NO;
1238 	loaded = YES;
1239     } {
1240 	HText *text = (HText *) HTAnchor_document(parent);
1241 
1242 	if ((destination != (HTAnchor *) parent) &&
1243 	    (destination != (HTAnchor *) (parent->parent))) {
1244 	    /* If child anchor */
1245 	    HText_selectAnchor(text,	/* Double display? @@ */
1246 			       (HTChildAnchor *) destination);
1247 	} else {
1248 	    if (!loaded)
1249 		HText_select(text);
1250 	}
1251     }
1252     return YES;
1253 
1254 }				/* HTLoadAnchor */
1255 
1256 /*	Search.						HTSearch()
1257  *	-------
1258  *
1259  *	Performs a keyword search on word given by the user.  Adds the
1260  *	keyword to the end of the current address and attempts to open
1261  *	the new address.
1262  *
1263  *  On Entry,
1264  *	 *keywords	space-separated keyword list or similar search list
1265  *	here		is anchor search is to be done on.
1266  */
hex(int i)1267 static char hex(int i)
1268 {
1269     const char *hexchars = "0123456789ABCDEF";
1270 
1271     return hexchars[i];
1272 }
1273 
HTSearch(const char * keywords,HTParentAnchor * here)1274 BOOL HTSearch(const char *keywords,
1275 	      HTParentAnchor *here)
1276 {
1277 #define acceptable \
1278 "1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_"
1279 
1280     char *q, *u;
1281     const char *p, *s, *e;	/* Pointers into keywords */
1282     char *address = NULL;
1283     BOOL result;
1284     char *escaped = typecallocn(char, (strlen(keywords) * 3) + 1);
1285     static const BOOL isAcceptable[96] =
1286     /* *INDENT-OFF* */
1287     /*	 0 1 2 3 4 5 6 7 8 9 A B C D E F */
1288     {	 0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,	/* 2x	!"#$%&'()*+,-./  */
1289 	 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,	/* 3x  0123456789:;<=>?  */
1290 	 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,	/* 4x  @ABCDEFGHIJKLMNO  */
1291 	 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,	/* 5X  PQRSTUVWXYZ[\]^_  */
1292 	 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,	/* 6x  `abcdefghijklmno  */
1293 	 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 };	/* 7X  pqrstuvwxyz{\}~	DEL */
1294     /* *INDENT-ON* */
1295 
1296     if (escaped == NULL)
1297 	outofmem(__FILE__, "HTSearch");
1298 
1299     if (here->isIndexAction == NULL) {
1300 	free(escaped);
1301 	return FALSE;
1302     }
1303     StrAllocCopy(address, here->isIndexAction);
1304 
1305     /*
1306      * Convert spaces to + and hex escape unacceptable characters.
1307      */
1308     for (s = keywords; *s && WHITE(*s); s++)	/* Scan */
1309 	;			/* Skip white space */
1310     for (e = s + strlen(s); e > s && WHITE(*(e - 1)); e--)	/* Scan */
1311 	;			/* Skip trailers */
1312     for (q = escaped, p = s; p < e; p++) {	/* Scan stripped field */
1313 	unsigned char c = UCH(TOASCII(*p));
1314 
1315 	if (WHITE(*p)) {
1316 	    *q++ = '+';
1317 	} else if (IS_CJK_TTY) {
1318 	    *q++ = *p;
1319 	} else if (c >= 32 && c <= UCH(127) && isAcceptable[c - 32]) {
1320 	    *q++ = *p;		/* 930706 TBL for MVS bug */
1321 	} else {
1322 	    *q++ = '%';
1323 	    *q++ = hex((int) (c >> 4));
1324 	    *q++ = hex((int) (c & 15));
1325 	}
1326     }				/* Loop over string */
1327     *q = '\0';			/* Terminate escaped string */
1328     u = StrChr(address, '?');	/* Find old search string */
1329     if (u != NULL)
1330 	*u = '\0';		/* Chop old search off */
1331 
1332     StrAllocCat(address, "?");
1333     StrAllocCat(address, escaped);
1334     FREE(escaped);
1335     result = HTLoadRelative(address, here);
1336     FREE(address);
1337 
1338     /*
1339      * If we got redirection, result will be NO, but use_this_url_instead will
1340      * be set.  The calling routine should check both and do whatever is
1341      * appropriate.  Only an http server (not a gopher or wais server) could
1342      * return redirection.  Lynx will go all the way back to its mainloop() and
1343      * subject a redirecting URL to all of its security and restrictions
1344      * checks.  - FM
1345      */
1346     return result;
1347 }
1348 
1349 /*	Search Given Indexname.			HTSearchAbsolute()
1350  *	-----------------------
1351  *
1352  *	Performs a keyword search on word given by the user.  Adds the
1353  *	keyword to the end of the current address and attempts to open
1354  *	the new address.
1355  *
1356  *  On Entry,
1357  *	*keywords	space-separated keyword list or similar search list
1358  *	*indexname	is name of object search is to be done on.
1359  */
HTSearchAbsolute(const char * keywords,char * indexname)1360 BOOL HTSearchAbsolute(const char *keywords,
1361 		      char *indexname)
1362 {
1363     DocAddress abs_doc;
1364     HTParentAnchor *anchor;
1365 
1366     abs_doc.address = indexname;
1367     abs_doc.post_data = NULL;
1368     abs_doc.post_content_type = NULL;
1369     abs_doc.bookmark = NULL;
1370     abs_doc.isHEAD = FALSE;
1371     abs_doc.safe = FALSE;
1372 
1373     anchor = HTAnchor_findAddress(&abs_doc);
1374     return HTSearch(keywords, anchor);
1375 }
1376 
1377 #ifdef NOT_USED_CODE
1378 /*	Generate the anchor for the home page.		HTHomeAnchor()
1379  *	--------------------------------------
1380  *
1381  *	As it involves file access, this should only be done once
1382  *	when the program first runs.
1383  *	This is a default algorithm -- browser don't HAVE to use this.
1384  *	But consistency between browsers is STRONGLY recommended!
1385  *
1386  *  Priority order is:
1387  *		1	WWW_HOME environment variable (logical name, etc)
1388  *		2	~/WWW/default.html
1389  *		3	/usr/local/bin/default.html
1390  *		4	http://www.w3.org/default.html
1391  */
HTHomeAnchor(void)1392 HTParentAnchor *HTHomeAnchor(void)
1393 {
1394     char *my_home_document = NULL;
1395     char *home = LYGetEnv(LOGICAL_DEFAULT);
1396     char *ref;
1397     HTParentAnchor *anchor;
1398 
1399     if (home) {
1400 	StrAllocCopy(my_home_document, home);
1401 #define MAX_FILE_NAME 1024	/* @@@ */
1402     } else if (HTClientHost) {	/* Telnet server */
1403 	/*
1404 	 * Someone telnets in, they get a special home.
1405 	 */
1406 	FILE *fp = fopen(REMOTE_POINTER, "r");
1407 	char *status;
1408 
1409 	if (fp) {
1410 	    my_home_document = typecallocn(char, MAX_FILE_NAME);
1411 
1412 	    if (my_home_document == NULL)
1413 		outofmem(__FILE__, "HTHomeAnchor");
1414 	    status = fgets(my_home_document, MAX_FILE_NAME, fp);
1415 	    if (!status) {
1416 		FREE(my_home_document);
1417 	    }
1418 	    fclose(fp);
1419 	}
1420 	if (my_home_document == NULL)
1421 	    StrAllocCopy(my_home_document, REMOTE_ADDRESS);
1422     }
1423 #ifdef UNIX
1424     if (my_home_document == NULL) {
1425 	FILE *fp = NULL;
1426 	char *home = LYGetEnv("HOME");
1427 
1428 	if (home != 0) {
1429 	    HTSprintf0(&my_home_document, "%s/%s", home, PERSONAL_DEFAULT);
1430 	    fp = fopen(my_home_document, "r");
1431 	}
1432 
1433 	if (!fp) {
1434 	    StrAllocCopy(my_home_document, LOCAL_DEFAULT_FILE);
1435 	    fp = fopen(my_home_document, "r");
1436 	}
1437 	if (fp) {
1438 	    fclose(fp);
1439 	} else {
1440 	    CTRACE((tfp, "HTBrowse: No local home document ~/%s or %s\n",
1441 		    PERSONAL_DEFAULT, LOCAL_DEFAULT_FILE));
1442 	    FREE(my_home_document);
1443 	}
1444     }
1445 #endif /* UNIX */
1446     ref = HTParse((my_home_document ?
1447 		   my_home_document : (HTClientHost ?
1448 				       REMOTE_ADDRESS : LAST_RESORT)),
1449 		  STR_FILE_URL,
1450 		  PARSE_ALL_WITHOUT_ANCHOR);
1451     if (my_home_document) {
1452 	CTRACE((tfp, "HTAccess: Using custom home page %s i.e., address %s\n",
1453 		my_home_document, ref));
1454 	FREE(my_home_document);
1455     }
1456     anchor = HTAnchor_findSimpleAddress(ref);
1457     FREE(ref);
1458     return anchor;
1459 }
1460 #endif /* NOT_USED_CODE */
1461