1 /*
2  * $LynxId: HTParse.c,v 1.98 2021/07/27 21:29:49 tom Exp $
3  *
4  *		Parse HyperText Document Address		HTParse.c
5  *		================================
6  */
7 
8 #include <HTUtils.h>
9 #include <HTParse.h>
10 
11 #include <LYUtils.h>
12 #include <LYLeaks.h>
13 #include <LYStrings.h>
14 #include <LYCharUtils.h>
15 #include <LYGlobalDefs.h>
16 
17 #ifdef HAVE_ALLOCA_H
18 #include <alloca.h>
19 #else
20 #ifdef __MINGW32__
21 #include <malloc.h>
22 #endif /* __MINGW32__ */
23 #endif
24 
25 #ifdef USE_IDN2
26 #include <idn2.h>
27 #define FreeIdna(out)             idn2_free(out)
28 #elif defined(USE_IDNA)
29 #include <idna.h>
30 #include <idn-free.h>
31 #define FreeIdna(out)             idn_free(out)
32 #define IDN2_OK                   IDNA_SUCCESS
33 #endif
34 
35 #define HEX_ESCAPE '%'
36 
37 struct struct_parts {
38     char *access;
39     char *host;
40     char *absolute;
41     char *relative;
42     char *search;		/* treated normally as part of path */
43     char *anchor;
44 };
45 
46 #if 0				/* for debugging */
47 static void show_parts(const char *name, struct struct_parts *parts, int line)
48 {
49     if (TRACE) {
50 	CTRACE((tfp, "struct_parts(%s) %s@%d\n", name, __FILE__, line));
51 	CTRACE((tfp, "   access   '%s'\n", NONNULL(parts->access)));
52 	CTRACE((tfp, "   host     '%s'\n", NONNULL(parts->host)));
53 	CTRACE((tfp, "   absolute '%s'\n", NONNULL(parts->absolute)));
54 	CTRACE((tfp, "   relative '%s'\n", NONNULL(parts->relative)));
55 	CTRACE((tfp, "   search   '%s'\n", NONNULL(parts->search)));
56 	CTRACE((tfp, "   anchor   '%s'\n", NONNULL(parts->anchor)));
57     }
58 }
59 #define SHOW_PARTS(name) show_parts(#name, &name, __LINE__)
60 #else
61 #define SHOW_PARTS(name)	/* nothing */
62 #endif
63 
64 /*	Strip white space off a string.				HTStrip()
65  *	-------------------------------
66  *
67  * On exit,
68  *	Return value points to first non-white character, or to 0 if none.
69  *	All trailing white space is OVERWRITTEN with zero.
70  */
HTStrip(char * s)71 char *HTStrip(char *s)
72 {
73 #define SPACE(c) ((c == ' ') || (c == '\t') || (c == '\n'))
74     char *p;
75 
76     for (p = s; *p; p++) {	/* Find end of string */
77 	;
78     }
79     for (p--; p >= s; p--) {
80 	if (SPACE(*p))
81 	    *p = '\0';		/* Zap trailing blanks */
82 	else
83 	    break;
84     }
85     while (SPACE(*s))
86 	s++;			/* Strip leading blanks */
87     return s;
88 }
89 
90 /*	Scan a filename for its constituents.			scan()
91  *	-------------------------------------
92  *
93  * On entry,
94  *	name	points to a document name which may be incomplete.
95  * On exit,
96  *	absolute or relative may be nonzero (but not both).
97  *	host, anchor and access may be nonzero if they were specified.
98  *	Any which are nonzero point to zero terminated strings.
99  */
scan(char * name,struct struct_parts * parts)100 static void scan(char *name,
101 		 struct struct_parts *parts)
102 {
103     char *after_access;
104     char *p;
105 
106     parts->access = NULL;
107     parts->host = NULL;
108     parts->absolute = NULL;
109     parts->relative = NULL;
110     parts->search = NULL;	/* normally not used - kw */
111     parts->anchor = NULL;
112 
113     /*
114      * Scan left-to-right for a scheme (access).
115      */
116     after_access = name;
117     for (p = name; *p; p++) {
118 	if (*p == ':') {
119 	    *p = '\0';
120 	    parts->access = name;	/* Access name has been specified */
121 	    after_access = (p + 1);
122 	    break;
123 	}
124 	if (*p == '/' || *p == '#' || *p == ';' || *p == '?')
125 	    break;
126     }
127 
128     /*
129      * Scan left-to-right for a fragment (anchor).
130      */
131     for (p = after_access; *p; p++) {
132 	if (*p == '#') {
133 	    parts->anchor = (p + 1);
134 	    *p = '\0';		/* terminate the rest */
135 	    break;		/* leave things after first # alone - kw */
136 	}
137     }
138 
139     /*
140      * Scan left-to-right for a host or absolute path.
141      */
142     p = after_access;
143     if (*p == '/') {
144 	if (p[1] == '/') {
145 	    parts->host = (p + 2);	/* host has been specified    */
146 	    *p = '\0';		/* Terminate access           */
147 	    p = StrChr(parts->host, '/');	/* look for end of host name if any */
148 	    if (p != NULL) {
149 		*p = '\0';	/* Terminate host */
150 		parts->absolute = (p + 1);	/* Root has been found */
151 	    } else {
152 		p = StrChr(parts->host, '?');
153 		if (p != NULL) {
154 		    *p = '\0';	/* Terminate host */
155 		    parts->search = (p + 1);
156 		}
157 	    }
158 	} else {
159 	    parts->absolute = (p + 1);	/* Root found but no host */
160 	}
161     } else {
162 	parts->relative = (*after_access) ?
163 	    after_access : NULL;	/* NULL for "" */
164     }
165 
166     /*
167      * Check schemes that commonly have unescaped hashes.
168      */
169     if (parts->access && parts->anchor &&
170     /* optimize */ StrChr("lnsdLNSD", *parts->access) != NULL) {
171 	if ((!parts->host && strcasecomp(parts->access, "lynxcgi")) ||
172 	    !strcasecomp(parts->access, "nntp") ||
173 	    !strcasecomp(parts->access, "snews") ||
174 	    !strcasecomp(parts->access, "news") ||
175 	    !strcasecomp(parts->access, "data")) {
176 	    /*
177 	     * Access specified but no host and not a lynxcgi URL, so the
178 	     * anchor may not really be one, e.g., news:j462#36487@foo.bar, or
179 	     * it's an nntp or snews URL, or news URL with a host.  Restore the
180 	     * '#' in the address.
181 	     */
182 	    /* but only if we have found a path component of which this will
183 	     * become part. - kw  */
184 	    if (parts->relative || parts->absolute) {
185 		*(parts->anchor - 1) = '#';
186 		parts->anchor = NULL;
187 	    }
188 	}
189     }
190 }				/*scan */
191 
192 #if defined(HAVE_ALLOCA) && !defined(LY_FIND_LEAKS)
193 #define LYalloca(x)        alloca((size_t)(x))
194 #define LYalloca_free(x)   {}
195 #else
196 #define LYalloca(x)        malloc((size_t)(x))
197 #define LYalloca_free(x)   free((void *)(x))
198 #endif
199 
strchr_or_end(char * string,int ch)200 static char *strchr_or_end(char *string, int ch)
201 {
202     char *result = StrChr(string, ch);
203 
204     if (result == 0) {
205 	result = string + strlen(string);
206     }
207     return result;
208 }
209 
210 /*
211  * Given a host specification that may end with a port number, e.g.,
212  *	foobar:123
213  * point to the ':' which begins the ":port" to make it simple to handle the
214  * substring.
215  *
216  * If no port is found (or a syntax error), return null.
217  */
HTParsePort(char * host,int * portp)218 char *HTParsePort(char *host, int *portp)
219 {
220     int brackets = 0;
221     char *result = NULL;
222 
223     *portp = 0;
224     if (host != NULL) {
225 	while (*host != '\0' && result == 0) {
226 	    switch (*host++) {
227 	    case ':':
228 		if (brackets == 0 && isdigit(UCH(*host))) {
229 		    char *next = NULL;
230 
231 		    *portp = (int) strtol(host, &next, 10);
232 		    if (next != 0 && next != host && *next == '\0') {
233 			result = (host - 1);
234 			CTRACE((tfp, "HTParsePort %d\n", *portp));
235 		    }
236 		}
237 		break;
238 	    case '[':		/* for ipv6 */
239 		++brackets;
240 		break;
241 	    case ']':		/* for ipv6 */
242 		--brackets;
243 		break;
244 	    }
245 	}
246     }
247     return result;
248 }
249 
250 #if defined(USE_IDNA) || defined(USE_IDN2)
hex_decode(int ch)251 static int hex_decode(int ch)
252 {
253     int result = -1;
254 
255     if (ch >= '0' && ch <= '9')
256 	result = (ch - '0');
257     else if (ch >= 'a' && ch <= 'f')
258 	result = (ch - 'a') + 10;
259     else if (ch >= 'A' && ch <= 'F')
260 	result = (ch - 'A') + 10;
261     return result;
262 }
263 
264 /*
265  * Convert in-place the given hostname to IDNA form.  That requires up to 64
266  * characters, and we've allowed for that, with MIN_PARSE.
267  */
convert_to_idna(char * host)268 static void convert_to_idna(char *host)
269 {
270     size_t length = strlen(host);
271     char *endhost = host + length;
272     char *buffer = malloc(length + 1);
273     char *params = malloc(length + 1);
274     char *output = NULL;
275     char *src, *dst;
276     int code;
277     int hi, lo;
278 
279     if (buffer != NULL && params != NULL) {
280 	code = TRUE;
281 	*params = '\0';
282 	for (dst = buffer, src = host; src < endhost; ++dst) {
283 	    int ch = *src++;
284 
285 	    if (RFC_3986_GEN_DELIMS(ch)) {
286 		strcpy(params, src - 1);
287 		*dst = '\0';
288 		break;
289 	    } else if (ch == HEX_ESCAPE) {
290 		if ((src + 1) < endhost
291 		    && (hi = hex_decode(src[0])) >= 0
292 		    && (lo = hex_decode(src[1])) >= 0) {
293 
294 		    *dst = (char) ((hi << 4) | lo);
295 		    src += 2;
296 		} else {
297 		    CTRACE((tfp, "convert_to_idna: `%s' is malformed\n", host));
298 		    code = FALSE;
299 		    break;
300 		}
301 	    } else {
302 		*dst = (char) ch;
303 	    }
304 	}
305 	if (code) {
306 	    *dst = '\0';
307 #ifdef USE_IDN2
308 #if (!defined(IDN2_VERSION_NUMBER) || IDN2_VERSION_NUMBER < 0x02000003)
309 	    /*
310 	     * Older libidn2 mishandles STD3, stripping underscores.
311 	     */
312 	    if (strchr(buffer, '_') != NULL) {
313 		code = -1;
314 	    } else
315 #endif
316 		switch (LYidnaMode) {
317 		case LYidna2003:
318 		    code = idn2_to_ascii_8z(buffer, &output, IDN2_TRANSITIONAL);
319 		    break;
320 		case LYidna2008:
321 		    /* IDNA2008 rules without the TR46 amendments */
322 		    code = idn2_to_ascii_8z(buffer, &output, 0);
323 		    break;
324 		case LYidnaTR46:
325 		    code = idn2_to_ascii_8z(buffer, &output, IDN2_NONTRANSITIONAL
326 					    | IDN2_NFC_INPUT);
327 		    break;
328 		case LYidnaCompat:
329 		    /* IDNA2008 */
330 		    code = idn2_to_ascii_8z(buffer, &output, IDN2_NONTRANSITIONAL
331 					    | IDN2_NFC_INPUT);
332 		    if (code == IDN2_DISALLOWED) {
333 			/* IDNA2003 - compatible */
334 			code = idn2_to_ascii_8z(buffer, &output, IDN2_TRANSITIONAL);
335 		    }
336 		    break;
337 		}
338 #else
339 	    code = idna_to_ascii_8z(buffer, &output, IDNA_USE_STD3_ASCII_RULES);
340 #endif
341 	    if (code == IDN2_OK) {
342 		CTRACE((tfp, "convert_to_idna: `%s' -> `%s': OK\n", buffer, output));
343 		strcpy(host, output);
344 		strcat(host, params);
345 	    } else {
346 		CTRACE((tfp, "convert_to_idna: `%s': %s\n",
347 			buffer,
348 			idna_strerror((Idna_rc) code)));
349 	    }
350 	    if (output)
351 		FreeIdna(output);
352 	}
353     }
354     free(buffer);
355     free(params);
356 }
357 #define MIN_PARSE 80
358 #else
359 #define MIN_PARSE 8
360 #endif
361 
362 /*	Parse a Name relative to another name.			HTParse()
363  *	--------------------------------------
364  *
365  *	This returns those parts of a name which are given (and requested)
366  *	substituting bits from the related name where necessary.
367  *
368  *	Originally based on RFC 1808, some details in RFC 3986 are used.
369  *
370  * On entry,
371  *	aName		A filename given
372  *	relatedName	A name relative to which aName is to be parsed
373  *	wanted		A mask for the bits which are wanted.
374  *
375  * On exit,
376  *     returns         A pointer to a malloc'd string which MUST BE FREED
377  */
HTParse(const char * aName,const char * relatedName,int wanted)378 char *HTParse(const char *aName,
379 	      const char *relatedName,
380 	      int wanted)
381 {
382     char *result = NULL;
383     char *tail = NULL;		/* a pointer to the end of the 'result' string */
384     char *return_value = NULL;
385     size_t len, len1, len2;
386     size_t need;
387     char *name = NULL;
388     char *rel = NULL;
389     char *p, *q;
390     char *acc_method;
391     struct struct_parts given, related;
392 
393     CTRACE((tfp, "HTParse: aName:`%s'\n", aName));
394     CTRACE((tfp, "   relatedName:`%s'\n", relatedName));
395 
396     if (wanted & (PARSE_STRICTPATH | PARSE_QUERY)) {	/* if detail wanted... */
397 	if ((wanted & (PARSE_STRICTPATH | PARSE_QUERY))
398 	    == (PARSE_STRICTPATH | PARSE_QUERY))	/* if strictpath AND query */
399 	    wanted |= PARSE_PATH;	/* then treat as if PARSE_PATH wanted */
400 	if (wanted & PARSE_PATH)	/* if PARSE_PATH wanted */
401 	    wanted &= ~(PARSE_STRICTPATH | PARSE_QUERY);	/* ignore details */
402     }
403 /* *INDENT-OFF* */
404     CTRACE((tfp, "   want:%s%s%s%s%s%s%s\n",
405 	    wanted & PARSE_PUNCTUATION ? " punc"   : "",
406 	    wanted & PARSE_ANCHOR      ? " anchor" : "",
407 	    wanted & PARSE_PATH        ? " path"   : "",
408 	    wanted & PARSE_HOST        ? " host"   : "",
409 	    wanted & PARSE_ACCESS      ? " access" : "",
410 	    wanted & PARSE_STRICTPATH  ? " PATH"   : "",
411 	    wanted & PARSE_QUERY       ? " QUERY"  : ""));
412 /* *INDENT-ON* */
413 
414     /*
415      * Allocate the temporary string. Optimized.
416      */
417     len1 = strlen(aName) + 1;
418     len2 = strlen(relatedName) + 1;
419     len = len1 + len2 + MIN_PARSE;	/* Lots of space: more than enough */
420 
421     need = (len * 2 + len1 + len2);
422     if (need > (size_t) max_uri_size ||
423 	(int) need < (int) len1 ||
424 	(int) need < (int) len2)
425 	return StrAllocCopy(return_value, "");
426 
427     result = tail = (char *) LYalloca(need);
428     if (result == NULL) {
429 	outofmem(__FILE__, "HTParse");
430     }
431     *result = '\0';
432     name = result + len;
433     rel = name + len1;
434 
435     /*
436      * Make working copy of the input string to cut up.
437      */
438     MemCpy(name, aName, len1);
439 
440     /*
441      * Cut up the string into URL fields.
442      */
443     scan(name, &given);
444     SHOW_PARTS(given);
445 
446     /*
447      * Now related string.
448      */
449     if ((given.access && given.host && given.absolute) || !*relatedName) {
450 	/*
451 	 * Inherit nothing!
452 	 */
453 	related.access = NULL;
454 	related.host = NULL;
455 	related.absolute = NULL;
456 	related.relative = NULL;
457 	related.search = NULL;
458 	related.anchor = NULL;
459     } else {
460 	MemCpy(rel, relatedName, len2);
461 	scan(rel, &related);
462     }
463     SHOW_PARTS(related);
464 
465     /*
466      * Handle the scheme (access) field.
467      */
468     if (given.access && given.host && !given.relative && !given.absolute) {
469 	if (!strcmp(given.access, "http") ||
470 	    !strcmp(given.access, "https") ||
471 	    !strcmp(given.access, "ftp")) {
472 
473 	    /*
474 	     * Assume root.
475 	     */
476 	    given.absolute = empty_string;
477 	}
478     }
479     acc_method = given.access ? given.access : related.access;
480     if (wanted & PARSE_ACCESS) {
481 	if (acc_method) {
482 	    strcpy(tail, acc_method);
483 	    tail += strlen(tail);
484 	    if (wanted & PARSE_PUNCTUATION) {
485 		*tail++ = ':';
486 		*tail = '\0';
487 	    }
488 	}
489     }
490 
491     /*
492      * If different schemes, inherit nothing.
493      *
494      * We'll try complying with RFC 1808 and the Fielding draft, and inherit
495      * nothing if both schemes are given, rather than only when they differ,
496      * except for file URLs - FM
497      *
498      * After trying it for a while, it's still premature, IHMO, to go along
499      * with it, so this is back to inheriting for identical schemes whether or
500      * not they are "file".  If you want to try it again yourself, uncomment
501      * the strcasecomp() below.  - FM
502      */
503     if ((given.access && related.access) &&
504 	(			/* strcasecomp(given.access, "file") || */
505 	    strcmp(given.access, related.access))) {
506 	related.host = NULL;
507 	related.absolute = NULL;
508 	related.relative = NULL;
509 	related.search = NULL;
510 	related.anchor = NULL;
511     }
512 
513     /*
514      * Handle the host field.
515      */
516     if (wanted & PARSE_HOST) {
517 	if (given.host || related.host) {
518 	    if (wanted & PARSE_PUNCTUATION) {
519 		*tail++ = '/';
520 		*tail++ = '/';
521 	    }
522 	    strcpy(tail, given.host ? given.host : related.host);
523 	    /*
524 	     * Ignore default port numbers, and trailing dots on FQDNs, which
525 	     * will only cause identical addresses to look different.  (related
526 	     * is already a clean url).
527 	     */
528 	    {
529 		char *p2, *h;
530 		int portnumber;
531 		int gen_delims = 0;
532 
533 		if ((p2 = HTSkipToAt(result, &gen_delims)) != NULL
534 		    && gen_delims == 0) {
535 		    tail = (p2 + 1);
536 		}
537 		p2 = HTParsePort(result, &portnumber);
538 		if (p2 != NULL && acc_method != NULL) {
539 		    /*
540 		     * Port specified.
541 		     */
542 #define ACC_METHOD(a,b) (!strcmp(acc_method, a) && (portnumber == b))
543 		    if (ACC_METHOD("http", 80) ||
544 			ACC_METHOD("https", 443) ||
545 			ACC_METHOD("gopher", 70) ||
546 			ACC_METHOD("ftp", 21) ||
547 			ACC_METHOD("wais", 210) ||
548 			ACC_METHOD("nntp", 119) ||
549 			ACC_METHOD("news", 119) ||
550 			ACC_METHOD("newspost", 119) ||
551 			ACC_METHOD("newsreply", 119) ||
552 			ACC_METHOD("snews", 563) ||
553 			ACC_METHOD("snewspost", 563) ||
554 			ACC_METHOD("snewsreply", 563) ||
555 			ACC_METHOD("finger", 79) ||
556 			ACC_METHOD("telnet", 23) ||
557 			ACC_METHOD("tn3270", 23) ||
558 			ACC_METHOD("rlogin", 513) ||
559 			ACC_METHOD("cso", 105))
560 			*p2 = '\0';	/* It is the default: ignore it */
561 		}
562 		if (p2 == NULL) {
563 		    int len3 = (int) strlen(tail);
564 
565 		    if (len3 > 0) {
566 			h = tail + len3 - 1;	/* last char of hostname */
567 			if (*h == '.')
568 			    *h = '\0';	/* chop final . */
569 		    }
570 		} else if (p2 != result) {
571 		    h = p2;
572 		    h--;	/* End of hostname */
573 		    if (*h == '.') {
574 			/*
575 			 * Slide p2 over h.
576 			 */
577 			while (*p2 != '\0')
578 			    *h++ = *p2++;
579 			*h = '\0';	/* terminate */
580 		    }
581 		}
582 	    }
583 #if defined(USE_IDNA) || defined(USE_IDN2)
584 	    /*
585 	     * Depending on locale-support, we could have a literal UTF-8
586 	     * string as a host name, or a URL-encoded form of that.
587 	     */
588 	    convert_to_idna(tail);
589 #endif
590 	}
591     }
592 
593     /*
594      * Trim any blanks from the result so far - there's no excuse for blanks
595      * in a hostname.  Also update the tail here.
596      */
597     tail = LYRemoveBlanks(result);
598 
599     /*
600      * If host in given or related was ended directly with a '?' (no slash),
601      * fake the search part into absolute.  This is the only case search is
602      * returned from scan.  A host must have been present.  this restores the
603      * '?' at which the host part had been truncated in scan, we have to do
604      * this after host part handling is done.  - kw
605      */
606     if (given.search && *(given.search - 1) == '\0') {
607 	given.absolute = given.search - 1;
608 	given.absolute[0] = '?';
609     } else if (related.search && !related.absolute &&
610 	       *(related.search - 1) == '\0') {
611 	related.absolute = related.search - 1;
612 	related.absolute[0] = '?';
613     }
614 
615     /*
616      * If different hosts, inherit no path.
617      */
618     if (given.host && related.host)
619 	if (strcmp(given.host, related.host) != 0) {
620 	    related.absolute = NULL;
621 	    related.relative = NULL;
622 	    related.anchor = NULL;
623 	}
624 
625     /*
626      * Handle the path.
627      */
628     if (wanted & (PARSE_PATH | PARSE_STRICTPATH | PARSE_QUERY)) {
629 	int want_detail = (wanted & (PARSE_STRICTPATH | PARSE_QUERY));
630 
631 	if (acc_method && !given.absolute && given.relative) {
632 	    /*
633 	     * Treat all given nntp or snews paths, or given paths for news
634 	     * URLs with a host, as absolute.
635 	     */
636 	    switch (*acc_method) {
637 	    case 'N':
638 	    case 'n':
639 		if (!strcasecomp(acc_method, "nntp") ||
640 		    (!strcasecomp(acc_method, "news") &&
641 		     !strncasecomp(result, "news://", 7))) {
642 		    given.absolute = given.relative;
643 		    given.relative = NULL;
644 		}
645 		break;
646 	    case 'S':
647 	    case 's':
648 		if (!strcasecomp(acc_method, "snews")) {
649 		    given.absolute = given.relative;
650 		    given.relative = NULL;
651 		}
652 		break;
653 	    }
654 	}
655 
656 	if (given.absolute) {	/* All is given */
657 	    char *base = tail;
658 
659 	    if (wanted & PARSE_PUNCTUATION)
660 		*tail++ = '/';
661 	    strcpy(tail, given.absolute);
662 	    HTSimplify(base, TRUE);
663 	    CTRACE((tfp, "HTParse: (ABS)\n"));
664 	} else if (related.absolute) {	/* Adopt path not name */
665 	    char *base = tail;
666 
667 	    *tail++ = '/';
668 	    strcpy(tail, related.absolute);
669 	    if (given.relative) {
670 		/* RFC 1808 part 4 step 5 (if URL path is empty) */
671 		/* a) if given has params, add/replace that */
672 		if (given.relative[0] == ';') {
673 		    strcpy(strchr_or_end(tail, ';'), given.relative);
674 		}
675 		/* b) if given has query, add/replace that */
676 		else if (given.relative[0] == '?') {
677 		    strcpy(strchr_or_end(tail, '?'), given.relative);
678 		}
679 		/* otherwise fall through to RFC 1808 part 4 step 6 */
680 		else {
681 		    p = StrChr(tail, '?');	/* Search part? */
682 		    if (p == NULL)
683 			p = (tail + strlen(tail) - 1);
684 		    for (; *p != '/'; p--) ;	/* last / */
685 		    p[1] = '\0';	/* Remove filename */
686 		    strcat(p, given.relative);	/* Add given one */
687 		}
688 		HTSimplify(base, FALSE);
689 		if (*base == '\0')
690 		    strcpy(base, "/");
691 	    } else {
692 		HTSimplify(base, TRUE);
693 	    }
694 	    if (base[0] == '/' && base[1] == '/') {
695 		char *pz;
696 
697 		for (pz = base; (pz[0] = pz[1]) != '\0'; ++pz) ;
698 	    }
699 	    CTRACE((tfp, "HTParse: (Related-ABS)\n"));
700 	} else if (given.relative) {
701 	    strcpy(tail, given.relative);	/* what we've got */
702 	    HTSimplify(tail, FALSE);
703 	    CTRACE((tfp, "HTParse: (REL)\n"));
704 	} else if (related.relative) {
705 	    strcpy(tail, related.relative);
706 	    HTSimplify(tail, FALSE);
707 	    CTRACE((tfp, "HTParse: (Related-REL)\n"));
708 	} else {		/* No inheritance */
709 	    if (!isLYNXCGI(aName) &&
710 		!isLYNXEXEC(aName) &&
711 		!isLYNXPROG(aName)) {
712 		*tail++ = '/';
713 		*tail = '\0';
714 	    } else {
715 		HTSimplify(tail, FALSE);
716 	    }
717 	    if (!strcmp(result, "news:/"))
718 		result[5] = '*';
719 	    CTRACE((tfp, "HTParse: (No inheritance)\n"));
720 	}
721 	if (want_detail) {
722 	    p = StrChr(tail, '?');	/* Search part? */
723 	    if (p) {
724 		if (PARSE_STRICTPATH) {
725 		    *p = '\0';
726 		} else {
727 		    if (!(wanted & PARSE_PUNCTUATION))
728 			p++;
729 		    do {
730 			*tail++ = *p;
731 		    } while (*p++);
732 		}
733 	    } else {
734 		if (wanted & PARSE_QUERY)
735 		    *tail = '\0';
736 	    }
737 	}
738     }
739 
740     /*
741      * Handle the fragment (anchor).  Never inherit.
742      */
743     if (wanted & PARSE_ANCHOR) {
744 	if (given.anchor && *given.anchor) {
745 	    tail += strlen(tail);
746 	    if (wanted & PARSE_PUNCTUATION)
747 		*tail++ = '#';
748 	    strcpy(tail, given.anchor);
749 	}
750     }
751 
752     /*
753      * If there are any blanks remaining in the string, escape them as needed.
754      * See the discussion in LYLegitimizeHREF() for example.
755      */
756     if ((p = StrChr(result, ' ')) != 0) {
757 	switch (is_url(result)) {
758 	case UNKNOWN_URL_TYPE:
759 	    CTRACE((tfp, "HTParse:      ignore:`%s'\n", result));
760 	    break;
761 	case LYNXEXEC_URL_TYPE:
762 	case LYNXPROG_URL_TYPE:
763 	case LYNXCGI_URL_TYPE:
764 	case LYNXPRINT_URL_TYPE:
765 	case LYNXHIST_URL_TYPE:
766 	case LYNXDOWNLOAD_URL_TYPE:
767 	case LYNXKEYMAP_URL_TYPE:
768 	case LYNXIMGMAP_URL_TYPE:
769 	case LYNXCOOKIE_URL_TYPE:
770 	case LYNXCACHE_URL_TYPE:
771 	case LYNXDIRED_URL_TYPE:
772 	case LYNXOPTIONS_URL_TYPE:
773 	case LYNXCFG_URL_TYPE:
774 	case LYNXCOMPILE_OPTS_URL_TYPE:
775 	case LYNXMESSAGES_URL_TYPE:
776 	    CTRACE((tfp, "HTParse:      spaces:`%s'\n", result));
777 	    break;
778 	case NOT_A_URL_TYPE:
779 	default:
780 	    CTRACE((tfp, "HTParse:      encode:`%s'\n", result));
781 	    do {
782 		q = p + strlen(p) + 2;
783 
784 		while (q != p + 1) {
785 		    q[0] = q[-2];
786 		    --q;
787 		}
788 		p[0] = HEX_ESCAPE;
789 		p[1] = '2';
790 		p[2] = '0';
791 	    } while ((p = StrChr(result, ' ')) != 0);
792 	    break;
793 	}
794     }
795     CTRACE((tfp, "HTParse:      result:`%s'\n", result));
796 
797     StrAllocCopy(return_value, result);
798     LYalloca_free(result);
799 
800     /* FIXME: could be optimized using HTParse() internals */
801     if (*relatedName &&
802 	((wanted & PARSE_ALL_WITHOUT_ANCHOR) == PARSE_ALL_WITHOUT_ANCHOR)) {
803 	/*
804 	 * Check whether to fill in localhost.  - FM
805 	 */
806 	LYFillLocalFileURL(&return_value, relatedName);
807 	CTRACE((tfp, "pass LYFillLocalFile:`%s'\n", return_value));
808     }
809 
810     return return_value;	/* exactly the right length */
811 }
812 
813 /*	HTParseAnchor(), fast HTParse() specialization
814  *	----------------------------------------------
815  *
816  * On exit,
817  *	returns		A pointer within input string (probably to its end '\0')
818  */
HTParseAnchor(const char * aName)819 const char *HTParseAnchor(const char *aName)
820 {
821     const char *p = aName;
822 
823     for (; *p && *p != '#'; p++) {
824 	;
825     }
826     if (*p == '#') {
827 	/* the safe way based on HTParse() -
828 	 * keeping in mind scan() peculiarities on schemes:
829 	 */
830 	struct struct_parts given;
831 	size_t need = ((unsigned) ((p - aName) + (int) strlen(p) + 1));
832 	char *name;
833 
834 	if (need > (size_t) max_uri_size) {
835 	    p += strlen(p);
836 	} else {
837 	    name = (char *) LYalloca(need);
838 
839 	    if (name == NULL) {
840 		outofmem(__FILE__, "HTParseAnchor");
841 	    }
842 	    strcpy(name, aName);
843 	    scan(name, &given);
844 	    LYalloca_free(name);
845 
846 	    p++;		/*next to '#' */
847 	    if (given.anchor == NULL) {
848 		for (; *p; p++)	/*scroll to end '\0' */
849 		    ;
850 	    }
851 	}
852     }
853     return p;
854 }
855 
856 /*	Simplify a filename.				HTSimplify()
857  *	--------------------
858  *
859  *  A unix-style file is allowed to contain the sequence xxx/../ which may
860  *  be replaced by "" , and the sequence "/./" which may be replaced by "/".
861  *  Simplification helps us recognize duplicate filenames.
862  *
863  *  RFC 3986 section 5.2.4 says to do this whether or not the path was relative.
864  */
HTSimplify(char * filename,BOOL absolute)865 void HTSimplify(char *filename, BOOL absolute)
866 {
867 #define MY_FMT "HTParse HTSimplify\t(%s)"
868 #ifdef NO_LYNX_TRACE
869 #define debug_at(at)		/* nothing */
870 #define atln		"?"
871 #else
872     const char *atln;
873 
874 #define debug_at(at)	atln = at
875 #endif
876     char *mark;
877     char *p;
878     size_t limit;
879 
880     CTRACE2(TRACE_HTPARSE,
881 	    (tfp, MY_FMT " %s\n",
882 	     filename,
883 	     absolute ? "ABS" : "REL"));
884 
885     if (LYIsPathSep(*filename) && !absolute)
886 	++filename;
887     mark = filename;
888     limit = strlen(filename);
889 
890     for (p = filename; *p; ++p) {
891 	if (*p == '?' || *p == '#') {
892 	    limit = (size_t) (p - filename);
893 	    break;
894 	}
895     }
896     while ((limit != 0) && (*filename != '\0')) {
897 	size_t trim = 0;
898 	size_t skip = 0;
899 	size_t last = 0;
900 
901 	debug_at("?");
902 	p = filename;
903 	if (limit >= 2 && !memcmp(p, "./", 2)) {	/* 2A */
904 	    debug_at("2A");
905 	    trim = 2;
906 	} else if (limit >= 3 && !memcmp(p, "../", 3)) {
907 	    debug_at("2A2");
908 	    trim = 3;
909 	} else if (limit >= 3 && !memcmp(p, "/./", 3)) {	/* 2B */
910 	    debug_at("2B");
911 	    trim = 2;
912 	    skip = 1;
913 	} else if (limit == 2 && !memcmp(p, "/.", 2)) {
914 	    debug_at("2B2");
915 	    trim = 1;
916 	    skip = 1;
917 	} else if (limit >= 4 && !memcmp(p, "/../", 4)) {	/* 2C */
918 	    debug_at("2C");
919 	    trim = 3;
920 	    skip = 1;
921 	    last = 1;
922 	} else if (limit == 3 && !memcmp(p, "/..", 3)) {
923 	    debug_at("2C2");
924 	    trim = 2;
925 	    skip = 1;
926 	    last = 1;
927 	} else if (limit == 2 && !memcmp(p, "..", 2)) {		/* 2D */
928 	    debug_at("2D");
929 	    trim = 2;
930 	} else if (limit == 1 && !memcmp(p, ".", 1)) {
931 	    debug_at("2D2");
932 	    trim = 1;
933 	}
934 	if (trim) {
935 	    CTRACE2(TRACE_HTPARSE,
936 		    (tfp, MY_FMT " trim %lu/%lu (%.*s) '%.*s' @%s\n",
937 		     mark, (unsigned long) trim, (unsigned long) limit,
938 		     (int) trim, p + skip, (int) limit, p, atln));
939 	}
940 	if (last) {
941 	    char *prior = filename;
942 
943 	    if (prior != mark) {
944 		--prior;
945 		while (prior != mark && *prior != '/') {
946 		    --prior;
947 		}
948 	    }
949 	    if (prior != filename) {
950 		trim += (size_t) (filename - prior);
951 		limit += (size_t) (filename - prior);
952 		filename = prior;
953 		CTRACE2(TRACE_HTPARSE,
954 			(tfp, MY_FMT " TRIM %lu/%lu (%.*s)\n",
955 			 mark, (unsigned long) trim, (unsigned long) limit,
956 			 (int) trim, filename + skip));
957 	    }
958 	}
959 	if (trim) {
960 	    limit -= trim;
961 	    for (p = filename;; ++p) {
962 		if ((p[0] = p[trim]) == '\0') {
963 		    break;
964 		}
965 		if (skip) {
966 		    p[0] = '/';
967 		    skip = 0;
968 		}
969 	    }
970 	    CTRACE2(TRACE_HTPARSE,
971 		    (tfp, MY_FMT " loop %lu\n", mark, (unsigned long) limit));
972 	} else {
973 	    if (*filename == '/') {
974 		++filename;
975 		--limit;
976 	    }
977 	    while ((limit != 0) && (*filename != '/')) {
978 		++filename;
979 		--limit;
980 	    }
981 	}
982     }
983     CTRACE2(TRACE_HTPARSE, (tfp, MY_FMT " done\n", mark));
984 #undef MY_FMT
985 }
986 
987 /*	Make Relative Name.					HTRelative()
988  *	-------------------
989  *
990  * This function creates and returns a string which gives an expression of
991  * one address as related to another.  Where there is no relation, an absolute
992  * address is returned.
993  *
994  *  On entry,
995  *	Both names must be absolute, fully qualified names of nodes
996  *	(no anchor bits)
997  *
998  *  On exit,
999  *	The return result points to a newly allocated name which, if
1000  *	parsed by HTParse relative to relatedName, will yield aName.
1001  *	The caller is responsible for freeing the resulting name later.
1002  *
1003  */
HTRelative(const char * aName,const char * relatedName)1004 char *HTRelative(const char *aName,
1005 		 const char *relatedName)
1006 {
1007     char *result = NULL;
1008     const char *p = aName;
1009     const char *q = relatedName;
1010     const char *after_access = NULL;
1011     const char *path = NULL;
1012     const char *last_slash = NULL;
1013     int slashes = 0;
1014 
1015     for (; *p; p++, q++) {	/* Find extent of match */
1016 	if (*p != *q)
1017 	    break;
1018 	if (*p == ':')
1019 	    after_access = p + 1;
1020 	if (*p == '/') {
1021 	    last_slash = p;
1022 	    slashes++;
1023 	    if (slashes == 3)
1024 		path = p;
1025 	}
1026     }
1027 
1028     /* q, p point to the first non-matching character or zero */
1029 
1030     if (!after_access) {	/* Different access */
1031 	StrAllocCopy(result, aName);
1032     } else if (slashes < 3) {	/* Different nodes */
1033 	StrAllocCopy(result, after_access);
1034     } else if (slashes == 3) {	/* Same node, different path */
1035 	StrAllocCopy(result, path);
1036     } else {			/* Some path in common */
1037 	unsigned levels = 0;
1038 
1039 	for (; *q && (*q != '#'); q++)
1040 	    if (*q == '/')
1041 		levels++;
1042 	result = typecallocn(char, 3 * levels + strlen(last_slash) + 1);
1043 
1044 	if (result == NULL)
1045 	    outofmem(__FILE__, "HTRelative");
1046 
1047 	result[0] = '\0';
1048 	for (; levels; levels--)
1049 	    strcat(result, "../");
1050 	strcat(result, last_slash + 1);
1051     }
1052     CTRACE((tfp,
1053 	    "HTparse: `%s' expressed relative to\n   `%s' is\n   `%s'.\n",
1054 	    aName, relatedName, result));
1055     return result;
1056 }
1057 
1058 #define AlloCopy(next,base,extra) \
1059 	typecallocn(char, ((next - base) + ((int) extra)))
1060 
1061 /*	Escape undesirable characters using %			HTEscape()
1062  *	-------------------------------------
1063  *
1064  *	This function takes a pointer to a string in which
1065  *	some characters may be unacceptable unescaped.
1066  *	It returns a string which has these characters
1067  *	represented by a '%' character followed by two hex digits.
1068  *
1069  *	Unlike HTUnEscape(), this routine returns a calloc'd string.
1070  */
1071 /* *INDENT-OFF* */
1072 static const unsigned char isAcceptable[96] =
1073 
1074 /*	Bit 0		xalpha		-- see HTFile.h
1075  *	Bit 1		xpalpha		-- as xalpha but with plus.
1076  *	Bit 2 ...	path		-- as xpalphas but with /
1077  */
1078     /*	 0 1 2 3 4 5 6 7 8 9 A B C D E F */
1079     {	 0,0,0,0,0,0,0,0,0,0,7,6,0,7,7,4,	/* 2x	!"#$%&'()*+,-./  */
1080 	 7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0,	/* 3x  0123456789:;<=>?  */
1081 	 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,	/* 4x  @ABCDEFGHIJKLMNO  */
1082 	 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7,	/* 5X  PQRSTUVWXYZ[\]^_  */
1083 	 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,	/* 6x  `abcdefghijklmno  */
1084 	 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0 };	/* 7X  pqrstuvwxyz{|}~	DEL */
1085 /* *INDENT-ON* */
1086 
1087 static const char *hex = "0123456789ABCDEF";
1088 
1089 #define ACCEPTABLE(a)	( a>=32 && a<128 && ((isAcceptable[a-32]) & mask))
1090 
HTEscape(const char * str,unsigned mask)1091 char *HTEscape(const char *str,
1092 	       unsigned mask)
1093 {
1094     const char *p;
1095     char *q;
1096     char *result;
1097     size_t unacceptable = 0;
1098 
1099     for (p = str; *p; p++)
1100 	if (!ACCEPTABLE(UCH(TOASCII(*p))))
1101 	    unacceptable++;
1102     result = AlloCopy(p, str, (unacceptable * 2) + 1);
1103 
1104     if (result == NULL)
1105 	outofmem(__FILE__, "HTEscape");
1106 
1107     for (q = result, p = str; *p; p++) {
1108 	unsigned char a = UCH(TOASCII(*p));
1109 
1110 	if (!ACCEPTABLE(a)) {
1111 	    *q++ = HEX_ESCAPE;	/* Means hex coming */
1112 	    *q++ = hex[a >> 4];
1113 	    *q++ = hex[a & 15];
1114 	} else
1115 	    *q++ = *p;
1116     }
1117     *q = '\0';			/* Terminate */
1118     return result;
1119 }
1120 
1121 /*	Escape unsafe characters using %			HTEscapeUnsafe()
1122  *	--------------------------------
1123  *
1124  *	This function takes a pointer to a string in which
1125  *	some characters may be that may be unsafe are unescaped.
1126  *	It returns a string which has these characters
1127  *	represented by a '%' character followed by two hex digits.
1128  *
1129  *	Unlike HTUnEscape(), this routine returns a malloc'd string.
1130  */
1131 #define UNSAFE(ch) (((ch) <= 32) || ((ch) >= 127))
1132 
HTEscapeUnsafe(const char * str)1133 char *HTEscapeUnsafe(const char *str)
1134 {
1135     const char *p;
1136     char *q;
1137     char *result;
1138     size_t unacceptable = 0;
1139 
1140     for (p = str; *p; p++)
1141 	if (UNSAFE(UCH(TOASCII(*p))))
1142 	    unacceptable++;
1143     result = AlloCopy(p, str, (unacceptable * 2) + 1);
1144 
1145     if (result == NULL)
1146 	outofmem(__FILE__, "HTEscapeUnsafe");
1147 
1148     for (q = result, p = str; *p; p++) {
1149 	unsigned char a = UCH(TOASCII(*p));
1150 
1151 	if (UNSAFE(a)) {
1152 	    *q++ = HEX_ESCAPE;	/* Means hex coming */
1153 	    *q++ = hex[a >> 4];
1154 	    *q++ = hex[a & 15];
1155 	} else
1156 	    *q++ = *p;
1157     }
1158     *q = '\0';			/* Terminate */
1159     return result;
1160 }
1161 
1162 /*	Escape undesirable characters using % but space to +.	HTEscapeSP()
1163  *	-----------------------------------------------------
1164  *
1165  *	This function takes a pointer to a string in which
1166  *	some characters may be unacceptable unescaped.
1167  *	It returns a string which has these characters
1168  *	represented by a '%' character followed by two hex digits,
1169  *	except that spaces are converted to '+' instead of %2B.
1170  *
1171  *	Unlike HTUnEscape(), this routine returns a calloced string.
1172  */
HTEscapeSP(const char * str,unsigned mask)1173 char *HTEscapeSP(const char *str,
1174 		 unsigned mask)
1175 {
1176     const char *p;
1177     char *q;
1178     char *result;
1179     size_t unacceptable = 0;
1180 
1181     for (p = str; *p; p++)
1182 	if (!(*p == ' ' || ACCEPTABLE(UCH(TOASCII(*p)))))
1183 	    unacceptable++;
1184     result = AlloCopy(p, str, (unacceptable * 2) + 1);
1185 
1186     if (result == NULL)
1187 	outofmem(__FILE__, "HTEscape");
1188 
1189     for (q = result, p = str; *p; p++) {
1190 	unsigned char a = UCH(TOASCII(*p));
1191 
1192 	if (a == 32) {
1193 	    *q++ = '+';
1194 	} else if (!ACCEPTABLE(a)) {
1195 	    *q++ = HEX_ESCAPE;	/* Means hex coming */
1196 	    *q++ = hex[a >> 4];
1197 	    *q++ = hex[a & 15];
1198 	} else {
1199 	    *q++ = *p;
1200 	}
1201     }
1202     *q = '\0';			/* Terminate */
1203     return result;
1204 }
1205 
1206 /*	Decode %xx escaped characters.				HTUnEscape()
1207  *	------------------------------
1208  *
1209  *	This function takes a pointer to a string in which some
1210  *	characters may have been encoded in %xy form, where xy is
1211  *	the ASCII hex code for character 16x+y.
1212  *	The string is converted in place, as it will never grow.
1213  */
from_hex(int c)1214 static char from_hex(int c)
1215 {
1216     return (char) (c >= '0' && c <= '9' ? c - '0'
1217 		   : c >= 'A' && c <= 'F' ? c - 'A' + 10
1218 		   : c - 'a' + 10);	/* accept small letters just in case */
1219 }
1220 
HTUnEscape(char * str)1221 char *HTUnEscape(char *str)
1222 {
1223     char *p = str;
1224     char *q = str;
1225 
1226     if (!(p && *p))
1227 	return str;
1228 
1229     while (*p != '\0') {
1230 	if (*p == HEX_ESCAPE &&
1231 	/*
1232 	 * Tests shouldn't be needed, but better safe than sorry.
1233 	 */
1234 	    p[1] && p[2] &&
1235 	    isxdigit(UCH(p[1])) &&
1236 	    isxdigit(UCH(p[2]))) {
1237 	    p++;
1238 	    if (*p)
1239 		*q = (char) (from_hex(*p++) * 16);
1240 	    if (*p) {
1241 		/*
1242 		 * Careful! FROMASCII() may evaluate its arg more than once!
1243 		 */
1244 		/* S/390 -- gil -- 0221 */
1245 		*q = (char) (*q + from_hex(*p++));
1246 	    }
1247 	    *q = FROMASCII(*q);
1248 	    q++;
1249 	} else {
1250 	    *q++ = *p++;
1251 	}
1252     }
1253 
1254     *q = '\0';
1255     return str;
1256 
1257 }				/* HTUnEscape */
1258 
1259 /*	Decode some %xx escaped characters.		      HTUnEscapeSome()
1260  *	-----------------------------------			Klaus Weide
1261  *							    (kweide@tezcat.com)
1262  *	This function takes a pointer to a string in which some
1263  *	characters may have been encoded in %xy form, where xy is
1264  *	the ASCII hex code for character 16x+y, and a pointer to
1265  *	a second string containing one or more characters which
1266  *	should be unescaped if escaped in the first string.
1267  *	The first string is converted in place, as it will never grow.
1268  */
HTUnEscapeSome(char * str,const char * do_trans)1269 char *HTUnEscapeSome(char *str,
1270 		     const char *do_trans)
1271 {
1272     char *p = str;
1273     char *q = str;
1274     char testcode;
1275 
1276     if (p == NULL || *p == '\0' || do_trans == NULL || *do_trans == '\0')
1277 	return str;
1278 
1279     while (*p != '\0') {
1280 	if (*p == HEX_ESCAPE &&
1281 	    p[1] && p[2] &&	/* tests shouldn't be needed, but.. */
1282 	    isxdigit(UCH(p[1])) &&
1283 	    isxdigit(UCH(p[2])) &&
1284 	    (testcode = (char) FROMASCII(from_hex(p[1]) * 16 +
1285 					 from_hex(p[2]))) &&	/* %00 no good */
1286 	    StrChr(do_trans, testcode)) {	/* it's one of the ones we want */
1287 	    *q++ = testcode;
1288 	    p += 3;
1289 	} else {
1290 	    *q++ = *p++;
1291 	}
1292     }
1293 
1294     *q = '\0';
1295     return str;
1296 
1297 }				/* HTUnEscapeSome */
1298 /* *INDENT-OFF* */
1299 static const unsigned char crfc[96] =
1300 
1301 /*	Bit 0		xalpha		-- need "quoting"
1302  *	Bit 1		xpalpha		-- need \escape if quoted
1303  */
1304     /*	 0 1 2 3 4 5 6 7 8 9 A B C D E F */
1305     {	 1,0,3,0,0,0,0,0,1,1,0,0,1,0,1,0,	/* 2x	!"#$%&'()*+,-./  */
1306 	 0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,0,	/* 3x  0123456789:;<=>?  */
1307 	 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,	/* 4x  @ABCDEFGHIJKLMNO  */
1308 	 0,0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,	/* 5X  PQRSTUVWXYZ[\]^_  */
1309 	 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,	/* 6x  `abcdefghijklmno  */
1310 	 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3 };	/* 7X  pqrstuvwxyz{|}~	DEL */
1311 /* *INDENT-ON* */
1312 
1313 #define ASCII_TAB '\011'
1314 #define ASCII_LF  '\012'
1315 #define ASCII_CR  '\015'
1316 #define ASCII_SPC '\040'
1317 #define ASCII_BAK '\134'
1318 
1319 /*
1320  *  Turn a string which is not a RFC 822 token into a quoted-string. - KW
1321  *  The "quoted" parameter tells whether we need the beginning/ending quote
1322  *  marks.  If not, the caller will provide them -TD
1323  */
HTMake822Word(char ** str,int quoted)1324 void HTMake822Word(char **str,
1325 		   int quoted)
1326 {
1327     const char *p;
1328     char *q;
1329     char *result;
1330     unsigned char a;
1331     unsigned added = 0;
1332 
1333     if (isEmpty(*str)) {
1334 	StrAllocCopy(*str, quoted ? "\"\"" : "");
1335 	return;
1336     }
1337     for (p = *str; *p; p++) {
1338 	a = UCH(TOASCII(*p));	/* S/390 -- gil -- 0240 */
1339 	if (a < 32 || a >= 128 ||
1340 	    ((crfc[a - 32]) & 1)) {
1341 	    if (!added)
1342 		added = 2;
1343 	    if (a >= 160 || a == '\t')
1344 		continue;
1345 	    if (a == '\r' || a == '\n')
1346 		added += 2;
1347 	    else if ((a & 127) < 32 || ((crfc[a - 32]) & 2))
1348 		added++;
1349 	}
1350     }
1351     if (!added)
1352 	return;
1353     result = AlloCopy(p, *str, added + 1);
1354     if (result == NULL)
1355 	outofmem(__FILE__, "HTMake822Word");
1356 
1357     q = result;
1358     if (quoted)
1359 	*q++ = '"';
1360     /*
1361      * Having converted the character to ASCII, we can't use symbolic
1362      * escape codes, since they're in the host character set, which
1363      * is not necessarily ASCII.  Thus we use octal escape codes instead.
1364      * -- gil (Paul Gilmartin) <pg@sweng.stortek.com>
1365      */
1366     /* S/390 -- gil -- 0268 */
1367     for (p = *str; *p; p++) {
1368 	a = UCH(TOASCII(*p));
1369 	if ((a != ASCII_TAB) &&
1370 	    ((a & 127) < ASCII_SPC ||
1371 	     (a < 128 && ((crfc[a - 32]) & 2))))
1372 	    *q++ = ASCII_BAK;
1373 	*q++ = *p;
1374 	if (a == ASCII_LF ||
1375 	    (a == ASCII_CR && (TOASCII(*(p + 1)) != ASCII_LF)))
1376 	    *q++ = ' ';
1377     }
1378     if (quoted)
1379 	*q++ = '"';
1380     *q = '\0';			/* Terminate */
1381     FREE(*str);
1382     *str = result;
1383 }
1384