1 /* HTML parser */
2 
3 #ifdef HAVE_CONFIG_H
4 #include "config.h"
5 #endif
6 
7 #ifndef _GNU_SOURCE
8 #define _GNU_SOURCE /* strcasestr() */
9 #endif
10 
11 #include <stdarg.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include "elinks.h"
17 
18 #include "bfu/listmenu.h"
19 #include "bfu/menu.h"
20 #include "bookmarks/bookmarks.h"
21 #include "config/options.h"
22 #include "config/kbdbind.h"
23 #include "document/html/frames.h"
24 #include "document/html/parser/link.h"
25 #include "document/html/parser/parse.h"
26 #include "document/html/parser/stack.h"
27 #include "document/html/parser.h"
28 #include "document/html/renderer.h"
29 #include "globhist/globhist.h"
30 #include "mime/mime.h"
31 #include "protocol/uri.h"
32 #include "util/conv.h"
33 #include "util/error.h"
34 #include "util/memdebug.h"
35 #include "util/memory.h"
36 #include "util/string.h"
37 
38 /* Unsafe macros */
39 #include "document/html/internal.h"
40 
41 
42 void
html_a(struct html_context * html_context,unsigned char * a,unsigned char * xxx3,unsigned char * xxx4,unsigned char ** xxx5)43 html_a(struct html_context *html_context, unsigned char *a,
44        unsigned char *xxx3, unsigned char *xxx4, unsigned char **xxx5)
45 {
46 	unsigned char *href;
47 
48 	href = get_url_val(a, "href", html_context->options);
49 	if (href) {
50 		unsigned char *target;
51 
52 		mem_free_set(&format.link,
53 			     join_urls(html_context->base_href,
54 				       trim_chars(href, ' ', 0)));
55 
56 		mem_free(href);
57 
58 		target = get_target(html_context->options, a);
59 		if (target) {
60 			mem_free_set(&format.target, target);
61 		} else {
62 			mem_free_set(&format.target, stracpy(html_context->base_target));
63 		}
64 
65 		if (0) {
66 			; /* Shut up compiler */
67 #ifdef CONFIG_GLOBHIST
68 		} else if (get_global_history_item(format.link)) {
69 			format.style.fg = format.vlink;
70 			html_top.pseudo_class &= ~ELEMENT_LINK;
71 			html_top.pseudo_class |= ELEMENT_VISITED;
72 #endif
73 #ifdef CONFIG_BOOKMARKS
74 		} else if (get_bookmark(format.link)) {
75 			format.style.fg = format.bookmark_link;
76 			html_top.pseudo_class &= ~ELEMENT_VISITED;
77 			/* XXX: Really set ELEMENT_LINK? --pasky */
78 			html_top.pseudo_class |= ELEMENT_LINK;
79 #endif
80 		} else {
81 			format.style.fg = format.clink;
82 			html_top.pseudo_class &= ~ELEMENT_VISITED;
83 			html_top.pseudo_class |= ELEMENT_LINK;
84 		}
85 
86 		mem_free_set(&format.title,
87 		             get_attr_val(a, "title", html_context->options));
88 
89 		html_focusable(html_context, a);
90 
91 	} else {
92 		kill_html_stack_item(html_context, &html_top);
93 	}
94 
95 	set_fragment_identifier(html_context, a, "name");
96 }
97 
98 /* Returns an allocated string made after @label
99  * but limited to @max_len length, by truncating
100  * the middle of @label string, which is replaced
101  * by an asterisk ('*').
102  * If @max_len < 0 it returns NULL.
103  * If @max_len == 0 it returns an unmodified copy
104  * of @label string.
105  * In either case, it may return NULL if a memory
106  * allocation failure occurs.
107  * Example:
108  * truncate_label("some_string", 5) => "so*ng" */
109 static unsigned char *
truncate_label(unsigned char * label,int max_len)110 truncate_label(unsigned char *label, int max_len)
111 {
112 	unsigned char *new_label;
113 	int len = strlen(label);
114 	int left_part_len;
115 	int right_part_len;
116 
117 	if (max_len < 0) return NULL;
118 	if (max_len == 0 || len <= max_len)
119 		return stracpy(label);
120 
121 	right_part_len = left_part_len = max_len / 2;
122 
123 	if (left_part_len + right_part_len + 1 > max_len)
124 		right_part_len--;
125 
126 	new_label = mem_alloc(max_len + 1);
127 	if (!new_label) return NULL;
128 
129 	if (left_part_len)
130 		memcpy(new_label, label, left_part_len);
131 
132 	new_label[left_part_len] = '*';
133 
134 	if (right_part_len)
135 		memcpy(new_label + left_part_len + 1,
136 		       label + len - right_part_len, right_part_len);
137 
138 	new_label[max_len] = '\0';
139 
140 	return new_label;
141 }
142 
143 /* Get image filename from its src attribute. */
144 static unsigned char *
get_image_filename_from_src(int max_len,unsigned char * src)145 get_image_filename_from_src(int max_len, unsigned char *src)
146 {
147 	unsigned char *text = NULL;
148 	unsigned char *start, *filename;
149 	int len;
150 
151 	if (!src) return NULL;
152 	/* We can display image as [foo.gif]. */
153 
154 	len = strcspn(src, "?");
155 
156 	for (start = src + len; start > src; start--)
157 		if (dir_sep(start[-1])) {
158 			break;
159 		}
160 
161 	len -= start - src;
162 
163 	filename = memacpy(start, len);
164 	if (filename) {
165 		/* XXX: Due to a compatibility alias (added: 2004-12-15 in
166 		 * 0.10pre3.CVS for document.browse.images.file_tags) this can
167 		 * return a negative @max_len. */
168 		text = truncate_label(filename, max_len);
169 		mem_free(filename);
170 	}
171 
172 	return text;
173 }
174 
175 
176 /* Returns an allocated string containing formatted @label. */
177 static unsigned char *
get_image_label(int max_len,unsigned char * label)178 get_image_label(int max_len, unsigned char *label)
179 {
180 	unsigned char *formatted_label;
181 
182 	if (!label) return NULL;
183 
184 	formatted_label = truncate_label(label, max_len);
185 	mem_free(label);
186 
187 	return formatted_label;
188 }
189 
190 static void
put_image_label(unsigned char * a,unsigned char * label,struct html_context * html_context)191 put_image_label(unsigned char *a, unsigned char *label,
192                 struct html_context *html_context)
193 {
194 	color_T fg;
195 
196 	/* This is not 100% appropriate for <img>, but well, accepting
197 	 * accesskey and tabindex near <img> is just our little
198 	 * extension to the standard. After all, it makes sense. */
199 	html_focusable(html_context, a);
200 
201 	fg = format.style.fg;
202 	format.style.fg = format.image_link;
203 	put_chrs(html_context, label, strlen(label));
204 	format.style.fg = fg;
205 }
206 
207 static void
html_img_do(unsigned char * a,unsigned char * object_src,struct html_context * html_context)208 html_img_do(unsigned char *a, unsigned char *object_src,
209             struct html_context *html_context)
210 {
211 	int ismap, usemap = 0;
212 	int add_brackets = 0;
213 	unsigned char *src = NULL;
214 	unsigned char *label = NULL;
215 	unsigned char *usemap_attr;
216 	struct document_options *options = html_context->options;
217 	int display_style = options->image_link.display_style;
218 
219 	/* Note about display_style:
220 	 * 0     means always display IMG
221 	 * 1     means always display filename
222 	 * 2     means display alt/title attribute if possible, IMG if not
223 	 * 3     means display alt/title attribute if possible, filename if not */
224 
225 	usemap_attr = get_attr_val(a, "usemap", options);
226 	if (usemap_attr) {
227 		unsigned char *joined_urls = join_urls(html_context->base_href,
228 						       usemap_attr);
229 		unsigned char *map_url;
230 
231 		mem_free(usemap_attr);
232 		if (!joined_urls) return;
233 		map_url = straconcat("MAP@", joined_urls, NULL);
234 		mem_free(joined_urls);
235 		if (!map_url) return;
236 
237 		html_stack_dup(html_context, ELEMENT_KILLABLE);
238 		mem_free_set(&format.link, map_url);
239 		format.form = NULL;
240 		format.style.attr |= AT_BOLD;
241 		usemap = 1;
242  	}
243 
244 	ismap = format.link
245 	        && has_attr(a, "ismap", options)
246 	        && !usemap;
247 
248 	if (display_style == 2 || display_style == 3) {
249 		label = get_attr_val(a, "alt", options);
250 		if (!label)
251 			label = get_attr_val(a, "title", options);
252 
253 		/* Little hack to preserve rendering of [   ], in directory listings,
254 		 * but we still want to drop extra spaces in alt or title attribute
255 		 * to limit display width on certain websites. --Zas */
256 		if (label && strlen(label) > 5) clr_spaces(label);
257 	}
258 
259 	src = null_or_stracpy(object_src);
260 	if (!src) src = get_url_val(a, "src", options);
261 	if (!src) src = get_url_val(a, "dynsrc", options);
262 
263 	/* If we have no label yet (no title or alt), so
264 	 * just use default ones, or image filename. */
265 	if (!label || !*label) {
266 		mem_free_set(&label, NULL);
267 		/* Do we want to display images with no alt/title and with no
268 		 * link on them ?
269 		 * If not, just exit now. */
270 		if (!options->images && !format.link) {
271 			mem_free_if(src);
272 			if (usemap) kill_html_stack_item(html_context, &html_top);
273 			return;
274 		}
275 
276 		add_brackets = 1;
277 
278 		if (usemap) {
279 			label = stracpy("USEMAP");
280 		} else if (ismap) {
281 			label = stracpy("ISMAP");
282 		} else {
283 			if (display_style == 3)
284 				label = get_image_filename_from_src(options->image_link.filename_maxlen, src);
285 		}
286 
287 	} else {
288 		label = get_image_label(options->image_link.label_maxlen, label);
289 	}
290 
291 	if (!label || !*label) {
292 		mem_free_set(&label, NULL);
293 		add_brackets = 1;
294 		if (display_style == 1)
295 			label = get_image_filename_from_src(options->image_link.filename_maxlen, src);
296 		if (!label || !*label)
297 			mem_free_set(&label, stracpy("IMG"));
298 	}
299 
300 	mem_free_set(&format.image, NULL);
301 	mem_free_set(&format.title, NULL);
302 
303 	if (label) {
304 		int img_link_tag = options->image_link.tagging;
305 
306 		if (img_link_tag && (img_link_tag == 2 || add_brackets)) {
307 			unsigned char *img_link_prefix = options->image_link.prefix;
308 			unsigned char *img_link_suffix = options->image_link.suffix;
309 			unsigned char *new_label = straconcat(img_link_prefix, label, img_link_suffix, NULL);
310 
311 			if (new_label) mem_free_set(&label, new_label);
312 		}
313 
314 		if (!options->image_link.show_any_as_links) {
315 			put_image_label(a, label, html_context);
316 
317 		} else {
318 			if (src) {
319 				format.image = join_urls(html_context->base_href, src);
320 			}
321 
322 			format.title = get_attr_val(a, "title", options);
323 
324 			if (ismap) {
325 				unsigned char *new_link;
326 
327 				html_stack_dup(html_context, ELEMENT_KILLABLE);
328 				new_link = straconcat(format.link, "?0,0", NULL);
329 				if (new_link)
330 					mem_free_set(&format.link, new_link);
331 			}
332 
333 			put_image_label(a, label, html_context);
334 
335 			if (ismap) kill_html_stack_item(html_context, &html_top);
336 			mem_free_set(&format.image, NULL);
337 			mem_free_set(&format.title, NULL);
338 		}
339 
340 		mem_free(label);
341 	}
342 
343 	mem_free_if(src);
344 	if (usemap) kill_html_stack_item(html_context, &html_top);
345 }
346 
347 void
html_img(struct html_context * html_context,unsigned char * a,unsigned char * xxx3,unsigned char * xxx4,unsigned char ** xxx5)348 html_img(struct html_context *html_context, unsigned char *a,
349          unsigned char *xxx3, unsigned char *xxx4, unsigned char **xxx5)
350 {
351 	html_img_do(a, NULL, html_context);
352 }
353 
354 void
put_link_line(unsigned char * prefix,unsigned char * linkname,unsigned char * link,unsigned char * target,struct html_context * html_context)355 put_link_line(unsigned char *prefix, unsigned char *linkname,
356 	      unsigned char *link, unsigned char *target,
357 	      struct html_context *html_context)
358 {
359 	html_context->has_link_lines = 1;
360 	html_stack_dup(html_context, ELEMENT_KILLABLE);
361 	ln_break(html_context, 1);
362 	mem_free_set(&format.link, NULL);
363 	mem_free_set(&format.target, NULL);
364 	mem_free_set(&format.title, NULL);
365 	format.form = NULL;
366 	put_chrs(html_context, prefix, strlen(prefix));
367 	format.link = join_urls(html_context->base_href, link);
368 	format.target = stracpy(target);
369 	format.style.fg = format.clink;
370 	put_chrs(html_context, linkname, strlen(linkname));
371 	ln_break(html_context, 1);
372 	kill_html_stack_item(html_context, &html_top);
373 }
374 
375 
376 void
html_applet(struct html_context * html_context,unsigned char * a,unsigned char * xxx3,unsigned char * xxx4,unsigned char ** xxx5)377 html_applet(struct html_context *html_context, unsigned char *a,
378             unsigned char *xxx3, unsigned char *xxx4, unsigned char **xxx5)
379 {
380 	unsigned char *code, *alt;
381 
382 	code = get_url_val(a, "code", html_context->options);
383 	if (!code) return;
384 
385 	alt = get_attr_val(a, "alt", html_context->options);
386 
387 	html_focusable(html_context, a);
388 
389 	if (alt && *alt) {
390 		put_link_line("Applet: ", alt, code,
391 			      html_context->options->framename, html_context);
392 	} else {
393 		put_link_line("", "Applet", code,
394 			      html_context->options->framename, html_context);
395 	}
396 
397 	mem_free_if(alt);
398 	mem_free(code);
399 }
400 
401 static void
html_iframe_do(unsigned char * a,unsigned char * object_src,struct html_context * html_context)402 html_iframe_do(unsigned char *a, unsigned char *object_src,
403                struct html_context *html_context)
404 {
405 	unsigned char *name, *url = NULL;
406 
407 	url = null_or_stracpy(object_src);
408 	if (!url) url = get_url_val(a, "src", html_context->options);
409 	if (!url) return;
410 
411 	name = get_attr_val(a, "name", html_context->options);
412 	if (!name) name = get_attr_val(a, "id", html_context->options);
413 	if (!name) name = stracpy("");
414 	if (!name) {
415 		mem_free(url);
416 		return;
417 	}
418 
419 	html_focusable(html_context, a);
420 
421 	if (*name) {
422 		put_link_line("IFrame: ", name, url,
423 			      html_context->options->framename, html_context);
424 	} else {
425 		put_link_line("", "IFrame", url,
426 			      html_context->options->framename, html_context);
427 	}
428 
429 	mem_free(name);
430 	mem_free(url);
431 }
432 
433 void
html_iframe(struct html_context * html_context,unsigned char * a,unsigned char * xxx3,unsigned char * xxx4,unsigned char ** xxx5)434 html_iframe(struct html_context *html_context, unsigned char *a,
435             unsigned char *xxx3, unsigned char *xxx4, unsigned char **xxx5)
436 {
437 	html_iframe_do(a, NULL, html_context);
438 }
439 
440 void
html_object(struct html_context * html_context,unsigned char * a,unsigned char * xxx3,unsigned char * xxx4,unsigned char ** xxx5)441 html_object(struct html_context *html_context, unsigned char *a,
442             unsigned char *xxx3, unsigned char *xxx4, unsigned char **xxx5)
443 {
444 	unsigned char *type, *url;
445 
446 	/* This is just some dirty wrapper. We emulate various things through
447 	 * this, which is anyway in the spirit of <object> element, unifying
448 	 * <img> and <iframe> etc. */
449 
450 	url = get_url_val(a, "data", html_context->options);
451 	if (!url) url = get_url_val(a, "codebase", html_context->options);
452 	if (!url) return;
453 
454 	type = get_attr_val(a, "type", html_context->options);
455 	if (!type) { mem_free(url); return; }
456 
457 	if (!c_strncasecmp(type, "text/", 5)) {
458 		/* We will just emulate <iframe>. */
459 		html_iframe_do(a, url, html_context);
460 		html_skip(html_context, a);
461 
462 	} else if (!c_strncasecmp(type, "image/", 6)) {
463 		/* <img> emulation. */
464 		/* TODO: Use the enclosed text as 'alt' attribute. */
465 		html_img_do(a, url, html_context);
466 	} else {
467 		unsigned char *name;
468 
469 		name = get_attr_val(a, "standby", html_context->options);
470 
471 		html_focusable(html_context, a);
472 
473 		if (name && *name) {
474 			put_link_line("Object: ", name, url,
475 			              html_context->options->framename,
476 				      html_context);
477 		} else {
478 			put_link_line("Object: ", type, url,
479 			              html_context->options->framename,
480 			              html_context);
481 		}
482 
483 		mem_free_if(name);
484 	}
485 
486 	mem_free(type);
487 	mem_free(url);
488 }
489 
490 void
html_embed(struct html_context * html_context,unsigned char * a,unsigned char * xxx3,unsigned char * xxx4,unsigned char ** xxx5)491 html_embed(struct html_context *html_context, unsigned char *a,
492            unsigned char *xxx3, unsigned char *xxx4, unsigned char **xxx5)
493 {
494 	unsigned char *type, *extension;
495 	unsigned char *object_src;
496 
497 	/* This is just some dirty wrapper. We emulate various things through
498 	 * this, which is anyway in the spirit of <object> element, unifying
499 	 * <img> and <iframe> etc. */
500 
501 	object_src = get_url_val(a, "src", html_context->options);
502 	if (!object_src || !*object_src) {
503 		mem_free_set(&object_src, NULL);
504 		return;
505 	}
506 
507 	/* If there is no extension we want to get the default mime/type
508 	 * anyway? */
509 	extension = strrchr(object_src, '.');
510 	if (!extension) extension = object_src;
511 
512 	type = get_extension_content_type(extension);
513 	if (type && !c_strncasecmp(type, "image/", 6)) {
514 		html_img_do(a, object_src, html_context);
515 	} else {
516 		/* We will just emulate <iframe>. */
517 		html_iframe_do(a, object_src, html_context);
518 	}
519 
520 	mem_free_if(type);
521 	mem_free_set(&object_src, NULL);
522 }
523 
524 
525 
526 /* Link types:
527 
528 Alternate
529 	Designates substitute versions for the document in which the link
530 	occurs. When used together with the lang attribute, it implies a
531 	translated version of the document. When used together with the
532 	media attribute, it implies a version designed for a different
533 	medium (or media).
534 
535 Stylesheet
536 	Refers to an external style sheet. See the section on external style
537 	sheets for details. This is used together with the link type
538 	"Alternate" for user-selectable alternate style sheets.
539 
540 Start
541 	Refers to the first document in a collection of documents. This link
542 	type tells search engines which document is considered by the author
543 	to be the starting point of the collection.
544 
545 Next
546 	Refers to the next document in a linear sequence of documents. User
547 	agents may choose to preload the "next" document, to reduce the
548 	perceived load time.
549 
550 Prev
551 	Refers to the previous document in an ordered series of documents.
552 	Some user agents also support the synonym "Previous".
553 
554 Contents
555 	Refers to a document serving as a table of contents.
556 	Some user agents also support the synonym ToC (from "Table of Contents").
557 
558 Index
559 	Refers to a document providing an index for the current document.
560 
561 Glossary
562 	Refers to a document providing a glossary of terms that pertain to the
563 	current document.
564 
565 Copyright
566 	Refers to a copyright statement for the current document.
567 
568 Chapter
569         Refers to a document serving as a chapter in a collection of documents.
570 
571 Section
572 	Refers to a document serving as a section in a collection of documents.
573 
574 Subsection
575 	Refers to a document serving as a subsection in a collection of
576 	documents.
577 
578 Appendix
579 	Refers to a document serving as an appendix in a collection of
580 	documents.
581 
582 Help
583 	Refers to a document offering help (more information, links to other
584 	sources information, etc.)
585 
586 Bookmark
587 	Refers to a bookmark. A bookmark is a link to a key entry point
588 	within an extended document. The title attribute may be used, for
589 	example, to label the bookmark. Note that several bookmarks may be
590 	defined in each document.
591 
592 Some more were added, like top. --Zas */
593 
594 enum hlink_type {
595 	LT_UNKNOWN = 0,
596 	LT_START,
597 	LT_PARENT,
598 	LT_NEXT,
599 	LT_PREV,
600 	LT_CONTENTS,
601 	LT_INDEX,
602 	LT_GLOSSARY,
603 	LT_CHAPTER,
604 	LT_SECTION,
605 	LT_SUBSECTION,
606 	LT_APPENDIX,
607 	LT_HELP,
608 	LT_SEARCH,
609 	LT_BOOKMARK,
610 	LT_COPYRIGHT,
611 	LT_AUTHOR,
612 	LT_ICON,
613 	LT_ALTERNATE,
614 	LT_ALTERNATE_LANG,
615 	LT_ALTERNATE_MEDIA,
616 	LT_ALTERNATE_STYLESHEET,
617 	LT_STYLESHEET,
618 };
619 
620 enum hlink_direction {
621 	LD_UNKNOWN = 0,
622 	LD_REV,
623 	LD_REL,
624 };
625 
626 struct hlink {
627 	enum hlink_type type;
628 	enum hlink_direction direction;
629 	unsigned char *content_type;
630 	unsigned char *media;
631 	unsigned char *href;
632 	unsigned char *hreflang;
633 	unsigned char *title;
634 	unsigned char *lang;
635 	unsigned char *name;
636 /* Not implemented yet.
637 	unsigned char *charset;
638 	unsigned char *target;
639 	unsigned char *id;
640 	unsigned char *class;
641 	unsigned char *dir;
642 */
643 };
644 
645 struct lt_default_name {
646 	enum hlink_type type;
647 	unsigned char *str;
648 };
649 
650 /* TODO: i18n */
651 /* XXX: Keep the (really really ;) default name first */
652 static struct lt_default_name lt_names[] = {
653 	{ LT_START, "start" },
654 	{ LT_START, "top" },
655 	{ LT_START, "home" },
656 	{ LT_PARENT, "parent" },
657 	{ LT_PARENT, "up" },
658 	{ LT_NEXT, "next" },
659 	{ LT_PREV, "previous" },
660 	{ LT_PREV, "prev" },
661 	{ LT_CONTENTS, "contents" },
662 	{ LT_CONTENTS, "toc" },
663 	{ LT_INDEX, "index" },
664 	{ LT_GLOSSARY, "glossary" },
665 	{ LT_CHAPTER, "chapter" },
666 	{ LT_SECTION, "section" },
667 	{ LT_SUBSECTION, "subsection" },
668 	{ LT_SUBSECTION, "child" },
669 	{ LT_SUBSECTION, "sibling" },
670 	{ LT_APPENDIX, "appendix" },
671 	{ LT_HELP, "help" },
672 	{ LT_SEARCH, "search" },
673 	{ LT_BOOKMARK, "bookmark" },
674 	{ LT_ALTERNATE_LANG, "alt. language" },
675 	{ LT_ALTERNATE_MEDIA, "alt. media" },
676 	{ LT_ALTERNATE_STYLESHEET, "alt. stylesheet" },
677 	{ LT_STYLESHEET, "stylesheet" },
678 	{ LT_ALTERNATE, "alternate" },
679 	{ LT_COPYRIGHT, "copyright" },
680 	{ LT_AUTHOR, "author" },
681 	{ LT_AUTHOR, "made" },
682 	{ LT_AUTHOR, "owner" },
683 	{ LT_ICON, "icon" },
684 	{ LT_UNKNOWN, NULL }
685 };
686 
687 /* Search for default name for this link according to its type. */
688 static unsigned char *
get_lt_default_name(struct hlink * link)689 get_lt_default_name(struct hlink *link)
690 {
691 	struct lt_default_name *entry = lt_names;
692 
693 	assert(link);
694 
695 	while (entry && entry->str) {
696 		if (entry->type == link->type) return entry->str;
697 		entry++;
698 	}
699 
700 	return "unknown";
701 }
702 
703 static void
html_link_clear(struct hlink * link)704 html_link_clear(struct hlink *link)
705 {
706 	assert(link);
707 
708 	mem_free_if(link->content_type);
709 	mem_free_if(link->media);
710 	mem_free_if(link->href);
711 	mem_free_if(link->hreflang);
712 	mem_free_if(link->title);
713 	mem_free_if(link->lang);
714 	mem_free_if(link->name);
715 
716 	memset(link, 0, sizeof(*link));
717 }
718 
719 /* Parse a link and return results in @link.
720  * It tries to identify known types. */
721 static int
html_link_parse(struct html_context * html_context,unsigned char * a,struct hlink * link)722 html_link_parse(struct html_context *html_context, unsigned char *a,
723                 struct hlink *link)
724 {
725 	int i;
726 
727 	assert(a && link);
728 	memset(link, 0, sizeof(*link));
729 
730 	link->href = get_url_val(a, "href", html_context->options);
731 	if (!link->href) return 0;
732 
733 	link->lang = get_attr_val(a, "lang", html_context->options);
734 	link->hreflang = get_attr_val(a, "hreflang", html_context->options);
735 	link->title = get_attr_val(a, "title", html_context->options);
736 	link->content_type = get_attr_val(a, "type", html_context->options);
737 	link->media = get_attr_val(a, "media", html_context->options);
738 
739 	link->name = get_attr_val(a, "rel", html_context->options);
740 	if (link->name) {
741 		link->direction = LD_REL;
742 	} else {
743 		link->name = get_attr_val(a, "rev", html_context->options);
744 		if (link->name) link->direction = LD_REV;
745 	}
746 
747 	if (!link->name) return 1;
748 
749 	/* TODO: fastfind */
750 	for (i = 0; lt_names[i].str; i++)
751 		if (!c_strcasecmp(link->name, lt_names[i].str)) {
752 			link->type = lt_names[i].type;
753 			return 1;
754 		}
755 
756 	if (c_strcasestr(link->name, "icon") ||
757 	   (link->content_type && c_strcasestr(link->content_type, "icon"))) {
758 		link->type = LT_ICON;
759 
760 	} else if (c_strcasestr(link->name, "alternate")) {
761 		link->type = LT_ALTERNATE;
762 		if (link->lang)
763 			link->type = LT_ALTERNATE_LANG;
764 		else if (c_strcasestr(link->name, "stylesheet") ||
765 			 (link->content_type && c_strcasestr(link->content_type, "css")))
766 			link->type = LT_ALTERNATE_STYLESHEET;
767 		else if (link->media)
768 			link->type = LT_ALTERNATE_MEDIA;
769 
770 	} else if (link->content_type && c_strcasestr(link->content_type, "css")) {
771 		link->type = LT_STYLESHEET;
772 	}
773 
774 	return 1;
775 }
776 
777 void
html_link(struct html_context * html_context,unsigned char * a,unsigned char * xxx3,unsigned char * xxx4,unsigned char ** xxx5)778 html_link(struct html_context *html_context, unsigned char *a,
779           unsigned char *xxx3, unsigned char *xxx4, unsigned char **xxx5)
780 {
781 	int link_display = html_context->options->meta_link_display;
782 	unsigned char *name;
783 	struct hlink link;
784 	struct string text;
785 	int name_neq_title = 0;
786 	int first = 1;
787 
788 #ifndef CONFIG_CSS
789 	if (!link_display) return;
790 #endif
791 	if (!html_link_parse(html_context, a, &link)) return;
792 	if (!link.href) goto free_and_return;
793 
794 #ifdef CONFIG_CSS
795 	if (link.type == LT_STYLESHEET) {
796 		int len = strlen(link.href);
797 
798 		import_css_stylesheet(&html_context->css_styles,
799 				      html_context->base_href, link.href, len);
800 	}
801 
802 	if (!link_display) goto free_and_return;
803 #endif
804 
805 	/* Ignore few annoying links.. */
806 	if (link_display < 5 &&
807 	    (link.type == LT_ICON ||
808 	     link.type == LT_AUTHOR ||
809 	     link.type == LT_STYLESHEET ||
810 	     link.type == LT_ALTERNATE_STYLESHEET)) goto free_and_return;
811 
812 	if (!link.name || link.type != LT_UNKNOWN)
813 		/* Give preference to our default names for known types. */
814 		name = get_lt_default_name(&link);
815 	else
816 		name = link.name;
817 
818 	if (!name) goto free_and_return;
819 	if (!init_string(&text)) goto free_and_return;
820 
821 	html_focusable(html_context, a);
822 
823 	if (link.title) {
824 		add_to_string(&text, link.title);
825 		name_neq_title = strcmp(link.title, name);
826 	} else
827 		add_to_string(&text, name);
828 
829 	if (link_display == 1) goto put_link_line;	/* Only title */
830 
831 #define APPEND(what) do { \
832 		add_to_string(&text, first ? " (" : ", "); \
833 		add_to_string(&text, (what)); \
834 		first = 0; \
835 	} while (0)
836 
837 	if (name_neq_title) {
838 		APPEND(name);
839 	}
840 
841 	if (link_display >= 3 && link.hreflang) {
842 		APPEND(link.hreflang);
843 	}
844 
845 	if (link_display >= 4 && link.content_type) {
846 		APPEND(link.content_type);
847 	}
848 
849 	if (link.lang && link.type == LT_ALTERNATE_LANG &&
850 	    (link_display < 3 || (link.hreflang &&
851 				  c_strcasecmp(link.hreflang, link.lang)))) {
852 		APPEND(link.lang);
853 	}
854 
855 	if (link.media) {
856 		APPEND(link.media);
857 	}
858 
859 #undef APPEND
860 
861 	if (!first) add_char_to_string(&text, ')');
862 
863 put_link_line:
864 	{
865 		unsigned char *prefix = (link.direction == LD_REL)
866 					? "Link: " : "Reverse link: ";
867 		unsigned char *link_name = (text.length)
868 					   ? text.source : name;
869 
870 		put_link_line(prefix, link_name, link.href,
871 			      html_context->base_target, html_context);
872 
873 		if (text.source) done_string(&text);
874 	}
875 
876 free_and_return:
877 	html_link_clear(&link);
878 }
879