1 /*
2 * File: html.cc
3 *
4 * Copyright (C) 2005-2007 Jorge Arellano Cid <jcid@dillo.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 3 of the License, or
9 * (at your option) any later version.
10 */
11
12 /*
13 * Dillo HTML parsing routines
14 */
15
16 /*-----------------------------------------------------------------------------
17 * Includes
18 *---------------------------------------------------------------------------*/
19 #include <ctype.h> /* for isspace */
20 #include <string.h> /* for memcpy and memmove */
21 #include <stdlib.h>
22 #include <stdio.h> /* for sprintf */
23 #include <errno.h>
24
25 #include "bw.h" /* for BrowserWindow */
26 #include "msg.h"
27 #include "binaryconst.h"
28 #include "colors.h"
29 #include "utf8.hh"
30
31 #include "misc.h"
32 #include "uicmd.hh"
33 #include "history.h"
34 #include "menu.hh"
35 #include "prefs.h"
36 #include "capi.h"
37 #include "html.hh"
38 #include "html_common.hh"
39 #include "form.hh"
40 #include "table.hh"
41
42 #include "dw/textblock.hh"
43 #include "dw/bullet.hh"
44 #include "dw/listitem.hh"
45 #include "dw/image.hh"
46 #include "dw/ruler.hh"
47
48 /*-----------------------------------------------------------------------------
49 * Defines
50 *---------------------------------------------------------------------------*/
51
52 /* Define to 1 to ignore white space immediately after an open tag,
53 * and immediately before a close tag. */
54 #define SGML_SPCDEL 0
55
56 #define TAB_SIZE 8
57
58 /*-----------------------------------------------------------------------------
59 * Name spaces
60 *---------------------------------------------------------------------------*/
61 using namespace lout;
62 using namespace dw;
63 using namespace dw::core;
64 using namespace dw::core::ui;
65 using namespace dw::core::style;
66
67 /*-----------------------------------------------------------------------------
68 * Typedefs
69 *---------------------------------------------------------------------------*/
70 class DilloHtml;
71 typedef void (*TagOpenFunct) (DilloHtml *html, const char *tag, int tagsize);
72 typedef void (*TagCloseFunct) (DilloHtml *html);
73
74 typedef enum {
75 SEEK_ATTR_START,
76 MATCH_ATTR_NAME,
77 SEEK_TOKEN_START,
78 SEEK_VALUE_START,
79 SKIP_VALUE,
80 GET_VALUE,
81 FINISHED
82 } DilloHtmlTagParsingState;
83
84 typedef enum {
85 HTML_LeftTrim = 1 << 0,
86 HTML_RightTrim = 1 << 1,
87 HTML_ParseEntities = 1 << 2
88 } DilloHtmlTagParsingFlags;
89
90
91 /*
92 * Exported function with C linkage.
93 */
94 extern "C" {
95 void *a_Html_text(const char *type, void *P, CA_Callback_t *Call,void **Data);
96 }
97
98 /*-----------------------------------------------------------------------------
99 * Forward declarations
100 *---------------------------------------------------------------------------*/
101 static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof);
102 static bool Html_load_image(BrowserWindow *bw, DilloUrl *url,
103 const DilloUrl *requester, DilloImage *image);
104 static void Html_callback(int Op, CacheClient_t *Client);
105 static void Html_tag_cleanup_at_close(DilloHtml *html, int TagIdx);
106
107 /*-----------------------------------------------------------------------------
108 * Local Data
109 *---------------------------------------------------------------------------*/
110 /* Parsing table structure */
111 typedef struct {
112 const char *name; /* element name */
113 unsigned char Flags; /* flags (explained near the table data) */
114 char EndTag; /* Is it Required, Optional or Forbidden */
115 uchar_t TagLevel; /* Used to heuristically parse bad HTML */
116 TagOpenFunct open; /* Open function */
117 TagOpenFunct content; /* Content function */
118 TagCloseFunct close; /* Close function */
119 } TagInfo;
120 extern const TagInfo Tags[];
121
122 /*-----------------------------------------------------------------------------
123 *-----------------------------------------------------------------------------
124 * Main Code
125 *-----------------------------------------------------------------------------
126 *---------------------------------------------------------------------------*/
127
128 /*
129 * Collect HTML error strings.
130 */
bugMessage(const char * format,...)131 void DilloHtml::bugMessage(const char *format, ... )
132 {
133 va_list argp;
134
135 if (bw->num_page_bugs)
136 dStr_append_c(bw->page_bugs, '\n');
137 dStr_sprintfa(bw->page_bugs,
138 "HTML warning: line %d, ",
139 getCurrLineNumber());
140 va_start(argp, format);
141 dStr_vsprintfa(bw->page_bugs, format, argp);
142 va_end(argp);
143 a_UIcmd_set_bug_prog(bw, ++bw->num_page_bugs);
144 }
145
146 /*
147 * Wrapper for a_Url_new that adds an error detection message.
148 * If use_base_url is TRUE, it uses base_url. Otherwise it uses html->base_url.
149 */
a_Html_url_new(DilloHtml * html,const char * url_str,const char * base_url,int use_base_url)150 DilloUrl *a_Html_url_new(DilloHtml *html,
151 const char *url_str, const char *base_url,
152 int use_base_url)
153 {
154 DilloUrl *url;
155 int n_ic, n_ic_spc;
156
157 url = a_Url_new(url_str,
158 (use_base_url) ? base_url : URL_STR_(html->base_url));
159 if ((n_ic = URL_ILLEGAL_CHARS(url)) != 0) {
160 const char *suffix = (n_ic) > 1 ? "s" : "";
161 n_ic_spc = URL_ILLEGAL_CHARS_SPC(url);
162 if (n_ic == n_ic_spc) {
163 BUG_MSG("URL has %d illegal space%s ('%s').", n_ic, suffix, url_str);
164 } else if (n_ic_spc == 0) {
165 BUG_MSG("URL has %d illegal byte%s in {00-1F, 7F-FF} range ('%s').",
166 n_ic, suffix, url_str);
167 } else {
168 BUG_MSG("URL has %d illegal byte%s: "
169 "%d space%s and %d in {00-1F, 7F-FF} range ('%s').",
170 n_ic, suffix,
171 n_ic_spc, n_ic_spc > 1 ? "s" : "", n_ic-n_ic_spc, url_str);
172 }
173 }
174 return url;
175 }
176
177 /*
178 * Set callback function and callback data for the "html/text" MIME type.
179 */
a_Html_text(const char * Type,void * P,CA_Callback_t * Call,void ** Data)180 void *a_Html_text(const char *Type, void *P, CA_Callback_t *Call, void **Data)
181 {
182 DilloWeb *web = (DilloWeb*)P;
183 DilloHtml *html = new DilloHtml(web->bw, web->url, Type);
184
185 *Data = (void*)html;
186 *Call = (CA_Callback_t)Html_callback;
187
188 return (void*)html->dw;
189 }
190
Html_free(void * data)191 static void Html_free(void *data)
192 {
193 delete ((DilloHtml*)data);
194 }
195
196 /*
197 * Used by the "Load images" page menuitem.
198 */
a_Html_load_images(void * v_html,DilloUrl * pattern)199 void a_Html_load_images(void *v_html, DilloUrl *pattern)
200 {
201 DilloHtml *html = (DilloHtml*)v_html;
202
203 html->loadImages(pattern);
204 }
205
206 /*
207 * Search for form
208 */
Html_contains_form(DilloHtml * html,void * v_form)209 static bool Html_contains_form(DilloHtml *html, void *v_form)
210 {
211 for (int i = 0; i < html->forms->size(); i++) {
212 if (html->forms->get(i) == v_form) {
213 return true;
214 }
215 }
216 return false;
217 }
218
219 /*
220 * Used by the "Submit form" form menuitem.
221 */
a_Html_form_submit(void * v_html,void * v_form)222 void a_Html_form_submit(void *v_html, void *v_form)
223 {
224 DilloHtml *html = (DilloHtml*)v_html;
225
226 if (Html_contains_form(html, v_form)) {
227 /* it's still valid */
228 a_Html_form_submit2(v_form);
229 }
230 }
231
232 /*
233 * Used by the "Reset form" form menuitem.
234 */
a_Html_form_reset(void * v_html,void * v_form)235 void a_Html_form_reset(void *v_html, void *v_form)
236 {
237 DilloHtml *html = (DilloHtml*)v_html;
238
239 if (Html_contains_form(html, v_form)) {
240 /* it's still valid */
241 a_Html_form_reset2(v_form);
242 }
243 }
244
245 /*
246 * Used by the "Show/Hide hiddens" form menuitem.
247 */
a_Html_form_display_hiddens(void * v_html,void * v_form,bool_t display)248 void a_Html_form_display_hiddens(void *v_html, void *v_form, bool_t display)
249 {
250 DilloHtml *html = (DilloHtml*)v_html;
251
252 if (Html_contains_form(html, v_form)) {
253 /* it's still valid */
254 a_Html_form_display_hiddens2(v_form, (display != 0));
255 }
256 }
257
258 /*
259 * Set the URL data for image maps.
260 */
Html_set_link_coordinates(DilloHtml * html,int link,int x,int y)261 static void Html_set_link_coordinates(DilloHtml *html, int link, int x, int y)
262 {
263 char data[64];
264
265 if (x != -1) {
266 snprintf(data, 64, "?%d,%d", x, y);
267 a_Url_set_ismap_coords(html->links->get(link), data);
268 }
269 }
270
271 /*
272 * Create a new link, set it as the url's parent
273 * and return the index.
274 */
Html_set_new_link(DilloHtml * html,DilloUrl ** url)275 static int Html_set_new_link(DilloHtml *html, DilloUrl **url)
276 {
277 int nl = html->links->size();
278 html->links->increase();
279 html->links->set(nl, (*url) ? *url : NULL);
280 return nl;
281 }
282
283 /*
284 * Evaluates the ALIGN attribute (left|center|right|justify) and
285 * sets the style at the top of the stack.
286 */
a_Html_tag_set_align_attr(DilloHtml * html,const char * tag,int tagsize)287 void a_Html_tag_set_align_attr(DilloHtml *html, const char *tag, int tagsize)
288 {
289 const char *align;
290
291 if ((align = a_Html_get_attr(html, tag, tagsize, "align"))) {
292 TextAlignType textAlignType = TEXT_ALIGN_LEFT;
293
294 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
295 BUG_MSG("The align attribute is obsolete in HTML5.");
296
297 if (dStrAsciiCasecmp (align, "left") == 0)
298 textAlignType = TEXT_ALIGN_LEFT;
299 else if (dStrAsciiCasecmp (align, "right") == 0)
300 textAlignType = TEXT_ALIGN_RIGHT;
301 else if (dStrAsciiCasecmp (align, "center") == 0)
302 textAlignType = TEXT_ALIGN_CENTER;
303 else if (dStrAsciiCasecmp (align, "justify") == 0)
304 textAlignType = TEXT_ALIGN_JUSTIFY;
305 #if 0
306 else if (dStrAsciiCasecmp (align, "char") == 0) {
307 /* TODO: Actually not supported for <p> etc. */
308 v.textAlign = TEXT_ALIGN_STRING;
309 if ((charattr = a_Html_get_attr(html, tag, tagsize, "char"))) {
310 if (charattr[0] == 0)
311 /* TODO: ALIGN=" ", and even ALIGN="&32;" will reult in
312 * an empty string (don't know whether the latter is
313 * correct, has to be clarified with the specs), so
314 * that for empty strings, " " is assumed. */
315 style_attrs.textAlignChar = ' ';
316 else
317 style_attrs.textAlignChar = charattr[0];
318 } else
319 /* TODO: Examine LANG attr of <html>. */
320 style_attrs.textAlignChar = '.';
321 }
322 #endif
323 html->styleEngine->setNonCssHint(CSS_PROPERTY_TEXT_ALIGN, CSS_TYPE_ENUM,
324 textAlignType);
325 }
326 }
327
328 /*
329 * Evaluates the VALIGN attribute (top|bottom|middle|baseline) and
330 * sets the style in style_attrs. Returns true when set.
331 */
a_Html_tag_set_valign_attr(DilloHtml * html,const char * tag,int tagsize)332 bool a_Html_tag_set_valign_attr(DilloHtml *html, const char *tag, int tagsize)
333 {
334 const char *attr;
335 VAlignType valign;
336
337 if ((attr = a_Html_get_attr(html, tag, tagsize, "valign"))) {
338 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
339 BUG_MSG("The valign attribute is obsolete in HTML5.");
340
341 if (dStrAsciiCasecmp (attr, "top") == 0)
342 valign = VALIGN_TOP;
343 else if (dStrAsciiCasecmp (attr, "bottom") == 0)
344 valign = VALIGN_BOTTOM;
345 else if (dStrAsciiCasecmp (attr, "baseline") == 0)
346 valign = VALIGN_BASELINE;
347 else
348 valign = VALIGN_MIDDLE;
349
350 html->styleEngine->setNonCssHint (CSS_PROPERTY_VERTICAL_ALIGN,
351 CSS_TYPE_ENUM, valign);
352 return true;
353 } else
354 return false;
355 }
356
357
358 /*
359 * Create and add a new Textblock to the current Textblock
360 */
Html_add_textblock(DilloHtml * html,int space)361 static void Html_add_textblock(DilloHtml *html, int space)
362 {
363 Textblock *textblock = new Textblock (prefs.limit_text_width);
364
365 HT2TB(html)->addParbreak (space, html->wordStyle ());
366 HT2TB(html)->addWidget (textblock, html->style ());
367 HT2TB(html)->addParbreak (space, html->wordStyle ());
368 S_TOP(html)->textblock = html->dw = textblock;
369 S_TOP(html)->hand_over_break = true;
370 }
371
372 /*
373 * Create and initialize a new DilloHtml class
374 */
DilloHtml(BrowserWindow * p_bw,const DilloUrl * url,const char * content_type)375 DilloHtml::DilloHtml(BrowserWindow *p_bw, const DilloUrl *url,
376 const char *content_type)
377 {
378 /* Init main variables */
379 bw = p_bw;
380 page_url = a_Url_dup(url);
381 base_url = a_Url_dup(url);
382 dw = NULL;
383
384 /* Init event receiver */
385 linkReceiver.html = this;
386 HT2LT(this)->connectLink (&linkReceiver);
387
388 a_Bw_add_doc(p_bw, this);
389
390 /* Init for-parsing variables */
391 Start_Buf = NULL;
392 Start_Ofs = 0;
393
394 _MSG("DilloHtml(): content type: %s\n", content_type);
395 this->content_type = dStrdup(content_type);
396
397 /* get charset */
398 a_Misc_parse_content_type(content_type, NULL, NULL, &charset);
399
400 stop_parser = false;
401
402 CurrOfs = OldOfs = 0;
403 OldLine = 1;
404
405 DocType = DT_NONE; /* assume Tag Soup 0.0! :-) */
406 DocTypeVersion = 0.0f;
407
408 styleEngine = new StyleEngine (HT2LT (this), page_url, base_url);
409
410 cssUrls = new misc::SimpleVector <DilloUrl*> (1);
411
412 stack = new misc::SimpleVector <DilloHtmlState> (16);
413 stack->increase();
414 stack->getRef(0)->parse_mode = DILLO_HTML_PARSE_MODE_INIT;
415 stack->getRef(0)->table_mode = DILLO_HTML_TABLE_MODE_NONE;
416 stack->getRef(0)->table_border_mode = DILLO_HTML_TABLE_BORDER_SEPARATE;
417 stack->getRef(0)->cell_text_align_set = false;
418 stack->getRef(0)->display_none = false;
419 stack->getRef(0)->list_type = HTML_LIST_NONE;
420 stack->getRef(0)->list_number = 0;
421 stack->getRef(0)->tag_idx = -1; /* MUST not be used */
422 stack->getRef(0)->textblock = NULL;
423 stack->getRef(0)->table = NULL;
424 stack->getRef(0)->ref_list_item = NULL;
425 stack->getRef(0)->hand_over_break = false;
426
427 InFlags = IN_NONE;
428
429 Stash = dStr_new("");
430 StashSpace = false;
431
432 pre_column = 0;
433 PreFirstChar = false;
434 PrevWasCR = false;
435 InVisitedLink = false;
436 ReqTagClose = false;
437 TagSoup = true;
438 loadCssFromStash = false;
439
440 Num_HTML = Num_HEAD = Num_BODY = Num_TITLE = 0;
441
442 attr_data = dStr_sized_new(1024);
443
444 non_css_link_color = -1;
445 non_css_visited_color = -1;
446 visited_color = -1;
447
448 /* Init page-handling variables */
449 forms = new misc::SimpleVector <DilloHtmlForm*> (1);
450 inputs_outside_form = new misc::SimpleVector <DilloHtmlInput*> (1);
451 links = new misc::SimpleVector <DilloUrl*> (64);
452 images = new misc::SimpleVector <DilloHtmlImage*> (16);
453
454 /* Initialize the main widget */
455 initDw();
456 /* Hook destructor to the dw delete call */
457 dw->setDeleteCallback(Html_free, this);
458 }
459
460 /*
461 * Miscellaneous initializations for Dw
462 */
initDw()463 void DilloHtml::initDw()
464 {
465 dReturn_if_fail (dw == NULL);
466
467 /* Create the main widget */
468 dw = stack->getRef(0)->textblock = new Textblock (prefs.limit_text_width);
469
470 bw->num_page_bugs = 0;
471 dStr_truncate(bw->page_bugs, 0);
472 }
473
474 /*
475 * Free memory used by the DilloHtml class.
476 */
~DilloHtml()477 DilloHtml::~DilloHtml()
478 {
479 _MSG("::~DilloHtml(this=%p)\n", this);
480
481 freeParseData();
482
483 a_Bw_remove_doc(bw, this);
484
485 a_Url_free(page_url);
486 a_Url_free(base_url);
487
488 for (int i = 0; i < cssUrls->size(); i++)
489 a_Url_free(cssUrls->get(i));
490 delete (cssUrls);
491
492 for (int i = 0; i < forms->size(); i++)
493 a_Html_form_delete (forms->get(i));
494 delete(forms);
495
496 for (int i = 0; i < inputs_outside_form->size(); i++)
497 a_Html_input_delete(inputs_outside_form->get(i));
498 delete(inputs_outside_form);
499
500 for (int i = 0; i < links->size(); i++)
501 a_Url_free(links->get(i));
502 delete (links);
503
504 for (int i = 0; i < images->size(); i++) {
505 DilloHtmlImage *img = images->get(i);
506 a_Url_free(img->url);
507 a_Image_unref(img->image);
508 dFree(img);
509 }
510 delete (images);
511
512 delete styleEngine;
513 }
514
515 /*
516 * Process the newly arrived html and put it into the page structure.
517 * (This function is called by Html_callback whenever there's new data)
518 */
write(char * Buf,int BufSize,int Eof)519 void DilloHtml::write(char *Buf, int BufSize, int Eof)
520 {
521 int token_start;
522 char *buf = Buf + Start_Ofs;
523 int bufsize = BufSize - Start_Ofs;
524
525 _MSG("DilloHtml::write BufSize=%d Start_Ofs=%d\n", BufSize, Start_Ofs);
526 #if 0
527 char *aux = dStrndup(Buf, BufSize);
528 MSG(" {%s}\n", aux);
529 dFree(aux);
530 #endif
531
532 /* Update Start_Buf. It may be used after the parser is stopped */
533 Start_Buf = Buf;
534
535 dReturn_if (dw == NULL);
536 dReturn_if (stop_parser == true);
537
538 token_start = Html_write_raw(this, buf, bufsize, Eof);
539 Start_Ofs += token_start;
540 }
541
542 /*
543 * Return the line number of the tag/word being processed by the parser.
544 * Also update the offsets.
545 */
getCurrLineNumber()546 int DilloHtml::getCurrLineNumber()
547 {
548 int i, ofs, line;
549 const char *p = Start_Buf;
550
551 dReturn_val_if_fail(p != NULL, -1);
552 /* Disable line counting for META hack. Buffers differ. */
553 dReturn_val_if((InFlags & IN_META_HACK), -1);
554
555 ofs = CurrOfs;
556 line = OldLine;
557 for (i = OldOfs; i < ofs; ++i)
558 if (p[i] == '\n' || (p[i] == '\r' && p[i+1] != '\n'))
559 ++line;
560 OldOfs = CurrOfs;
561 OldLine = line;
562 return line;
563 }
564
565 /*
566 * Free parsing data.
567 */
freeParseData()568 void DilloHtml::freeParseData()
569 {
570 delete(stack);
571
572 dStr_free(Stash, TRUE);
573 dStr_free(attr_data, TRUE);
574 dFree(content_type);
575 dFree(charset);
576 }
577
578 /*
579 * Finish parsing a HTML page. Close the parser and close the client.
580 * The class is not deleted here, it remains until the widget is destroyed.
581 */
finishParsing(int ClientKey)582 void DilloHtml::finishParsing(int ClientKey)
583 {
584 int si;
585
586 dReturn_if (stop_parser == true);
587
588 /* flag we've already parsed up to the last byte */
589 InFlags |= IN_EOF;
590
591 /* force the close of elements left open (TODO: not for XHTML) */
592 while ((si = stack->size() - 1)) {
593 if (stack->getRef(si)->tag_idx != -1) {
594 Html_tag_cleanup_at_close(this, stack->getRef(si)->tag_idx);
595 }
596 }
597
598 /* Nothing left to do with the parser. Clear all flags, except EOF. */
599 InFlags = IN_EOF;
600
601 /* Remove this client from our active list */
602 a_Bw_close_client(bw, ClientKey);
603 }
604
605 /*
606 * Allocate and insert form information.
607 */
formNew(DilloHtmlMethod method,const DilloUrl * action,DilloHtmlEnc enc,const char * charset)608 int DilloHtml::formNew(DilloHtmlMethod method, const DilloUrl *action,
609 DilloHtmlEnc enc, const char *charset)
610 {
611 // avoid data loss on repush after CSS stylesheets have been loaded
612 bool enabled = bw->NumPendingStyleSheets == 0;
613 DilloHtmlForm *form = a_Html_form_new (this, method, action,
614 enc, charset, enabled);
615 int nf = forms->size ();
616 forms->increase ();
617 forms->set (nf, form);
618 _MSG("Html formNew: action=%s nform=%d\n", action, nf);
619 return forms->size();
620 }
621
622 /*
623 * Get the current form.
624 */
getCurrentForm()625 DilloHtmlForm *DilloHtml::getCurrentForm ()
626 {
627 return forms->get (forms->size() - 1);
628 }
629
unloadedImages()630 bool_t DilloHtml::unloadedImages()
631 {
632 for (int i = 0; i < images->size(); i++) {
633 if (images->get(i)->image != NULL) {
634 return TRUE;
635 }
636 }
637 return FALSE;
638 }
639
640 /*
641 * Load images if they were disabled.
642 */
loadImages(const DilloUrl * pattern)643 void DilloHtml::loadImages (const DilloUrl *pattern)
644 {
645 dReturn_if (a_Bw_expecting(bw));
646
647 /* If the user asked for a specific image, the user (NULL) is the requester,
648 * and the domain mechanism will always permit the request. But if the user
649 * just asked for all images (clicking "Load images"), use the page URL as
650 * the requester so that the domain mechanism can act as a filter.
651 * If the possible patterns become more complex, it might be good to have
652 * the caller supply the requester instead.
653 */
654 const DilloUrl *requester = pattern ? NULL : this->page_url;
655
656 for (int i = 0; i < images->size(); i++) {
657 DilloHtmlImage *hi = images->get(i);
658
659 if (hi->image) {
660 assert(hi->url);
661 if ((!pattern) || (!a_Url_cmp(hi->url, pattern))) {
662 if (Html_load_image(bw, hi->url, requester, hi->image)) {
663 a_Image_unref (hi->image);
664 hi->image = NULL; // web owns it now
665 }
666 }
667 }
668 }
669 }
670
671 /*
672 * Save URL in a vector (may be loaded later).
673 */
addCssUrl(const DilloUrl * url)674 void DilloHtml::addCssUrl(const DilloUrl *url)
675 {
676 int nu = cssUrls->size();
677 cssUrls->increase();
678 cssUrls->set(nu, a_Url_dup(url));
679 }
680
enter(Widget * widget,int link,int img,int x,int y)681 bool DilloHtml::HtmlLinkReceiver::enter (Widget *widget, int link, int img,
682 int x, int y)
683 {
684 BrowserWindow *bw = html->bw;
685
686 _MSG(" ** ");
687 if (link == -1) {
688 _MSG(" Link LEAVE notify...\n");
689 a_UIcmd_set_msg(bw, "");
690 } else {
691 _MSG(" Link ENTER notify...\n");
692 Html_set_link_coordinates(html, link, x, y);
693 a_UIcmd_set_msg(bw, "%s", URL_STR(html->links->get(link)));
694 }
695 return true;
696 }
697
698 /*
699 * Handle the "press" signal.
700 */
press(Widget * widget,int link,int img,int x,int y,EventButton * event)701 bool DilloHtml::HtmlLinkReceiver::press (Widget *widget, int link, int img,
702 int x, int y, EventButton *event)
703 {
704 BrowserWindow *bw = html->bw;
705 int ret = false;
706 DilloUrl *linkurl = NULL;
707
708 _MSG("pressed button %d\n", event->button);
709 if (event->button == 3) {
710 // popup menus
711 if (img != -1) {
712 // image menu
713 if (link != -1)
714 linkurl = html->links->get(link);
715 const bool_t loaded_img = (html->images->get(img)->image == NULL);
716 a_UIcmd_image_popup(bw, html->images->get(img)->url, loaded_img,
717 html->page_url, linkurl);
718 ret = true;
719 } else {
720 if (link == -1) {
721 a_UIcmd_page_popup(bw, bw->num_page_bugs != 0, html->cssUrls);
722 ret = true;
723 } else {
724 a_UIcmd_link_popup(bw, html->links->get(link));
725 ret = true;
726 }
727 }
728 }
729 return ret;
730 }
731
732 /*
733 * Handle the "click" signal.
734 */
click(Widget * widget,int link,int img,int x,int y,EventButton * event)735 bool DilloHtml::HtmlLinkReceiver::click (Widget *widget, int link, int img,
736 int x, int y, EventButton *event)
737 {
738 BrowserWindow *bw = html->bw;
739
740 if ((img != -1) && (html->images->get(img)->image)) {
741 // clicked an image that has not already been loaded
742 if (event->button == 1){
743 // load all instances of this image
744 DilloUrl *pattern = html->images->get(img)->url;
745 html->loadImages(pattern);
746 return true;
747 }
748 }
749
750 if (link != -1) {
751 DilloUrl *url = html->links->get(link);
752 _MSG("clicked on URL %d: %s\n", link, a_Url_str (url));
753
754 Html_set_link_coordinates(html, link, x, y);
755
756 if (event->button == 1) {
757 a_UIcmd_open_url(bw, url);
758 } else if (event->button == 2) {
759 if (prefs.middle_click_opens_new_tab) {
760 int focus = prefs.focus_new_tab ? 1 : 0;
761 if (event->state == SHIFT_MASK) focus = !focus;
762 a_UIcmd_open_url_nt(bw, url, focus);
763 } else
764 a_UIcmd_open_url_nw(bw, url);
765 } else {
766 return false;
767 }
768
769 /* Change the link color to "visited" as visual feedback */
770 for (Widget *w = widget; w; w = w->getParent()) {
771 _MSG(" ->%s\n", w->getClassName());
772 if (w->instanceOf(dw::Textblock::CLASS_ID)) {
773 ((Textblock*)w)->changeLinkColor (link, html->visited_color);
774 break;
775 }
776 }
777 }
778 return true;
779 }
780
781 /*
782 * Initialize the stash buffer
783 */
a_Html_stash_init(DilloHtml * html)784 void a_Html_stash_init(DilloHtml *html)
785 {
786 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_STASH;
787 html->StashSpace = false;
788 dStr_truncate(html->Stash, 0);
789 }
790
791 /* Entities list from the HTML 4.01 DTD */
792 typedef struct {
793 const char *entity;
794 int isocode;
795 } Ent_t;
796
797 #define NumEnt 252
798 static const Ent_t Entities[NumEnt] = {
799 {"AElig",0306}, {"Aacute",0301}, {"Acirc",0302}, {"Agrave",0300},
800 {"Alpha",01621},{"Aring",0305}, {"Atilde",0303}, {"Auml",0304},
801 {"Beta",01622}, {"Ccedil",0307}, {"Chi",01647}, {"Dagger",020041},
802 {"Delta",01624},{"ETH",0320}, {"Eacute",0311}, {"Ecirc",0312},
803 {"Egrave",0310},{"Epsilon",01625},{"Eta",01627}, {"Euml",0313},
804 {"Gamma",01623},{"Iacute",0315}, {"Icirc",0316}, {"Igrave",0314},
805 {"Iota",01631}, {"Iuml",0317}, {"Kappa",01632}, {"Lambda",01633},
806 {"Mu",01634}, {"Ntilde",0321}, {"Nu",01635}, {"OElig",0522},
807 {"Oacute",0323},{"Ocirc",0324}, {"Ograve",0322}, {"Omega",01651},
808 {"Omicron",01637},{"Oslash",0330},{"Otilde",0325},{"Ouml",0326},
809 {"Phi",01646}, {"Pi",01640}, {"Prime",020063},{"Psi",01650},
810 {"Rho",01641}, {"Scaron",0540}, {"Sigma",01643}, {"THORN",0336},
811 {"Tau",01644}, {"Theta",01630}, {"Uacute",0332}, {"Ucirc",0333},
812 {"Ugrave",0331},{"Upsilon",01645},{"Uuml",0334}, {"Xi",01636},
813 {"Yacute",0335},{"Yuml",0570}, {"Zeta",01626}, {"aacute",0341},
814 {"acirc",0342}, {"acute",0264}, {"aelig",0346}, {"agrave",0340},
815 {"alefsym",020465},{"alpha",01661},{"amp",38}, {"and",021047},
816 {"ang",021040}, {"aring",0345}, {"asymp",021110},{"atilde",0343},
817 {"auml",0344}, {"bdquo",020036},{"beta",01662}, {"brvbar",0246},
818 {"bull",020042},{"cap",021051}, {"ccedil",0347}, {"cedil",0270},
819 {"cent",0242}, {"chi",01707}, {"circ",01306}, {"clubs",023143},
820 {"cong",021105},{"copy",0251}, {"crarr",020665},{"cup",021052},
821 {"curren",0244},{"dArr",020723}, {"dagger",020040},{"darr",020623},
822 {"deg",0260}, {"delta",01664}, {"diams",023146},{"divide",0367},
823 {"eacute",0351},{"ecirc",0352}, {"egrave",0350}, {"empty",021005},
824 {"emsp",020003},{"ensp",020002}, {"epsilon",01665},{"equiv",021141},
825 {"eta",01667}, {"eth",0360}, {"euml",0353}, {"euro",020254},
826 {"exist",021003},{"fnof",0622}, {"forall",021000},{"frac12",0275},
827 {"frac14",0274},{"frac34",0276}, {"frasl",020104},{"gamma",01663},
828 {"ge",021145}, {"gt",62}, {"hArr",020724}, {"harr",020624},
829 {"hearts",023145},{"hellip",020046},{"iacute",0355},{"icirc",0356},
830 {"iexcl",0241}, {"igrave",0354}, {"image",020421},{"infin",021036},
831 {"int",021053}, {"iota",01671}, {"iquest",0277}, {"isin",021010},
832 {"iuml",0357}, {"kappa",01672}, {"lArr",020720}, {"lambda",01673},
833 {"lang",021451},{"laquo",0253}, {"larr",020620}, {"lceil",021410},
834 {"ldquo",020034},{"le",021144}, {"lfloor",021412},{"lowast",021027},
835 {"loz",022712}, {"lrm",020016}, {"lsaquo",020071},{"lsquo",020030},
836 {"lt",60}, {"macr",0257}, {"mdash",020024},{"micro",0265},
837 {"middot",0267},{"minus",021022},{"mu",01674}, {"nabla",021007},
838 {"nbsp",0240}, {"ndash",020023},{"ne",021140}, {"ni",021013},
839 {"not",0254}, {"notin",021011},{"nsub",021204}, {"ntilde",0361},
840 {"nu",01675}, {"oacute",0363}, {"ocirc",0364}, {"oelig",0523},
841 {"ograve",0362},{"oline",020076},{"omega",01711}, {"omicron",01677},
842 {"oplus",021225},{"or",021050}, {"ordf",0252}, {"ordm",0272},
843 {"oslash",0370},{"otilde",0365}, {"otimes",021227},{"ouml",0366},
844 {"para",0266}, {"part",021002}, {"permil",020060},{"perp",021245},
845 {"phi",01706}, {"pi",01700}, {"piv",01726}, {"plusmn",0261},
846 {"pound",0243}, {"prime",020062},{"prod",021017}, {"prop",021035},
847 {"psi",01710}, {"quot",34}, {"rArr",020722}, {"radic",021032},
848 {"rang",021452},{"raquo",0273}, {"rarr",020622}, {"rceil",021411},
849 {"rdquo",020035},{"real",020434},{"reg",0256}, {"rfloor",021413},
850 {"rho",01701}, {"rlm",020017}, {"rsaquo",020072},{"rsquo",020031},
851 {"sbquo",020032},{"scaron",0541},{"sdot",021305}, {"sect",0247},
852 {"shy",0255}, {"sigma",01703}, {"sigmaf",01702},{"sim",021074},
853 {"spades",023140},{"sub",021202},{"sube",021206}, {"sum",021021},
854 {"sup",021203}, {"sup1",0271}, {"sup2",0262}, {"sup3",0263},
855 {"supe",021207},{"szlig",0337}, {"tau",01704}, {"there4",021064},
856 {"theta",01670},{"thetasym",01721},{"thinsp",020011},{"thorn",0376},
857 {"tilde",01334},{"times",0327}, {"trade",020442},{"uArr",020721},
858 {"uacute",0372},{"uarr",020621}, {"ucirc",0373}, {"ugrave",0371},
859 {"uml",0250}, {"upsih",01722}, {"upsilon",01705},{"uuml",0374},
860 {"weierp",020430},{"xi",01676}, {"yacute",0375}, {"yen",0245},
861 {"yuml",0377}, {"zeta",01666}, {"zwj",020015}, {"zwnj",020014}
862 };
863
864
865 /*
866 * Comparison function for binary search
867 */
Html_entity_comp(const void * a,const void * b)868 static int Html_entity_comp(const void *a, const void *b)
869 {
870 return strcmp(((Ent_t *)a)->entity, ((Ent_t *)b)->entity);
871 }
872
873 /*
874 * Binary search of 'key' in entity list
875 */
Html_entity_search(char * key)876 static int Html_entity_search(char *key)
877 {
878 Ent_t *res, EntKey;
879
880 EntKey.entity = key;
881 res = (Ent_t*) bsearch(&EntKey, Entities, NumEnt,
882 sizeof(Ent_t), Html_entity_comp);
883 if (res)
884 return (res - Entities);
885 return -1;
886 }
887
888 /*
889 * This is M$ non-standard "smart quotes" (w1252). Now even deprecated by them!
890 *
891 * SGML for HTML4.01 defines c >= 128 and c <= 159 as UNUSED.
892 * TODO: Probably I should remove this hack, and add a HTML warning. --Jcid
893 */
Html_ms_stupid_quotes_2ucs(int isocode)894 static int Html_ms_stupid_quotes_2ucs(int isocode)
895 {
896 int ret;
897 switch (isocode) {
898 case 145:
899 case 146: ret = '\''; break;
900 case 147:
901 case 148: ret = '"'; break;
902 case 149: ret = 176; break;
903 case 150:
904 case 151: ret = '-'; break;
905 default: ret = isocode; break;
906 }
907 return ret;
908 }
909
910 /*
911 * Given an entity, return the UCS character code.
912 * Returns a negative value (error code) if not a valid entity.
913 *
914 * The first character *token is assumed to be == '&'
915 *
916 * For valid entities, *entsize is set to the length of the parsed entity.
917 */
Html_parse_entity(DilloHtml * html,const char * token,int toksize,int * entsize)918 static int Html_parse_entity(DilloHtml *html, const char *token,
919 int toksize, int *entsize)
920 {
921 int isocode, i;
922 char *tok, *s, c;
923
924 token++;
925 tok = s = toksize ? dStrndup(token, (uint_t)toksize) : dStrdup(token);
926
927 isocode = -1;
928
929 if (*s == '#') {
930 /* numeric character reference */
931 errno = 0;
932 if (*++s == 'x' || *s == 'X') {
933 if (isxdigit(*++s)) {
934 /* strtol with base 16 accepts leading "0x" - we don't */
935 if (*s == '0' && s[1] == 'x') {
936 s++;
937 isocode = 0;
938 } else {
939 isocode = strtol(s, &s, 16);
940 }
941 }
942 } else if (isdigit(*s)) {
943 isocode = strtol(s, &s, 10);
944 }
945
946 if (!isocode || errno || isocode > 0xffff) {
947 /* this catches null bytes, errors and codes >= 0xFFFF */
948 BUG_MSG("Numeric character reference \"%s\" out of range.", tok);
949 isocode = -2;
950 }
951
952 if (isocode != -1) {
953 if (*s == ';')
954 s++;
955 else if (prefs.show_extra_warnings)
956 BUG_MSG("Numeric character reference without trailing ';'.");
957 }
958
959 } else if (isalpha(*s)) {
960 /* character entity reference */
961 while (*++s && (isalnum(*s) || strchr(":_.-", *s))) ;
962 c = *s;
963 *s = 0;
964
965 if ((i = Html_entity_search(tok)) >= 0) {
966 isocode = Entities[i].isocode;
967 } else {
968 if (html->DocType == DT_XHTML && !strcmp(tok, "apos")) {
969 isocode = 0x27;
970 } else {
971 if ((html->DocType == DT_HTML && html->DocTypeVersion == 4.01f) ||
972 html->DocType == DT_XHTML)
973 BUG_MSG("Undefined character entity '%s'.", tok);
974 isocode = -3;
975 }
976 }
977 if (c == ';')
978 s++;
979 else if (prefs.show_extra_warnings)
980 BUG_MSG("Character entity reference without trailing ';'.");
981 }
982
983 *entsize = s-tok+1;
984 dFree(tok);
985
986 if (isocode >= 145 && isocode <= 151) {
987 /* TODO: remove this hack. */
988 isocode = Html_ms_stupid_quotes_2ucs(isocode);
989 } else if (isocode == -1 && prefs.show_extra_warnings)
990 BUG_MSG("Literal '&'.");
991
992 return isocode;
993 }
994
995 /*
996 * Convert all the entities in a token to utf8 encoding. Takes
997 * a token and its length, and returns a newly allocated string.
998 */
a_Html_parse_entities(DilloHtml * html,const char * token,int toksize)999 char *a_Html_parse_entities(DilloHtml *html, const char *token, int toksize)
1000 {
1001 const char *esc_set = "&";
1002 char *new_str, buf[4];
1003 int i, j, k, n, s, isocode, entsize;
1004
1005 new_str = dStrndup(token, toksize);
1006 s = strcspn(new_str, esc_set);
1007 if (new_str[s] == 0)
1008 return new_str;
1009
1010 for (i = j = s; i < toksize; i++) {
1011 if (token[i] == '&' &&
1012 (isocode = Html_parse_entity(html, token+i,
1013 toksize-i, &entsize)) >= 0) {
1014 if (isocode >= 128) {
1015 /* multibyte encoding */
1016 n = a_Utf8_encode(isocode, buf);
1017 for (k = 0; k < n; ++k)
1018 new_str[j++] = buf[k];
1019 } else {
1020 new_str[j++] = (char) isocode;
1021 }
1022 i += entsize-1;
1023 } else {
1024 new_str[j++] = token[i];
1025 }
1026 }
1027 new_str[j] = '\0';
1028 return new_str;
1029 }
1030
1031 /*
1032 * For white-space: pre-line, we must break the line if encountering a newline.
1033 * Otherwise, collapse whitespace as usual.
1034 */
Html_process_space_pre_line(DilloHtml * html,const char * space,int spacesize)1035 static void Html_process_space_pre_line(DilloHtml *html, const char *space,
1036 int spacesize)
1037 {
1038 int i, breakCnt = 0;
1039
1040 for (i = 0; i < spacesize; i++) {
1041 /* Support for "\r", "\n" and "\r\n" line breaks */
1042 if (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR)) {
1043 breakCnt++;
1044 html->PrevWasCR = (space[i] == '\r');
1045
1046 HT2TB(html)->addLinebreak (html->wordStyle ());
1047 }
1048 }
1049 if (breakCnt == 0) {
1050 HT2TB(html)->addSpace(html->wordStyle ());
1051 }
1052 }
1053
1054 /*
1055 * Parse spaces
1056 */
Html_process_space(DilloHtml * html,const char * space,int spacesize)1057 static void Html_process_space(DilloHtml *html, const char *space,
1058 int spacesize)
1059 {
1060 char *spc;
1061 int i, offset;
1062 DilloHtmlParseMode parse_mode = S_TOP(html)->parse_mode;
1063
1064 if (S_TOP(html)->display_none) {
1065 /* do nothing */
1066 } else if (parse_mode == DILLO_HTML_PARSE_MODE_STASH) {
1067 html->StashSpace = (html->Stash->len > 0);
1068
1069 } else if (parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
1070 dStr_append_l(html->Stash, space, spacesize);
1071
1072 } else if (parse_mode == DILLO_HTML_PARSE_MODE_PRE) {
1073 int spaceCnt = 0;
1074
1075 /* re-scan the string for characters that cause line breaks */
1076 for (i = 0; i < spacesize; i++) {
1077 /* Support for "\r", "\n" and "\r\n" line breaks (skips the first) */
1078 if (!html->PreFirstChar &&
1079 (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR))) {
1080
1081 if (spaceCnt) {
1082 spc = dStrnfill(spaceCnt, ' ');
1083 HT2TB(html)->addText (spc, spaceCnt, html->wordStyle ());
1084 dFree(spc);
1085 spaceCnt = 0;
1086 }
1087 HT2TB(html)->addLinebreak (html->wordStyle ());
1088 html->pre_column = 0;
1089 }
1090 html->PreFirstChar = false;
1091
1092 /* cr and lf should not be rendered -- they appear as a break */
1093 switch (space[i]) {
1094 case '\r':
1095 case '\n':
1096 break;
1097 case '\t':
1098 if (prefs.show_extra_warnings)
1099 BUG_MSG("TAB character inside <pre>.");
1100 offset = TAB_SIZE - html->pre_column % TAB_SIZE;
1101 spaceCnt += offset;
1102 html->pre_column += offset;
1103 break;
1104 default:
1105 spaceCnt++;
1106 html->pre_column++;
1107 break;
1108 }
1109
1110 html->PrevWasCR = (space[i] == '\r');
1111 }
1112
1113 if (spaceCnt) {
1114 // add break possibility for the white-space:pre-wrap case
1115 HT2TB(html)->addBreakOption (html->wordStyle (), false);
1116 spc = dStrnfill(spaceCnt, ' ');
1117 HT2TB(html)->addText (spc, spaceCnt, html->wordStyle ());
1118 dFree(spc);
1119 }
1120
1121 } else {
1122 if (SGML_SPCDEL) {
1123 /* SGML_SPCDEL ignores white space immediately after an open tag */
1124 } else if (html->wordStyle ()->whiteSpace == WHITE_SPACE_PRE_LINE) {
1125 Html_process_space_pre_line(html, space, spacesize);
1126 } else {
1127 HT2TB(html)->addSpace(html->wordStyle ());
1128 }
1129
1130 if (parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY)
1131 html->StashSpace = (html->Stash->len > 0);
1132 }
1133 }
1134
1135 /*
1136 * Handles putting the word into its proper place
1137 * > STASH and VERBATIM --> html->Stash
1138 * > otherwise it goes through addText()
1139 *
1140 * Entities are parsed (or not) according to parse_mode.
1141 * 'word' is a '\0'-terminated string.
1142 */
Html_process_word(DilloHtml * html,const char * word,int size)1143 static void Html_process_word(DilloHtml *html, const char *word, int size)
1144 {
1145 int i, j, start;
1146 char *Pword;
1147 DilloHtmlParseMode parse_mode = S_TOP(html)->parse_mode;
1148
1149 if (S_TOP(html)->display_none)
1150 return;
1151
1152 if (parse_mode == DILLO_HTML_PARSE_MODE_STASH ||
1153 parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY) {
1154 if (html->StashSpace) {
1155 dStr_append_c(html->Stash, ' ');
1156 html->StashSpace = false;
1157 }
1158 Pword = a_Html_parse_entities(html, word, size);
1159 dStr_append(html->Stash, Pword);
1160 dFree(Pword);
1161
1162 } else if (parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
1163 /* word goes in untouched, it is not processed here. */
1164 dStr_append_l(html->Stash, word, size);
1165 }
1166
1167 if (parse_mode == DILLO_HTML_PARSE_MODE_STASH ||
1168 parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
1169 /* skip until the closing instructions */
1170
1171 } else if (parse_mode == DILLO_HTML_PARSE_MODE_PRE) {
1172 /* all this overhead is to catch white-space entities */
1173 Pword = a_Html_parse_entities(html, word, size);
1174 for (start = i = 0; Pword[i]; start = i)
1175 if (isspace(Pword[i])) {
1176 while (Pword[++i] && isspace(Pword[i])) ;
1177 Html_process_space(html, Pword + start, i - start);
1178 } else {
1179 while (Pword[++i] && !isspace(Pword[i])) ;
1180 HT2TB(html)->addText(Pword + start, i - start, html->wordStyle ());
1181 html->pre_column += i - start;
1182 html->PreFirstChar = false;
1183 }
1184 dFree(Pword);
1185
1186 } else {
1187 const char *word2, *beyond_word2;
1188
1189 Pword = NULL;
1190 if (!memchr(word,'&', size)) {
1191 /* No entities */
1192 word2 = word;
1193 beyond_word2 = word + size;
1194 } else {
1195 /* Collapse white-space entities inside the word (except ) */
1196 Pword = a_Html_parse_entities(html, word, size);
1197 /* Collapse adjacent " \t\f\n\r" characters into a single space */
1198 for (i = j = 0; (Pword[i] = Pword[j]); ++i, ++j) {
1199 if (strchr(" \t\f\n\r", Pword[i])) {
1200 if (i == 0 || (i > 0 && Pword[i-1] != ' '))
1201 Pword[i] = ' ';
1202 else
1203 for (--i; Pword[j+1] && strchr(" \t\f\n\r", Pword[j+1]); ++j)
1204 ;
1205 }
1206 }
1207 word2 = Pword;
1208 beyond_word2 = word2 + strlen(word2);
1209 }
1210 for (start = i = 0; word2[i]; start = i) {
1211 int len;
1212
1213 if (isspace(word2[i])) {
1214 while (word2[++i] && isspace(word2[i])) ;
1215 Html_process_space(html, word2 + start, i - start);
1216 } else if (!strncmp(word2+i, utf8_zero_width_space, 3)) {
1217 i += 3;
1218 HT2TB(html)->addBreakOption(html->wordStyle (), false);
1219 } else if (a_Utf8_ideographic(word2+i, beyond_word2, &len)) {
1220 i += len;
1221 HT2TB(html)->addText(word2 + start, i - start, html->wordStyle ());
1222 HT2TB(html)->addBreakOption(html->wordStyle (), false);
1223 } else {
1224 do {
1225 i += len;
1226 } while (word2[i] && !isspace(word2[i]) &&
1227 strncmp(word2+i, utf8_zero_width_space, 3) &&
1228 (!a_Utf8_ideographic(word2+i, beyond_word2, &len)));
1229 HT2TB(html)->addText(word2 + start, i - start, html->wordStyle ());
1230 }
1231 }
1232 if (Pword == word2)
1233 dFree(Pword);
1234 }
1235 }
1236
1237 /*
1238 * Does the tag in tagstr (e.g. "p") match the tag in the tag, tagsize
1239 * structure, with the initial < skipped over (e.g. "P align=center>")?
1240 */
Html_match_tag(const char * tagstr,char * tag,int tagsize)1241 static bool Html_match_tag(const char *tagstr, char *tag, int tagsize)
1242 {
1243 int i;
1244
1245 for (i = 0; i < tagsize && tagstr[i] != '\0'; i++) {
1246 if (D_ASCII_TOLOWER(tagstr[i]) != D_ASCII_TOLOWER(tag[i]))
1247 return false;
1248 }
1249 /* The test for '/' is for xml compatibility: "empty/>" will be matched. */
1250 if (i < tagsize && (isspace(tag[i]) || tag[i] == '>' || tag[i] == '/'))
1251 return true;
1252 return false;
1253 }
1254
1255 /*
1256 * This function is called after popping the stack, to
1257 * handle nested Textblock widgets.
1258 */
Html_eventually_pop_dw(DilloHtml * html,bool hand_over_break)1259 static void Html_eventually_pop_dw(DilloHtml *html, bool hand_over_break)
1260 {
1261 if (html->dw != S_TOP(html)->textblock) {
1262 if (hand_over_break)
1263 HT2TB(html)->handOverBreak (html->style ());
1264 HT2TB(html)->flush ();
1265 html->dw = S_TOP(html)->textblock;
1266 }
1267 }
1268
1269 /*
1270 * Push the tag (copying attributes from the top of the stack)
1271 */
Html_push_tag(DilloHtml * html,int tag_idx)1272 static void Html_push_tag(DilloHtml *html, int tag_idx)
1273 {
1274 int n_items;
1275
1276 n_items = html->stack->size ();
1277 html->stack->increase ();
1278 /* We'll copy the former stack item and just change the tag and its index
1279 * instead of copying all fields except for tag. --Jcid */
1280 *html->stack->getRef(n_items) = *html->stack->getRef(n_items - 1);
1281 html->stack->getRef(n_items)->tag_idx = tag_idx;
1282 html->dw = S_TOP(html)->textblock;
1283 }
1284
1285 /*
1286 * Push the tag (used to force en element with optional open into the stack)
1287 * Note: now it's the same as Html_push_tag(), but things may change...
1288 */
Html_force_push_tag(DilloHtml * html,int tag_idx)1289 static void Html_force_push_tag(DilloHtml *html, int tag_idx)
1290 {
1291 html->startElement (tag_idx);
1292 Html_push_tag(html, tag_idx);
1293 }
1294
1295 /*
1296 * Pop the top tag in the stack
1297 */
Html_real_pop_tag(DilloHtml * html)1298 static void Html_real_pop_tag(DilloHtml *html)
1299 {
1300 bool hand_over_break;
1301
1302 html->styleEngine->endElement (S_TOP(html)->tag_idx);
1303 hand_over_break = S_TOP(html)->hand_over_break;
1304 html->stack->setSize (html->stack->size() - 1);
1305 Html_eventually_pop_dw(html, hand_over_break);
1306 }
1307
1308 /*
1309 * Cleanup the stack to a given index.
1310 */
Html_tag_cleanup_to_idx(DilloHtml * html,int idx)1311 static void Html_tag_cleanup_to_idx(DilloHtml *html, int idx)
1312 {
1313 int s_sz;
1314 while ((s_sz = html->stack->size()) > idx) {
1315 int toptag_idx = S_TOP(html)->tag_idx;
1316 TagInfo toptag = Tags[toptag_idx];
1317 if (s_sz > idx + 1 && toptag.EndTag != 'O')
1318 BUG_MSG(" - forcing close of open tag: <%s>.", toptag.name);
1319 _MSG("Close: %*s%s\n", size," ", toptag.name);
1320 if (toptag.close)
1321 toptag.close(html);
1322 Html_real_pop_tag(html);
1323 }
1324 }
1325
1326 /*
1327 * Default close function for tags.
1328 * (conditional cleanup of the stack)
1329 * There are several ways of doing it. Considering the HTML 4.01 spec
1330 * which defines optional close tags, and the will to deliver useful diagnose
1331 * messages for bad-formed HTML, it'll go as follows:
1332 * 1.- Search the stack for the first tag that requires a close tag.
1333 * 2.- If it matches, clean all the optional-close tags in between.
1334 * 3.- Cleanup the matching tag. (on error, give a warning message)
1335 *
1336 * If 'w3c_mode' is NOT enabled:
1337 * 1.- Search the stack for a matching tag based on tag level.
1338 * 2.- If it exists, clean all the tags in between.
1339 * 3.- Cleanup the matching tag. (on error, give a warning message)
1340 */
Html_tag_cleanup_at_close(DilloHtml * html,int new_idx)1341 static void Html_tag_cleanup_at_close(DilloHtml *html, int new_idx)
1342 {
1343 static int i_BUTTON = a_Html_tag_index("button"),
1344 i_SELECT = a_Html_tag_index("select"),
1345 i_TEXTAREA = a_Html_tag_index("textarea");
1346 int w3c_mode = !prefs.w3c_plus_heuristics;
1347 int stack_idx, tag_idx, matched = 0, expected = 0;
1348 TagInfo new_tag = Tags[new_idx];
1349
1350 /* Look for the candidate tag to close */
1351 stack_idx = html->stack->size();
1352 while (--stack_idx) {
1353 tag_idx = html->stack->getRef(stack_idx)->tag_idx;
1354 if (tag_idx == new_idx) {
1355 /* matching tag found */
1356 matched = 1;
1357 break;
1358 } else if (Tags[tag_idx].EndTag == 'O') {
1359 /* skip an optional tag */
1360 continue;
1361 } else if ((new_idx == i_BUTTON && html->InFlags & IN_BUTTON) ||
1362 (new_idx == i_SELECT && html->InFlags & IN_SELECT) ||
1363 (new_idx == i_TEXTAREA && html->InFlags & IN_TEXTAREA)) {
1364 /* let these elements close tags inside them */
1365 continue;
1366 } else if (w3c_mode || Tags[tag_idx].TagLevel >= new_tag.TagLevel) {
1367 /* this is the tag that should have been closed */
1368 expected = 1;
1369 break;
1370 }
1371 }
1372
1373 if (matched) {
1374 Html_tag_cleanup_to_idx(html, stack_idx);
1375 } else if (expected) {
1376 BUG_MSG("Unexpected closing tag: </%s> -- expected </%s>.",
1377 new_tag.name, Tags[tag_idx].name);
1378 } else {
1379 BUG_MSG("Unexpected closing tag: </%s>.", new_tag.name);
1380 }
1381 }
1382
1383 /*
1384 * Avoid nesting and inter-nesting of BUTTON, SELECT and TEXTAREA,
1385 * by closing them before opening another.
1386 * This is not an HTML SPEC restriction , but it avoids lots of trouble
1387 * inside dillo (concurrent inputs), and makes almost no sense to have.
1388 */
Html_tag_cleanup_nested_inputs(DilloHtml * html,int new_idx)1389 static void Html_tag_cleanup_nested_inputs(DilloHtml *html, int new_idx)
1390 {
1391 static int i_BUTTON = a_Html_tag_index("button"),
1392 i_SELECT = a_Html_tag_index("select"),
1393 i_TEXTAREA = a_Html_tag_index("textarea");
1394 int stack_idx, u_idx, matched = 0;
1395
1396 dReturn_if_fail(html->InFlags & (IN_BUTTON | IN_SELECT | IN_TEXTAREA));
1397 dReturn_if_fail(new_idx == i_BUTTON || new_idx == i_SELECT ||
1398 new_idx == i_TEXTAREA);
1399
1400 /* Get the unclosed tag index */
1401 u_idx = (html->InFlags & IN_BUTTON) ? i_BUTTON :
1402 (html->InFlags & IN_SELECT) ? i_SELECT : i_TEXTAREA;
1403
1404 /* Look for it inside the stack */
1405 stack_idx = html->stack->size();
1406 while (--stack_idx) {
1407 if (html->stack->getRef(stack_idx)->tag_idx == u_idx) {
1408 /* matching tag found */
1409 matched = 1;
1410 break;
1411 }
1412 }
1413
1414 if (matched) {
1415 BUG_MSG("Attempt to nest <%s> element inside <%s> -- closing <%s>.",
1416 Tags[new_idx].name, Tags[u_idx].name, Tags[u_idx].name);
1417 Html_tag_cleanup_to_idx(html, stack_idx);
1418 } else {
1419 MSG_WARN("Inconsistent parser state, flag is SET but no '%s' element"
1420 "was found in the stack\n", Tags[u_idx].name);
1421 }
1422
1423 html->InFlags &= ~(IN_BUTTON | IN_SELECT | IN_TEXTAREA);
1424 }
1425
1426
1427 /*
1428 * Some parsing routines.
1429 */
1430
1431 /*
1432 * Used by a_Html_parse_length
1433 */
Html_parse_length_or_multi_length(const char * attr,char ** endptr)1434 static CssLength Html_parse_length_or_multi_length (const char *attr,
1435 char **endptr)
1436 {
1437 CssLength l;
1438 double v;
1439 char *end;
1440
1441 v = strtod (attr, &end);
1442 switch (*end) {
1443 case '%':
1444 end++;
1445 l = CSS_CREATE_LENGTH (v / 100, CSS_LENGTH_TYPE_PERCENTAGE);
1446 break;
1447
1448 case '*':
1449 end++;
1450 l = CSS_CREATE_LENGTH (v, CSS_LENGTH_TYPE_RELATIVE);
1451 break;
1452 /*
1453 The "px" suffix seems not allowed by HTML4.01 SPEC.
1454 case 'p':
1455 if (end[1] == 'x')
1456 end += 2;
1457 */
1458 default:
1459 l = CSS_CREATE_LENGTH (v, CSS_LENGTH_TYPE_PX);
1460 break;
1461 }
1462
1463 if (endptr)
1464 *endptr = end;
1465 return l;
1466 }
1467
1468
1469 /*
1470 * Returns a length or a percentage, or UNDEF_LENGTH in case
1471 * of an error, or if attr is NULL.
1472 */
a_Html_parse_length(DilloHtml * html,const char * attr)1473 CssLength a_Html_parse_length (DilloHtml *html, const char *attr)
1474 {
1475 CssLength l;
1476 char *end;
1477
1478 l = Html_parse_length_or_multi_length (attr, &end);
1479 if (CSS_LENGTH_TYPE (l) == CSS_LENGTH_TYPE_RELATIVE)
1480 /* not allowed as &Length; */
1481 l = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO);
1482 else {
1483 /* allow only whitespaces */
1484 if (*end && !isspace (*end)) {
1485 BUG_MSG("Garbage after length: '%s'.", attr);
1486 l = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO);
1487 }
1488 }
1489
1490 _MSG("a_Html_parse_length: \"%s\" %d\n", attr, CSS_LENGTH_VALUE(l));
1491 return l;
1492 }
1493
1494 /*
1495 * Parse a color attribute.
1496 * Return value: parsed color, or default_color (+ error msg) on error.
1497 */
a_Html_color_parse(DilloHtml * html,const char * str,int32_t default_color)1498 int32_t a_Html_color_parse(DilloHtml *html, const char *str,
1499 int32_t default_color)
1500 {
1501 int err = 1;
1502 int32_t color = a_Color_parse(str, default_color, &err);
1503
1504 if (err) {
1505 BUG_MSG("Color '%s' is not in \"#RRGGBB\" format.", str);
1506 }
1507 return color;
1508 }
1509
1510 /*
1511 * Check that 'val' is composed of characters inside [A-Za-z0-9:_.-]
1512 * Note: ID can't have entities, but this check is enough (no '&').
1513 * Return value: 1 if OK, 0 otherwise.
1514 */
1515 static int
Html_check_name_val(DilloHtml * html,const char * val,const char * attrname)1516 Html_check_name_val(DilloHtml *html, const char *val, const char *attrname)
1517 {
1518 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) {
1519 bool valid = *val && !strchr(val, ' ');
1520
1521 if (!valid) {
1522 BUG_MSG("'%s' value \"%s\" must not be empty and must not contain "
1523 "spaces.", attrname, val);
1524 }
1525 return valid ? 1 : 0;
1526 } else {
1527 int i;
1528
1529 for (i = 0; val[i]; ++i)
1530 if (!isascii(val[i]) || !(isalnum(val[i]) || strchr(":_.-", val[i])))
1531 break;
1532
1533 if (val[i] || !(isascii(val[0]) && isalpha(val[0])))
1534 BUG_MSG("%s attribute value \"%s\" is not of the form "
1535 "'[A-Za-z][A-Za-z0-9:_.-]*'.", attrname, val);
1536
1537 return !(val[i]);
1538 }
1539 }
1540
1541 /*
1542 * Handle DOCTYPE declaration
1543 *
1544 * Follows the convention that HTML 4.01
1545 * doctypes which include a full w3c DTD url are treated as
1546 * standards-compliant, but 4.01 without the url and HTML 4.0 and
1547 * earlier are not. XHTML doctypes are always standards-compliant
1548 * whether or not an url is present.
1549 *
1550 * Note: I'm not sure about this convention. The W3C validator
1551 * recognizes the "HTML Level" with or without the URL. The convention
1552 * comes from mozilla (see URLs below), but Dillo doesn't have the same
1553 * rendering modes, so it may be better to chose another behaviour. --Jcid
1554 *
1555 * http://www.mozilla.org/docs/web-developer/quirks/doctypes.html
1556 * http://lists.auriga.wearlab.de/pipermail/dillo-dev/2004-October/002300.html
1557 *
1558 * This is not a full DOCTYPE parser, just enough for what Dillo uses.
1559 */
Html_parse_doctype(DilloHtml * html,const char * tag,int tagsize)1560 static void Html_parse_doctype(DilloHtml *html, const char *tag, int tagsize)
1561 {
1562 static const char HTML_SGML_sig [] = "<!DOCTYPE HTML PUBLIC ";
1563 static const char HTML20 [] = "-//IETF//DTD HTML";
1564 static const char HTML32 [] = "-//W3C//DTD HTML 3.2";
1565 static const char HTML40 [] = "-//W3C//DTD HTML 4.0";
1566 static const char HTML401 [] = "-//W3C//DTD HTML 4.01";
1567 static const char HTML401_url[] = "http://www.w3.org/TR/html4/";
1568 static const char XHTML1 [] = "-//W3C//DTD XHTML 1.0";
1569 static const char XHTML1_url [] = "http://www.w3.org/TR/xhtml1/DTD/";
1570 static const char XHTML11 [] = "-//W3C//DTD XHTML 1.1";
1571 static const char XHTML11_url[] = "http://www.w3.org/TR/xhtml11/DTD/";
1572
1573 size_t i;
1574 int quote;
1575 char *p, *ntag = dStrndup(tag, tagsize);
1576
1577 /* Tag sanitization: Collapse whitespace between tokens
1578 * and replace '\n' and '\r' with ' ' inside quoted strings. */
1579 for (i = 0, p = ntag; *p; ++p) {
1580 if (isspace(*p)) {
1581 for (ntag[i++] = ' '; isspace(p[1]); ++p) ;
1582 } else if ((quote = *p) == '"' || *p == '\'') {
1583 for (ntag[i++] = *p++; (ntag[i] = *p) && ntag[i++] != quote; ++p) {
1584 if (*p == '\n' || *p == '\r')
1585 ntag[i - 1] = ' ';
1586 p += (p[0] == '\r' && p[1] == '\n') ? 1 : 0;
1587 }
1588 } else {
1589 ntag[i++] = *p;
1590 }
1591 if (!*p)
1592 break;
1593 }
1594 ntag[i] = 0;
1595
1596 _MSG("New: {%s}\n", ntag);
1597
1598 if (html->DocType != DT_NONE)
1599 BUG_MSG("Multiple DOCTYPE declarations.");
1600
1601 /* The default DT_NONE type is TagSoup */
1602 if (i > strlen(HTML_SGML_sig) && // avoid out of bounds reads!
1603 !dStrnAsciiCasecmp(ntag, HTML_SGML_sig, strlen(HTML_SGML_sig))) {
1604 p = ntag + strlen(HTML_SGML_sig) + 1;
1605 if (!strncmp(p, HTML401, strlen(HTML401)) &&
1606 dStriAsciiStr(p + strlen(HTML401), HTML401_url)) {
1607 html->DocType = DT_HTML;
1608 html->DocTypeVersion = 4.01f;
1609 } else if (!strncmp(p, XHTML1, strlen(XHTML1)) &&
1610 dStriAsciiStr(p + strlen(XHTML1), XHTML1_url)) {
1611 html->DocType = DT_XHTML;
1612 html->DocTypeVersion = 1.0f;
1613 } else if (!strncmp(p, XHTML11, strlen(XHTML11)) &&
1614 dStriAsciiStr(p + strlen(XHTML11), XHTML11_url)) {
1615 html->DocType = DT_XHTML;
1616 html->DocTypeVersion = 1.1f;
1617 } else if (!strncmp(p, HTML40, strlen(HTML40))) {
1618 html->DocType = DT_HTML;
1619 html->DocTypeVersion = 4.0f;
1620 } else if (!strncmp(p, HTML32, strlen(HTML32))) {
1621 html->DocType = DT_HTML;
1622 html->DocTypeVersion = 3.2f;
1623 } else if (!strncmp(p, HTML20, strlen(HTML20))) {
1624 html->DocType = DT_HTML;
1625 html->DocTypeVersion = 2.0f;
1626 }
1627 } else if (!dStrAsciiCasecmp(ntag, "<!DOCTYPE html>") ||
1628 !dStrAsciiCasecmp(ntag, "<!DOCTYPE html >") ||
1629 !dStrAsciiCasecmp(ntag,
1630 "<!DOCTYPE html SYSTEM \"about:legacy-compat\">") ||
1631 !dStrAsciiCasecmp(ntag,
1632 "<!DOCTYPE html SYSTEM 'about:legacy-compat'>")) {
1633 html->DocType = DT_HTML;
1634 html->DocTypeVersion = 5.0f;
1635 }
1636 if (html->DocType == DT_NONE) {
1637 html->DocType = DT_UNRECOGNIZED;
1638 BUG_MSG("DOCTYPE not recognized: ('%s').", ntag);
1639 }
1640 dFree(ntag);
1641 }
1642
1643 /*
1644 * Handle open HTML element
1645 */
Html_tag_open_html(DilloHtml * html,const char * tag,int tagsize)1646 static void Html_tag_open_html(DilloHtml *html, const char *tag, int tagsize)
1647 {
1648 /* The IN_HTML flag will be kept set until at IN_EOF condition.
1649 * This allows to handle pages with multiple or uneven HTML tags */
1650
1651 if (!(html->InFlags & IN_HTML))
1652 html->InFlags |= IN_HTML;
1653 if (html->Num_HTML < UCHAR_MAX)
1654 ++html->Num_HTML;
1655
1656 if (html->Num_HTML > 1) {
1657 BUG_MSG("<html> was already open.");
1658 html->ReqTagClose = true;
1659 }
1660 }
1661
1662 /*
1663 * Handle close HTML element
1664 */
Html_tag_close_html(DilloHtml * html)1665 static void Html_tag_close_html(DilloHtml *html)
1666 {
1667 _MSG("Html_tag_close_html: Num_HTML=%d\n", html->Num_HTML);
1668 }
1669
1670 /*
1671 * Handle open HEAD element
1672 */
Html_tag_open_head(DilloHtml * html,const char * tag,int tagsize)1673 static void Html_tag_open_head(DilloHtml *html, const char *tag, int tagsize)
1674 {
1675 if (html->InFlags & IN_BODY) {
1676 BUG_MSG("<head> must go before the BODY section.");
1677 html->ReqTagClose = true;
1678 return;
1679 }
1680
1681 if (html->Num_HEAD < UCHAR_MAX)
1682 ++html->Num_HEAD;
1683 if (html->InFlags & IN_HEAD) {
1684 BUG_MSG("<head> was already open.");
1685 html->ReqTagClose = true;
1686 } else if (html->Num_HEAD > 1) {
1687 BUG_MSG("<head> already finished -- ignoring.");
1688 html->ReqTagClose = true;
1689 } else {
1690 html->InFlags |= IN_HEAD;
1691 }
1692 }
1693
1694 /*
1695 * Handle close HEAD element
1696 * Note: HEAD is parsed once completely got.
1697 */
Html_tag_close_head(DilloHtml * html)1698 static void Html_tag_close_head(DilloHtml *html)
1699 {
1700 if (html->InFlags & IN_HEAD) {
1701 if (html->Num_HEAD == 1) {
1702 /* match for the well formed start of HEAD section */
1703 if (html->Num_TITLE == 0)
1704 BUG_MSG("<head> lacks <title>.");
1705
1706 html->InFlags &= ~IN_HEAD;
1707
1708 /* charset is already set, load remote stylesheets now */
1709 for (int i = 0; i < html->cssUrls->size(); i++) {
1710 a_Html_load_stylesheet(html, html->cssUrls->get(i));
1711 }
1712 } else if (html->Num_HEAD > 1) {
1713 --html->Num_HEAD;
1714 }
1715 } else {
1716 /* not reached, see Html_tag_cleanup_at_close() */
1717 }
1718 }
1719
1720 /*
1721 * Handle open TITLE
1722 * calls stash init, where the title string will be stored
1723 */
Html_tag_open_title(DilloHtml * html,const char * tag,int tagsize)1724 static void Html_tag_open_title(DilloHtml *html, const char *tag, int tagsize)
1725 {
1726 /* fill the stash buffer so TITLE content can be ignored
1727 * when not valid, redundant or outside HEAD section */
1728 a_Html_stash_init(html);
1729
1730 if (html->InFlags & IN_HEAD) {
1731 if (html->Num_TITLE < UCHAR_MAX)
1732 ++html->Num_TITLE;
1733 if (html->Num_TITLE > 1)
1734 BUG_MSG("Redundant <title>.");
1735 } else {
1736 BUG_MSG("<title> must be inside <head> -- ignoring.");
1737 }
1738 }
1739
1740 /*
1741 * Handle close TITLE
1742 * set page-title in the browser window and in the history.
1743 */
Html_tag_close_title(DilloHtml * html)1744 static void Html_tag_close_title(DilloHtml *html)
1745 {
1746 if (html->InFlags & IN_HEAD && html->Num_TITLE == 1) {
1747 /* title is only valid inside HEAD */
1748 a_UIcmd_set_page_title(html->bw, html->Stash->str);
1749 a_History_set_title_by_url(html->page_url, html->Stash->str);
1750 }
1751 }
1752
1753 /*
1754 * Handle open SCRIPT
1755 * initializes stash, where the embedded code will be stored.
1756 * MODE_VERBATIM is used because MODE_STASH catches entities.
1757 */
Html_tag_open_script(DilloHtml * html,const char * tag,int tagsize)1758 static void Html_tag_open_script(DilloHtml *html, const char *tag, int tagsize)
1759 {
1760 a_Html_stash_init(html);
1761 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_VERBATIM;
1762 }
1763
1764 /*
1765 * Handle close SCRIPT
1766 */
Html_tag_close_script(DilloHtml * html)1767 static void Html_tag_close_script(DilloHtml *html)
1768 {
1769 /* eventually the stash will be sent to an interpreter for parsing */
1770 }
1771
1772 /*
1773 * Handle open STYLE
1774 * Store contents in the stash where the style sheet interpreter can get it.
1775 */
Html_tag_open_style(DilloHtml * html,const char * tag,int tagsize)1776 static void Html_tag_open_style(DilloHtml *html, const char *tag, int tagsize)
1777 {
1778 const char *attrbuf;
1779
1780 html->loadCssFromStash = true;
1781
1782 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) {
1783 if (html->DocType != DT_HTML || html->DocTypeVersion <= 4.01f)
1784 BUG_MSG("<style> requires type attribute.");
1785 } else if (dStrAsciiCasecmp(attrbuf, "text/css")) {
1786 html->loadCssFromStash = false;
1787 }
1788 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "media")) &&
1789 dStrAsciiCasecmp(attrbuf, "all") && !dStriAsciiStr(attrbuf, "screen")) {
1790 /* HTML 4.01 sec. 6.13 says that media descriptors are case-sensitive,
1791 * but sec. 14.2.3 says that the attribute is case-insensitive.
1792 * TODO can be a comma-separated list.
1793 * TODO handheld.
1794 */
1795 html->loadCssFromStash = false;
1796 }
1797
1798 a_Html_stash_init(html);
1799 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_VERBATIM;
1800 }
1801
1802 /*
1803 * Handle close STYLE
1804 */
Html_tag_close_style(DilloHtml * html)1805 static void Html_tag_close_style(DilloHtml *html)
1806 {
1807 if (prefs.parse_embedded_css && html->loadCssFromStash)
1808 html->styleEngine->parse(html, html->base_url, html->Stash->str,
1809 html->Stash->len, CSS_ORIGIN_AUTHOR);
1810 }
1811
1812 /*
1813 * <BODY>
1814 */
Html_tag_open_body(DilloHtml * html,const char * tag,int tagsize)1815 static void Html_tag_open_body(DilloHtml *html, const char *tag, int tagsize)
1816 {
1817 const char *attrbuf;
1818 int32_t color;
1819 int tag_index_a = a_Html_tag_index ("a");
1820 style::Color *bgColor;
1821 style::StyleImage *bgImage;
1822 style::BackgroundRepeat bgRepeat;
1823 style::BackgroundAttachment bgAttachment;
1824 style::Length bgPositionX, bgPositionY;
1825
1826 _MSG("Html_tag_open_body Num_BODY=%d\n", html->Num_BODY);
1827 if (!(html->InFlags & IN_BODY))
1828 html->InFlags |= IN_BODY;
1829 if (html->Num_BODY < UCHAR_MAX)
1830 ++html->Num_BODY;
1831
1832 if (html->Num_BODY > 1) {
1833 BUG_MSG("<body> was already open.");
1834 html->ReqTagClose = true;
1835 return;
1836 }
1837
1838 if (html->InFlags & IN_HEAD) {
1839 /* if we're here, it's bad XHTML, no need to recover */
1840 BUG_MSG("Unclosed <head>.");
1841 }
1842
1843 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "bgcolor"))) {
1844 color = a_Html_color_parse(html, attrbuf, -1);
1845
1846 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1847 BUG_MSG("<body> bgcolor attribute is obsolete.");
1848
1849 if (color != -1)
1850 html->styleEngine->setNonCssHint (CSS_PROPERTY_BACKGROUND_COLOR,
1851 CSS_TYPE_COLOR, color);
1852 }
1853
1854 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "text"))) {
1855 color = a_Html_color_parse(html, attrbuf, -1);
1856
1857 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1858 BUG_MSG("<body> text attribute is obsolete.");
1859
1860 if (color != -1)
1861 html->styleEngine->setNonCssHint (CSS_PROPERTY_COLOR,
1862 CSS_TYPE_COLOR, color);
1863 }
1864
1865 html->restyle ();
1866
1867 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "link"))) {
1868 html->non_css_link_color = a_Html_color_parse(html, attrbuf, -1);
1869 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1870 BUG_MSG("<body> link attribute is obsolete.");
1871 }
1872
1873 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "vlink"))) {
1874 html->non_css_visited_color = a_Html_color_parse(html, attrbuf, -1);
1875 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
1876 BUG_MSG("<body> vlink attribute is obsolete.");
1877 }
1878
1879 html->dw->setStyle (html->style ());
1880
1881 bgColor = html->styleEngine->backgroundColor ();
1882 if (bgColor)
1883 HT2LT(html)->setBgColor(bgColor);
1884
1885 bgImage = html->styleEngine->backgroundImage (&bgRepeat, &bgAttachment,
1886 &bgPositionX, &bgPositionY);
1887 if (bgImage)
1888 HT2LT(html)->setBgImage(bgImage, bgRepeat, bgAttachment, bgPositionX,
1889 bgPositionY);
1890
1891 /* Determine a color for visited links.
1892 * This color is computed once per page and used for immediate feedback
1893 * when clicking a link.
1894 * On reload style including color for visited links is computed properly
1895 * according to CSS.
1896 */
1897 html->startElement (tag_index_a);
1898 html->styleEngine->setPseudoVisited ();
1899 if (html->non_css_visited_color != -1) {
1900 html->styleEngine->setNonCssHint (CSS_PROPERTY_COLOR, CSS_TYPE_COLOR,
1901 html->non_css_visited_color);
1902 }
1903 html->visited_color = html->style ()->color->getColor ();
1904 html->styleEngine->endElement (tag_index_a);
1905
1906 if (prefs.contrast_visited_color) {
1907 /* get a color that has a "safe distance" from text, link and bg */
1908 html->visited_color =
1909 a_Color_vc(html->visited_color,
1910 html->style ()->color->getColor(),
1911 html->non_css_link_color,
1912 html->backgroundStyle()->backgroundColor->getColor());
1913 }
1914
1915
1916 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_BODY;
1917 }
1918
1919 /*
1920 * BODY
1921 */
Html_tag_close_body(DilloHtml * html)1922 static void Html_tag_close_body(DilloHtml *html)
1923 {
1924 /* Some tag soup pages use multiple BODY tags...
1925 * Defer clearing the IN_BODY flag until IN_EOF */
1926 }
1927
1928 /*
1929 * <P>
1930 * TODO: what's the point between adding the parbreak before and
1931 * after the push?
1932 */
Html_tag_open_p(DilloHtml * html,const char * tag,int tagsize)1933 static void Html_tag_open_p(DilloHtml *html, const char *tag, int tagsize)
1934 {
1935 CssPropertyList props;
1936
1937 a_Html_tag_set_align_attr (html, tag, tagsize);
1938 }
1939
1940 /*
1941 * <FRAME>, <IFRAME>
1942 * TODO: This is just a temporary fix while real frame support
1943 * isn't finished. Imitates lynx/w3m's frames.
1944 */
Html_tag_open_frame(DilloHtml * html,const char * tag,int tagsize)1945 static void Html_tag_open_frame (DilloHtml *html, const char *tag, int tagsize)
1946 {
1947 const char *attrbuf;
1948 DilloUrl *url;
1949 CssPropertyList props;
1950
1951 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src")))
1952 return;
1953
1954 if (!(url = a_Html_url_new(html, attrbuf, NULL, 0)))
1955 return;
1956
1957 if (a_Capi_get_flags_with_redirection(url) & CAPI_IsCached) {
1958 /* visited frame */
1959 html->styleEngine->setPseudoVisited ();
1960 } else {
1961 /* unvisited frame */
1962 html->styleEngine->setPseudoLink ();
1963 }
1964
1965 html->styleEngine->setNonCssHint (PROPERTY_X_LINK, CSS_TYPE_INTEGER,
1966 Html_set_new_link(html,&url));
1967 }
1968
1969 static void
Html_tag_content_frame(DilloHtml * html,const char * tag,int tagsize)1970 Html_tag_content_frame (DilloHtml *html, const char *tag, int tagsize)
1971 {
1972 const char *attrbuf;
1973 char *src;
1974 Textblock *textblock;
1975 Widget *bullet;
1976
1977 textblock = HT2TB(html);
1978
1979 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src")))
1980 return;
1981
1982 src = dStrdup(attrbuf);
1983
1984 textblock->addParbreak (5, html->wordStyle ());
1985
1986 bullet = new Bullet();
1987 textblock->addWidget(bullet, html->wordStyle ());
1988 textblock->addSpace(html->wordStyle ());
1989
1990 if (D_ASCII_TOLOWER(tag[1]) == 'i') {
1991 /* IFRAME usually comes with very long advertising/spying URLS,
1992 * to not break rendering we will force name="IFRAME" */
1993 textblock->addText ("IFRAME", html->wordStyle ());
1994
1995 } else {
1996 /* FRAME:
1997 * If 'name' tag is present use it, if not use 'src' value */
1998 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "name"))) {
1999 textblock->addText (src, html->wordStyle ());
2000 } else {
2001 textblock->addText (attrbuf, html->wordStyle ());
2002 }
2003 }
2004
2005 textblock->addParbreak (5, html->wordStyle ());
2006
2007 dFree(src);
2008 }
2009
2010 /*
2011 * <FRAMESET>
2012 * TODO: This is just a temporary fix while real frame support
2013 * isn't finished. Imitates lynx/w3m's frames.
2014 */
Html_tag_content_frameset(DilloHtml * html,const char * tag,int tagsize)2015 static void Html_tag_content_frameset (DilloHtml *html,
2016 const char *tag, int tagsize)
2017 {
2018 HT2TB(html)->addParbreak (9, html->wordStyle ());
2019 HT2TB(html)->addText("--FRAME--", html->wordStyle ());
2020 Html_add_textblock(html, 5);
2021 }
2022
2023 /*
2024 * <H1> | <H2> | <H3> | <H4> | <H5> | <H6>
2025 */
Html_tag_open_h(DilloHtml * html,const char * tag,int tagsize)2026 static void Html_tag_open_h(DilloHtml *html, const char *tag, int tagsize)
2027 {
2028 a_Html_tag_set_align_attr (html, tag, tagsize);
2029
2030 a_Html_stash_init(html);
2031 S_TOP(html)->parse_mode =
2032 DILLO_HTML_PARSE_MODE_STASH_AND_BODY;
2033 }
2034
2035 /*
2036 * <BR>
2037 */
Html_tag_content_br(DilloHtml * html,const char * tag,int tagsize)2038 static void Html_tag_content_br(DilloHtml *html, const char *tag, int tagsize)
2039 {
2040 HT2TB(html)->addLinebreak (html->wordStyle ());
2041 }
2042
2043 /*
2044 * <FONT>
2045 */
Html_tag_open_font(DilloHtml * html,const char * tag,int tagsize)2046 static void Html_tag_open_font(DilloHtml *html, const char *tag, int tagsize)
2047 {
2048 const char *attrbuf;
2049 char *fontFamily = NULL;
2050 int32_t color;
2051
2052 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "color"))) {
2053 if (prefs.contrast_visited_color && html->InVisitedLink) {
2054 color = html->visited_color;
2055 } else {
2056 /* use the tag-specified color */
2057 color = a_Html_color_parse(html, attrbuf, -1);
2058 }
2059 if (color != -1)
2060 html->styleEngine->setNonCssHint (CSS_PROPERTY_COLOR,
2061 CSS_TYPE_COLOR, color);
2062 }
2063
2064 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "face"))) {
2065 fontFamily = dStrdup(attrbuf);
2066 html->styleEngine->setNonCssHint (CSS_PROPERTY_FONT_FAMILY,
2067 CSS_TYPE_SYMBOL, fontFamily);
2068 }
2069
2070 dFree(fontFamily);
2071 }
2072
2073 /*
2074 * <ABBR>
2075 */
Html_tag_open_abbr(DilloHtml * html,const char * tag,int tagsize)2076 static void Html_tag_open_abbr(DilloHtml *html, const char *tag, int tagsize)
2077 {
2078 const char *attrbuf;
2079
2080 html->styleEngine->inheritBackgroundColor ();
2081
2082 if (prefs.show_tooltip &&
2083 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
2084
2085 html->styleEngine->setNonCssHint (PROPERTY_X_TOOLTIP, CSS_TYPE_STRING,
2086 attrbuf);
2087 }
2088 }
2089
2090 /*
2091 * Read image-associated tag attributes and create new image.
2092 */
a_Html_common_image_attrs(DilloHtml * html,const char * tag,int tagsize)2093 void a_Html_common_image_attrs(DilloHtml *html, const char *tag, int tagsize)
2094 {
2095 char *width_ptr, *height_ptr;
2096 const char *attrbuf;
2097 CssLength l_w = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO);
2098 CssLength l_h = CSS_CREATE_LENGTH(0.0, CSS_LENGTH_TYPE_AUTO);
2099 int w = 0, h = 0;
2100
2101 if (prefs.show_tooltip &&
2102 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
2103 html->styleEngine->setNonCssHint(PROPERTY_X_TOOLTIP, CSS_TYPE_STRING,
2104 attrbuf);
2105 }
2106 width_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "width", NULL);
2107 height_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "height", NULL);
2108 // Check for malicious values
2109 // TODO: the same for percentage and relative lengths.
2110 if (width_ptr) {
2111 l_w = a_Html_parse_length (html, width_ptr);
2112 w = (int) (CSS_LENGTH_TYPE(l_w) == CSS_LENGTH_TYPE_PX ?
2113 CSS_LENGTH_VALUE(l_w) : 0);
2114 }
2115 if (height_ptr) {
2116 l_h = a_Html_parse_length (html, height_ptr);
2117 h = (int) (CSS_LENGTH_TYPE(l_h) == CSS_LENGTH_TYPE_PX ?
2118 CSS_LENGTH_VALUE(l_h) : 0);
2119 }
2120 /* Check for suspicious image size request that would cause
2121 * an excessive amount of memory to be allocated for the
2122 * image buffer.
2123 * Be careful to avoid integer overflows during the checks.
2124 * There is an additional check in dw/image.cc to catch cases
2125 * where only one dimension is given and the image is scaled
2126 * preserving its original aspect ratio.
2127 * Size requests passed via CSS are also checked there.
2128 */
2129 if (w < 0 || h < 0 ||
2130 w > IMAGE_MAX_AREA || h > IMAGE_MAX_AREA ||
2131 (h > 0 && w > IMAGE_MAX_AREA / h)) {
2132 dFree(width_ptr);
2133 dFree(height_ptr);
2134 width_ptr = height_ptr = NULL;
2135 MSG("a_Html_common_image_attrs: suspicious image size request %d x %d\n",
2136 w, h);
2137 } else {
2138 if (CSS_LENGTH_TYPE(l_w) != CSS_LENGTH_TYPE_AUTO)
2139 html->styleEngine->setNonCssHint (CSS_PROPERTY_WIDTH,
2140 CSS_TYPE_LENGTH_PERCENTAGE, l_w);
2141 if (CSS_LENGTH_TYPE(l_h) != CSS_LENGTH_TYPE_AUTO)
2142 html->styleEngine->setNonCssHint (CSS_PROPERTY_HEIGHT,
2143 CSS_TYPE_LENGTH_PERCENTAGE, l_h);
2144 }
2145
2146 /* TODO: we should scale the image respecting its ratio.
2147 * As the image size is not known at this time, maybe a flag
2148 * can be set to scale it later.
2149 if ((width_ptr && !height_ptr) || (height_ptr && !width_ptr))
2150 [...]
2151 */
2152
2153 /* x_img is an index to a list of {url,image} pairs.
2154 * We know a_Html_image_new() will use size() as its next index */
2155 html->styleEngine->setNonCssHint (PROPERTY_X_IMG, CSS_TYPE_INTEGER,
2156 html->images->size());
2157
2158
2159 dFree(width_ptr);
2160 dFree(height_ptr);
2161 }
2162
a_Html_image_new(DilloHtml * html,const char * tag,int tagsize)2163 DilloImage *a_Html_image_new(DilloHtml *html, const char *tag, int tagsize)
2164 {
2165 bool load_now;
2166 char *alt_ptr;
2167 const char *attrbuf;
2168 DilloUrl *url;
2169 DilloImage *image;
2170
2171 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src")) ||
2172 !(url = a_Html_url_new(html, attrbuf, NULL, 0)))
2173 return NULL;
2174
2175 alt_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "alt", NULL);
2176 if ((!alt_ptr || !*alt_ptr) && !prefs.load_images) {
2177 dFree(alt_ptr);
2178 alt_ptr = dStrdup("[IMG]"); // Place holder for img_off mode
2179 }
2180
2181 dw::Image *dw = new dw::Image(alt_ptr);
2182 image =
2183 a_Image_new(html->dw->getLayout(), (void*)(dw::core::ImgRenderer*)dw, 0);
2184
2185 if (HT2TB(html)->getBgColor())
2186 image->bg_color = HT2TB(html)->getBgColor()->getColor();
2187
2188 DilloHtmlImage *hi = dNew(DilloHtmlImage, 1);
2189 hi->url = url;
2190 html->images->increase();
2191 html->images->set(html->images->size() - 1, hi);
2192
2193 load_now = prefs.load_images ||
2194 !dStrAsciiCasecmp(URL_SCHEME(url), "data") ||
2195 (a_Capi_get_flags_with_redirection(url) & CAPI_IsCached);
2196
2197 if (load_now && Html_load_image(html->bw, url, html->page_url, image)) {
2198 // hi->image is NULL if dillo tries to load the image immediately
2199 hi->image = NULL;
2200 } else {
2201 // otherwise a reference is kept in html->images
2202 hi->image = image;
2203 a_Image_ref(image);
2204 }
2205
2206 dFree(alt_ptr);
2207 return image;
2208 }
2209
2210 /*
2211 * Tell cache to retrieve image
2212 */
Html_load_image(BrowserWindow * bw,DilloUrl * url,const DilloUrl * requester,DilloImage * Image)2213 static bool Html_load_image(BrowserWindow *bw, DilloUrl *url,
2214 const DilloUrl *requester, DilloImage *Image)
2215 {
2216 DilloWeb *Web;
2217 int ClientKey;
2218 /* Fill a Web structure for the cache query */
2219 Web = a_Web_new(bw, url, requester);
2220 Web->Image = Image;
2221 a_Image_ref(Image);
2222 Web->flags |= WEB_Image;
2223 /* Request image data from the cache */
2224 if ((ClientKey = a_Capi_open_url(Web, NULL, NULL)) != 0) {
2225 a_Bw_add_client(bw, ClientKey, 0);
2226 a_Bw_add_url(bw, url);
2227 }
2228 return ClientKey != 0;
2229 }
2230
Html_tag_open_img(DilloHtml * html,const char * tag,int tagsize)2231 static void Html_tag_open_img(DilloHtml *html, const char *tag, int tagsize)
2232 {
2233 int space, border;
2234 const char *attrbuf;
2235
2236 a_Html_common_image_attrs(html, tag, tagsize);
2237
2238 /* Spacing to the left and right */
2239 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "hspace"))) {
2240 space = strtol(attrbuf, NULL, 10);
2241 if (space > 0) {
2242 space = CSS_CREATE_LENGTH(space, CSS_LENGTH_TYPE_PX);
2243 html->styleEngine->setNonCssHint (CSS_PROPERTY_MARGIN_LEFT,
2244 CSS_TYPE_LENGTH_PERCENTAGE, space);
2245 html->styleEngine->setNonCssHint (CSS_PROPERTY_MARGIN_RIGHT,
2246 CSS_TYPE_LENGTH_PERCENTAGE, space);
2247 }
2248 }
2249
2250 /* Spacing at the top and bottom */
2251 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "vspace"))) {
2252 space = strtol(attrbuf, NULL, 10);
2253 if (space > 0) {
2254 space = CSS_CREATE_LENGTH(space, CSS_LENGTH_TYPE_PX);
2255 html->styleEngine->setNonCssHint (CSS_PROPERTY_MARGIN_TOP,
2256 CSS_TYPE_LENGTH_PERCENTAGE, space);
2257 html->styleEngine->setNonCssHint (CSS_PROPERTY_MARGIN_BOTTOM,
2258 CSS_TYPE_LENGTH_PERCENTAGE, space);
2259 }
2260 }
2261
2262 /* Border */
2263 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "border"))) {
2264 border = strtol(attrbuf, NULL, 10);
2265 if (border >= 0) {
2266 border = CSS_CREATE_LENGTH(border, CSS_LENGTH_TYPE_PX);
2267 html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_TOP_WIDTH,
2268 CSS_TYPE_LENGTH_PERCENTAGE, border);
2269 html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_BOTTOM_WIDTH,
2270 CSS_TYPE_LENGTH_PERCENTAGE, border);
2271 html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_LEFT_WIDTH,
2272 CSS_TYPE_LENGTH_PERCENTAGE, border);
2273 html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_RIGHT_WIDTH,
2274 CSS_TYPE_LENGTH_PERCENTAGE, border);
2275
2276 html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_TOP_STYLE,
2277 CSS_TYPE_ENUM, BORDER_SOLID);
2278 html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_BOTTOM_STYLE,
2279 CSS_TYPE_ENUM, BORDER_SOLID);
2280 html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_LEFT_STYLE,
2281 CSS_TYPE_ENUM, BORDER_SOLID);
2282 html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_RIGHT_STYLE,
2283 CSS_TYPE_ENUM, BORDER_SOLID);
2284 }
2285 }
2286
2287 }
2288
2289 /*
2290 * Create a new Image struct and request the image-url to the cache
2291 * (If it either hits or misses, is not relevant here; that's up to the
2292 * cache functions)
2293 */
Html_tag_content_img(DilloHtml * html,const char * tag,int tagsize)2294 static void Html_tag_content_img(DilloHtml *html, const char *tag, int tagsize)
2295 {
2296 DilloImage *Image;
2297 DilloUrl *usemap_url;
2298 const char *attrbuf;
2299
2300 /* This avoids loading images. Useful for viewing suspicious HTML email. */
2301 if (URL_FLAGS(html->base_url) & URL_SpamSafe)
2302 return;
2303
2304 Image = a_Html_image_new(html, tag, tagsize);
2305 if (!Image)
2306 return;
2307
2308 usemap_url = NULL;
2309 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "usemap")))
2310 /* TODO: usemap URLs outside of the document are not used. */
2311 usemap_url = a_Html_url_new(html, attrbuf, NULL, 0);
2312
2313 // At this point, we know that Image->ir represents an image
2314 // widget. Notice that the order of the casts matters, because of
2315 // multiple inheritance.
2316 dw::Image *dwi = (dw::Image*)(dw::core::ImgRenderer*)Image->img_rndr;
2317 HT2TB(html)->addWidget(dwi, html->style());
2318
2319 /* Image maps */
2320 if (a_Html_get_attr(html, tag, tagsize, "ismap")) {
2321 dwi->setIsMap();
2322 _MSG(" Html_tag_open_img: server-side map (ISMAP)\n");
2323 } else if (html->style ()->x_link != -1 &&
2324 usemap_url == NULL) {
2325 /* For simple links, we have to suppress the "image_pressed" signal.
2326 * This is overridden for USEMAP images. */
2327 // a_Dw_widget_set_button_sensitive (IM2DW(Image->dw), FALSE);
2328 }
2329
2330 if (usemap_url) {
2331 dwi->setUseMap(&html->maps, new ::object::String(URL_STR(usemap_url)));
2332 a_Url_free (usemap_url);
2333 }
2334 }
2335
2336 /*
2337 * <map>
2338 */
Html_tag_content_map(DilloHtml * html,const char * tag,int tagsize)2339 static void Html_tag_content_map(DilloHtml *html, const char *tag, int tagsize)
2340 {
2341 char *hash_name;
2342 const char *attrbuf;
2343 DilloUrl *url;
2344
2345 if (html->InFlags & IN_MAP) {
2346 BUG_MSG("Nested <map>.");
2347 } else {
2348 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "name"))) {
2349 html->InFlags |= IN_MAP;
2350 hash_name = dStrconcat("#", attrbuf, NULL);
2351 url = a_Html_url_new(html, hash_name, NULL, 0);
2352 html->maps.startNewMap(new ::object::String(URL_STR(url)));
2353 a_Url_free (url);
2354 dFree(hash_name);
2355 } else {
2356 BUG_MSG("<map> requires name attribute.");
2357 }
2358 }
2359 }
2360
2361 /*
2362 * Handle close <MAP>
2363 */
Html_tag_close_map(DilloHtml * html)2364 static void Html_tag_close_map(DilloHtml *html)
2365 {
2366 /* This is a hack for the perhaps frivolous feature of drawing image map
2367 * shapes when there is no image to display. If this map is defined after
2368 * an image that has not been loaded (img != NULL), tell the image to
2369 * redraw. (It will only do so if it uses a map.)
2370 */
2371 for (int i = 0; i < html->images->size(); i++) {
2372 DilloImage *img = html->images->get(i)->image;
2373
2374 if (img) {
2375 // At this point, we know that img->ir represents an image
2376 // widget. (Really? Is this assumtion safe?) Notice that the
2377 // order of the casts matters, because of multiple
2378 // inheritance.
2379 dw::Image *dwi = (dw::Image*)(dw::core::ImgRenderer*)img->img_rndr;
2380 dwi->forceMapRedraw();
2381 }
2382 }
2383 html->InFlags &= ~IN_MAP;
2384 }
2385
2386 /*
2387 * Read coords in a string, returning a vector of ints.
2388 */
2389 static
Html_read_coords(DilloHtml * html,const char * str)2390 misc::SimpleVector<int> *Html_read_coords(DilloHtml *html, const char *str)
2391 {
2392 int coord;
2393 const char *tail = str;
2394 char *newtail = NULL;
2395 misc::SimpleVector<int> *coords = new misc::SimpleVector<int> (4);
2396
2397 while (1) {
2398 coord = strtol(tail, &newtail, 10);
2399 if (coord == 0 && newtail == tail)
2400 break;
2401 coords->increase();
2402 coords->set(coords->size() - 1, coord);
2403 while (isspace(*newtail))
2404 newtail++;
2405 if (!*newtail)
2406 break;
2407 if (*newtail != ',') {
2408 BUG_MSG("<area> coords must be integers separated by commas.");
2409 }
2410 tail = newtail + 1;
2411 }
2412
2413 return coords;
2414 }
2415
2416 /*
2417 * <AREA>
2418 */
2419 static void
Html_tag_content_area(DilloHtml * html,const char * tag,int tagsize)2420 Html_tag_content_area(DilloHtml *html, const char *tag, int tagsize)
2421 {
2422 enum types {UNKNOWN, RECTANGLE, CIRCLE, POLYGON, BACKGROUND};
2423 types type;
2424 misc::SimpleVector<int> *coords = NULL;
2425 DilloUrl* url;
2426 const char *attrbuf;
2427 int link = -1;
2428 Shape *shape = NULL;
2429
2430 if (!(html->InFlags & IN_MAP)) {
2431 BUG_MSG("<area> not inside <map>.");
2432 return;
2433 }
2434 attrbuf = a_Html_get_attr(html, tag, tagsize, "shape");
2435
2436 if (!attrbuf || !*attrbuf || !dStrAsciiCasecmp(attrbuf, "rect")) {
2437 /* the default shape is a rectangle */
2438 type = RECTANGLE;
2439 } else if (dStrAsciiCasecmp(attrbuf, "default") == 0) {
2440 /* "default" is the background */
2441 type = BACKGROUND;
2442 } else if (dStrAsciiCasecmp(attrbuf, "circle") == 0) {
2443 type = CIRCLE;
2444 } else if (dStrnAsciiCasecmp(attrbuf, "poly", 4) == 0) {
2445 type = POLYGON;
2446 } else {
2447 BUG_MSG("<area> unknown shape: '%s'.", attrbuf);
2448 type = UNKNOWN;
2449 }
2450 if (type == RECTANGLE || type == CIRCLE || type == POLYGON) {
2451 /* TODO: add support for coords in % */
2452 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "coords"))) {
2453 coords = Html_read_coords(html, attrbuf);
2454
2455 if (type == RECTANGLE) {
2456 if (coords->size() != 4)
2457 BUG_MSG("<area> rectangle must have four coordinate values.");
2458 if (coords->size() >= 4)
2459 shape = new Rectangle(coords->get(0),
2460 coords->get(1),
2461 coords->get(2) - coords->get(0),
2462 coords->get(3) - coords->get(1));
2463 } else if (type == CIRCLE) {
2464 if (coords->size() != 3)
2465 BUG_MSG("<area> circle must have three coordinate values.");
2466 if (coords->size() >= 3)
2467 shape = new Circle(coords->get(0), coords->get(1),
2468 coords->get(2));
2469 } else if (type == POLYGON) {
2470 Polygon *poly;
2471 int i;
2472 if (coords->size() % 2)
2473 BUG_MSG("<area> polygon with odd number of coordinates.");
2474 shape = poly = new Polygon();
2475 for (i = 0; i < (coords->size() / 2); i++)
2476 poly->addPoint(coords->get(2*i), coords->get(2*i + 1));
2477 }
2478 delete(coords);
2479 }
2480 }
2481 if (shape != NULL || type == BACKGROUND) {
2482 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) {
2483 url = a_Html_url_new(html, attrbuf, NULL, 0);
2484 dReturn_if_fail ( url != NULL );
2485 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "alt")))
2486 a_Url_set_alt(url, attrbuf);
2487
2488 link = Html_set_new_link(html, &url);
2489 }
2490 if (type == BACKGROUND)
2491 html->maps.setCurrentMapDefaultLink(link);
2492 else
2493 html->maps.addShapeToCurrentMap(shape, link);
2494 }
2495 }
2496
2497 /*
2498 * <OBJECT>
2499 * Simply provide a link if the object is something downloadable.
2500 */
Html_tag_open_object(DilloHtml * html,const char * tag,int tagsize)2501 static void Html_tag_open_object(DilloHtml *html, const char *tag, int tagsize)
2502 {
2503 DilloUrl *url, *base_url = NULL;
2504 const char *attrbuf;
2505
2506 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "codebase"))) {
2507 base_url = a_Html_url_new(html, attrbuf, NULL, 0);
2508 }
2509
2510 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "data"))) {
2511 url = a_Html_url_new(html, attrbuf,
2512 URL_STR(base_url), (base_url != NULL));
2513 dReturn_if_fail ( url != NULL );
2514
2515 if (a_Capi_get_flags_with_redirection(url) & CAPI_IsCached) {
2516 html->styleEngine->setPseudoVisited ();
2517 } else {
2518 html->styleEngine->setPseudoLink ();
2519 }
2520
2521 html->styleEngine->setNonCssHint(PROPERTY_X_LINK, CSS_TYPE_INTEGER,
2522 Html_set_new_link(html, &url));
2523 }
2524 a_Url_free(base_url);
2525 }
2526
Html_tag_content_object(DilloHtml * html,const char * tag,int tagsize)2527 static void Html_tag_content_object(DilloHtml *html, const char *tag,
2528 int tagsize)
2529 {
2530 if (a_Html_get_attr(html, tag, tagsize, "data"))
2531 HT2TB(html)->addText("[OBJECT]", html->wordStyle ());
2532 }
2533
2534 /*
2535 * <VIDEO>
2536 * Provide a link to the video.
2537 */
Html_tag_open_video(DilloHtml * html,const char * tag,int tagsize)2538 static void Html_tag_open_video(DilloHtml *html, const char *tag, int tagsize)
2539 {
2540 DilloUrl *url;
2541 const char *attrbuf;
2542
2543 if (html->InFlags & IN_MEDIA) {
2544 MSG("<video> not handled when already inside a media element.\n");
2545 return;
2546 }
2547 /* TODO: poster attr */
2548
2549 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2550 url = a_Html_url_new(html, attrbuf, NULL, 0);
2551 dReturn_if_fail ( url != NULL );
2552
2553 if (a_Capi_get_flags_with_redirection(url) & CAPI_IsCached) {
2554 html->styleEngine->setPseudoVisited ();
2555 } else {
2556 html->styleEngine->setPseudoLink ();
2557 }
2558
2559 html->styleEngine->setNonCssHint(PROPERTY_X_LINK, CSS_TYPE_INTEGER,
2560 Html_set_new_link(html, &url));
2561
2562 HT2TB(html)->addText("[VIDEO]", html->wordStyle ());
2563 }
2564 html->InFlags |= IN_MEDIA;
2565 }
2566
2567 /*
2568 * <AUDIO>
2569 * Provide a link to the audio.
2570 */
Html_tag_open_audio(DilloHtml * html,const char * tag,int tagsize)2571 static void Html_tag_open_audio(DilloHtml *html, const char *tag, int tagsize)
2572 {
2573 DilloUrl *url;
2574 const char *attrbuf;
2575
2576 if (html->InFlags & IN_MEDIA) {
2577 MSG("<audio> not handled when already inside a media element.\n");
2578 return;
2579 }
2580
2581 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2582 url = a_Html_url_new(html, attrbuf, NULL, 0);
2583 dReturn_if_fail ( url != NULL );
2584
2585 if (a_Capi_get_flags_with_redirection(url) & CAPI_IsCached) {
2586 html->styleEngine->setPseudoVisited ();
2587 } else {
2588 html->styleEngine->setPseudoLink ();
2589 }
2590
2591 html->styleEngine->setNonCssHint(PROPERTY_X_LINK, CSS_TYPE_INTEGER,
2592 Html_set_new_link(html, &url));
2593
2594 HT2TB(html)->addText("[AUDIO]", html->wordStyle ());
2595 }
2596 html->InFlags |= IN_MEDIA;
2597 }
2598
2599 /*
2600 * <SOURCE>
2601 * Media resource; provide a link to its address.
2602 */
Html_tag_open_source(DilloHtml * html,const char * tag,int tagsize)2603 static void Html_tag_open_source(DilloHtml *html, const char *tag,
2604 int tagsize)
2605 {
2606 const char *attrbuf;
2607
2608 if (!(html->InFlags & IN_MEDIA)) {
2609 BUG_MSG("<source> not inside a media element.");
2610 return;
2611 }
2612 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2613 BUG_MSG("<source> requires src attribute.");
2614 return;
2615 } else {
2616 DilloUrl *url = a_Html_url_new(html, attrbuf, NULL, 0);
2617
2618 dReturn_if_fail ( url != NULL );
2619
2620 if (a_Capi_get_flags_with_redirection(url) & CAPI_IsCached) {
2621 html->styleEngine->setPseudoVisited ();
2622 } else {
2623 html->styleEngine->setPseudoLink ();
2624 }
2625 html->styleEngine->setNonCssHint(PROPERTY_X_LINK, CSS_TYPE_INTEGER,
2626 Html_set_new_link(html, &url));
2627 }
2628 }
2629
Html_tag_content_source(DilloHtml * html,const char * tag,int tagsize)2630 static void Html_tag_content_source(DilloHtml *html, const char *tag,
2631 int tagsize)
2632 {
2633 if ((html->InFlags & IN_MEDIA) && a_Html_get_attr(html, tag, tagsize,"src"))
2634 HT2TB(html)->addText("[MEDIA SOURCE]", html->wordStyle ());
2635 }
2636
2637 /*
2638 * Media (AUDIO/VIDEO) close function
2639 */
Html_tag_close_media(DilloHtml * html)2640 static void Html_tag_close_media(DilloHtml *html)
2641 {
2642 html->InFlags &= ~IN_MEDIA;
2643 }
2644
2645 /*
2646 * <EMBED>
2647 * Provide a link to embedded content.
2648 */
Html_tag_open_embed(DilloHtml * html,const char * tag,int tagsize)2649 static void Html_tag_open_embed(DilloHtml *html, const char *tag, int tagsize)
2650 {
2651 const char *attrbuf;
2652
2653 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "src"))) {
2654 DilloUrl *url = a_Html_url_new(html, attrbuf, NULL, 0);
2655
2656 dReturn_if_fail ( url != NULL );
2657
2658 if (a_Capi_get_flags_with_redirection(url) & CAPI_IsCached) {
2659 html->styleEngine->setPseudoVisited ();
2660 } else {
2661 html->styleEngine->setPseudoLink ();
2662 }
2663
2664 html->styleEngine->setNonCssHint(PROPERTY_X_LINK, CSS_TYPE_INTEGER,
2665 Html_set_new_link(html, &url));
2666 }
2667 }
2668
Html_tag_content_embed(DilloHtml * html,const char * tag,int tagsize)2669 static void Html_tag_content_embed(DilloHtml *html,const char *tag,int tagsize)
2670 {
2671 if (a_Html_get_attr(html, tag, tagsize, "src"))
2672 HT2TB(html)->addText("[EMBED]", html->wordStyle ());
2673 }
2674
2675 /*
2676 * Test and extract the link from a javascript instruction.
2677 */
Html_get_javascript_link(DilloHtml * html)2678 static const char* Html_get_javascript_link(DilloHtml *html)
2679 {
2680 size_t i;
2681 char ch, *p1, *p2;
2682 Dstr *Buf = html->attr_data;
2683
2684 if (dStrnAsciiCasecmp("javascript", Buf->str, 10) == 0) {
2685 i = strcspn(Buf->str, "'\"");
2686 ch = Buf->str[i];
2687 if ((ch == '"' || ch == '\'') &&
2688 (p2 = strchr(Buf->str + i + 1 , ch))) {
2689 p1 = Buf->str + i;
2690 BUG_MSG("Link depends on javascript().");
2691 dStr_truncate(Buf, p2 - Buf->str);
2692 dStr_erase(Buf, 0, p1 - Buf->str + 1);
2693 }
2694 }
2695 return Buf->str;
2696 }
2697
2698 /*
2699 * Register an anchor for this page.
2700 */
Html_add_anchor(DilloHtml * html,const char * name)2701 static void Html_add_anchor(DilloHtml *html, const char *name)
2702 {
2703 _MSG("Registering ANCHOR: %s\n", name);
2704 if (!HT2TB(html)->addAnchor (name, html->style ()))
2705 BUG_MSG("Anchor names must be unique within the document (\"%s\").",
2706 name);
2707 /*
2708 * According to Sec. 12.2.1 of the HTML 4.01 spec, "anchor names that
2709 * differ only in case may not appear in the same document", but
2710 * "comparisons between fragment identifiers and anchor names must be
2711 * done by exact (case-sensitive) match." We ignore the case issue and
2712 * always test for exact matches. Moreover, what does uppercase mean
2713 * for Unicode characters outside the ASCII range?
2714 */
2715 }
2716
2717 /*
2718 * <A>
2719 */
Html_tag_open_a(DilloHtml * html,const char * tag,int tagsize)2720 static void Html_tag_open_a(DilloHtml *html, const char *tag, int tagsize)
2721 {
2722 DilloUrl *url;
2723 const char *attrbuf;
2724
2725 /* TODO: add support for MAP with A HREF */
2726 if (html->InFlags & IN_MAP)
2727 Html_tag_content_area(html, tag, tagsize);
2728
2729 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) {
2730 /* if it's a javascript link, extract the reference. */
2731 if (D_ASCII_TOLOWER(attrbuf[0]) == 'j')
2732 attrbuf = Html_get_javascript_link(html);
2733
2734 url = a_Html_url_new(html, attrbuf, NULL, 0);
2735 dReturn_if_fail ( url != NULL );
2736
2737 if (a_Capi_get_flags_with_redirection(url) & CAPI_IsCached) {
2738 html->InVisitedLink = true;
2739 html->styleEngine->setPseudoVisited ();
2740 if (html->non_css_visited_color != -1)
2741 html->styleEngine->setNonCssHint(CSS_PROPERTY_COLOR,
2742 CSS_TYPE_COLOR,
2743 html->non_css_visited_color);
2744 } else {
2745 html->styleEngine->setPseudoLink ();
2746 if (html->non_css_link_color != -1)
2747 html->styleEngine->setNonCssHint(CSS_PROPERTY_COLOR,
2748 CSS_TYPE_COLOR,
2749 html->non_css_link_color);
2750 }
2751
2752 html->styleEngine->setNonCssHint (PROPERTY_X_LINK, CSS_TYPE_INTEGER,
2753 Html_set_new_link(html, &url));
2754 }
2755 if (prefs.show_tooltip &&
2756 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
2757 html->styleEngine->setNonCssHint (PROPERTY_X_TOOLTIP, CSS_TYPE_STRING,
2758 attrbuf);
2759 }
2760
2761 html->styleEngine->inheritBackgroundColor ();
2762
2763 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "name"))) {
2764 char *nameVal;
2765 const char *id = html->styleEngine->getId ();
2766
2767 if (prefs.show_extra_warnings)
2768 Html_check_name_val(html, attrbuf, "name");
2769
2770 nameVal = a_Url_decode_hex_str(attrbuf);
2771
2772 if (nameVal) {
2773 /* We compare the "id" value with the url-decoded "name" value */
2774 if (!id || strcmp(nameVal, id)) {
2775 if (id)
2776 BUG_MSG("In <a>, id ('%s') and name ('%s') attributes differ.",
2777 id, nameVal);
2778 Html_add_anchor(html, nameVal);
2779 }
2780
2781 dFree(nameVal);
2782 }
2783 }
2784 }
2785
2786 /*
2787 * <A> close function
2788 */
Html_tag_close_a(DilloHtml * html)2789 static void Html_tag_close_a(DilloHtml *html)
2790 {
2791 html->InVisitedLink = false;
2792 }
2793
2794 /*
2795 * <BLOCKQUOTE>
2796 */
Html_tag_open_blockquote(DilloHtml * html,const char * tag,int tagsize)2797 static void Html_tag_open_blockquote(DilloHtml *html,
2798 const char *tag, int tagsize)
2799 {
2800 Html_add_textblock(html, 9);
2801 }
2802
2803 /*
2804 * <Q>
2805 */
Html_tag_open_q(DilloHtml * html,const char * tag,int tagsize)2806 static void Html_tag_open_q(DilloHtml *html, const char *tag, int tagsize)
2807 {
2808 /*
2809 * Left Double Quotation Mark, which is wrong in many cases, but
2810 * should at least be widely recognized.
2811 */
2812 const char *U201C = "\xe2\x80\x9c";
2813
2814 html->styleEngine->inheritBackgroundColor ();
2815 HT2TB(html)->addText (U201C, html->wordStyle ());
2816 }
2817
2818 /*
2819 * </Q>
2820 */
Html_tag_close_q(DilloHtml * html)2821 static void Html_tag_close_q(DilloHtml *html)
2822 {
2823 /* Right Double Quotation Mark */
2824 const char *U201D = "\xe2\x80\x9d";
2825
2826 HT2TB(html)->addText (U201D, html->wordStyle ());
2827 }
2828
2829 /*
2830 * Handle the <UL> tag.
2831 */
Html_tag_open_ul(DilloHtml * html,const char * tag,int tagsize)2832 static void Html_tag_open_ul(DilloHtml *html, const char *tag, int tagsize)
2833 {
2834 const char *attrbuf;
2835 ListStyleType list_style_type;
2836
2837 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) {
2838
2839 /* list_style_type explicitly defined */
2840 if (dStrAsciiCasecmp(attrbuf, "disc") == 0)
2841 list_style_type = LIST_STYLE_TYPE_DISC;
2842 else if (dStrAsciiCasecmp(attrbuf, "circle") == 0)
2843 list_style_type = LIST_STYLE_TYPE_CIRCLE;
2844 else if (dStrAsciiCasecmp(attrbuf, "square") == 0)
2845 list_style_type = LIST_STYLE_TYPE_SQUARE;
2846 else
2847 /* invalid value */
2848 list_style_type = LIST_STYLE_TYPE_DISC;
2849
2850 html->styleEngine->setNonCssHint (CSS_PROPERTY_LIST_STYLE_TYPE,
2851 CSS_TYPE_ENUM, list_style_type);
2852 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2853 BUG_MSG("<ul> type attribute is obsolete.");
2854 }
2855
2856 S_TOP(html)->list_type = HTML_LIST_UNORDERED;
2857 S_TOP(html)->list_number = 0;
2858 S_TOP(html)->ref_list_item = NULL;
2859 }
2860
2861 /*
2862 * Handle the <DIR> or <MENU> tag.
2863 * (Deprecated and almost the same as <UL>)
2864 */
Html_tag_open_dir(DilloHtml * html,const char * tag,int tagsize)2865 static void Html_tag_open_dir(DilloHtml *html, const char *tag, int tagsize)
2866 {
2867 html->styleEngine->inheritBackgroundColor ();
2868 HT2TB(html)->addParbreak (9, html->wordStyle ());
2869
2870 S_TOP(html)->list_type = HTML_LIST_UNORDERED;
2871 S_TOP(html)->list_number = 0;
2872 S_TOP(html)->ref_list_item = NULL;
2873
2874 if (prefs.show_extra_warnings)
2875 BUG_MSG("Obsolete list type; use <ul> instead.");
2876 }
2877
2878 /*
2879 * Handle the <MENU> tag.
2880 */
Html_tag_open_menu(DilloHtml * html,const char * tag,int tagsize)2881 static void Html_tag_open_menu(DilloHtml *html, const char *tag, int tagsize)
2882 {
2883 /* In another bit of ridiculous mess from the HTML5 world, the menu
2884 * element, which was deprecated in HTML4:
2885 * - does not appear at all in W3C's HTML5 spec
2886 * - appears in WHATWG's HTML5 doc and the W3C's 5.1 draft, where it
2887 * means something totally different than it did in the old days
2888 * (now it's for popup menus and toolbar menus rather than being a
2889 * sort of list).
2890 */
2891 if (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f))
2892 Html_tag_open_dir(html, tag, tagsize);
2893 }
2894
2895 /*
2896 * Handle the <OL> tag.
2897 */
Html_tag_open_ol(DilloHtml * html,const char * tag,int tagsize)2898 static void Html_tag_open_ol(DilloHtml *html, const char *tag, int tagsize)
2899 {
2900 const char *attrbuf;
2901 int n = 1;
2902
2903 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "type"))) {
2904 ListStyleType listStyleType = LIST_STYLE_TYPE_DECIMAL;
2905
2906 if (*attrbuf == '1')
2907 listStyleType = LIST_STYLE_TYPE_DECIMAL;
2908 else if (*attrbuf == 'a')
2909 listStyleType = LIST_STYLE_TYPE_LOWER_ALPHA;
2910 else if (*attrbuf == 'A')
2911 listStyleType = LIST_STYLE_TYPE_UPPER_ALPHA;
2912 else if (*attrbuf == 'i')
2913 listStyleType = LIST_STYLE_TYPE_LOWER_ROMAN;
2914 else if (*attrbuf == 'I')
2915 listStyleType = LIST_STYLE_TYPE_UPPER_ROMAN;
2916
2917 html->styleEngine->setNonCssHint (CSS_PROPERTY_LIST_STYLE_TYPE,
2918 CSS_TYPE_ENUM, listStyleType);
2919 }
2920
2921 S_TOP(html)->list_type = HTML_LIST_ORDERED;
2922
2923 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "start")) &&
2924 (n = (int) strtol(attrbuf, NULL, 10)) < 0) {
2925 BUG_MSG("Illegal '-' character in START attribute; Starting from 0.");
2926 n = 0;
2927 }
2928 S_TOP(html)->list_number = n;
2929 S_TOP(html)->ref_list_item = NULL;
2930 }
2931
2932 /*
2933 * Handle the <LI> tag.
2934 */
Html_tag_open_li(DilloHtml * html,const char * tag,int tagsize)2935 static void Html_tag_open_li(DilloHtml *html, const char *tag, int tagsize)
2936 {
2937 Style *style = html->style ();
2938 int *list_number;
2939 const char *attrbuf;
2940
2941 if (S_TOP(html)->list_type == HTML_LIST_NONE)
2942 BUG_MSG("<li> outside <ul> or <ol>.");
2943
2944 html->InFlags |= IN_LI;
2945
2946 /* Get our parent tag's variables (used as state storage) */
2947 list_number = &html->stack->getRef(html->stack->size()-2)->list_number;
2948
2949 if (style->listStyleType >= LIST_STYLE_TYPE_DECIMAL) {
2950 // ordered
2951 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "value")) &&
2952 (*list_number = strtol(attrbuf, NULL, 10)) < 0) {
2953 BUG_MSG("Illegal negative list value attribute; Starting from 0.");
2954 *list_number = 0;
2955 }
2956 }
2957 }
2958
2959 /*
2960 * Close <LI>
2961 */
Html_tag_close_li(DilloHtml * html)2962 static void Html_tag_close_li(DilloHtml *html)
2963 {
2964 html->InFlags &= ~IN_LI;
2965 ((ListItem *)html->dw)->flush ();
2966 }
2967
2968 /*
2969 * <HR>
2970 */
Html_tag_open_hr(DilloHtml * html,const char * tag,int tagsize)2971 static void Html_tag_open_hr(DilloHtml *html, const char *tag, int tagsize)
2972 {
2973 char *width_ptr;
2974 const char *attrbuf;
2975 int32_t size = 0;
2976
2977 width_ptr = a_Html_get_attr_wdef(html, tag, tagsize, "width", NULL);
2978 if (width_ptr) {
2979 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2980 BUG_MSG("<hr> width attribute is obsolete.");
2981 html->styleEngine->setNonCssHint (CSS_PROPERTY_WIDTH,
2982 CSS_TYPE_LENGTH_PERCENTAGE,
2983 a_Html_parse_length (html, width_ptr));
2984 dFree(width_ptr);
2985 }
2986
2987 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "size"))) {
2988 size = strtol(attrbuf, NULL, 10);
2989 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2990 BUG_MSG("<hr> size attribute is obsolete.");
2991 }
2992
2993 a_Html_tag_set_align_attr(html, tag, tagsize);
2994
2995 /* TODO: evaluate attribute */
2996 if (a_Html_get_attr(html, tag, tagsize, "noshade")) {
2997 if (html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
2998 BUG_MSG("<hr> noshade attribute is obsolete.");
2999 html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_TOP_STYLE,
3000 CSS_TYPE_ENUM, BORDER_SOLID);
3001 html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_BOTTOM_STYLE,
3002 CSS_TYPE_ENUM, BORDER_SOLID);
3003 html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_LEFT_STYLE,
3004 CSS_TYPE_ENUM, BORDER_SOLID);
3005 html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_RIGHT_STYLE,
3006 CSS_TYPE_ENUM, BORDER_SOLID);
3007
3008 if (size <= 0)
3009 size = 1;
3010 }
3011
3012 if (size > 0) {
3013 CssLength size_top = CSS_CREATE_LENGTH ((size+1)/2, CSS_LENGTH_TYPE_PX);
3014 CssLength size_bottom = CSS_CREATE_LENGTH (size / 2, CSS_LENGTH_TYPE_PX);
3015 html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_TOP_WIDTH,
3016 CSS_TYPE_LENGTH_PERCENTAGE, size_top);
3017 html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_LEFT_WIDTH,
3018 CSS_TYPE_LENGTH_PERCENTAGE, size_top);
3019 html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_BOTTOM_WIDTH,
3020 CSS_TYPE_LENGTH_PERCENTAGE,
3021 size_bottom);
3022 html->styleEngine->setNonCssHint (CSS_PROPERTY_BORDER_RIGHT_WIDTH,
3023 CSS_TYPE_LENGTH_PERCENTAGE,
3024 size_bottom);
3025 }
3026
3027 }
3028
Html_tag_content_hr(DilloHtml * html,const char * tag,int tagsize)3029 static void Html_tag_content_hr(DilloHtml *html, const char *tag, int tagsize)
3030 {
3031 Widget *hruler;
3032 HT2TB(html)->addParbreak (5, html->wordStyle ());
3033
3034 hruler = new Ruler();
3035 hruler->setStyle (html->style ());
3036 HT2TB(html)->addWidget (hruler, html->style ());
3037 HT2TB(html)->addParbreak (5, html->wordStyle ());
3038 }
3039
3040 /*
3041 * <DL>
3042 */
Html_tag_open_dl(DilloHtml * html,const char * tag,int tagsize)3043 static void Html_tag_open_dl(DilloHtml *html, const char *tag, int tagsize)
3044 {
3045 /* may want to actually do some stuff here. */
3046 html->styleEngine->inheritBackgroundColor ();
3047 HT2TB(html)->addParbreak (9, html->wordStyle ());
3048 }
3049
3050 /*
3051 * <DT>
3052 */
Html_tag_open_dt(DilloHtml * html,const char * tag,int tagsize)3053 static void Html_tag_open_dt(DilloHtml *html, const char *tag, int tagsize)
3054 {
3055 html->styleEngine->inheritBackgroundColor ();
3056 HT2TB(html)->addParbreak (9, html->wordStyle ());
3057 }
3058
3059 /*
3060 * <DD>
3061 */
Html_tag_open_dd(DilloHtml * html,const char * tag,int tagsize)3062 static void Html_tag_open_dd(DilloHtml *html, const char *tag, int tagsize)
3063 {
3064 Html_add_textblock(html, 9);
3065 }
3066
3067 /*
3068 * <PRE>
3069 */
Html_tag_open_pre(DilloHtml * html,const char * tag,int tagsize)3070 static void Html_tag_open_pre(DilloHtml *html, const char *tag, int tagsize)
3071 {
3072 html->styleEngine->inheritBackgroundColor ();
3073 HT2TB(html)->addParbreak (9, html->wordStyle ());
3074
3075 html->InFlags |= IN_PRE;
3076 }
3077
3078 /*
3079 * Custom close for <PRE>
3080 */
Html_tag_close_pre(DilloHtml * html)3081 static void Html_tag_close_pre(DilloHtml *html)
3082 {
3083 html->InFlags &= ~IN_PRE;
3084 }
3085
3086 /*
3087 * Check whether a tag is in the "excluding" element set for PRE
3088 * Excl. Set = {IMG, OBJECT, APPLET, BIG, SMALL, SUB, SUP, FONT, BASEFONT}
3089 */
Html_tag_pre_excludes(DilloHtml * html,int tag_idx)3090 static int Html_tag_pre_excludes(DilloHtml *html, int tag_idx)
3091 {
3092 if (!(html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)) {
3093 /* HTML5 doesn't say anything about excluding elements */
3094 const char *es_set[] = {"img", "object", "applet", "big", "small", "sub",
3095 "sup", "font", "basefont", NULL};
3096 static int ei_set[10], i;
3097
3098 /* initialize array */
3099 if (!ei_set[0])
3100 for (i = 0; es_set[i]; ++i)
3101 ei_set[i] = a_Html_tag_index(es_set[i]);
3102
3103 for (i = 0; ei_set[i]; ++i)
3104 if (tag_idx == ei_set[i])
3105 return 1;
3106 }
3107 return 0;
3108 }
3109
3110 /*
3111 * Update the document's content type information based on meta tag data.
3112 */
Html_update_content_type(DilloHtml * html,const char * content)3113 static void Html_update_content_type(DilloHtml *html, const char *content)
3114 {
3115 const char *new_content = a_Capi_set_content_type(html->page_url, content,
3116 "meta");
3117 /* Cannot ask cache whether the content type was changed, as
3118 * this code in another bw might have already changed it for us.
3119 */
3120 if (a_Misc_content_type_cmp(html->content_type, new_content)) {
3121 html->stop_parser = true; /* The cache buffer is no longer valid */
3122 a_UIcmd_repush(html->bw);
3123 }
3124 }
3125
3126 /*
3127 * Handle <META>
3128 * We do not support http-equiv=refresh with delay>0 because it's
3129 * non standard, (the HTML 4.01 SPEC recommends explicitly to avoid it).
3130 * More info at:
3131 * http://lists.w3.org/Archives/Public/www-html/2000Feb/thread.html#msg232
3132 * Instant client-side redirects (delay=0) are supported:
3133 * http://www.w3.org/TR/2008/NOTE-WCAG20-TECHS-20081211/H76.html
3134 *
3135 * TODO: Note that we're sending custom HTML while still IN_HEAD. This
3136 * is a hackish way to put the message. A much cleaner approach is to
3137 * build a custom widget for it.
3138 */
Html_tag_open_meta(DilloHtml * html,const char * tag,int tagsize)3139 static void Html_tag_open_meta(DilloHtml *html, const char *tag, int tagsize)
3140 {
3141 const char meta_template[] =
3142 "<table width='100%%'><tr><td bgcolor='#ee0000'>Warning:</td>\n"
3143 " <td bgcolor='#8899aa' width='100%%'>\n"
3144 " This page uses the NON-STANDARD meta refresh tag.<br> The HTML 4.01 SPEC\n"
3145 " (sec 7.4.4) recommends explicitly to avoid it.</td></tr>\n"
3146 " <tr><td bgcolor='#a0a0a0' colspan='2'>The author wanted you to go\n"
3147 " <a href='%s'>here</a>%s</td></tr></table><br>\n";
3148
3149 const char *p, *equiv, *charset, *content;
3150 char delay_str[64], *mr_url;
3151 DilloUrl *new_url;
3152 int delay;
3153
3154 /* only valid inside HEAD */
3155 if (!(html->InFlags & IN_HEAD)) {
3156 if (!((html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) &&
3157 a_Html_get_attr(html, tag, tagsize, "itemprop"))) {
3158 /* With the HTML 5.1 draft spec, meta with itemprop may appear
3159 * in the body.
3160 */
3161 BUG_MSG("This <meta> element must be inside the HEAD section.");
3162 }
3163 return;
3164 }
3165
3166 if ((equiv = a_Html_get_attr(html, tag, tagsize, "http-equiv"))) {
3167 if (!dStrAsciiCasecmp(equiv, "refresh") &&
3168 (content = a_Html_get_attr(html, tag, tagsize, "content"))) {
3169
3170 /* Get delay, if present, and make a message with it */
3171 if ((delay = strtol(content, NULL, 0))) {
3172 snprintf(delay_str, 64, " after %d second%s.",
3173 delay, (delay > 1) ? "s" : "");
3174 } else {
3175 sprintf(delay_str, ".");
3176 }
3177 /* Skip to anything after "URL=" or ";" if "URL=" is not found */
3178 if ((p = dStriAsciiStr(content, "url=")))
3179 content = p + strlen("url=");
3180 else if ((p = strstr(content, ";")))
3181 content = p + strlen(";");
3182 /* Handle the case of a quoted URL */
3183 if (*content == '"' || *content == '\'') {
3184 if ((p = strchr(content + 1, *content)))
3185 mr_url = dStrndup(content + 1, p - content - 1);
3186 else
3187 mr_url = dStrdup(content + 1);
3188 } else {
3189 mr_url = dStrdup(content);
3190 }
3191 new_url = a_Html_url_new(html, mr_url, NULL, 0);
3192
3193 if (a_Url_cmp(html->base_url, new_url) == 0) {
3194 /* redirection loop, or empty url string: ignore */
3195 BUG_MSG("<meta> refresh: %s.",
3196 *mr_url ? "redirection loop" : "no target URL");
3197 } else if (delay == 0) {
3198 /* zero-delay redirection */
3199 html->stop_parser = true;
3200 if (URL_FLAGS(html->base_url) & URL_SpamSafe) {
3201 a_UIcmd_set_msg(html->bw,
3202 "WARNING: local URL with META refresh. Aborting.");
3203 } else if (a_Capi_dpi_verify_request(html->bw, new_url)) {
3204 a_UIcmd_redirection0((void*)html->bw, new_url);
3205 }
3206 } else {
3207 /* Send a custom HTML message.
3208 * TODO: This is a hairy hack,
3209 * It'd be much better to build a widget. */
3210 Dstr *ds_msg = dStr_sized_new(256);
3211 dStr_sprintf(ds_msg, meta_template, URL_STR(new_url), delay_str);
3212 {
3213 int o_InFlags = html->InFlags;
3214 int o_TagSoup = html->TagSoup;
3215 html->InFlags = IN_BODY + IN_META_HACK;
3216 html->TagSoup = false;
3217 Html_write_raw(html, ds_msg->str, ds_msg->len, 0);
3218 html->TagSoup = o_TagSoup;
3219 html->InFlags = o_InFlags;
3220 }
3221 dStr_free(ds_msg, 1);
3222 }
3223 a_Url_free(new_url);
3224 dFree(mr_url);
3225
3226 } else if (!dStrAsciiCasecmp(equiv, "content-type") &&
3227 (content = a_Html_get_attr(html, tag, tagsize, "content"))) {
3228 _MSG("Html_tag_open_meta: content={%s}\n", content);
3229 Html_update_content_type(html, content);
3230 }
3231 } else if (html->DocType == DT_HTML && html->DocTypeVersion == 5.0f &&
3232 (charset = a_Html_get_attr(html, tag, tagsize, "charset"))) {
3233 char *content = dStrconcat("text/html; charset=", charset, NULL);
3234
3235 Html_update_content_type(html, content);
3236 dFree(content);
3237 }
3238 }
3239
3240 /*
3241 * Called by the network engine when a stylesheet has new data.
3242 */
Html_css_load_callback(int Op,CacheClient_t * Client)3243 static void Html_css_load_callback(int Op, CacheClient_t *Client)
3244 {
3245 _MSG("Html_css_load_callback: Op=%d\n", Op);
3246 if (Op) { /* EOF */
3247 BrowserWindow *bw = ((DilloWeb *)Client->Web)->bw;
3248 /* Repush when we've got them all */
3249 if (--bw->NumPendingStyleSheets == 0)
3250 a_UIcmd_repush(bw);
3251 }
3252 }
3253
3254 /*
3255 * Tell cache to retrieve a stylesheet
3256 */
a_Html_load_stylesheet(DilloHtml * html,DilloUrl * url)3257 void a_Html_load_stylesheet(DilloHtml *html, DilloUrl *url)
3258 {
3259 char *data;
3260 int len;
3261
3262 dReturn_if (url == NULL || ! prefs.load_stylesheets);
3263
3264 _MSG("Html_load_stylesheet: ");
3265 if (a_Capi_get_buf(url, &data, &len)) {
3266 _MSG("cached URL=%s len=%d", URL_STR(url), len);
3267 if (a_Capi_get_flags_with_redirection(url) & CAPI_Completed) {
3268 if (strncmp("@charset \"", data, 10) == 0) {
3269 char *endq = strchr(data+10, '"');
3270
3271 if (endq && (endq - data <= 51)) {
3272 /* IANA limits charset names to 40 characters */
3273 char *content_type;
3274
3275 *endq = '\0';
3276 content_type = dStrconcat("text/css; charset=", data+10, NULL);
3277 *endq = '"';
3278 a_Capi_unref_buf(url);
3279 a_Capi_set_content_type(url, content_type, "meta");
3280 dFree(content_type);
3281 a_Capi_get_buf(url, &data, &len);
3282 }
3283 }
3284 html->styleEngine->parse(html, url, data, len, CSS_ORIGIN_AUTHOR);
3285 }
3286 a_Capi_unref_buf(url);
3287 } else {
3288 /* Fill a Web structure for the cache query */
3289 int ClientKey;
3290 DilloWeb *Web = a_Web_new(html->bw, url, html->page_url);
3291 Web->flags |= WEB_Stylesheet;
3292 if ((ClientKey = a_Capi_open_url(Web, Html_css_load_callback, NULL))) {
3293 ++html->bw->NumPendingStyleSheets;
3294 a_Bw_add_client(html->bw, ClientKey, 0);
3295 a_Bw_add_url(html->bw, url);
3296 MSG("NumPendingStyleSheets=%d\n", html->bw->NumPendingStyleSheets);
3297 }
3298 }
3299 _MSG("\n");
3300 }
3301
3302 /*
3303 * Parse the LINK element (Only CSS stylesheets by now).
3304 * (If it either hits or misses, is not relevant here; that's up to the
3305 * cache functions)
3306 *
3307 * TODO: How will we know when to use "handheld"? Ask the html->bw->ui for
3308 * screen dimensions, or a dillorc preference.
3309 */
Html_tag_open_link(DilloHtml * html,const char * tag,int tagsize)3310 static void Html_tag_open_link(DilloHtml *html, const char *tag, int tagsize)
3311 {
3312 DilloUrl *url;
3313 const char *attrbuf;
3314
3315 //char *tag_str = dStrndup(tag, tagsize);
3316 //MSG("Html_tag_open_link(): %s\n", tag_str);
3317 //dFree(tag_str);
3318
3319 /* When viewing suspicious HTML email, don't load LINK */
3320 dReturn_if (URL_FLAGS(html->base_url) & URL_SpamSafe);
3321
3322 /* Ignore LINK outside HEAD */
3323 if (!(html->InFlags & IN_HEAD)) {
3324 if (!((html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f) &&
3325 a_Html_get_attr(html, tag, tagsize, "itemprop"))) {
3326 /* With the HTML 5.1 draft spec, link with itemprop may appear
3327 * in the body.
3328 */
3329 BUG_MSG("This <link> element must be inside the HEAD section.");
3330 }
3331 return;
3332 }
3333 /* Remote stylesheets enabled? */
3334 dReturn_if_fail (prefs.load_stylesheets);
3335 /* CSS stylesheet link */
3336 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "rel")) ||
3337 dStrAsciiCasecmp(attrbuf, "stylesheet"))
3338 return;
3339
3340 /* IMPLIED attributes? */
3341 if (((attrbuf = a_Html_get_attr(html, tag, tagsize, "type")) &&
3342 dStrAsciiCasecmp(attrbuf, "text/css")) ||
3343 ((attrbuf = a_Html_get_attr(html, tag, tagsize, "media")) &&
3344 !dStriAsciiStr(attrbuf, "screen") && dStrAsciiCasecmp(attrbuf, "all")))
3345 return;
3346
3347 if (!(attrbuf = a_Html_get_attr(html, tag, tagsize, "href")) ||
3348 !(url = a_Html_url_new(html, attrbuf, NULL, 0)))
3349 return;
3350
3351 _MSG(" Html_tag_open_link(): addCssUrl %s\n", URL_STR(url));
3352
3353 html->addCssUrl(url);
3354 a_Url_free(url);
3355 }
3356
3357 /*
3358 * Set the Document Base URI
3359 */
Html_tag_open_base(DilloHtml * html,const char * tag,int tagsize)3360 static void Html_tag_open_base(DilloHtml *html, const char *tag, int tagsize)
3361 {
3362 const char *attrbuf;
3363 DilloUrl *BaseUrl;
3364
3365 if (html->InFlags & IN_HEAD) {
3366 if ((attrbuf = a_Html_get_attr(html, tag, tagsize, "href"))) {
3367 BaseUrl = a_Html_url_new(html, attrbuf, "", 1);
3368 if (URL_SCHEME_(BaseUrl)) {
3369 /* Pass the URL_SpamSafe flag to the new base url */
3370 a_Url_set_flags(
3371 BaseUrl, URL_FLAGS(html->base_url) & URL_SpamSafe);
3372 a_Url_free(html->base_url);
3373 html->base_url = BaseUrl;
3374 } else {
3375 BUG_MSG("<base> URI is relative (it MUST be absolute).");
3376 a_Url_free(BaseUrl);
3377 }
3378 }
3379 } else {
3380 BUG_MSG("<base> not inside HEAD section.");
3381 }
3382 }
3383
Html_tag_open_default(DilloHtml * html,const char * tag,int tagsize)3384 static void Html_tag_open_default(DilloHtml *html,const char *tag,int tagsize)
3385 {
3386 html->styleEngine->inheritBackgroundColor();
3387 }
3388
3389 /*
3390 * <SPAN>
3391 */
Html_tag_open_span(DilloHtml * html,const char * tag,int tagsize)3392 static void Html_tag_open_span(DilloHtml *html, const char *tag, int tagsize)
3393 {
3394 const char *attrbuf;
3395
3396 html->styleEngine->inheritBackgroundColor();
3397
3398 if (prefs.show_tooltip &&
3399 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
3400
3401 html->styleEngine->setNonCssHint (PROPERTY_X_TOOLTIP, CSS_TYPE_STRING,
3402 attrbuf);
3403 }
3404 }
3405
3406 /*
3407 * html5 sectioning stuff: article aside nav section header footer
3408 */
Html_tag_open_sectioning(DilloHtml * html,const char * tag,int tagsize)3409 static void Html_tag_open_sectioning(DilloHtml *html, const char *tag,
3410 int tagsize)
3411 {
3412 const char *attrbuf;
3413
3414 if (prefs.show_tooltip &&
3415 (attrbuf = a_Html_get_attr(html, tag, tagsize, "title"))) {
3416
3417 html->styleEngine->setNonCssHint (PROPERTY_X_TOOLTIP, CSS_TYPE_STRING,
3418 attrbuf);
3419 }
3420 }
3421
3422 /*
3423 * <DIV> (TODO: make a complete implementation)
3424 */
Html_tag_open_div(DilloHtml * html,const char * tag,int tagsize)3425 static void Html_tag_open_div(DilloHtml *html, const char *tag, int tagsize)
3426 {
3427 a_Html_tag_set_align_attr (html, tag, tagsize);
3428 Html_tag_open_sectioning(html, tag, tagsize);
3429 }
3430
3431 /*
3432 * Default close for paragraph tags - pop the stack and break.
3433 */
Html_tag_close_par(DilloHtml * html)3434 static void Html_tag_close_par(DilloHtml *html)
3435 {
3436 HT2TB(html)->addParbreak (9, html->wordStyle ());
3437 }
3438
3439 /*
3440 * <WBR> "The wbr element represents a line break opportunity."
3441 */
Html_tag_content_wbr(DilloHtml * html,const char * tag,int tagsize)3442 static void Html_tag_content_wbr(DilloHtml *html, const char *tag, int tagsize)
3443 {
3444 HT2TB(html)->addBreakOption(html->wordStyle (), true);
3445 }
3446
3447
3448 /*
3449 * Function index for the open, content, and close functions for each tag
3450 * (Alphabetically sorted for a binary search).
3451 * The open and close functions are always called. They are used for style
3452 * handling and HTML bug reporting.
3453 * Content creation (e.g. adding new widgets or text) is done in the content
3454 * function, which is not called in the display:none case.
3455 * Note: many tags don't need a content function (e.g. <div>, <span>, ...).
3456 *
3457 * Explanation for the 'Flags' field:
3458 *
3459 * {"address", B8(010110), ...}
3460 * |||||`- inline element
3461 * ||||`-- block element
3462 * |||`--- inline container
3463 * ||`---- block container
3464 * |`----- body element
3465 * `------ head element
3466 *
3467 * Notes:
3468 * - The upper two bits are not used yet.
3469 * - Empty elements have both inline and block container clear.
3470 * (flow have both set)
3471 */
3472
3473 const TagInfo Tags[] = {
3474 {"a", B8(011101),'R',2, Html_tag_open_a, NULL, Html_tag_close_a},
3475 {"abbr", B8(010101),'R',2, Html_tag_open_abbr, NULL, NULL},
3476 /* acronym 010101 -- obsolete in HTML5 */
3477 {"address", B8(010110),'R',2,Html_tag_open_default, NULL, Html_tag_close_par},
3478 {"area", B8(010001),'F',0, Html_tag_open_default, Html_tag_content_area,
3479 NULL},
3480 {"article", B8(011110),'R',2, Html_tag_open_sectioning, NULL, NULL},
3481 {"aside", B8(011110),'R',2, Html_tag_open_sectioning, NULL, NULL},
3482 {"audio", B8(011101),'R',2, Html_tag_open_audio, NULL, Html_tag_close_media},
3483 {"b", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
3484 {"base", B8(100001),'F',0, Html_tag_open_base, NULL, NULL},
3485 /* basefont 010001 -- obsolete in HTML5 */
3486 /* bdo 010101 */
3487 {"big", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
3488 {"blockquote", B8(011110),'R',2, Html_tag_open_blockquote, NULL,
3489 NULL},
3490 {"body", B8(011110),'O',1, Html_tag_open_body, NULL, Html_tag_close_body},
3491 {"br", B8(010001),'F',0, Html_tag_open_default, Html_tag_content_br,
3492 NULL},
3493 {"button", B8(011101),'R',2, Html_tag_open_button,NULL,Html_tag_close_button},
3494 /* caption */
3495 {"center", B8(011110),'R',2, Html_tag_open_default, NULL, NULL},
3496 {"cite", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
3497 {"code", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
3498 /* col 010010 'F' */
3499 /* colgroup */
3500 {"dd", B8(011110),'O',1, Html_tag_open_dd, NULL, NULL},
3501 {"del", B8(011101),'R',2, Html_tag_open_default, NULL, NULL},
3502 {"dfn", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
3503 {"dir", B8(011010),'R',2, Html_tag_open_dir, NULL, Html_tag_close_par},
3504 /* TODO: complete <div> support! */
3505 {"div", B8(011110),'R',2, Html_tag_open_div, NULL, NULL},
3506 {"dl", B8(011010),'R',2, Html_tag_open_dl, NULL, Html_tag_close_par},
3507 {"dt", B8(010110),'O',1, Html_tag_open_dt, NULL, Html_tag_close_par},
3508 {"em", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
3509 {"embed", B8(010001),'F',0, Html_tag_open_embed, Html_tag_content_embed,NULL},
3510 /* fieldset */
3511 {"figcaption", B8(011110),'R',2, Html_tag_open_default, NULL, NULL},
3512 {"figure", B8(011110),'R',2, Html_tag_open_default, NULL, NULL},
3513 {"font", B8(010101),'R',2, Html_tag_open_font, NULL, NULL},
3514 {"footer", B8(011110),'R',2, Html_tag_open_sectioning, NULL, NULL},
3515 {"form", B8(011110),'R',2, Html_tag_open_form, NULL, Html_tag_close_form},
3516 {"frame", B8(010010),'F',0, Html_tag_open_frame, Html_tag_content_frame,
3517 NULL},
3518 {"frameset", B8(011110),'R',2, Html_tag_open_default,
3519 Html_tag_content_frameset, NULL},
3520 {"h1", B8(010110),'R',2, Html_tag_open_h, NULL, NULL},
3521 {"h2", B8(010110),'R',2, Html_tag_open_h, NULL, NULL},
3522 {"h3", B8(010110),'R',2, Html_tag_open_h, NULL, NULL},
3523 {"h4", B8(010110),'R',2, Html_tag_open_h, NULL, NULL},
3524 {"h5", B8(010110),'R',2, Html_tag_open_h, NULL, NULL},
3525 {"h6", B8(010110),'R',2, Html_tag_open_h, NULL, NULL},
3526 {"head", B8(101101),'O',1, Html_tag_open_head, NULL, Html_tag_close_head},
3527 {"header", B8(011110),'R',2, Html_tag_open_sectioning, NULL, NULL},
3528 {"hr", B8(010010),'F',0, Html_tag_open_hr, Html_tag_content_hr,
3529 NULL},
3530 {"html", B8(001110),'O',1, Html_tag_open_html, NULL, Html_tag_close_html},
3531 {"i", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
3532 {"iframe", B8(011110),'R',2, Html_tag_open_frame, Html_tag_content_frame,
3533 NULL},
3534 {"img", B8(010001),'F',0, Html_tag_open_img, Html_tag_content_img,
3535 NULL},
3536 {"input", B8(010001),'F',0, Html_tag_open_input, NULL, NULL},
3537 {"ins", B8(011101),'R',2, Html_tag_open_default, NULL, NULL},
3538 {"isindex", B8(110001),'F',0, Html_tag_open_isindex, NULL, NULL},
3539 {"kbd", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
3540 /* label 010101 */
3541 /* legend 01?? */
3542 {"li", B8(011110),'O',1, Html_tag_open_li, NULL, Html_tag_close_li},
3543 {"link", B8(100001),'F',0, Html_tag_open_link, NULL, NULL},
3544 {"map", B8(011001),'R',2, Html_tag_open_default, Html_tag_content_map,
3545 Html_tag_close_map},
3546 {"mark", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
3547 /* menu 1010 -- TODO: not exactly 1010, it can contain LI and inline */
3548 {"menu", B8(011010),'R',2, Html_tag_open_menu, NULL, Html_tag_close_par},
3549 {"meta", B8(110001),'F',0, Html_tag_open_meta, NULL, NULL},
3550 {"nav", B8(011110),'R',2, Html_tag_open_sectioning, NULL, NULL},
3551 /* noframes 1011 -- obsolete in HTML5 */
3552 /* noscript 1011 */
3553 {"object", B8(111101),'R',2, Html_tag_open_object, Html_tag_content_object,
3554 NULL},
3555 {"ol", B8(011010),'R',2, Html_tag_open_ol, NULL, NULL},
3556 {"optgroup", B8(010101),'O',1, Html_tag_open_optgroup, NULL,
3557 Html_tag_close_optgroup},
3558 {"option", B8(010001),'O',0, Html_tag_open_option,NULL,Html_tag_close_option},
3559 {"p", B8(010110),'O',1, Html_tag_open_p, NULL, NULL},
3560 /* param 010001 'F' */
3561 {"pre", B8(010110),'R',2, Html_tag_open_pre, NULL, Html_tag_close_pre},
3562 {"q", B8(010101),'R',2, Html_tag_open_q, NULL, Html_tag_close_q},
3563 {"s", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
3564 {"samp", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
3565 {"script", B8(111001),'R',2, Html_tag_open_script,NULL,Html_tag_close_script},
3566 {"section", B8(011110),'R',2, Html_tag_open_sectioning, NULL, NULL},
3567 {"select", B8(010101),'R',2, Html_tag_open_select,NULL,Html_tag_close_select},
3568 {"small", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
3569 {"source", B8(010001),'F',0, Html_tag_open_source, Html_tag_content_source,
3570 NULL},
3571 {"span", B8(010101),'R',2, Html_tag_open_span, NULL, NULL},
3572 {"strike", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
3573 {"strong", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
3574 {"style", B8(100101),'R',2, Html_tag_open_style, NULL, Html_tag_close_style},
3575 {"sub", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
3576 {"sup", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
3577 {"table", B8(011010),'R',5, Html_tag_open_table, Html_tag_content_table,
3578 NULL},
3579 /* tbody */
3580 {"td", B8(011110),'O',3, Html_tag_open_td, Html_tag_content_td,
3581 NULL},
3582 {"textarea", B8(010101),'R', 2, Html_tag_open_textarea,
3583 Html_tag_content_textarea, Html_tag_close_textarea},
3584 /* tfoot */
3585 {"th", B8(011110),'O',1, Html_tag_open_th, Html_tag_content_th,
3586 NULL},
3587 /* thead */
3588 {"title", B8(100101),'R',2, Html_tag_open_title, NULL, Html_tag_close_title},
3589 {"tr", B8(011010),'O',4, Html_tag_open_tr, Html_tag_content_tr,
3590 NULL},
3591 {"tt", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
3592 {"u", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
3593 {"ul", B8(011010),'R',2, Html_tag_open_ul, NULL, NULL},
3594 {"var", B8(010101),'R',2, Html_tag_open_default, NULL, NULL},
3595 {"video", B8(011101),'R',2, Html_tag_open_video, NULL, Html_tag_close_media},
3596 {"wbr", B8(010101),'F',0, Html_tag_open_default, Html_tag_content_wbr, NULL}
3597 };
3598 #define NTAGS (sizeof(Tags)/sizeof(Tags[0]))
3599
3600
3601 /*
3602 * Compares tag from buffer ('/' or '>' or space-ended string) [p1]
3603 * with tag from taglist (lowercase, zero ended string) [p2]
3604 * Return value: as strcmp()
3605 */
Html_tag_compare(const char * p1,const char * p2)3606 static int Html_tag_compare(const char *p1, const char *p2)
3607 {
3608 while ( *p2 ) {
3609 if (D_ASCII_TOLOWER(*p1) != *p2)
3610 return(D_ASCII_TOLOWER(*p1) - *p2);
3611 ++p1;
3612 ++p2;
3613 }
3614 return !strchr(" >/\n\r\t", *p1);
3615 }
3616
3617 /*
3618 * Get 'tag' index
3619 * return -1 if tag is not handled yet
3620 */
a_Html_tag_index(const char * tag)3621 int a_Html_tag_index(const char *tag)
3622 {
3623 int low, high, mid, cond;
3624
3625 /* Binary search */
3626 low = 0;
3627 high = NTAGS - 1; /* Last tag index */
3628 while (low <= high) {
3629 mid = (low + high) / 2;
3630 if ((cond = Html_tag_compare(tag, Tags[mid].name)) < 0 )
3631 high = mid - 1;
3632 else if (cond > 0)
3633 low = mid + 1;
3634 else
3635 return mid;
3636 }
3637 return -1;
3638 }
3639
3640 /*
3641 * For elements with optional close, check whether is time to close.
3642 * Return value: (1: Close, 0: Don't close)
3643 * --tuned for speed.
3644 */
Html_needs_optional_close(int old_idx,int cur_idx)3645 static int Html_needs_optional_close(int old_idx, int cur_idx)
3646 {
3647 static int i_P = -1, i_LI, i_TD, i_TR, i_TH, i_DD, i_DT, i_OPTION;
3648 // i_THEAD, i_TFOOT, i_COLGROUP;
3649
3650 if (i_P == -1) {
3651 /* initialize the indexes of elements with optional close */
3652 i_P = a_Html_tag_index("p"),
3653 i_LI = a_Html_tag_index("li"),
3654 i_TD = a_Html_tag_index("td"),
3655 i_TR = a_Html_tag_index("tr"),
3656 i_TH = a_Html_tag_index("th"),
3657 i_DD = a_Html_tag_index("dd"),
3658 i_DT = a_Html_tag_index("dt"),
3659 i_OPTION = a_Html_tag_index("option");
3660 // i_THEAD = a_Html_tag_index("thead");
3661 // i_TFOOT = a_Html_tag_index("tfoot");
3662 // i_COLGROUP = a_Html_tag_index("colgroup");
3663 }
3664
3665 if (old_idx == i_P || old_idx == i_DT) {
3666 /* P and DT are closed by block elements */
3667 return (Tags[cur_idx].Flags & 2);
3668 } else if (old_idx == i_LI) {
3669 /* LI closes LI */
3670 return (cur_idx == i_LI);
3671 } else if (old_idx == i_TD || old_idx == i_TH) {
3672 /* TD and TH are closed by TD, TH and TR */
3673 return (cur_idx == i_TD || cur_idx == i_TH || cur_idx == i_TR);
3674 } else if (old_idx == i_TR) {
3675 /* TR closes TR */
3676 return (cur_idx == i_TR);
3677 } else if (old_idx == i_DD) {
3678 /* DD is closed by DD and DT */
3679 return (cur_idx == i_DD || cur_idx == i_DT);
3680 } else if (old_idx == i_OPTION) {
3681 return 1; // OPTION always needs close
3682 }
3683
3684 /* HTML, HEAD, BODY are handled by Html_test_section(), not here. */
3685 /* TODO: TBODY is pending */
3686 return 0;
3687 }
3688
3689
3690 /*
3691 * Conditional cleanup of the stack (at open time).
3692 * - This helps catching block elements inside inline containers (a BUG).
3693 * - It also closes elements with "optional" close tag.
3694 *
3695 * This function is called when opening a block element or <OPTION>.
3696 *
3697 * It searches the stack closing open inline containers, and closing
3698 * elements with optional close tag when necessary.
3699 *
3700 * Note: OPTION is the only non-block element with an optional close.
3701 */
Html_stack_cleanup_at_open(DilloHtml * html,int new_idx)3702 static void Html_stack_cleanup_at_open(DilloHtml *html, int new_idx)
3703 {
3704 /* We know that the element we're about to push is a block element.
3705 * (except for OPTION, which is an empty inline, so is closed anyway)
3706 * Notes:
3707 * Its 'tag' is not yet pushed into the stack,
3708 * 'new_idx' is its index inside Tags[].
3709 */
3710
3711 if (!html->TagSoup)
3712 return;
3713
3714 while (html->stack->size() > 1) {
3715 int oldtag_idx = S_TOP(html)->tag_idx;
3716
3717 if (Tags[oldtag_idx].EndTag == 'O') { // Element with optional close
3718 if (!Html_needs_optional_close(oldtag_idx, new_idx))
3719 break;
3720 } else if (Tags[oldtag_idx].Flags & 8) { // Block container
3721 break;
3722 }
3723
3724 /* we have an inline (or empty) container... */
3725 if (Tags[oldtag_idx].EndTag == 'R') {
3726 BUG_MSG("<%s> is not allowed to contain <%s>. -- closing <%s>.",
3727 Tags[oldtag_idx].name, Tags[new_idx].name,
3728 Tags[oldtag_idx].name);
3729 }
3730
3731 /* Workaround for Apache and its bad HTML directory listings... */
3732 if ((html->InFlags & IN_PRE) &&
3733 strcmp(Tags[new_idx].name, "hr") == 0)
3734 break;
3735 /* Avoid OPTION closing SELECT */
3736 if ((html->InFlags & IN_SELECT) &&
3737 strcmp(Tags[new_idx].name,"option") == 0)
3738 break;
3739
3740 /* This call closes the top tag only. */
3741 Html_tag_cleanup_at_close(html, oldtag_idx);
3742 }
3743 }
3744
3745 /*
3746 * HTML, HEAD and BODY elements have optional open and close tags.
3747 * Handle this "magic" here.
3748 */
Html_test_section(DilloHtml * html,int new_idx,int IsCloseTag)3749 static void Html_test_section(DilloHtml *html, int new_idx, int IsCloseTag)
3750 {
3751 const char *tag;
3752 int tag_idx;
3753
3754 if (!(html->InFlags & IN_HTML) && html->DocType == DT_NONE)
3755 BUG_MSG("The required DOCTYPE declaration is missing. "
3756 "Handling as HTML4.");
3757
3758 if (!(html->InFlags & IN_HTML)) {
3759 tag = "<html>";
3760 tag_idx = a_Html_tag_index(tag + 1);
3761 if (tag_idx != new_idx || IsCloseTag) {
3762 /* implicit open */
3763 Html_force_push_tag(html, tag_idx);
3764 _MSG("Open : %*s%s\n", html->stack->size()," ",Tags[tag_idx].name);
3765 Tags[tag_idx].open (html, tag, strlen(tag));
3766 }
3767 }
3768
3769 if (Tags[new_idx].Flags & 32) {
3770 /* head element */
3771 if (!(html->InFlags & IN_HEAD) && html->Num_HEAD == 0) {
3772 tag = "<head>";
3773 tag_idx = a_Html_tag_index(tag + 1);
3774 if (tag_idx != new_idx || IsCloseTag) {
3775 /* implicit open of the head element */
3776 Html_force_push_tag(html, tag_idx);
3777 _MSG("Open : %*s%s\n", html->stack->size()," ",Tags[tag_idx].name);
3778 Tags[tag_idx].open (html, tag, strlen(tag));
3779 }
3780 }
3781
3782 } else if (Tags[new_idx].Flags & 16) {
3783 /* body element */
3784 if (html->InFlags & IN_HEAD) {
3785 tag = "</head>";
3786 tag_idx = a_Html_tag_index(tag + 2);
3787 Html_tag_cleanup_at_close(html, tag_idx);
3788 }
3789 tag = "<body>";
3790 tag_idx = a_Html_tag_index(tag + 1);
3791 if (tag_idx != new_idx || IsCloseTag) {
3792 /* implicit open */
3793 Html_force_push_tag(html, tag_idx);
3794 _MSG("Open : %*s%s\n", html->stack->size()," ",Tags[tag_idx].name);
3795 Tags[tag_idx].open (html, tag, strlen(tag));
3796 }
3797 }
3798 }
3799
3800 /*
3801 * Parse attributes that can appear on any tag.
3802 */
Html_parse_common_attrs(DilloHtml * html,char * tag,int tagsize)3803 static void Html_parse_common_attrs(DilloHtml *html, char *tag, int tagsize)
3804 {
3805 const char *attrbuf;
3806 char lang[3];
3807
3808 if (tagsize >= 8 && /* length of "<t id=i>" */
3809 (attrbuf = a_Html_get_attr(html, tag, tagsize, "id"))) {
3810 /* According to the SGML declaration of HTML 4, all NAME values
3811 * occuring outside entities must be converted to uppercase
3812 * (this is what "NAMECASE GENERAL YES" says). But the HTML 4
3813 * spec states in Sec. 7.5.2 that anchor ids are case-sensitive.
3814 * So we don't do it and hope for better specs in the future ...
3815 */
3816 Html_check_name_val(html, attrbuf, "id");
3817
3818 html->styleEngine->setId(attrbuf);
3819 }
3820
3821 if (tagsize >= 11 && (prefs.parse_embedded_css || prefs.load_stylesheets)) {
3822 /* length of "<t class=i>" or "<t style=i>" */
3823 attrbuf = a_Html_get_attr(html, tag, tagsize, "class");
3824 if (attrbuf)
3825 html->styleEngine->setClass (attrbuf);
3826
3827 attrbuf = a_Html_get_attr(html, tag, tagsize, "style");
3828 if (attrbuf)
3829 html->styleEngine->setStyle (attrbuf);
3830 }
3831
3832 /* handle "xml:lang" and "lang" attributes
3833 * We use only the first two chars of the value to deal with
3834 * extended language tags (see http://www.rfc-editor.org/rfc/bcp/bcp47.txt)
3835 */
3836 memset(lang, 0, sizeof(lang));
3837 if (tagsize >= 14) {
3838 /* length of "<t xml:lang=i>" */
3839 attrbuf = a_Html_get_attr(html, tag, tagsize, "xml:lang");
3840 if (attrbuf)
3841 strncpy(lang, attrbuf, 2);
3842 }
3843 if (!lang[0] && tagsize >= 10) { /* 'xml:lang' prevails over 'lang' */
3844 /* length of "<t lang=i>" */
3845 attrbuf = a_Html_get_attr(html, tag, tagsize, "lang");
3846 if (attrbuf)
3847 strncpy(lang, attrbuf, 2);
3848 }
3849 if (lang[0])
3850 html->styleEngine->setNonCssHint(PROPERTY_X_LANG, CSS_TYPE_STRING, lang);
3851 }
3852
3853 /*
3854 * Warn when encountering elements that are obsolete in HTML5. This list
3855 * was from the "W3C Candidate Recommendation 6 August 2013".
3856 */
Html_check_html5_obsolete(DilloHtml * html,int ni)3857 static void Html_check_html5_obsolete(DilloHtml *html, int ni)
3858 {
3859 static int indexes[9] = {-1};
3860
3861 if (indexes[0] == -1) {
3862 indexes[0] = a_Html_tag_index("dir");
3863 indexes[1] = a_Html_tag_index("frame");
3864 indexes[2] = a_Html_tag_index("frameset");
3865 indexes[3] = a_Html_tag_index("isindex");
3866 indexes[4] = a_Html_tag_index("strike");
3867 indexes[5] = a_Html_tag_index("big");
3868 indexes[6] = a_Html_tag_index("center");
3869 indexes[7] = a_Html_tag_index("font");
3870 indexes[8] = a_Html_tag_index("tt");
3871 }
3872 for (int i = 0; i < 9; i++) {
3873 if (indexes[i] == ni) {
3874 BUG_MSG("<%s> is obsolete in HTML5.", Tags[ni].name);
3875 break;
3876 }
3877 }
3878 }
3879
Html_display_block(DilloHtml * html)3880 static void Html_display_block(DilloHtml *html)
3881 {
3882 //HT2TB(html)->addParbreak (5, html->styleEngine->wordStyle ());
3883 Html_add_textblock(html, 0);
3884 }
3885
Html_display_listitem(DilloHtml * html)3886 static void Html_display_listitem(DilloHtml *html)
3887 {
3888 Style *style = html->style ();
3889 Style *wordStyle = html->wordStyle ();
3890 Widget **ref_list_item;
3891 ListItem *list_item;
3892 int *list_number;
3893 char buf[16];
3894
3895 /* Get our parent tag's variables (used as state storage) */
3896 list_number = &html->stack->getRef(html->stack->size()-2)->list_number;
3897 ref_list_item = &html->stack->getRef(html->stack->size()-2)->ref_list_item;
3898
3899 HT2TB(html)->addParbreak (0, wordStyle);
3900
3901 list_item = new ListItem ((ListItem*)*ref_list_item,prefs.limit_text_width);
3902 HT2TB(html)->addWidget (list_item, style);
3903 HT2TB(html)->addParbreak (0, wordStyle);
3904 *ref_list_item = list_item;
3905 S_TOP(html)->textblock = html->dw = list_item;
3906
3907 if (style->listStyleType == LIST_STYLE_TYPE_NONE) {
3908 // none
3909 } else if (style->listStyleType >= LIST_STYLE_TYPE_DECIMAL) {
3910 // ordered
3911 numtostr((*list_number)++, buf, 16, style->listStyleType);
3912 list_item->initWithText (buf, wordStyle);
3913 } else {
3914 // unordered
3915 list_item->initWithWidget (new Bullet(), wordStyle);
3916 }
3917 }
3918
3919 /*
3920 * Process a tag, given as 'tag' and 'tagsize'. -- tagsize is [1 based]
3921 * ('tag' must include the enclosing angle brackets)
3922 * This function calls the right open or close function for the tag.
3923 */
Html_process_tag(DilloHtml * html,char * tag,int tagsize)3924 static void Html_process_tag(DilloHtml *html, char *tag, int tagsize)
3925 {
3926 int ci, ni; /* current and new tag indexes */
3927 char *start = tag + 1; /* discard the '<' */
3928 int IsCloseTag = (*start == '/');
3929
3930 dReturn_if (html->stop_parser == true);
3931
3932 ni = a_Html_tag_index(start + IsCloseTag);
3933 if (ni == -1) {
3934 /* TODO: doctype parsing is a bit fuzzy, but enough for the time being */
3935 if (!(html->InFlags & IN_HTML)) {
3936 if (tagsize > 9 && !dStrnAsciiCasecmp(tag, "<!doctype", 9))
3937 Html_parse_doctype(html, tag, tagsize);
3938 }
3939 /* Ignore unknown tags */
3940 return;
3941 }
3942
3943 if (!IsCloseTag && html->DocType == DT_HTML && html->DocTypeVersion >= 5.0f)
3944 Html_check_html5_obsolete(html, ni);
3945
3946 /* Handle HTML, HEAD and BODY. Elements with optional open and close */
3947 if (!(html->InFlags & IN_BODY) /* && parsing HTML */)
3948 Html_test_section(html, ni, IsCloseTag);
3949
3950 /* Tag processing */
3951 ci = S_TOP(html)->tag_idx;
3952 switch (IsCloseTag) {
3953 case 0:
3954 /* Open function */
3955
3956 /* Cleanup when opening a block element, or
3957 * when openning over an element with optional close */
3958 if (Tags[ni].Flags & 2 || (ci != -1 && Tags[ci].EndTag == 'O'))
3959 Html_stack_cleanup_at_open(html, ni);
3960
3961 /* TODO: this is only raising a warning, take some defined action.
3962 * Note: apache uses IMG inside PRE (we could use its "alt"). */
3963 if ((html->InFlags & IN_PRE) && Html_tag_pre_excludes(html, ni))
3964 BUG_MSG("<pre> is not allowed to contain <%s>.", Tags[ni].name);
3965
3966 /* Make sure these elements don't nest each other */
3967 if (html->InFlags & (IN_BUTTON | IN_SELECT | IN_TEXTAREA))
3968 Html_tag_cleanup_nested_inputs(html, ni);
3969
3970 /* Push the tag into the stack */
3971 Html_push_tag(html, ni);
3972
3973 html->startElement (ni);
3974 _MSG("Open : %*s%s\n", html->stack->size(), " ", Tags[ni].name);
3975
3976 /* Parse attributes that can appear on any tag */
3977 Html_parse_common_attrs(html, tag, tagsize);
3978
3979 /* Call the open function for this tag */
3980 _MSG("Html_process_tag Open : %s\n", Tags[ni].name);
3981 Tags[ni].open (html, tag, tagsize);
3982
3983 if (! S_TOP(html)->display_none) {
3984 switch (html->style ()->display) {
3985 case DISPLAY_BLOCK:
3986 Html_display_block(html);
3987 break;
3988 case DISPLAY_LIST_ITEM:
3989 Html_display_listitem(html);
3990 break;
3991 case DISPLAY_NONE:
3992 S_TOP(html)->display_none = true;
3993 break;
3994 case DISPLAY_INLINE:
3995 case DISPLAY_INLINE_BLOCK: // TODO: implement inline-block
3996 default:
3997 break;
3998 }
3999
4000 if (Tags[ni].content && ! S_TOP(html)->display_none) {
4001 Tags[ni].content (html, tag, tagsize);
4002 }
4003 }
4004
4005 if (html->stop_parser)
4006 break;
4007
4008 if (S_TOP(html)->parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {
4009 /* don't change anything */
4010 } else if (S_TOP(html)->parse_mode != DILLO_HTML_PARSE_MODE_PRE &&
4011 (html->style ()->whiteSpace == WHITE_SPACE_PRE ||
4012 html->style ()->whiteSpace == WHITE_SPACE_PRE_WRAP)) {
4013 S_TOP(html)->parse_mode = DILLO_HTML_PARSE_MODE_PRE;
4014 html->pre_column = 0;
4015 html->PreFirstChar = true;
4016 }
4017
4018 if (html->styleEngine->getId ())
4019 Html_add_anchor(html, html->styleEngine->getId ());
4020
4021 /* Request immediate close for elements with forbidden close tag. */
4022 /* TODO: XHTML always requires close tags. A simple implementation
4023 * of the commented clause below will make it work. */
4024 if (/* parsing HTML && */ Tags[ni].EndTag == 'F')
4025 html->ReqTagClose = true;
4026
4027 /* Don't break! Open tags may also close themselves */
4028
4029 default:
4030 /* Close function */
4031
4032 /* Test for </x>, ReqTagClose, <x /> and <x/> */
4033 if (*start == '/' || /* </x> */
4034 html->ReqTagClose || /* request */
4035 (tag[tagsize-2] == '/' && /* XML: */
4036 (strchr(" \"'", tag[tagsize-3]) || /* [ "']/> */
4037 (size_t)tagsize == strlen(Tags[ni].name) + 3))) { /* <x/> */
4038
4039 _MSG("Html_process_tag Close: %s\n", Tags[ni].name);
4040 Html_tag_cleanup_at_close(html, ni);
4041 /* This was a close tag */
4042 html->ReqTagClose = false;
4043 }
4044 }
4045 }
4046
4047 /*
4048 * Get attribute value for 'attrname' and return it.
4049 * Tags start with '<' and end with a '>' (Ex: "<P align=center>")
4050 * tagsize = strlen(tag) from '<' to '>', inclusive.
4051 *
4052 * Returns one of the following:
4053 * * The value of the attribute.
4054 * * An empty string if the attribute exists but has no value.
4055 * * NULL if the attribute doesn't exist.
4056 */
Html_get_attr2(DilloHtml * html,const char * tag,int tagsize,const char * attrname,int tag_parsing_flags)4057 static const char *Html_get_attr2(DilloHtml *html,
4058 const char *tag,
4059 int tagsize,
4060 const char *attrname,
4061 int tag_parsing_flags)
4062 {
4063 int i, isocode, entsize, Found = 0, delimiter = 0, attr_pos = 0;
4064 Dstr *Buf = html->attr_data;
4065 DilloHtmlTagParsingState state = SEEK_ATTR_START;
4066
4067 dReturn_val_if_fail(*attrname, NULL);
4068
4069 dStr_truncate(Buf, 0);
4070
4071 for (i = 1; i < tagsize; ++i) {
4072 switch (state) {
4073 case SEEK_ATTR_START:
4074 if (isspace(tag[i]))
4075 state = SEEK_TOKEN_START;
4076 else if (tag[i] == '=')
4077 state = SEEK_VALUE_START;
4078 break;
4079
4080 case MATCH_ATTR_NAME:
4081 if (!attrname[attr_pos] &&
4082 (tag[i] == '=' || isspace(tag[i]) || tag[i] == '>')) {
4083 Found = 1;
4084 state = SEEK_TOKEN_START;
4085 --i;
4086 } else if (!tag[i]) {
4087 state = SEEK_ATTR_START; // NULL byte is not allowed
4088 } else {
4089 if (D_ASCII_TOLOWER(tag[i]) != D_ASCII_TOLOWER(attrname[attr_pos]))
4090 state = SEEK_ATTR_START;
4091 attr_pos++;
4092 }
4093 break;
4094
4095 case SEEK_TOKEN_START:
4096 if (tag[i] == '=') {
4097 state = SEEK_VALUE_START;
4098 } else if (!isspace(tag[i])) {
4099 attr_pos = 0;
4100 state = (Found) ? FINISHED : MATCH_ATTR_NAME;
4101 --i;
4102 }
4103 break;
4104 case SEEK_VALUE_START:
4105 if (!isspace(tag[i])) {
4106 delimiter = (tag[i] == '"' || tag[i] == '\'') ? tag[i] : ' ';
4107 i -= (delimiter == ' ');
4108 state = (Found) ? GET_VALUE : SKIP_VALUE;
4109 }
4110 break;
4111
4112 case SKIP_VALUE:
4113 if ((delimiter == ' ' && isspace(tag[i])) || tag[i] == delimiter)
4114 state = SEEK_TOKEN_START;
4115 break;
4116 case GET_VALUE:
4117 if ((delimiter == ' ' && (isspace(tag[i]) || tag[i] == '>')) ||
4118 tag[i] == delimiter) {
4119 state = FINISHED;
4120 } else if (tag[i] == '&' &&
4121 (tag_parsing_flags & HTML_ParseEntities)) {
4122 if ((isocode = Html_parse_entity(html, tag+i,
4123 tagsize-i, &entsize)) >= 0) {
4124 if (isocode >= 128) {
4125 char buf[4];
4126 int k, n = a_Utf8_encode(isocode, buf);
4127 for (k = 0; k < n; ++k)
4128 dStr_append_c(Buf, buf[k]);
4129 } else {
4130 dStr_append_c(Buf, (char) isocode);
4131 }
4132 i += entsize-1;
4133 } else {
4134 dStr_append_c(Buf, tag[i]);
4135 }
4136 } else if (tag[i] == '\r' || tag[i] == '\t') {
4137 dStr_append_c(Buf, ' ');
4138 } else if (tag[i] == '\n') {
4139 /* ignore */
4140 } else {
4141 dStr_append_c(Buf, tag[i]);
4142 }
4143 break;
4144
4145 case FINISHED:
4146 i = tagsize;
4147 break;
4148 }
4149 }
4150
4151 if (tag_parsing_flags & HTML_LeftTrim)
4152 while (isspace(Buf->str[0]))
4153 dStr_erase(Buf, 0, 1);
4154 if (tag_parsing_flags & HTML_RightTrim)
4155 while (Buf->len && isspace(Buf->str[Buf->len - 1]))
4156 dStr_truncate(Buf, Buf->len - 1);
4157
4158 return (Found) ? Buf->str : NULL;
4159 }
4160
4161 /*
4162 * Call Html_get_attr2 telling it to parse entities and strip the result
4163 */
a_Html_get_attr(DilloHtml * html,const char * tag,int tagsize,const char * attrname)4164 const char *a_Html_get_attr(DilloHtml *html,
4165 const char *tag,
4166 int tagsize,
4167 const char *attrname)
4168 {
4169 return Html_get_attr2(html, tag, tagsize, attrname,
4170 HTML_LeftTrim | HTML_RightTrim | HTML_ParseEntities);
4171 }
4172
4173 /*
4174 * "a_Html_get_attr with default"
4175 * Call a_Html_get_attr() and dStrdup() the returned string.
4176 * If the attribute isn't found a copy of 'def' is returned.
4177 */
a_Html_get_attr_wdef(DilloHtml * html,const char * tag,int tagsize,const char * attrname,const char * def)4178 char *a_Html_get_attr_wdef(DilloHtml *html,
4179 const char *tag,
4180 int tagsize,
4181 const char *attrname,
4182 const char *def)
4183 {
4184 const char *attrbuf = a_Html_get_attr(html, tag, tagsize, attrname);
4185
4186 return attrbuf ? dStrdup(attrbuf) : dStrdup(def);
4187 }
4188
4189 /*
4190 * Dispatch the apropriate function for 'Op'
4191 * This function is a Cache client and gets called whenever new data arrives
4192 * Op : operation to perform.
4193 * CbData : a pointer to a DilloHtml structure
4194 * Buf : a pointer to new data
4195 * BufSize : new data size (in bytes)
4196 */
Html_callback(int Op,CacheClient_t * Client)4197 static void Html_callback(int Op, CacheClient_t *Client)
4198 {
4199 DilloHtml *html = (DilloHtml*)Client->CbData;
4200
4201 if (Op) { /* EOF */
4202 html->write((char*)Client->Buf, Client->BufSize, 1);
4203 html->finishParsing(Client->Key);
4204 } else {
4205 html->write((char*)Client->Buf, Client->BufSize, 0);
4206 }
4207 }
4208
4209 /*
4210 * Here's where we parse the html and put it into the Textblock structure.
4211 * Return value: number of bytes parsed
4212 */
Html_write_raw(DilloHtml * html,char * buf,int bufsize,int Eof)4213 static int Html_write_raw(DilloHtml *html, char *buf, int bufsize, int Eof)
4214 {
4215 char ch = 0, *p, *text;
4216 int token_start, buf_index;
4217
4218 /* Now, 'buf' and 'bufsize' define a buffer aligned to start at a token
4219 * boundary. Iterate through tokens until end of buffer is reached. */
4220 buf_index = 0;
4221 token_start = buf_index;
4222 while ((buf_index < bufsize) && !html->stop_parser) {
4223 /* invariant: buf_index == bufsize || token_start == buf_index */
4224
4225 if (S_TOP(html)->parse_mode ==
4226 DILLO_HTML_PARSE_MODE_VERBATIM) {
4227 /* Non HTML code here, let's skip until closing tag */
4228 do {
4229 const char *tag = Tags[S_TOP(html)->tag_idx].name;
4230 buf_index += strcspn(buf + buf_index, "<");
4231 if (buf_index + (int)strlen(tag) + 3 > bufsize) {
4232 buf_index = bufsize;
4233 } else if (strncmp(buf + buf_index, "</", 2) == 0 &&
4234 Html_match_tag(tag, buf+buf_index+2, strlen(tag)+1)) {
4235 /* copy VERBATIM text into the stash buffer */
4236 text = dStrndup(buf + token_start, buf_index - token_start);
4237 dStr_append(html->Stash, text);
4238 dFree(text);
4239 token_start = buf_index;
4240 break;
4241 } else
4242 ++buf_index;
4243 } while (buf_index < bufsize);
4244
4245 if (buf_index == bufsize)
4246 break;
4247 }
4248
4249 if (isspace(buf[buf_index])) {
4250 /* whitespace: group all available whitespace */
4251 while (++buf_index < bufsize && isspace(buf[buf_index])) ;
4252 Html_process_space(html, buf + token_start, buf_index - token_start);
4253 token_start = buf_index;
4254
4255 } else if (buf[buf_index] == '<' && (ch = buf[buf_index + 1]) &&
4256 (isalpha(ch) || strchr("/!?", ch)) ) {
4257 /* Tag */
4258 if (buf_index + 3 < bufsize && !strncmp(buf + buf_index, "<!--", 4)) {
4259 /* Comment: search for close of comment, skipping over
4260 * everything except a matching "-->" tag. */
4261 while ( (p = (char*) memchr(buf + buf_index, '>',
4262 bufsize - buf_index)) ){
4263 buf_index = p - buf + 1;
4264 if (p[-1] == '-' && p[-2] == '-') break;
4265 }
4266 if (p) {
4267 /* Got the whole comment. Let's throw it away! :) */
4268 token_start = buf_index;
4269 } else
4270 buf_index = bufsize;
4271 } else {
4272 /* Tag: search end of tag (skipping over quoted strings) */
4273 html->CurrOfs = html->Start_Ofs + token_start;
4274
4275 while ( buf_index < bufsize ) {
4276 buf_index++;
4277 buf_index += strcspn(buf + buf_index, ">\"'<");
4278 if ((ch = buf[buf_index]) == '>') {
4279 break;
4280 } else if (ch == '"' || ch == '\'') {
4281 /* Skip over quoted string */
4282 buf_index++;
4283 buf_index += strcspn(buf + buf_index,
4284 (ch == '"') ? "\">" : "'>");
4285 if (buf[buf_index] == '>') {
4286 /* Unterminated string value? Let's look ahead and test:
4287 * (<: unterminated, closing-quote: terminated) */
4288 int offset = buf_index + 1;
4289 offset += strcspn(buf + offset,
4290 (ch == '"') ? "\"<" : "'<");
4291 if (buf[offset] == ch || !buf[offset]) {
4292 buf_index = offset;
4293 } else {
4294 BUG_MSG("Attribute lacks closing quote.");
4295 break;
4296 }
4297 }
4298 } else if (ch == '<') {
4299 /* unterminated tag detected */
4300 p = dStrndup(buf+token_start+1,
4301 strcspn(buf+token_start+1, " <\n\r\t"));
4302 BUG_MSG("<%s> lacks its closing '>'.", p);
4303 dFree(p);
4304 --buf_index;
4305 break;
4306 }
4307 }
4308 if (buf_index < bufsize) {
4309 buf_index++;
4310 Html_process_tag(html, buf + token_start,
4311 buf_index - token_start);
4312 token_start = buf_index;
4313 }
4314 }
4315 } else {
4316 /* A Word: search for whitespace or tag open */
4317 html->CurrOfs = html->Start_Ofs + token_start;
4318
4319 while (++buf_index < bufsize) {
4320 buf_index += strcspn(buf + buf_index, " <\n\r\t\f\v");
4321 if (buf[buf_index] == '<' && (ch = buf[buf_index + 1]) &&
4322 !isalpha(ch) && !strchr("/!?", ch))
4323 continue;
4324 break;
4325 }
4326 if (buf_index < bufsize || Eof) {
4327 /* successfully found end of token */
4328 ch = buf[buf_index];
4329 buf[buf_index] = 0;
4330 Html_process_word(html, buf + token_start,
4331 buf_index - token_start);
4332 buf[buf_index] = ch;
4333 token_start = buf_index;
4334 }
4335 }
4336 }/*while*/
4337
4338 HT2TB(html)->flush ();
4339
4340 return token_start;
4341 }
4342
4343
4344