1 #include "utils.h"
2
3 /*
4 async parsing
5 */
html5_dom_mythread_function(void * arg)6 void *html5_dom_mythread_function(void *arg) {
7 mythread_context_t *ctx = (mythread_context_t *) arg;
8 mythread_t *mythread = ctx->mythread;
9
10 mythread_mutex_wait(mythread, ctx->mutex);
11 ctx->func(ctx->id, ctx);
12 mythread_nanosleep_destroy(ctx->timespec);
13 ctx->opt = MyTHREAD_OPT_QUIT;
14 mythread_mutex_close(mythread, ctx->mutex);
15
16 return NULL;
17 }
18
html5_dom_fd_write(int fd,const char * data,int size)19 static int html5_dom_fd_write(int fd, const char *data, int size) {
20 if (fd > -1) {
21 #ifdef MyCORE_OS_WINDOWS_NT
22 return _write(fd, data, size);
23 #else
24 return write(fd, data, size);
25 #endif
26 }
27 return 0;
28 }
29
html5_dom_async_parse(html5_dom_async_result * result)30 int html5_dom_async_parse(html5_dom_async_result *result) {
31 mystatus_t status;
32
33 // create parser
34 html5_dom_parser_t *self = html5_dom_parser_new(&result->opts);
35
36 // init myhtml
37 self->myhtml = myhtml_create();
38
39 if (self->opts.threads <= 1) {
40 status = myhtml_init(self->myhtml, MyHTML_OPTIONS_PARSE_MODE_SINGLE, 1, 0);
41 } else {
42 status = myhtml_init(self->myhtml, MyHTML_OPTIONS_DEFAULT, self->opts.threads, 0);
43 }
44
45 if (status) {
46 html5_dom_parser_free(self);
47 result->status = status;
48 result->done = true;
49 return html5_dom_fd_write(result->fd, "0", 1);
50 }
51
52 // init myhtml tree
53 myhtml_tree_t *tree = myhtml_tree_create();
54 status = myhtml_tree_init(tree, self->myhtml);
55 if (status) {
56 myhtml_tree_destroy(tree);
57 html5_dom_parser_free(self);
58 result->status = status;
59 result->done = true;
60 return html5_dom_fd_write(result->fd, "0", 1);
61 }
62
63 // detect encoding
64 myencoding_t encoding = html5_dom_auto_encoding(&result->opts, (const char **) &result->html, &result->length);
65
66 // apply options to tree
67 html5_dom_apply_tree_options(tree, &result->opts);
68
69 // try parse
70 status = myhtml_parse(tree, encoding, result->html, result->length);
71
72 if (status) {
73 myhtml_tree_destroy(tree);
74 html5_dom_parser_free(self);
75 result->status = status;
76 result->done = true;
77 return html5_dom_fd_write(result->fd, "0", 1);
78 }
79
80 result->done = true;
81 result->tree = tree;
82 result->parser = self;
83
84 // trigger event
85 return html5_dom_fd_write(result->fd, "1", 1);
86 }
87
html5_dom_async_parse_worker(mythread_id_t thread_id,void * arg)88 void html5_dom_async_parse_worker(mythread_id_t thread_id, void *arg) {
89 mythread_context_t *ctx = (mythread_context_t *) arg;
90 html5_dom_async_result *result = (html5_dom_async_result *) ctx->mythread->context;
91 html5_dom_async_parse(result);
92 }
93
94 /*
95 parser
96 */
html5_dom_parser_new(html5_dom_options_t * options)97 html5_dom_parser_t *html5_dom_parser_new(html5_dom_options_t *options) {
98 html5_dom_parser_t *self = (html5_dom_parser_t *) malloc(sizeof(html5_dom_parser_t));
99 memset(self, 0, sizeof(html5_dom_parser_t));
100 memcpy(&self->opts, options, sizeof(html5_dom_options_t));
101 return self;
102 }
103
html5_dom_parser_free(html5_dom_parser_t * self)104 void *html5_dom_parser_free(html5_dom_parser_t *self) {
105 if (self->myhtml) {
106 myhtml_destroy(self->myhtml);
107 self->myhtml = NULL;
108 }
109
110 if (self->mycss_entry) {
111 mycss_entry_destroy(self->mycss_entry, 1);
112 self->mycss_entry = NULL;
113 }
114
115 if (self->mycss) {
116 mycss_destroy(self->mycss, 1);
117 self->mycss = NULL;
118 }
119
120 if (self->finder) {
121 modest_finder_destroy(self->finder, 1);
122 self->finder = NULL;
123 }
124
125 free(self);
126 }
127
html5_dom_init_css(html5_dom_parser_t * parser)128 mystatus_t html5_dom_init_css(html5_dom_parser_t *parser) {
129 mystatus_t status = MyCSS_STATUS_OK;
130
131 if (!parser->mycss) {
132 parser->mycss = mycss_create();
133 status = mycss_init(parser->mycss);
134 if (status) {
135 mycss_destroy(parser->mycss, 1);
136 parser->mycss = NULL;
137 return status;
138 }
139 }
140
141 if (!parser->mycss_entry) {
142 parser->mycss_entry = mycss_entry_create();
143 status = mycss_entry_init(parser->mycss, parser->mycss_entry);
144 if (status) {
145 mycss_entry_destroy(parser->mycss_entry, 1);
146 mycss_destroy(parser->mycss, 1);
147 parser->mycss = NULL;
148 parser->mycss_entry = NULL;
149 return status;
150 }
151 }
152
153 return status;
154 }
155
html5_dom_parse_fragment(html5_dom_options_t * opts,myhtml_tree_t * tree,myhtml_tag_id_t tag_id,myhtml_namespace_t ns,const char * text,size_t length,html5_fragment_parts_t * parts,mystatus_t * status_out)156 myhtml_tree_node_t *html5_dom_parse_fragment(html5_dom_options_t *opts, myhtml_tree_t *tree, myhtml_tag_id_t tag_id, myhtml_namespace_t ns,
157 const char *text, size_t length, html5_fragment_parts_t *parts, mystatus_t *status_out)
158 {
159 mystatus_t status;
160
161 myhtml_t *parser = myhtml_tree_get_myhtml(tree);
162
163 // cteate temorary tree
164 myhtml_tree_t *fragment_tree = myhtml_tree_create();
165 status = myhtml_tree_init(fragment_tree, parser);
166 if (status) {
167 *status_out = status;
168 myhtml_tree_destroy(tree);
169 return NULL;
170 }
171
172 html5_dom_apply_tree_options(fragment_tree, opts);
173
174 myencoding_t encoding = html5_dom_auto_encoding(opts, &text, &length);
175
176 // parse fragment from text
177 status = myhtml_parse_fragment(fragment_tree, encoding, text, length, tag_id, ns);
178 if (status) {
179 *status_out = status;
180 myhtml_tree_destroy(tree);
181 return NULL;
182 }
183
184 // clone fragment from temporary tree to persistent tree
185 myhtml_tree_node_t *node = html5_dom_recursive_clone_node(tree, myhtml_tree_get_node_html(fragment_tree), parts);
186
187 if (node) {
188 html5_dom_tree_t *context = (html5_dom_tree_t *) node->tree->context;
189 if (!context->fragment_tag_id)
190 context->fragment_tag_id = html5_dom_tag_id_by_name(tree, "-fragment", 9, true);
191 node->tag_id = context->fragment_tag_id;
192 }
193
194 myhtml_tree_destroy(fragment_tree);
195
196 *status_out = status;
197
198 return node;
199 }
200
html5_dom_apply_tree_options(myhtml_tree_t * tree,html5_dom_options_t * opts)201 void html5_dom_apply_tree_options(myhtml_tree_t *tree, html5_dom_options_t *opts) {
202 if (opts->scripts) {
203 tree->flags |= MyHTML_TREE_FLAGS_SCRIPT;
204 } else {
205 tree->flags &= ~MyHTML_TREE_FLAGS_SCRIPT;
206 }
207
208 if (opts->ignore_doctype)
209 tree->parse_flags |= MyHTML_TREE_PARSE_FLAGS_WITHOUT_DOCTYPE_IN_TREE;
210
211 if (opts->ignore_whitespace)
212 tree->parse_flags |= MyHTML_TREE_PARSE_FLAGS_SKIP_WHITESPACE_TOKEN;
213 }
214
215 /*
216 misc
217 */
modest_strerror(mystatus_t status)218 const char *modest_strerror(mystatus_t status) {
219 switch (status) {
220 #include "gen/modest_errors.c"
221 }
222 return status ? "UNKNOWN" : "";
223 }
224
html5_dom_get_ua_display_prop(myhtml_tree_node_t * node)225 int html5_dom_get_ua_display_prop(myhtml_tree_node_t *node) {
226 switch (node->tag_id) {
227 #include "gen/tags_ua_style.c"
228 }
229 return TAG_UA_STYLE_INLINE;
230 }
231
html5_dom_rtrim_mystring(mycore_string_t * str,char c)232 void html5_dom_rtrim_mystring(mycore_string_t *str, char c) {
233 size_t i = str->length;
234 while (i > 0) {
235 --i;
236
237 if (str->data[i] != c)
238 break;
239
240 str->data[i] = '\0';
241 --str->length;
242 }
243 }
244
245 /*
246 finders & css
247 */
_modest_finder_callback_found_with_one_node(modest_finder_t * finder,myhtml_tree_node_t * node,mycss_selectors_list_t * selector_list,mycss_selectors_entry_t * selector,mycss_selectors_specificity_t * spec,void * ctx)248 void _modest_finder_callback_found_with_one_node(modest_finder_t *finder, myhtml_tree_node_t *node,
249 mycss_selectors_list_t *selector_list, mycss_selectors_entry_t *selector, mycss_selectors_specificity_t *spec, void *ctx)
250 {
251 myhtml_tree_node_t **result_node = (myhtml_tree_node_t **) ctx;
252 if (!*result_node)
253 *result_node = node;
254 }
255
html5_node_finder(html5_dom_parser_t * parser,modest_finder_selector_combinator_f func,myhtml_tree_node_t * scope,mycss_selectors_entries_list_t * list,size_t list_size,mystatus_t * status_out,bool one)256 void *html5_node_finder(html5_dom_parser_t *parser, modest_finder_selector_combinator_f func,
257 myhtml_tree_node_t *scope, mycss_selectors_entries_list_t *list, size_t list_size, mystatus_t *status_out, bool one)
258 {
259 *status_out = MODEST_STATUS_OK;
260
261 if (!scope)
262 return NULL;
263
264 // Init finder
265 mystatus_t status;
266 if (parser->finder) {
267 parser->finder = modest_finder_create();
268 status = modest_finder_init(parser->finder);
269 if (status) {
270 *status_out = status;
271 modest_finder_destroy(parser->finder, 1);
272 return NULL;
273 }
274 }
275
276 if (one) {
277 // Process selector entries
278 myhtml_tree_node_t *node = NULL;
279 for (size_t i = 0; i < list_size; ++i) {
280 func(parser->finder, scope, NULL, list[i].entry, &list[i].specificity,
281 _modest_finder_callback_found_with_one_node, &node);
282
283 if (node)
284 break;
285 }
286
287 return (void *) node;
288 } else {
289 // Init collection for results
290 myhtml_collection_t *collection = myhtml_collection_create(4096, &status);
291 if (status) {
292 *status_out = MODEST_STATUS_ERROR_MEMORY_ALLOCATION;
293 return NULL;
294 }
295
296 // Process selector entries
297 for (size_t i = 0; i < list_size; ++i) {
298 func(parser->finder, scope, NULL, list[i].entry, &list[i].specificity,
299 modest_finder_callback_found_with_collection, collection);
300 }
301
302 return (void *) collection;
303 }
304 }
305
html5_find_selector_func(const char * c,int combo_len)306 modest_finder_selector_combinator_f html5_find_selector_func(const char *c, int combo_len) {
307 if (combo_len == 2) {
308 if (c[0] == '|' && c[1] == '|')
309 return modest_finder_node_combinator_column;
310 if ((c[0] == '>' && c[1] == '>'))
311 return modest_finder_node_combinator_descendant;
312 } else if (combo_len == 1) {
313 if (c[0] == '>')
314 return modest_finder_node_combinator_child;
315 if (c[0] == '+')
316 return modest_finder_node_combinator_next_sibling;
317 if (c[0] == '~')
318 return modest_finder_node_combinator_following_sibling;
319 if (c[0] == '^')
320 return modest_finder_node_combinator_begin;
321 }
322 return modest_finder_node_combinator_descendant;
323 }
324
html5_parse_selector(mycss_entry_t * entry,const char * query,size_t query_len,mystatus_t * status_out)325 mycss_selectors_list_t *html5_parse_selector(mycss_entry_t *entry, const char *query, size_t query_len, mystatus_t *status_out) {
326 mystatus_t status;
327
328 *status_out = MyCSS_STATUS_OK;
329
330 mycss_selectors_list_t *list = mycss_selectors_parse(mycss_entry_selectors(entry), MyENCODING_UTF_8, query, query_len, &status);
331 if (status || list == NULL || (list->flags & MyCSS_SELECTORS_FLAGS_SELECTOR_BAD)) {
332 if (list)
333 mycss_selectors_list_destroy(mycss_entry_selectors(entry), list, true);
334 *status_out = status;
335 return NULL;
336 }
337
338 return list;
339 }
340
341 /*
342 nodes
343 */
html5_dom_tag_id_by_name(myhtml_tree_t * tree,const char * tag_str,size_t tag_len,bool allow_create)344 myhtml_tag_id_t html5_dom_tag_id_by_name(myhtml_tree_t *tree, const char *tag_str, size_t tag_len, bool allow_create) {
345 const myhtml_tag_context_t *tag_ctx = myhtml_tag_get_by_name(tree->tags, tag_str, tag_len);
346 if (tag_ctx) {
347 return tag_ctx->id;
348 } else if (allow_create) {
349 // add custom tag
350 return myhtml_tag_add(tree->tags, tag_str, tag_len, MyHTML_TOKENIZER_STATE_DATA, true);
351 }
352 return MyHTML_TAG__UNDEF;
353 }
354
355 // Safe copy node from native or foreign tree
html5_dom_copy_foreign_node(myhtml_tree_t * tree,myhtml_tree_node_t * node)356 myhtml_tree_node_t *html5_dom_copy_foreign_node(myhtml_tree_t *tree, myhtml_tree_node_t *node) {
357 // Create new node
358 myhtml_tree_node_t *new_node = myhtml_tree_node_create(tree);
359 new_node->tag_id = node->tag_id;
360 new_node->ns = node->ns;
361
362 // Copy custom tag
363 if (tree != node->tree && node->tag_id >= MyHTML_TAG_LAST_ENTRY) {
364 new_node->tag_id = MyHTML_TAG__UNDEF;
365
366 // Get tag name in foreign tree
367 const myhtml_tag_context_t *tag_ctx = myhtml_tag_get_by_id(node->tree->tags, node->tag_id);
368 if (tag_ctx) {
369 // Get same tag in native tree
370 new_node->tag_id = html5_dom_tag_id_by_name(tree, tag_ctx->name, tag_ctx->name_length, true);
371 }
372 }
373
374 if (node->token) {
375 // Wait, if node not yet done
376 myhtml_token_node_wait_for_done(node->tree->token, node->token);
377
378 // Copy node token
379 new_node->token = myhtml_token_node_create(tree->token, tree->mcasync_rules_token_id);
380 if (!new_node->token) {
381 myhtml_tree_node_delete(new_node);
382 return NULL;
383 }
384
385 new_node->token->tag_id = node->token->tag_id;
386 new_node->token->type = node->token->type;
387 new_node->token->attr_first = NULL;
388 new_node->token->attr_last = NULL;
389 new_node->token->raw_begin = tree != node->tree ? 0 : node->token->raw_begin;
390 new_node->token->raw_length = tree != node->tree ? 0 : node->token->raw_length;
391 new_node->token->element_begin = tree != node->tree ? 0 : node->token->element_begin;
392 new_node->token->element_length = tree != node->tree ? 0 : node->token->element_length;
393 new_node->token->type = new_node->token->type | MyHTML_TOKEN_TYPE_DONE;
394
395 // Copy text data
396 if (node->token->str.length) {
397 mycore_string_init(tree->mchar, tree->mchar_node_id, &new_node->token->str, node->token->str.length + 1);
398 mycore_string_append(&new_node->token->str, node->token->str.data, node->token->str.length);
399 } else {
400 mycore_string_clean_all(&new_node->token->str);
401 }
402
403 // Copy node attributes
404 myhtml_token_attr_t *attr = node->token->attr_first;
405 while (attr) {
406 myhtml_token_attr_copy(tree->token, attr, new_node->token, tree->mcasync_rules_attr_id);
407 attr = attr->next;
408 }
409 }
410
411 return new_node;
412 }
413
html5_dom_recursive_clone_node(myhtml_tree_t * tree,myhtml_tree_node_t * node,html5_fragment_parts_t * parts)414 myhtml_tree_node_t *html5_dom_recursive_clone_node(myhtml_tree_t *tree, myhtml_tree_node_t *node, html5_fragment_parts_t *parts) {
415 myhtml_tree_node_t *new_node = html5_dom_copy_foreign_node(tree, node);
416 myhtml_tree_node_t *child = myhtml_node_child(node);
417
418 if (parts) {
419 if (node == node->tree->node_html)
420 parts->node_html = new_node;
421 else if (node == node->tree->node_head)
422 parts->node_head = new_node;
423 else if (node == node->tree->node_body)
424 parts->node_body = new_node;
425 else if (node == node->tree->document)
426 parts->document = new_node;
427 }
428
429 while (child) {
430 myhtml_tree_node_add_child(new_node, html5_dom_recursive_clone_node(tree, child, parts));
431 child = myhtml_node_next(child);
432 }
433
434 return new_node;
435 }
436
437 // Try to implements https://html.spec.whatwg.org/multipage/dom.html#the-innertext-idl-attribute
438 // Using default user-agent box model types for tags instead of real css.
html5_dom_recursive_node_inner_text(myhtml_tree_node_t * node,html5_dom_inner_text_state_t * state)439 void html5_dom_recursive_node_inner_text(myhtml_tree_node_t *node, html5_dom_inner_text_state_t *state) {
440 if (node->tag_id == MyHTML_TAG__TEXT) {
441 size_t text_len = 0;
442 const char *text = myhtml_node_text(node, &text_len);
443
444 bool is_empty = true;
445 for (size_t i = 0; i < text_len; ++i) {
446 // skip CR
447 if (text[i] == '\r')
448 continue;
449
450 // collapse spaces
451 if (isspace(text[i]) && (text[i] != '\xA0' || !i || text[i - 1] != '\xC2') && text[i] != '\xC2') {
452 bool skip_spaces = (state->value.length > 0 && state->value.data[state->value.length - 1] == ' ') || state->new_line;
453 if (skip_spaces)
454 continue;
455 mycore_string_append_one(&state->value, ' ');
456 }
457 // save other chars
458 else {
459 mycore_string_append_one(&state->value, text[i]);
460 is_empty = false;
461 state->new_line = false;
462 }
463 }
464
465 if (!is_empty)
466 state->last_br = false;
467 } else if (node_is_element(node)) {
468 // get default box model type for tag
469 int display = html5_dom_get_ua_display_prop(node);
470
471 // skip hidden nodes
472 if (display == TAG_UA_STYLE_NONE)
473 return;
474
475 // skip some special nodes
476 switch (node->tag_id) {
477 case MyHTML_TAG_TEXTAREA:
478 case MyHTML_TAG_INPUT:
479 case MyHTML_TAG_AUDIO:
480 case MyHTML_TAG_VIDEO:
481 return;
482 }
483
484 // <br> always inserts \n
485 if (node->tag_id == MyHTML_TAG_BR) {
486 mycore_string_append_one(&state->value, '\n');
487 state->new_line = true;
488 state->last_br = true;
489 } else {
490 switch (display) {
491 case TAG_UA_STYLE_BLOCK:
492 case TAG_UA_STYLE_TABLE:
493 case TAG_UA_STYLE_TABLE_CAPTION:
494 // if last token - line break, then collapse
495 // if last token - text, then insert new line break
496 if (!state->last_br) {
497 html5_dom_rtrim_mystring(&state->value, ' ');
498 mycore_string_append_one(&state->value, '\n');
499 state->new_line = true;
500 state->last_br = true;
501 }
502 break;
503 }
504
505 myhtml_tree_node_t *child = myhtml_node_child(node);
506 while (child) {
507 html5_dom_recursive_node_inner_text(child, state);
508 child = myhtml_node_next(child);
509 }
510
511 switch (display) {
512 case TAG_UA_STYLE_BLOCK:
513 case TAG_UA_STYLE_TABLE:
514 case TAG_UA_STYLE_TABLE_CAPTION:
515 // if last token - line break, then collapse
516 // if last token - text, then insert new line break
517 if (!state->last_br) {
518 html5_dom_rtrim_mystring(&state->value, ' ');
519 if (node->tag_id == MyHTML_TAG_P) {
520 // chrome inserts two \n after <p>
521 mycore_string_append_one(&state->value, '\n');
522 mycore_string_append_one(&state->value, '\n');
523 } else {
524 mycore_string_append_one(&state->value, '\n');
525 }
526 state->new_line = true;
527 state->last_br = true;
528 }
529 break;
530
531 case TAG_UA_STYLE_TABLE_CELL:
532 {
533 bool is_last_cell = false;
534 myhtml_tree_node_t *cell = myhtml_node_last_child(myhtml_node_parent(node));
535 while (cell) {
536 if (html5_dom_get_ua_display_prop(cell) == TAG_UA_STYLE_TABLE_CELL) {
537 is_last_cell = cell == node;
538 break;
539 }
540 cell = myhtml_node_prev(cell);
541 }
542
543 if (!is_last_cell) {
544 html5_dom_rtrim_mystring(&state->value, ' ');
545 mycore_string_append_one(&state->value, '\t');
546 }
547
548 state->new_line = true;
549 }
550 break;
551
552 case TAG_UA_STYLE_TABLE_ROW:
553 {
554 bool is_last_row = false;
555 myhtml_tree_node_t *row = myhtml_node_last_child(myhtml_node_parent(node));
556 while (row) {
557 if (html5_dom_get_ua_display_prop(row) == TAG_UA_STYLE_TABLE_ROW) {
558 is_last_row = (row == node);
559 break;
560 }
561 row = myhtml_node_prev(row);
562 }
563
564 if (!is_last_row) {
565 html5_dom_rtrim_mystring(&state->value, ' ');
566 mycore_string_append_one(&state->value, '\n');
567 state->last_br = true;
568 }
569
570 state->new_line = true;
571 }
572 break;
573 }
574 }
575 }
576 }
577
578 // Safe delete nodes only if it has not perl object representation
html5_tree_node_delete_recursive(myhtml_tree_node_t * node)579 void html5_tree_node_delete_recursive(myhtml_tree_node_t *node) {
580 if (!myhtml_node_get_data(node)) {
581 myhtml_tree_node_t *child = myhtml_node_child(node);
582 if (child) {
583 while (child) {
584 myhtml_tree_node_t *next = myhtml_node_next(child);
585 myhtml_tree_node_remove(child);
586 html5_tree_node_delete_recursive(child);
587 child = next;
588 }
589 }
590 myhtml_tree_node_delete(node);
591 }
592 }
593
594 /*
595 attrs
596 */
html5_dom_replace_attr_value(myhtml_tree_node_t * node,const char * key,size_t key_len,const char * val,size_t val_len,myencoding_t encoding)597 void html5_dom_replace_attr_value(myhtml_tree_node_t *node, const char *key, size_t key_len, const char *val, size_t val_len, myencoding_t encoding) {
598 myhtml_tree_attr_t *attr = myhtml_attribute_by_key(node, key, key_len);
599 if (attr) { // edit
600 // destroy original value
601 mycore_string_destroy(&attr->value, 0);
602
603 // set new value
604 mycore_string_init(node->tree->mchar, node->tree->mchar_node_id, &attr->value, (val_len + 1));
605
606 // apply encoding
607 if (encoding == MyENCODING_UTF_8) {
608 mycore_string_append(&attr->value, val, val_len);
609 } else {
610 myencoding_string_append(&attr->value, val, val_len, encoding);
611 }
612 } else { // add new
613 myhtml_attribute_add(node, key, key_len, val, val_len, encoding);
614 }
615 }
616
617 /*
618 encoding
619 */
html5_dom_auto_encoding(html5_dom_options_t * opts,const char ** html_str,size_t * html_length)620 myencoding_t html5_dom_auto_encoding(html5_dom_options_t *opts, const char **html_str, size_t *html_length) {
621 // Try to determine encoding
622 myencoding_t encoding;
623 if (opts->encoding == MyENCODING_AUTO) {
624 encoding = MyENCODING_NOT_DETERMINED;
625 if (*html_length) {
626 // Search encoding in meta-tags
627 if (opts->encoding_use_meta) {
628 size_t size = opts->encoding_prescan_limit < *html_length ? opts->encoding_prescan_limit : *html_length;
629 encoding = myencoding_prescan_stream_to_determine_encoding(*html_str, size);
630 }
631
632 if (encoding == MyENCODING_NOT_DETERMINED) {
633 // Check BOM
634 if (!opts->encoding_use_bom || !myencoding_detect_and_cut_bom(*html_str, *html_length, &encoding, html_str, html_length)) {
635 // Check heuristic
636 if (!myencoding_detect(*html_str, *html_length, &encoding)) {
637 // Can't determine encoding, use default
638 encoding = opts->default_encoding;
639 }
640 }
641 }
642 } else {
643 encoding = opts->default_encoding;
644 }
645 } else {
646 encoding = opts->encoding;
647 }
648 return encoding;
649 }
650