1 #include "utils.h"
2 
3 /*
4 	async parsing
5 */
html5_dom_mythread_function(void * arg)6 void *html5_dom_mythread_function(void *arg) {
7 	mythread_context_t *ctx = (mythread_context_t *) arg;
8 	mythread_t *mythread = ctx->mythread;
9 
10 	mythread_mutex_wait(mythread, ctx->mutex);
11 	ctx->func(ctx->id, ctx);
12 	mythread_nanosleep_destroy(ctx->timespec);
13 	ctx->opt = MyTHREAD_OPT_QUIT;
14 	mythread_mutex_close(mythread, ctx->mutex);
15 
16     return NULL;
17 }
18 
html5_dom_fd_write(int fd,const char * data,int size)19 static int html5_dom_fd_write(int fd, const char *data, int size) {
20 	if (fd > -1) {
21 		#ifdef MyCORE_OS_WINDOWS_NT
22 			return _write(fd, data, size);
23 		#else
24 			return write(fd, data, size);
25 		#endif
26 	}
27 	return 0;
28 }
29 
html5_dom_async_parse(html5_dom_async_result * result)30 int html5_dom_async_parse(html5_dom_async_result *result) {
31 	mystatus_t status;
32 
33 	// create parser
34 	html5_dom_parser_t *self = html5_dom_parser_new(&result->opts);
35 
36 	// init myhtml
37 	self->myhtml = myhtml_create();
38 
39 	if (self->opts.threads <= 1) {
40 		status = myhtml_init(self->myhtml, MyHTML_OPTIONS_PARSE_MODE_SINGLE, 1, 0);
41 	} else {
42 		status = myhtml_init(self->myhtml, MyHTML_OPTIONS_DEFAULT, self->opts.threads, 0);
43 	}
44 
45 	if (status) {
46 		html5_dom_parser_free(self);
47 		result->status = status;
48 		result->done = true;
49 		return html5_dom_fd_write(result->fd, "0", 1);
50 	}
51 
52 	// init myhtml tree
53 	myhtml_tree_t *tree = myhtml_tree_create();
54 	status = myhtml_tree_init(tree, self->myhtml);
55 	if (status) {
56 		myhtml_tree_destroy(tree);
57 		html5_dom_parser_free(self);
58 		result->status = status;
59 		result->done = true;
60 		return html5_dom_fd_write(result->fd, "0", 1);
61 	}
62 
63 	// detect encoding
64 	myencoding_t encoding = html5_dom_auto_encoding(&result->opts, (const char **) &result->html, &result->length);
65 
66 	// apply options to tree
67 	html5_dom_apply_tree_options(tree, &result->opts);
68 
69 	// try parse
70 	status = myhtml_parse(tree, encoding, result->html, result->length);
71 
72 	if (status) {
73 		myhtml_tree_destroy(tree);
74 		html5_dom_parser_free(self);
75 		result->status = status;
76 		result->done = true;
77 		return html5_dom_fd_write(result->fd, "0", 1);
78 	}
79 
80 	result->done = true;
81 	result->tree = tree;
82 	result->parser = self;
83 
84 	// trigger event
85 	return html5_dom_fd_write(result->fd, "1", 1);
86 }
87 
html5_dom_async_parse_worker(mythread_id_t thread_id,void * arg)88 void html5_dom_async_parse_worker(mythread_id_t thread_id, void *arg) {
89 	mythread_context_t *ctx = (mythread_context_t *) arg;
90 	html5_dom_async_result *result = (html5_dom_async_result *) ctx->mythread->context;
91 	html5_dom_async_parse(result);
92 }
93 
94 /*
95 	parser
96 */
html5_dom_parser_new(html5_dom_options_t * options)97 html5_dom_parser_t *html5_dom_parser_new(html5_dom_options_t *options) {
98 	html5_dom_parser_t *self = (html5_dom_parser_t *) malloc(sizeof(html5_dom_parser_t));
99 	memset(self, 0, sizeof(html5_dom_parser_t));
100 	memcpy(&self->opts, options, sizeof(html5_dom_options_t));
101 	return self;
102 }
103 
html5_dom_parser_free(html5_dom_parser_t * self)104 void *html5_dom_parser_free(html5_dom_parser_t *self) {
105 	if (self->myhtml) {
106 		myhtml_destroy(self->myhtml);
107 		self->myhtml = NULL;
108 	}
109 
110 	if (self->mycss_entry) {
111 		mycss_entry_destroy(self->mycss_entry, 1);
112 		self->mycss_entry = NULL;
113 	}
114 
115 	if (self->mycss) {
116 		mycss_destroy(self->mycss, 1);
117 		self->mycss = NULL;
118 	}
119 
120 	if (self->finder) {
121 		modest_finder_destroy(self->finder, 1);
122 		self->finder = NULL;
123 	}
124 
125 	free(self);
126 }
127 
html5_dom_init_css(html5_dom_parser_t * parser)128 mystatus_t html5_dom_init_css(html5_dom_parser_t *parser) {
129 	mystatus_t status = MyCSS_STATUS_OK;
130 
131 	if (!parser->mycss) {
132 		parser->mycss = mycss_create();
133 		status = mycss_init(parser->mycss);
134 		if (status) {
135 			mycss_destroy(parser->mycss, 1);
136 			parser->mycss = NULL;
137 			return status;
138 		}
139 	}
140 
141 	if (!parser->mycss_entry) {
142 		parser->mycss_entry = mycss_entry_create();
143 		status = mycss_entry_init(parser->mycss, parser->mycss_entry);
144 		if (status) {
145 			mycss_entry_destroy(parser->mycss_entry, 1);
146 			mycss_destroy(parser->mycss, 1);
147 			parser->mycss = NULL;
148 			parser->mycss_entry = NULL;
149 			return status;
150 		}
151 	}
152 
153 	return status;
154 }
155 
html5_dom_parse_fragment(html5_dom_options_t * opts,myhtml_tree_t * tree,myhtml_tag_id_t tag_id,myhtml_namespace_t ns,const char * text,size_t length,html5_fragment_parts_t * parts,mystatus_t * status_out)156 myhtml_tree_node_t *html5_dom_parse_fragment(html5_dom_options_t *opts, myhtml_tree_t *tree, myhtml_tag_id_t tag_id, myhtml_namespace_t ns,
157 	const char *text, size_t length, html5_fragment_parts_t *parts, mystatus_t *status_out)
158 {
159 	mystatus_t status;
160 
161 	myhtml_t *parser = myhtml_tree_get_myhtml(tree);
162 
163 	// cteate temorary tree
164 	myhtml_tree_t *fragment_tree = myhtml_tree_create();
165 	status = myhtml_tree_init(fragment_tree, parser);
166 	if (status) {
167 		*status_out = status;
168 		myhtml_tree_destroy(tree);
169 		return NULL;
170 	}
171 
172 	html5_dom_apply_tree_options(fragment_tree, opts);
173 
174 	myencoding_t encoding = html5_dom_auto_encoding(opts, &text, &length);
175 
176 	// parse fragment from text
177 	status = myhtml_parse_fragment(fragment_tree, encoding, text, length, tag_id, ns);
178 	if (status) {
179 		*status_out = status;
180 		myhtml_tree_destroy(tree);
181 		return NULL;
182 	}
183 
184 	// clone fragment from temporary tree to persistent tree
185 	myhtml_tree_node_t *node = html5_dom_recursive_clone_node(tree, myhtml_tree_get_node_html(fragment_tree), parts);
186 
187 	if (node) {
188 		html5_dom_tree_t *context = (html5_dom_tree_t *) node->tree->context;
189 		if (!context->fragment_tag_id)
190 			context->fragment_tag_id = html5_dom_tag_id_by_name(tree, "-fragment", 9, true);
191 		node->tag_id = context->fragment_tag_id;
192 	}
193 
194 	myhtml_tree_destroy(fragment_tree);
195 
196 	*status_out = status;
197 
198 	return node;
199 }
200 
html5_dom_apply_tree_options(myhtml_tree_t * tree,html5_dom_options_t * opts)201 void html5_dom_apply_tree_options(myhtml_tree_t *tree, html5_dom_options_t *opts) {
202 	if (opts->scripts) {
203 		tree->flags |= MyHTML_TREE_FLAGS_SCRIPT;
204 	} else {
205 		tree->flags &= ~MyHTML_TREE_FLAGS_SCRIPT;
206 	}
207 
208 	if (opts->ignore_doctype)
209 		tree->parse_flags |= MyHTML_TREE_PARSE_FLAGS_WITHOUT_DOCTYPE_IN_TREE;
210 
211 	if (opts->ignore_whitespace)
212 		tree->parse_flags |= MyHTML_TREE_PARSE_FLAGS_SKIP_WHITESPACE_TOKEN;
213 }
214 
215 /*
216 	misc
217 */
modest_strerror(mystatus_t status)218 const char *modest_strerror(mystatus_t status) {
219 	switch (status) {
220 		#include "gen/modest_errors.c"
221 	}
222 	return status ? "UNKNOWN" : "";
223 }
224 
html5_dom_get_ua_display_prop(myhtml_tree_node_t * node)225 int html5_dom_get_ua_display_prop(myhtml_tree_node_t *node) {
226 	switch (node->tag_id) {
227 		#include "gen/tags_ua_style.c"
228 	}
229 	return TAG_UA_STYLE_INLINE;
230 }
231 
html5_dom_rtrim_mystring(mycore_string_t * str,char c)232 void html5_dom_rtrim_mystring(mycore_string_t *str, char c) {
233 	size_t i = str->length;
234 	while (i > 0) {
235 		--i;
236 
237 		if (str->data[i] != c)
238 			break;
239 
240 		str->data[i] = '\0';
241 		--str->length;
242 	}
243 }
244 
245 /*
246 	finders & css
247 */
_modest_finder_callback_found_with_one_node(modest_finder_t * finder,myhtml_tree_node_t * node,mycss_selectors_list_t * selector_list,mycss_selectors_entry_t * selector,mycss_selectors_specificity_t * spec,void * ctx)248 void _modest_finder_callback_found_with_one_node(modest_finder_t *finder, myhtml_tree_node_t *node,
249 	mycss_selectors_list_t *selector_list, mycss_selectors_entry_t *selector, mycss_selectors_specificity_t *spec, void *ctx)
250 {
251 	myhtml_tree_node_t **result_node = (myhtml_tree_node_t **) ctx;
252 	if (!*result_node)
253 		*result_node = node;
254 }
255 
html5_node_finder(html5_dom_parser_t * parser,modest_finder_selector_combinator_f func,myhtml_tree_node_t * scope,mycss_selectors_entries_list_t * list,size_t list_size,mystatus_t * status_out,bool one)256 void *html5_node_finder(html5_dom_parser_t *parser, modest_finder_selector_combinator_f func,
257 		myhtml_tree_node_t *scope, mycss_selectors_entries_list_t *list, size_t list_size, mystatus_t *status_out, bool one)
258 {
259 	*status_out = MODEST_STATUS_OK;
260 
261 	if (!scope)
262 		return NULL;
263 
264 	// Init finder
265 	mystatus_t status;
266 	if (parser->finder) {
267 		parser->finder = modest_finder_create();
268 		status = modest_finder_init(parser->finder);
269 		if (status) {
270 			*status_out = status;
271 			modest_finder_destroy(parser->finder, 1);
272 			return NULL;
273 		}
274 	}
275 
276 	if (one) {
277 		// Process selector entries
278 		myhtml_tree_node_t *node = NULL;
279 		for (size_t i = 0; i < list_size; ++i) {
280 			func(parser->finder, scope, NULL, list[i].entry, &list[i].specificity,
281 				_modest_finder_callback_found_with_one_node, &node);
282 
283 			if (node)
284 				break;
285 		}
286 
287 		return (void *) node;
288 	} else {
289 		// Init collection for results
290 		myhtml_collection_t *collection = myhtml_collection_create(4096, &status);
291 		if (status) {
292 			*status_out = MODEST_STATUS_ERROR_MEMORY_ALLOCATION;
293 			return NULL;
294 		}
295 
296 		// Process selector entries
297 		for (size_t i = 0; i < list_size; ++i) {
298 			func(parser->finder, scope, NULL, list[i].entry, &list[i].specificity,
299 				modest_finder_callback_found_with_collection, collection);
300 		}
301 
302 		return (void *) collection;
303 	}
304 }
305 
html5_find_selector_func(const char * c,int combo_len)306 modest_finder_selector_combinator_f html5_find_selector_func(const char *c, int combo_len) {
307 	if (combo_len == 2) {
308 		if (c[0] == '|' && c[1] == '|')
309 			return modest_finder_node_combinator_column;
310 		if ((c[0] == '>' && c[1] == '>'))
311 			return modest_finder_node_combinator_descendant;
312 	} else if (combo_len == 1) {
313 		if (c[0] == '>')
314 			return modest_finder_node_combinator_child;
315 		if (c[0] == '+')
316 			return modest_finder_node_combinator_next_sibling;
317 		if (c[0] == '~')
318 			return modest_finder_node_combinator_following_sibling;
319 		if (c[0] == '^')
320 			return modest_finder_node_combinator_begin;
321 	}
322 	return modest_finder_node_combinator_descendant;
323 }
324 
html5_parse_selector(mycss_entry_t * entry,const char * query,size_t query_len,mystatus_t * status_out)325 mycss_selectors_list_t *html5_parse_selector(mycss_entry_t *entry, const char *query, size_t query_len, mystatus_t *status_out) {
326 	mystatus_t status;
327 
328 	*status_out = MyCSS_STATUS_OK;
329 
330 	mycss_selectors_list_t *list = mycss_selectors_parse(mycss_entry_selectors(entry), MyENCODING_UTF_8, query, query_len, &status);
331 	if (status || list == NULL || (list->flags & MyCSS_SELECTORS_FLAGS_SELECTOR_BAD)) {
332 		if (list)
333 			mycss_selectors_list_destroy(mycss_entry_selectors(entry), list, true);
334 		*status_out = status;
335 		return NULL;
336 	}
337 
338 	return list;
339 }
340 
341 /*
342 	nodes
343 */
html5_dom_tag_id_by_name(myhtml_tree_t * tree,const char * tag_str,size_t tag_len,bool allow_create)344 myhtml_tag_id_t html5_dom_tag_id_by_name(myhtml_tree_t *tree, const char *tag_str, size_t tag_len, bool allow_create) {
345 	const myhtml_tag_context_t *tag_ctx = myhtml_tag_get_by_name(tree->tags, tag_str, tag_len);
346 	if (tag_ctx) {
347 		return tag_ctx->id;
348 	} else if (allow_create) {
349 		// add custom tag
350 		return myhtml_tag_add(tree->tags, tag_str, tag_len, MyHTML_TOKENIZER_STATE_DATA, true);
351 	}
352 	return MyHTML_TAG__UNDEF;
353 }
354 
355 // Safe copy node from native or foreign tree
html5_dom_copy_foreign_node(myhtml_tree_t * tree,myhtml_tree_node_t * node)356 myhtml_tree_node_t *html5_dom_copy_foreign_node(myhtml_tree_t *tree, myhtml_tree_node_t *node) {
357 	// Create new node
358 	myhtml_tree_node_t *new_node = myhtml_tree_node_create(tree);
359 	new_node->tag_id		= node->tag_id;
360 	new_node->ns			= node->ns;
361 
362 	// Copy custom tag
363 	if (tree != node->tree && node->tag_id >= MyHTML_TAG_LAST_ENTRY) {
364 		new_node->tag_id = MyHTML_TAG__UNDEF;
365 
366 		// Get tag name in foreign tree
367 		const myhtml_tag_context_t *tag_ctx = myhtml_tag_get_by_id(node->tree->tags, node->tag_id);
368 		if (tag_ctx) {
369 			// Get same tag in native tree
370 			new_node->tag_id = html5_dom_tag_id_by_name(tree, tag_ctx->name, tag_ctx->name_length, true);
371 		}
372 	}
373 
374 	if (node->token) {
375 		// Wait, if node not yet done
376 		myhtml_token_node_wait_for_done(node->tree->token, node->token);
377 
378 		// Copy node token
379 		new_node->token = myhtml_token_node_create(tree->token, tree->mcasync_rules_token_id);
380 		if (!new_node->token) {
381 			myhtml_tree_node_delete(new_node);
382 			return NULL;
383 		}
384 
385 		new_node->token->tag_id			= node->token->tag_id;
386 		new_node->token->type			= node->token->type;
387 		new_node->token->attr_first		= NULL;
388 		new_node->token->attr_last		= NULL;
389 		new_node->token->raw_begin		= tree != node->tree ? 0 : node->token->raw_begin;
390 		new_node->token->raw_length		= tree != node->tree ? 0 : node->token->raw_length;
391 		new_node->token->element_begin	= tree != node->tree ? 0 : node->token->element_begin;
392 		new_node->token->element_length	= tree != node->tree ? 0 : node->token->element_length;
393 		new_node->token->type			= new_node->token->type | MyHTML_TOKEN_TYPE_DONE;
394 
395 		// Copy text data
396 		if (node->token->str.length) {
397 			mycore_string_init(tree->mchar, tree->mchar_node_id, &new_node->token->str, node->token->str.length + 1);
398 			mycore_string_append(&new_node->token->str, node->token->str.data, node->token->str.length);
399 		} else {
400 			mycore_string_clean_all(&new_node->token->str);
401 		}
402 
403 		// Copy node attributes
404 		myhtml_token_attr_t *attr = node->token->attr_first;
405 		while (attr) {
406 			myhtml_token_attr_copy(tree->token, attr, new_node->token, tree->mcasync_rules_attr_id);
407 			attr = attr->next;
408 		}
409 	}
410 
411 	return new_node;
412 }
413 
html5_dom_recursive_clone_node(myhtml_tree_t * tree,myhtml_tree_node_t * node,html5_fragment_parts_t * parts)414 myhtml_tree_node_t *html5_dom_recursive_clone_node(myhtml_tree_t *tree, myhtml_tree_node_t *node, html5_fragment_parts_t *parts) {
415 	myhtml_tree_node_t *new_node = html5_dom_copy_foreign_node(tree, node);
416 	myhtml_tree_node_t *child = myhtml_node_child(node);
417 
418 	if (parts) {
419 		if (node == node->tree->node_html)
420 			parts->node_html = new_node;
421 		else if (node == node->tree->node_head)
422 			parts->node_head = new_node;
423 		else if (node == node->tree->node_body)
424 			parts->node_body = new_node;
425 		else if (node == node->tree->document)
426 			parts->document = new_node;
427 	}
428 
429 	while (child) {
430 		myhtml_tree_node_add_child(new_node, html5_dom_recursive_clone_node(tree, child, parts));
431 		child = myhtml_node_next(child);
432 	}
433 
434 	return new_node;
435 }
436 
437 // Try to implements https://html.spec.whatwg.org/multipage/dom.html#the-innertext-idl-attribute
438 // Using default user-agent box model types for tags instead of real css.
html5_dom_recursive_node_inner_text(myhtml_tree_node_t * node,html5_dom_inner_text_state_t * state)439 void html5_dom_recursive_node_inner_text(myhtml_tree_node_t *node, html5_dom_inner_text_state_t *state) {
440 	if (node->tag_id == MyHTML_TAG__TEXT) {
441 		size_t text_len = 0;
442 		const char *text = myhtml_node_text(node, &text_len);
443 
444 		bool is_empty = true;
445 		for (size_t i = 0; i < text_len; ++i) {
446 			// skip CR
447 			if (text[i] == '\r')
448 				continue;
449 
450 			// collapse spaces
451 			if (isspace(text[i]) && (text[i] != '\xA0' || !i || text[i - 1] != '\xC2') && text[i] != '\xC2') {
452 				bool skip_spaces = (state->value.length > 0 && state->value.data[state->value.length - 1] == ' ') || state->new_line;
453 				if (skip_spaces)
454 					continue;
455 				mycore_string_append_one(&state->value, ' ');
456 			}
457 			// save other chars
458 			else {
459 				mycore_string_append_one(&state->value, text[i]);
460 				is_empty = false;
461 				state->new_line = false;
462 			}
463 		}
464 
465 		if (!is_empty)
466 			state->last_br = false;
467 	} else if (node_is_element(node)) {
468 		// get default box model type for tag
469 		int display = html5_dom_get_ua_display_prop(node);
470 
471 		// skip hidden nodes
472 		if (display == TAG_UA_STYLE_NONE)
473 			return;
474 
475 		// skip some special nodes
476 		switch (node->tag_id) {
477 			case MyHTML_TAG_TEXTAREA:
478 			case MyHTML_TAG_INPUT:
479 			case MyHTML_TAG_AUDIO:
480 			case MyHTML_TAG_VIDEO:
481 				return;
482 		}
483 
484 		// <br> always inserts \n
485 		if (node->tag_id == MyHTML_TAG_BR) {
486 			mycore_string_append_one(&state->value, '\n');
487 			state->new_line = true;
488 			state->last_br = true;
489 		} else {
490 			switch (display) {
491 				case TAG_UA_STYLE_BLOCK:
492 				case TAG_UA_STYLE_TABLE:
493 				case TAG_UA_STYLE_TABLE_CAPTION:
494 					// if last token - line break, then collapse
495 					// if last token - text, then insert new line break
496 					if (!state->last_br) {
497 						html5_dom_rtrim_mystring(&state->value, ' ');
498 						mycore_string_append_one(&state->value, '\n');
499 						state->new_line = true;
500 						state->last_br = true;
501 					}
502 				break;
503 			}
504 
505 			myhtml_tree_node_t *child = myhtml_node_child(node);
506 			while (child) {
507 				html5_dom_recursive_node_inner_text(child, state);
508 				child = myhtml_node_next(child);
509 			}
510 
511 			switch (display) {
512 				case TAG_UA_STYLE_BLOCK:
513 				case TAG_UA_STYLE_TABLE:
514 				case TAG_UA_STYLE_TABLE_CAPTION:
515 					// if last token - line break, then collapse
516 					// if last token - text, then insert new line break
517 					if (!state->last_br) {
518 						html5_dom_rtrim_mystring(&state->value, ' ');
519 						if (node->tag_id == MyHTML_TAG_P) {
520 							// chrome inserts two \n after <p>
521 							mycore_string_append_one(&state->value, '\n');
522 							mycore_string_append_one(&state->value, '\n');
523 						} else {
524 							mycore_string_append_one(&state->value, '\n');
525 						}
526 						state->new_line = true;
527 						state->last_br = true;
528 					}
529 				break;
530 
531 				case TAG_UA_STYLE_TABLE_CELL:
532 				{
533 					bool is_last_cell = false;
534 					myhtml_tree_node_t *cell = myhtml_node_last_child(myhtml_node_parent(node));
535 					while (cell) {
536 						if (html5_dom_get_ua_display_prop(cell) == TAG_UA_STYLE_TABLE_CELL) {
537 							is_last_cell = cell == node;
538 							break;
539 						}
540 						cell = myhtml_node_prev(cell);
541 					}
542 
543 					if (!is_last_cell) {
544 						html5_dom_rtrim_mystring(&state->value, ' ');
545 						mycore_string_append_one(&state->value, '\t');
546 					}
547 
548 					state->new_line = true;
549 				}
550 				break;
551 
552 				case TAG_UA_STYLE_TABLE_ROW:
553 				{
554 					bool is_last_row = false;
555 					myhtml_tree_node_t *row = myhtml_node_last_child(myhtml_node_parent(node));
556 					while (row) {
557 						if (html5_dom_get_ua_display_prop(row) == TAG_UA_STYLE_TABLE_ROW) {
558 							is_last_row = (row == node);
559 							break;
560 						}
561 						row = myhtml_node_prev(row);
562 					}
563 
564 					if (!is_last_row) {
565 						html5_dom_rtrim_mystring(&state->value, ' ');
566 						mycore_string_append_one(&state->value, '\n');
567 						state->last_br = true;
568 					}
569 
570 					state->new_line = true;
571 				}
572 				break;
573 			}
574 		}
575 	}
576 }
577 
578 // Safe delete nodes only if it has not perl object representation
html5_tree_node_delete_recursive(myhtml_tree_node_t * node)579 void html5_tree_node_delete_recursive(myhtml_tree_node_t *node) {
580 	if (!myhtml_node_get_data(node)) {
581 		myhtml_tree_node_t *child = myhtml_node_child(node);
582 		if (child) {
583 			while (child) {
584 				myhtml_tree_node_t *next = myhtml_node_next(child);
585 				myhtml_tree_node_remove(child);
586 				html5_tree_node_delete_recursive(child);
587 				child = next;
588 			}
589 		}
590 		myhtml_tree_node_delete(node);
591 	}
592 }
593 
594 /*
595 	attrs
596 */
html5_dom_replace_attr_value(myhtml_tree_node_t * node,const char * key,size_t key_len,const char * val,size_t val_len,myencoding_t encoding)597 void html5_dom_replace_attr_value(myhtml_tree_node_t *node, const char *key, size_t key_len, const char *val, size_t val_len, myencoding_t encoding) {
598 	myhtml_tree_attr_t *attr = myhtml_attribute_by_key(node, key, key_len);
599 	if (attr) { // edit
600 		// destroy original value
601 		mycore_string_destroy(&attr->value, 0);
602 
603 		// set new value
604 		mycore_string_init(node->tree->mchar, node->tree->mchar_node_id, &attr->value, (val_len + 1));
605 
606 		// apply encoding
607 		if (encoding == MyENCODING_UTF_8) {
608 			mycore_string_append(&attr->value, val, val_len);
609 		} else {
610 			myencoding_string_append(&attr->value, val, val_len, encoding);
611 		}
612 	} else { // add new
613 		myhtml_attribute_add(node, key, key_len, val, val_len, encoding);
614 	}
615 }
616 
617 /*
618 	encoding
619 */
html5_dom_auto_encoding(html5_dom_options_t * opts,const char ** html_str,size_t * html_length)620 myencoding_t html5_dom_auto_encoding(html5_dom_options_t *opts, const char **html_str, size_t *html_length) {
621 	// Try to determine encoding
622 	myencoding_t encoding;
623 	if (opts->encoding == MyENCODING_AUTO) {
624 		encoding = MyENCODING_NOT_DETERMINED;
625 		if (*html_length) {
626 			// Search encoding in meta-tags
627 			if (opts->encoding_use_meta) {
628 				size_t size = opts->encoding_prescan_limit < *html_length ? opts->encoding_prescan_limit : *html_length;
629 				encoding = myencoding_prescan_stream_to_determine_encoding(*html_str, size);
630 			}
631 
632 			if (encoding == MyENCODING_NOT_DETERMINED) {
633 				// Check BOM
634 				if (!opts->encoding_use_bom || !myencoding_detect_and_cut_bom(*html_str, *html_length, &encoding, html_str, html_length)) {
635 					// Check heuristic
636 					if (!myencoding_detect(*html_str, *html_length, &encoding)) {
637 						// Can't determine encoding, use default
638 						encoding = opts->default_encoding;
639 					}
640 				}
641 			}
642 		} else {
643 			encoding = opts->default_encoding;
644 		}
645 	} else {
646 		encoding = opts->encoding;
647 	}
648 	return encoding;
649 }
650