1 /**
2 
3 	MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more.
4 
5 	@file writer.c
6 
7 	@brief Coordinate conversion of token tree to output formats.
8 
9 
10 	@author	Fletcher T. Penney
11 	@bug
12 
13 **/
14 
15 /*
16 
17 	Copyright © 2016 - 2017 Fletcher T. Penney.
18 
19 
20 	The `MultiMarkdown 6` project is released under the MIT License..
21 
22 	GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project:
23 
24 		https://github.com/fletcher/MultiMarkdown-4/
25 
26 	MMD 4 is released under both the MIT License and GPL.
27 
28 
29 	CuTest is released under the zlib/libpng license. See CuTest.c for the text
30 	of the license.
31 
32 
33 	## The MIT License ##
34 
35 	Permission is hereby granted, free of charge, to any person obtaining a copy
36 	of this software and associated documentation files (the "Software"), to deal
37 	in the Software without restriction, including without limitation the rights
38 	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
39 	copies of the Software, and to permit persons to whom the Software is
40 	furnished to do so, subject to the following conditions:
41 
42 	The above copyright notice and this permission notice shall be included in
43 	all copies or substantial portions of the Software.
44 
45 	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
46 	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
47 	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
48 	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
49 	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
50 	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
51 	THE SOFTWARE.
52 
53 */
54 
55 #include <ctype.h>
56 #include <stdio.h>
57 #include <stdlib.h>
58 #include <string.h>
59 
60 #include "libMultiMarkdown.h"
61 
62 #include "aho-corasick.h"
63 #include "beamer.h"
64 #include "char.h"
65 #include "d_string.h"
66 #include "html.h"
67 #include "itmz.h"
68 #include "i18n.h"
69 #include "latex.h"
70 #include "memoir.h"
71 #include "mmd.h"
72 #include "opendocument-content.h"
73 #include "opml.h"
74 #include "parser.h"
75 #include "scanners.h"
76 #include "stack.h"
77 #include "token.h"
78 #include "uuid.h"
79 #include "writer.h"
80 
81 
82 void store_citation(scratch_pad * scratch, footnote * f);
83 
84 void store_footnote(scratch_pad * scratch, footnote * f);
85 
86 void store_glossary(scratch_pad * scratch, footnote * f);
87 
88 void store_link(scratch_pad * scratch, link * l);
89 
90 void store_metadata(scratch_pad * scratch, meta * m);
91 
92 void store_abbreviation(scratch_pad * scratch, footnote * a);
93 
94 
95 /// strndup not available on all platforms
my_strndup(const char * source,size_t n)96 static char * my_strndup(const char * source, size_t n) {
97 	if (source == NULL) {
98 		return NULL;
99 	}
100 
101 	size_t len = 0;
102 	char * result;
103 	const char * test = source;
104 
105 	// strlen is too slow if strlen(source) >> n
106 	for (len = 0; len < n; ++len) {
107 		if (*test == '\0') {
108 			break;
109 		}
110 
111 		test++;
112 	}
113 
114 	result = malloc(len + 1);
115 
116 	if (result) {
117 		memcpy(result, source, len);
118 		result[len] = '\0';
119 	}
120 
121 	return result;
122 }
123 
124 
125 /// strdup() not available on all platforms
my_strdup(const char * source)126 static char * my_strdup(const char * source) {
127 	if (source == NULL) {
128 		return NULL;
129 	}
130 
131 	char * result = malloc(strlen(source) + 1);
132 
133 	if (result) {
134 		strcpy(result, source);
135 	}
136 
137 	return result;
138 }
139 
140 
141 /// Temporary storage while exporting parse tree to output format
scratch_pad_new(mmd_engine * e,short format)142 scratch_pad * scratch_pad_new(mmd_engine * e, short format) {
143 	scratch_pad * p = malloc(sizeof(scratch_pad));
144 
145 	if (p) {
146 		p->padded = 2;							// Prevent unnecessary leading space
147 		p->list_is_tight = false;				// Tight vs Loose list
148 		p->skip_token = 0;						// Skip over next n tokens
149 		p->close_para = true;
150 
151 		p->extensions = e->extensions;
152 		p->output_format = format;
153 		p->quotes_lang = e->quotes_lang;
154 		p->language = e->language;
155 
156 		p->header_stack = e->header_stack;
157 
158 		p->outline_stack = stack_new(0);
159 		p->opml_item_closed = 1;
160 
161 		p->recurse_depth = 0;
162 
163 		p->base_header_level = 1;
164 
165 		p->odf_para_type = BLOCK_PARA;
166 
167 		if (e->extensions & EXT_RANDOM_FOOT) {
168 			p->random_seed_base = rand() % 32000;
169 		} else {
170 			p->random_seed_base = 0;
171 		}
172 
173 		if (e->extensions & EXT_RANDOM_LABELS) {
174 			p->random_seed_base_labels = rand() % 32000;
175 		} else {
176 			p->random_seed_base_labels = 0;
177 		}
178 
179 		p->label_counter = 0;
180 
181 		// Store links in a hash for rapid retrieval when exporting
182 		p->link_hash = NULL;
183 		link * l;
184 
185 		for (int i = 0; i < e->link_stack->size; ++i) {
186 			l = stack_peek_index(e->link_stack, i);
187 
188 			store_link(p, l);
189 		}
190 
191 		// Store citations in a hash for rapid retrieval when exporting
192 		footnote * f;
193 
194 		p->used_citations = stack_new(0);
195 		p->inline_citations_to_free = stack_new(0);
196 		p->citation_being_printed = 0;
197 		p->bibtex_file = NULL;
198 
199 		p->citation_hash = NULL;
200 
201 		for (int i = 0; i < e->citation_stack->size; ++i) {
202 			f = stack_peek_index(e->citation_stack, i);
203 
204 			store_citation(p, f);
205 		}
206 
207 		// Store footnotes in a hash for rapid retrieval when exporting
208 		p->used_footnotes = stack_new(0);				// Store footnotes as we use them
209 		p->inline_footnotes_to_free = stack_new(0);		// Inline footnotes need to be freed
210 		p->footnote_being_printed = 0;
211 		p->footnote_para_counter = -1;
212 
213 		p->footnote_hash = NULL;				// Store defined footnotes in a hash
214 
215 		for (int i = 0; i < e->footnote_stack->size; ++i) {
216 			f = stack_peek_index(e->footnote_stack, i);
217 
218 			store_footnote(p, f);
219 		}
220 
221 		// Store glossaries in a hash for rapid retrieval when exporting
222 		p->used_glossaries = stack_new(0);
223 		p->inline_glossaries_to_free = stack_new(0);
224 		p->glossary_being_printed = 0;
225 
226 		p->glossary_hash = NULL;
227 
228 		for (int i = 0; i < e->glossary_stack->size; ++i) {
229 			f = stack_peek_index(e->glossary_stack, i);
230 
231 			store_glossary(p, f);
232 		}
233 
234 		// Store abbreviations in a hash for rapid retrieval when exporting
235 		p->used_abbreviations = stack_new(0);
236 		p->inline_abbreviations_to_free = stack_new(0);
237 
238 		p->abbreviation_hash = NULL;
239 
240 		for (int i = 0; i < e->abbreviation_stack->size; ++i) {
241 			f = stack_peek_index(e->abbreviation_stack, i);
242 
243 			store_abbreviation(p, f);
244 		}
245 
246 		// Store metadata in a hash for rapid retrieval when exporting
247 		p->meta_hash = NULL;
248 		meta * m;
249 
250 		for (int i = 0; i < e->metadata_stack->size; ++i) {
251 			m = stack_peek_index(e->metadata_stack, i);
252 
253 			store_metadata(p, m);
254 		}
255 
256 
257 		// Store used assets in a hash
258 		p->asset_hash = NULL;
259 		p->store_assets = 0;
260 		p->remember_assets = 0;
261 
262 		p->critic_stack = e->critic_stack;
263 	}
264 
265 	return p;
266 }
267 
268 
scratch_pad_free(scratch_pad * scratch)269 void scratch_pad_free(scratch_pad * scratch) {
270 //	HASH_CLEAR(hh, scratch->link_hash);
271 
272 	stack_free(scratch->outline_stack);
273 
274 	link * l, * l_tmp;
275 
276 	// Free link hash
277 	HASH_ITER(hh, scratch->link_hash, l, l_tmp) {
278 		HASH_DEL(scratch->link_hash, l); 	// Remove item from hash
279 		free(l);		// "Shallow" free -- the pointers will be freed
280 		// with the original later.
281 	}
282 
283 	fn_holder * f, * f_tmp;
284 
285 
286 	// Free footnote hash
287 	HASH_ITER(hh, scratch->footnote_hash, f, f_tmp) {
288 		HASH_DEL(scratch->footnote_hash, f);	// Remove item from hash
289 		free(f);		// Free the fn_holder
290 	}
291 	stack_free(scratch->used_footnotes);
292 
293 	while (scratch->inline_footnotes_to_free->size) {
294 		footnote_free(stack_pop(scratch->inline_footnotes_to_free));
295 	}
296 
297 	stack_free(scratch->inline_footnotes_to_free);
298 
299 
300 	// Free citation hash
301 	HASH_ITER(hh, scratch->citation_hash, f, f_tmp) {
302 		HASH_DEL(scratch->citation_hash, f);	// Remove item from hash
303 		free(f);		// Free the fn_holder
304 	}
305 	stack_free(scratch->used_citations);
306 
307 	while (scratch->inline_citations_to_free->size) {
308 		footnote_free(stack_pop(scratch->inline_citations_to_free));
309 	}
310 
311 	stack_free(scratch->inline_citations_to_free);
312 
313 	free(scratch->bibtex_file);
314 
315 	// Free glossary hash
316 	HASH_ITER(hh, scratch->glossary_hash, f, f_tmp) {
317 		HASH_DEL(scratch->glossary_hash, f);	// Remove item from hash
318 		free(f);		// Free the fn_holder
319 	}
320 	stack_free(scratch->used_glossaries);
321 
322 	while (scratch->inline_glossaries_to_free->size) {
323 		footnote_free(stack_pop(scratch->inline_glossaries_to_free));
324 	}
325 
326 	stack_free(scratch->inline_glossaries_to_free);
327 
328 
329 	// Free abbreviation hash
330 	HASH_ITER(hh, scratch->abbreviation_hash, f, f_tmp) {
331 		HASH_DEL(scratch->abbreviation_hash, f);	// Remove item from hash
332 		free(f);		// Free the fn_holder
333 	}
334 	stack_free(scratch->used_abbreviations);
335 
336 	while (scratch->inline_abbreviations_to_free->size) {
337 		footnote_free(stack_pop(scratch->inline_abbreviations_to_free));
338 	}
339 
340 	stack_free(scratch->inline_abbreviations_to_free);
341 
342 
343 	// Free metadata hash
344 	meta * m, * m_tmp;
345 
346 	HASH_ITER(hh, scratch->meta_hash, m, m_tmp) {
347 		HASH_DEL(scratch->meta_hash, m); 	// Remove item from hash
348 		// Don't free meta pointer since it is freed with the mmd_engine
349 		//meta_free(m);
350 	}
351 
352 	free(scratch);
353 }
354 
355 
356 /// Ensure at least num newlines at end of output buffer
pad(DString * d,short num,scratch_pad * scratch)357 void pad(DString * d, short num, scratch_pad * scratch) {
358 	while (num > scratch->padded) {
359 		d_string_append_c(d, '\n');
360 		scratch->padded++;
361 	}
362 }
363 
364 
print_token_raw(DString * out,const char * source,token * t)365 void print_token_raw(DString * out, const char * source, token * t) {
366 	if (t) {
367 		switch (t->type) {
368 			case EMPH_START:
369 			case EMPH_STOP:
370 			case STRONG_START:
371 			case STRONG_STOP:
372 			case TEXT_EMPTY:
373 			case MARKER_BLOCKQUOTE:
374 				break;
375 
376 			case PAIR_EMPH:
377 			case PAIR_STRONG:
378 			case BLOCK_HTML:
379 				print_token_tree_raw(out, source, t->child);
380 				break;
381 
382 			default:
383 				d_string_append_c_array(out, &source[t->start], t->len);
384 				break;
385 		}
386 	}
387 }
388 
389 
print_token_tree_raw(DString * out,const char * source,token * t)390 void print_token_tree_raw(DString * out, const char * source, token * t) {
391 	while (t) {
392 		print_token_raw(out, source, t);
393 
394 		t = t->next;
395 	}
396 }
397 
398 
text_inside_pair(const char * source,token * pair)399 char * text_inside_pair(const char * source, token * pair) {
400 	char * result = NULL;
401 
402 	if (source && pair) {
403 		if (pair->child && pair->child->mate) {
404 			// [foo], [^foo], [#foo] should give different strings -- use closer len
405 			result = my_strndup(&source[pair->start + pair->child->mate->len], pair->len - (pair->child->mate->len * 2));
406 		} else {
407 			if (pair->child) {
408 				result = my_strndup(&source[pair->start + pair->child->len], pair->len - (pair->child->len + 1));
409 			}
410 		}
411 	}
412 
413 	return result;
414 }
415 
416 
label_from_string(const char * str)417 char * label_from_string(const char * str) {
418 	const char * next_char;
419 	char * label = NULL;
420 
421 	DString * out = d_string_new("");
422 
423 	while (*str != '\0') {
424 		next_char = str;
425 		next_char++;
426 
427 		if ((*next_char & 0xC0) == 0x80) {
428 			// Allow multibyte characters
429 			d_string_append_c(out, *str);
430 
431 			while ((*next_char & 0xC0) == 0x80) {
432 				str++;
433 				d_string_append_c(out, *str);
434 				next_char++;
435 			}
436 		} else if ((*str >= '0' && *str <= '9') || (*str >= 'A' && *str <= 'Z')
437 				   || (*str >= 'a' && *str <= 'z') || (*str == '.') || (*str == '_')
438 				   || (*str == '-') || (*str == ':')) {
439 			// Allow 0-9, A-Z, a-z, ., _, -, :
440 			d_string_append_c(out, tolower(*str));
441 		}
442 
443 		str++;
444 	}
445 
446 	label = out->str;
447 	d_string_free(out, false);
448 
449 	return label;
450 }
451 
452 
label_from_token(const char * source,token * t)453 char * label_from_token(const char * source, token * t) {
454 	char * label = NULL;
455 
456 	DString * raw = d_string_new("");
457 
458 	d_string_append_c_array(raw, &source[t->start], t->len);
459 
460 	label = label_from_string(raw->str);
461 
462 	d_string_free(raw, true);
463 
464 	return label;
465 }
466 
467 
label_from_header(const char * source,token * t,scratch_pad * scratch)468 char * label_from_header(const char * source, token * t, scratch_pad * scratch) {
469 	char * result;
470 	short temp_short;
471 
472 	token * temp_token = manual_label_from_header(t, source);
473 
474 	if (temp_token) {
475 		result = label_from_token(source, temp_token);
476 	} else {
477 		if (scratch->extensions & EXT_RANDOM_LABELS) {
478 			srand(scratch->random_seed_base_labels + scratch->label_counter);
479 			temp_short = rand() % 32000 + 1;
480 			result = malloc(sizeof(char) * 6);
481 			sprintf(result, "%d", temp_short);
482 
483 			scratch->label_counter++;
484 		} else {
485 			result = label_from_token(source, t);
486 		}
487 	}
488 
489 	return result;
490 }
491 
492 
493 /// Clean up whitespace in string for standardization
clean_string(const char * str,bool lowercase)494 char * clean_string(const char * str, bool lowercase) {
495 	if (str == NULL) {
496 		return NULL;
497 	}
498 
499 	DString * out = d_string_new("");
500 	char * clean = NULL;
501 	bool block_whitespace = true;
502 
503 	while (*str != '\0') {
504 		switch (*str) {
505 			case '\\':
506 				switch (*(str + 1)) {
507 					case '\n':
508 					case '\r':
509 						d_string_append_c(out, '\n');
510 						block_whitespace = true;
511 						break;
512 
513 					default:
514 						d_string_append_c(out, '\\');
515 						block_whitespace = false;
516 						break;
517 				}
518 
519 				break;
520 
521 			case '\t':
522 			case ' ':
523 			case '\n':
524 			case '\r':
525 				if (!block_whitespace) {
526 					d_string_append_c(out, ' ');
527 					block_whitespace = true;
528 				}
529 
530 				break;
531 
532 			default:
533 				if (lowercase) {
534 					d_string_append_c(out, tolower(*str));
535 				} else {
536 					d_string_append_c(out, *str);
537 				}
538 
539 				block_whitespace = false;
540 				break;
541 		}
542 
543 		str++;
544 	}
545 
546 	clean = out->str;
547 
548 	// Trim trailing whitespace/newlines
549 	while (out->currentStringLength && char_is_whitespace_or_line_ending(clean[out->currentStringLength - 1])) {
550 		out->currentStringLength--;
551 		clean[out->currentStringLength] = '\0';
552 	}
553 
554 	d_string_free(out, false);
555 
556 	// Trim trailing whitespace
557 	return clean;
558 }
559 
560 
clean_string_from_range(const char * source,size_t start,size_t len,bool lowercase)561 char * clean_string_from_range(const char * source, size_t start, size_t len, bool lowercase) {
562 	char * clean = NULL;
563 
564 	DString * raw = d_string_new("");
565 
566 	d_string_append_c_array(raw, &source[start], len);
567 
568 	clean = clean_string(raw->str, lowercase);
569 
570 	d_string_free(raw, true);
571 
572 	return clean;
573 }
574 
575 
clean_string_from_token(const char * source,token * t,bool lowercase)576 char * clean_string_from_token(const char * source, token * t, bool lowercase) {
577 	return clean_string_from_range(source, t->start, t->len, lowercase);
578 }
579 
580 
clean_inside_pair(const char * source,token * t,bool lowercase)581 char * clean_inside_pair(const char * source, token * t, bool lowercase) {
582 	char * text = text_inside_pair(source, t);
583 
584 	char * clean = clean_string(text, lowercase);
585 
586 	free(text);
587 
588 	return clean;
589 }
590 
591 
attr_new(char * key,char * value)592 attr * attr_new(char * key, char * value) {
593 	attr * a = malloc(sizeof(attr));
594 	size_t len = strlen(value);
595 
596 	// Strip quotes if present
597 	if (value[0] == '"') {
598 		value++;
599 		len--;
600 	}
601 
602 	if (value[len - 1] == '"') {
603 		value[len - 1] = '\0';
604 	}
605 
606 	if (a) {
607 		a->key = key;
608 		a->value = my_strdup(value);
609 		a->next = NULL;
610 	}
611 
612 	return a;
613 }
614 
615 
parse_attributes(char * source)616 attr * parse_attributes(char * source) {
617 	attr * attributes = NULL;
618 	attr * a = NULL;
619 	char * key = NULL;
620 	char * value = NULL;
621 	size_t scan_len;
622 	size_t pos = 0;
623 
624 	while (source[pos] != '\0' && scan_attr(&source[pos])) {
625 		pos +=  scan_spnl(&source[pos]);
626 
627 		// Get key
628 		scan_len = scan_key(&source[pos]);
629 		key = my_strndup(&source[pos], scan_len);
630 
631 		// Skip '='
632 		pos += scan_len + 1;
633 
634 		// Get value
635 		scan_len = scan_value(&source[pos]);
636 		value = my_strndup(&source[pos], scan_len);
637 
638 		pos += scan_len;
639 
640 		if (a) {
641 			a->next = attr_new(key, value);
642 			a = a->next;
643 		} else {
644 #ifndef __clang_analyzer__
645 			a = attr_new(key, value);
646 			attributes = a;
647 #endif
648 		}
649 
650 		free(value);	// We stored a modified copy
651 	}
652 
653 	return attributes;
654 }
655 
656 
link_new(const char * source,token * label,char * url,char * title,char * attributes,short flags)657 link * link_new(const char * source, token * label, char * url, char * title, char * attributes, short flags) {
658 	link * l = malloc(sizeof(link));
659 
660 	if (l) {
661 		l->label = label;
662 
663 		if (label) {
664 			l->clean_text = clean_inside_pair(source, label, true);
665 			l->label_text = label_from_token(source, label);
666 		} else {
667 			l->clean_text = NULL;
668 			l->label_text = NULL;
669 		}
670 
671 		l->url = clean_string(url, false);
672 		l->title = (title == NULL) ? NULL : my_strdup(title);
673 		l->attributes = (attributes == NULL) ? NULL : parse_attributes(attributes);
674 
675 		l->flags = flags;
676 	}
677 
678 	return l;
679 }
680 
681 
682 /// Store shallow copies of links in the storage hash.  The link
683 /// itself is new, but references the same data as the original.
684 /// This allows the copied link to simply be `free()`'d without
685 /// freeing the pointers.
link_shallow_copy(link * l)686 link * link_shallow_copy(link * l) {
687 	link * new = malloc(sizeof(link));
688 
689 	if (new) {
690 		new->label = l->label;
691 		new->clean_text = l->clean_text;
692 		new->label_text = l->label_text;
693 		new->url = l->url;
694 		new->title = l->title;
695 		new->attributes = l->attributes;
696 	}
697 
698 	return new;
699 }
700 
701 
702 /// Copy stored links to a hash for quick searching during export.
703 /// Links are stored via a clean version of their text(from
704 /// `clean_string()`) and a label version (`label_from_string()`).
705 /// The first link for each string is stored.
store_link(scratch_pad * scratch,link * l)706 void store_link(scratch_pad * scratch, link * l) {
707 	link * temp_link;
708 
709 	// Add link via `clean_text`?
710 	if (l->clean_text && l->clean_text[0] != '\0') {
711 		HASH_FIND_STR(scratch->link_hash, l->clean_text, temp_link);
712 
713 		if (!temp_link) {
714 			// Only add if another link is not found with clean_text
715 			temp_link = link_shallow_copy(l);
716 			HASH_ADD_KEYPTR(hh, scratch->link_hash, l->clean_text, strlen(l->clean_text), temp_link);
717 		}
718 	}
719 
720 	// Add link via `label_text`?
721 	if (l->label_text && l->label_text[0] != '\0') {
722 		HASH_FIND_STR(scratch->link_hash, l->label_text, temp_link);
723 
724 		if (!temp_link) {
725 			// Only add if another link is not found with label_text
726 			temp_link = link_shallow_copy(l);
727 			HASH_ADD_KEYPTR(hh, scratch->link_hash, l->label_text, strlen(l->label_text), temp_link);
728 		}
729 	}
730 }
731 
retrieve_link(scratch_pad * scratch,const char * key)732 link * retrieve_link(scratch_pad * scratch, const char * key) {
733 	link * l;
734 
735 	HASH_FIND_STR(scratch->link_hash, key, l);
736 
737 	if (l) {
738 		return l;
739 	}
740 
741 	char * clean = clean_string(key, true);
742 
743 	HASH_FIND_STR(scratch->link_hash, clean, l);
744 
745 	free(clean);
746 
747 	return l;
748 }
749 
750 
fn_holder_new(footnote * f)751 fn_holder * fn_holder_new(footnote * f) {
752 	fn_holder * h = malloc(sizeof(fn_holder));
753 
754 	if (h) {
755 		h->note = f;
756 	}
757 
758 	return h;
759 }
760 
761 
store_footnote(scratch_pad * scratch,footnote * f)762 void store_footnote(scratch_pad * scratch, footnote * f) {
763 	fn_holder * temp_holder;
764 
765 	// Store by `clean_text`?
766 	if (f->clean_text && f->clean_text[0] != '\0') {
767 		HASH_FIND_STR(scratch->footnote_hash, f->clean_text, temp_holder);
768 
769 		if (!temp_holder) {
770 			temp_holder = fn_holder_new(f);
771 			HASH_ADD_KEYPTR(hh, scratch->footnote_hash, f->clean_text, strlen(f->clean_text), temp_holder);
772 		}
773 	}
774 
775 	// Store by `label_text`?
776 	if (f->label_text && f->label_text[0] != '\0') {
777 		HASH_FIND_STR(scratch->footnote_hash, f->label_text, temp_holder);
778 
779 		if (!temp_holder) {
780 			temp_holder = fn_holder_new(f);
781 			HASH_ADD_KEYPTR(hh, scratch->footnote_hash, f->label_text, strlen(f->label_text), temp_holder);
782 		}
783 	}
784 }
785 
786 
store_citation(scratch_pad * scratch,footnote * f)787 void store_citation(scratch_pad * scratch, footnote * f) {
788 	fn_holder * temp_holder;
789 
790 	// Store by `clean_text`?
791 	if (f->clean_text && f->clean_text[0] != '\0') {
792 		HASH_FIND_STR(scratch->citation_hash, f->clean_text, temp_holder);
793 
794 		if (!temp_holder) {
795 			temp_holder = fn_holder_new(f);
796 			HASH_ADD_KEYPTR(hh, scratch->citation_hash, f->clean_text, strlen(f->clean_text), temp_holder);
797 		}
798 	}
799 
800 	// Store by `label_text`?
801 	if (f->label_text && f->label_text[0] != '\0') {
802 		HASH_FIND_STR(scratch->citation_hash, f->label_text, temp_holder);
803 
804 		if (!temp_holder) {
805 			temp_holder = fn_holder_new(f);
806 			HASH_ADD_KEYPTR(hh, scratch->citation_hash, f->label_text, strlen(f->label_text), temp_holder);
807 		}
808 	}
809 }
810 
811 
store_glossary(scratch_pad * scratch,footnote * f)812 void store_glossary(scratch_pad * scratch, footnote * f) {
813 	fn_holder * temp_holder;
814 
815 	// Store by `clean_text`?
816 	if (f->clean_text && f->clean_text[0] != '\0') {
817 		HASH_FIND_STR(scratch->glossary_hash, f->clean_text, temp_holder);
818 
819 		if (!temp_holder) {
820 			temp_holder = fn_holder_new(f);
821 			HASH_ADD_KEYPTR(hh, scratch->glossary_hash, f->clean_text, strlen(f->clean_text), temp_holder);
822 		}
823 	}
824 
825 	// Store by `label_text`?
826 	if (f->label_text && f->label_text[0] != '\0') {
827 		HASH_FIND_STR(scratch->glossary_hash, f->label_text, temp_holder);
828 
829 		if (!temp_holder) {
830 			temp_holder = fn_holder_new(f);
831 			HASH_ADD_KEYPTR(hh, scratch->glossary_hash, f->label_text, strlen(f->label_text), temp_holder);
832 		}
833 	}
834 }
835 
836 
store_metadata(scratch_pad * scratch,meta * m)837 void store_metadata(scratch_pad * scratch, meta * m) {
838 	meta * temp;
839 
840 	// Store by `key`
841 	if (m->key && m->key[0] != '\0') {
842 		HASH_FIND_STR(scratch->meta_hash, m->key, temp);
843 
844 		if (!temp) {
845 			HASH_ADD_KEYPTR(hh, scratch->meta_hash, m->key, strlen(m->key), m);
846 		}
847 	}
848 }
849 
850 
store_abbreviation(scratch_pad * scratch,footnote * f)851 void store_abbreviation(scratch_pad * scratch, footnote * f) {
852 	fn_holder * temp_holder;
853 
854 	// Store by `label_text`
855 	if (f->label_text && f->label_text[0] != '\0') {
856 		HASH_FIND_STR(scratch->abbreviation_hash, f->label_text, temp_holder);
857 
858 		if (!temp_holder) {
859 			temp_holder = fn_holder_new(f);
860 			HASH_ADD_KEYPTR(hh, scratch->abbreviation_hash, f->label_text, strlen(f->label_text), temp_holder);
861 		}
862 	}
863 }
864 
865 
link_free(link * l)866 void link_free(link * l) {
867 	if (l) {
868 		free(l->label_text);
869 		free(l->clean_text);
870 		free(l->url);
871 		free(l->title);
872 		//    free(l->id);
873 
874 		attr * a = l->attributes;
875 		attr * b;
876 
877 		while (a) {
878 			b = a->next;
879 			free(a->key);
880 			free(a->value);
881 			free(a);
882 			a = b;
883 		}
884 
885 		free(l);
886 	}
887 }
888 
889 
whitespace_accept(token ** remainder)890 void whitespace_accept(token ** remainder) {
891 	while (token_chain_accept_multiple(remainder, 3, NON_INDENT_SPACE, INDENT_SPACE, INDENT_TAB));
892 }
893 
894 
895 /// Find link based on label
extract_link_from_stack(scratch_pad * scratch,const char * target)896 link * extract_link_from_stack(scratch_pad * scratch, const char * target) {
897 	char * key = clean_string(target, true);
898 
899 	link * temp = NULL;
900 
901 	HASH_FIND_STR(scratch->link_hash, key, temp);
902 
903 	free(key);
904 
905 	if (temp) {
906 		return temp;
907 	}
908 
909 	key = label_from_string(target);
910 
911 	HASH_FIND_STR(scratch->link_hash, key, temp);
912 
913 	free(key);
914 
915 	return temp;
916 }
917 
918 
validate_url(const char * url)919 bool validate_url(const char * url) {
920 	size_t len = scan_url(url);
921 
922 	return (len && len == strlen(url)) ? true : false;
923 }
924 
925 
destination_accept(const char * source,token ** remainder,bool validate)926 char * destination_accept(const char * source, token ** remainder, bool validate) {
927 	char * url = NULL;
928 	char * clean = NULL;
929 	token * t = NULL;
930 	size_t start;
931 	size_t scan_len;
932 
933 	if (*remainder == NULL) {
934 		return url;
935 	}
936 
937 	switch ((*remainder)->type) {
938 		case PAIR_PAREN:
939 		case PAIR_ANGLE:
940 		case PAIR_QUOTE_SINGLE:
941 		case PAIR_QUOTE_DOUBLE:
942 			t = token_chain_accept_multiple(remainder, 2, PAIR_ANGLE, PAIR_PAREN);
943 			url = text_inside_pair(source, t);
944 			break;
945 
946 		default:
947 			start = (*remainder)->start;
948 
949 			// Skip any whitespace
950 			while (char_is_whitespace(source[start])) {
951 				start++;
952 			}
953 
954 			scan_len = scan_destination(&source[start]);
955 
956 			// Grab destination string
957 			url = my_strndup(&source[start], scan_len);
958 
959 			// Advance remainder to end of destination
960 			while ((*remainder)->next &&
961 					(*remainder)->next->start < start + scan_len) {
962 				*remainder = (*remainder)->next;
963 			}
964 
965 			t = (*remainder);	// We need to remember this for below
966 			// Move remainder beyond destination
967 			*remainder = (*remainder)->next;
968 
969 			// Is there a space in a URL concatenated with a title or attribute?
970 			// e.g. [foo]: http://foo.bar/ class="foo"
971 			// Since only one space between URL and class, they are joined.
972 
973 			if (t->type == TEXT_PLAIN) {
974 				// Trim leading whitespace
975 				token_trim_leading_whitespace(t, source);
976 				token_split_on_char(t, source, ' ');
977 				*remainder = t->next;
978 			}
979 
980 			break;
981 	}
982 
983 	// Is this a valid URL?
984 	clean = clean_string(url, false);
985 
986 	if (validate && !validate_url(clean)) {
987 		free(clean);
988 		clean = NULL;
989 	}
990 
991 	free(url);
992 	return clean;
993 }
994 
995 
url_accept(const char * source,size_t start,size_t max_len,size_t * end_pos,bool validate)996 char * url_accept(const char * source, size_t start, size_t max_len, size_t * end_pos, bool validate) {
997 	char * url = NULL;
998 	char * clean = NULL;
999 	size_t scan_len;
1000 
1001 	scan_len = scan_destination(&source[start]);
1002 
1003 	if (scan_len) {
1004 		if (scan_len > max_len) {
1005 			scan_len = max_len;
1006 		}
1007 
1008 		if (end_pos) {
1009 			*end_pos = start + scan_len;
1010 		}
1011 
1012 		// Is this <foo>?
1013 		if ((source[start] == '<') &&
1014 				(source[start + scan_len - 1] == '>')) {
1015 			// Strip '<' and '>'
1016 			start++;
1017 			scan_len -= 2;
1018 		}
1019 
1020 		url = my_strndup(&source[start], scan_len);
1021 
1022 		clean = clean_string(url, false);
1023 
1024 		if (validate && !validate_url(clean)) {
1025 			free(clean);
1026 			clean = NULL;
1027 		}
1028 
1029 		free(url);
1030 	}
1031 
1032 	return clean;
1033 }
1034 
1035 
1036 /// Extract url string from `(foo)` or `(<foo>)` or `(foo "bar")`
extract_from_paren(token * paren,const char * source,char ** url,char ** title,char ** attributes)1037 void extract_from_paren(token * paren, const char * source, char ** url, char ** title, char ** attributes) {
1038 	size_t scan_len;
1039 	size_t pos = paren->child->next->start;
1040 
1041 
1042 	size_t attr_len;
1043 
1044 	// Skip whitespace
1045 	while (char_is_whitespace(source[pos])) {
1046 		pos++;
1047 	}
1048 
1049 	// Grab URL
1050 	*url = url_accept(source, pos, paren->start + paren->len - 1 - pos, &pos, false);
1051 
1052 	// Skip whitespace
1053 	while (char_is_whitespace(source[pos])) {
1054 		pos++;
1055 	}
1056 
1057 	// Grab title, if present
1058 	scan_len = scan_title(&source[pos]);
1059 
1060 	if (scan_len) {
1061 		*title = my_strndup(&source[pos + 1], scan_len - 2);
1062 		pos += scan_len;
1063 	}
1064 
1065 	// Skip whitespace
1066 	while (char_is_whitespace(source[pos])) {
1067 		pos++;
1068 	}
1069 
1070 	// Grab attributes, if present
1071 	attr_len = scan_attributes(&source[pos]);
1072 
1073 	if (attr_len) {
1074 		*attributes = my_strndup(&source[pos], attr_len);
1075 	}
1076 }
1077 
1078 
1079 /// Create a link from an explicit "inline" link `[foo](bar)`
explicit_link(scratch_pad * scratch,token * bracket,token * paren,const char * source)1080 link * explicit_link(scratch_pad * scratch, token * bracket, token * paren, const char * source) {
1081 	char * url_char = NULL;
1082 	char * title_char = NULL;
1083 	char * attr_char = NULL;
1084 	link * l = NULL;
1085 
1086 	extract_from_paren(paren, source, &url_char, &title_char, &attr_char);
1087 
1088 	if (attr_char) {
1089 		if (!(scratch->extensions & EXT_COMPATIBILITY)) {
1090 			l = link_new(source, NULL, url_char, title_char, attr_char, LINK_INLINE);
1091 		}
1092 	} else {
1093 		l = link_new(source, NULL, url_char, title_char, attr_char, LINK_INLINE);
1094 	}
1095 
1096 	free(url_char);
1097 	free(title_char);
1098 	free(attr_char);
1099 
1100 	return l;
1101 }
1102 
1103 
footnote_new(const char * source,token * label,token * content,bool lowercase)1104 footnote * footnote_new(const char * source, token * label, token * content, bool lowercase) {
1105 	footnote * f = malloc(sizeof(footnote));
1106 	token * walker;
1107 
1108 	if (f) {
1109 		f->label = label;
1110 		f->clean_text = (label == NULL) ? NULL : clean_inside_pair(source, label, lowercase);
1111 		f->label_text = (label == NULL) ? NULL : label_from_token(source, label);
1112 		f->free_para  = false;
1113 		f->count = -1;
1114 
1115 		if (content) {
1116 			switch (content->type) {
1117 				case BLOCK_PARA:
1118 					f->content = content;
1119 					break;
1120 
1121 				case TEXT_PLAIN:
1122 					token_trim_leading_whitespace(content, source);
1123 
1124 				default:
1125 					// Trim trailing newlines
1126 					walker = content->tail;
1127 
1128 					while (walker) {
1129 						switch (walker->type) {
1130 							case TEXT_NL:
1131 							case TEXT_NL_SP:
1132 								content->tail = walker->prev;
1133 								token_free(walker);
1134 								walker = content->tail;
1135 								walker->next = NULL;
1136 								break;
1137 
1138 							default:
1139 								walker = NULL;
1140 								break;
1141 						}
1142 					}
1143 
1144 					f->content = token_new_parent(content, BLOCK_PARA);
1145 					f->free_para = true;
1146 					break;
1147 			}
1148 		} else {
1149 			f->content = NULL;
1150 		}
1151 	}
1152 
1153 	return f;
1154 }
1155 
1156 
footnote_free(footnote * f)1157 void footnote_free(footnote * f) {
1158 	if (f) {
1159 		if (f->free_para) {
1160 #ifdef kUseObjectPool
1161 			// Nothing to do here
1162 #else
1163 			free(f->content);
1164 #endif
1165 		}
1166 
1167 		free(f->clean_text);
1168 		free(f->label_text);
1169 
1170 		free(f);
1171 	}
1172 }
1173 
1174 
meta_new(const char * source,size_t key_start,size_t len)1175 meta * meta_new(const char * source, size_t key_start, size_t len) {
1176 	meta * m = malloc(sizeof(meta));
1177 	char * key;
1178 
1179 	if (m) {
1180 		key = my_strndup(&source[key_start], len);
1181 		m->key = label_from_string(key);
1182 		free(key);
1183 		m->value = NULL;
1184 		m->start = key_start;
1185 	}
1186 
1187 	return m;
1188 }
1189 
1190 
meta_set_value(meta * m,const char * value)1191 void meta_set_value(meta * m, const char * value) {
1192 	if (value) {
1193 		if (m->value) {
1194 			free(m->value);
1195 		}
1196 
1197 		m->value = clean_string(value, false);
1198 	}
1199 }
1200 
1201 
meta_free(meta * m)1202 void meta_free(meta * m) {
1203 	if (m) {
1204 		free(m->key);
1205 		free(m->value);
1206 
1207 		free(m);
1208 	}
1209 }
1210 
1211 
1212 /// Find metadata based on key
extract_meta_from_stack(scratch_pad * scratch,const char * target)1213 meta * extract_meta_from_stack(scratch_pad * scratch, const char * target) {
1214 	char * key = clean_string(target, true);
1215 
1216 	meta * temp = NULL;
1217 
1218 	HASH_FIND_STR(scratch->meta_hash, key, temp);
1219 
1220 	free(key);
1221 
1222 	return temp;
1223 }
1224 
1225 
extract_metadata(scratch_pad * scratch,const char * target)1226 char * extract_metadata(scratch_pad * scratch, const char * target) {
1227 	char * clean = label_from_string(target);
1228 
1229 	meta * m = extract_meta_from_stack(scratch, clean);
1230 	free(clean);
1231 
1232 	if (m) {
1233 		return m->value;
1234 	}
1235 
1236 	return NULL;
1237 }
1238 
1239 
abbr_new(const char * source,token * label,token * content)1240 abbr * abbr_new(const char * source, token * label, token * content) {
1241 	abbr * a = malloc(sizeof(abbr));
1242 
1243 	if (a) {
1244 		a->abbr = text_inside_pair(source, label);
1245 		a->abbr_len = strlen(a->abbr);
1246 		a->expansion = clean_string_from_range(source, content->start, content->len, false);
1247 		a->expansion_len = strlen(a->expansion);
1248 	}
1249 
1250 	return a;
1251 }
1252 
abbreviation_free(abbr * a)1253 void abbreviation_free(abbr * a) {
1254 	if (a) {
1255 		free(a->abbr);
1256 		free(a->expansion);
1257 		free(a);
1258 	}
1259 }
1260 
1261 
definition_extract(mmd_engine * e,token ** remainder)1262 bool definition_extract(mmd_engine * e, token ** remainder) {
1263 	char * source = e->dstr->str;
1264 	token * label = NULL;
1265 	token * title = NULL;
1266 	char * url_char = NULL;
1267 	char * title_char = NULL;
1268 	char * attr_char = NULL;
1269 	token * temp = NULL;
1270 	size_t attr_len;
1271 
1272 	link * l = NULL;
1273 	footnote * f = NULL;
1274 
1275 	// Store label
1276 	label = *remainder;
1277 
1278 	*remainder = (*remainder)->next;
1279 
1280 	// Prepare for parsing
1281 
1282 	// Account for settings
1283 
1284 	switch (label->type) {
1285 		case PAIR_BRACKET_CITATION:
1286 		case PAIR_BRACKET_FOOTNOTE:
1287 		case PAIR_BRACKET_GLOSSARY:
1288 			if (e->extensions & EXT_NOTES) {
1289 				if (!token_chain_accept(remainder, COLON)) {
1290 					return false;
1291 				}
1292 
1293 				title = *remainder;		// Track first token of content in 'title'
1294 
1295 				// Store for later use
1296 				switch (label->type) {
1297 					case PAIR_BRACKET_CITATION:
1298 						f = footnote_new(e->dstr->str, label, title, true);
1299 						stack_push(e->citation_stack, f);
1300 						break;
1301 
1302 					case PAIR_BRACKET_FOOTNOTE:
1303 						f = footnote_new(e->dstr->str, label, title, true);
1304 						stack_push(e->footnote_stack, f);
1305 						break;
1306 
1307 					case PAIR_BRACKET_GLOSSARY:
1308 						f = footnote_new(e->dstr->str, label, title, false);
1309 						stack_push(e->glossary_stack, f);
1310 						break;
1311 				}
1312 
1313 				break;
1314 			}
1315 
1316 		case PAIR_BRACKET:
1317 
1318 			// Reference Link Definition
1319 
1320 			if (!token_chain_accept(remainder, COLON)) {
1321 				return false;
1322 			}
1323 
1324 			// Skip space
1325 			whitespace_accept(remainder);
1326 
1327 			// Grab destination
1328 			url_char = destination_accept(e->dstr->str, remainder, false);
1329 
1330 			whitespace_accept(remainder);
1331 
1332 			// Grab title, if present
1333 			temp = *remainder;
1334 
1335 			title = token_chain_accept_multiple(remainder, 2, PAIR_QUOTE_DOUBLE, PAIR_QUOTE_SINGLE);
1336 
1337 			if (!title) {
1338 				// See if there's a title on next line
1339 				whitespace_accept(remainder);
1340 				token_chain_accept_multiple(remainder, 2, TEXT_NL, TEXT_LINEBREAK);
1341 				whitespace_accept(remainder);
1342 
1343 				title = token_chain_accept_multiple(remainder, 2, PAIR_QUOTE_DOUBLE, PAIR_QUOTE_SINGLE);
1344 
1345 				if (!title) {
1346 					*remainder = temp;
1347 				}
1348 			}
1349 
1350 			title_char = text_inside_pair(e->dstr->str, title);
1351 
1352 			// Get attributes
1353 			if ((*remainder) && (((*remainder)->type != TEXT_NL) && ((*remainder)->type != TEXT_LINEBREAK))) {
1354 				if (!(e->extensions & EXT_COMPATIBILITY)) {
1355 					attr_len = scan_attributes(&source[(*remainder)->start]);
1356 
1357 					if (attr_len) {
1358 						attr_char = my_strndup(&source[(*remainder)->start], attr_len);
1359 
1360 						// Skip forward
1361 						attr_len += (*remainder)->start;
1362 
1363 						while ((*remainder) && (*remainder)->start < attr_len) {
1364 							*remainder = (*remainder)->next;
1365 						}
1366 					}
1367 
1368 					l = link_new(e->dstr->str, label, url_char, title_char, attr_char, LINK_REFERENCE);
1369 				} else {
1370 					// Not valid match
1371 				}
1372 			} else {
1373 				l = link_new(e->dstr->str, label, url_char, title_char, attr_char, LINK_REFERENCE);
1374 			}
1375 
1376 			// Store link for later use
1377 			if (l) {
1378 				stack_push(e->link_stack, l);
1379 			}
1380 
1381 			break;
1382 
1383 		case PAIR_BRACKET_VARIABLE:
1384 			fprintf(stderr, "Process variable:\n");
1385 			token_describe(label, e->dstr->str);
1386 			break;
1387 
1388 		default:
1389 			// Rest of block is not definitions (or has already been processed)
1390 			return false;
1391 	}
1392 
1393 	// Advance to next line
1394 	token_skip_until_type_multiple(remainder, 2, TEXT_NL, TEXT_LINEBREAK);
1395 
1396 	if (*remainder) {
1397 		*remainder = (*remainder)->next;
1398 	}
1399 
1400 	// Clean up
1401 	free(url_char);
1402 	free(title_char);
1403 	free(attr_char);
1404 
1405 	return true;
1406 }
1407 
1408 
process_definition_block(mmd_engine * e,token * block)1409 void process_definition_block(mmd_engine * e, token * block) {
1410 	footnote * f;
1411 
1412 	token * label = block->child;
1413 
1414 	if (label->type == BLOCK_PARA) {
1415 		label = label->child;
1416 	}
1417 
1418 	switch (block->type) {
1419 		case BLOCK_DEF_ABBREVIATION:
1420 		case BLOCK_DEF_CITATION:
1421 		case BLOCK_DEF_FOOTNOTE:
1422 		case BLOCK_DEF_GLOSSARY:
1423 			switch (block->type) {
1424 				case BLOCK_DEF_ABBREVIATION:
1425 					// Strip leading '>'' from term
1426 					f = footnote_new(e->dstr->str, label, block->child, false);
1427 
1428 					if (f && f->clean_text) {
1429 						memmove(f->clean_text, &(f->clean_text)[1], strlen(f->clean_text));
1430 
1431 						while (char_is_whitespace((f->clean_text)[0])) {
1432 							memmove(f->clean_text, &(f->clean_text)[1], strlen(f->clean_text));
1433 						}
1434 					}
1435 
1436 					// Adjust the properties
1437 					if (f) {
1438 						free(f->label_text);
1439 						f->label_text = f->clean_text;
1440 
1441 						if (f->content &&
1442 								f->content->child &&
1443 								f->content->child->next &&
1444 								f->content->child->next->next) {
1445 							f->clean_text = clean_string_from_range(e->dstr->str, f->content->child->next->next->start, block->start + block->len - f->content->child->next->next->start, false);
1446 						} else {
1447 							f->clean_text = NULL;
1448 						}
1449 					}
1450 
1451 					stack_push(e->abbreviation_stack, f);
1452 					break;
1453 
1454 				case BLOCK_DEF_CITATION:
1455 					f = footnote_new(e->dstr->str, label, block->child, true);
1456 					stack_push(e->citation_stack, f);
1457 					break;
1458 
1459 				case BLOCK_DEF_FOOTNOTE:
1460 					f = footnote_new(e->dstr->str, label, block->child, true);
1461 					stack_push(e->footnote_stack, f);
1462 					break;
1463 
1464 				case BLOCK_DEF_GLOSSARY:
1465 					// Strip leading '?' from term
1466 					f = footnote_new(e->dstr->str, label, block->child, false);
1467 
1468 					if (f && f->clean_text) {
1469 						memmove(f->clean_text, &(f->clean_text)[1], strlen(f->clean_text));
1470 					}
1471 
1472 					//if (f && f->label_text)
1473 					//		memmove(f->label_text, &(f->label_text)[1],strlen(f->label_text));
1474 
1475 					stack_push(e->glossary_stack, f);
1476 					break;
1477 			}
1478 
1479 			label->type = TEXT_EMPTY;
1480 
1481 			if (label->next) {
1482 				label->next->type = TEXT_EMPTY;
1483 			}
1484 
1485 			strip_leading_whitespace(label, e->dstr->str);
1486 			break;
1487 
1488 		case BLOCK_DEF_LINK:
1489 			definition_extract(e, &(label));
1490 			break;
1491 
1492 		default:
1493 			fprintf(stderr, "process %d\n", block->type);
1494 	}
1495 
1496 	block->type = BLOCK_EMPTY;
1497 }
1498 
1499 
process_definition_stack(mmd_engine * e)1500 void process_definition_stack(mmd_engine * e) {
1501 	for (int i = 0; i < e->definition_stack->size; ++i) {
1502 		process_definition_block(e, stack_peek_index(e->definition_stack, i));
1503 	}
1504 }
1505 
manual_label_from_header(token * h,const char * source)1506 token * manual_label_from_header(token * h, const char * source) {
1507 	if (!h || !h->child) {
1508 		return NULL;
1509 	}
1510 
1511 	token * walker = h->child->tail;
1512 	token * label = NULL;
1513 	short count = 0;
1514 
1515 	while (walker) {
1516 		switch (walker->type) {
1517 			case MANUAL_LABEL:
1518 				// Already identified
1519 				label = walker;
1520 				walker = NULL;
1521 				break;
1522 
1523 			case INDENT_TAB:
1524 			case INDENT_SPACE:
1525 			case NON_INDENT_SPACE:
1526 			case TEXT_NL:
1527 			case TEXT_LINEBREAK:
1528 			case TEXT_EMPTY:
1529 			case MARKER_H1:
1530 			case MARKER_H2:
1531 			case MARKER_H3:
1532 			case MARKER_H4:
1533 			case MARKER_H5:
1534 			case MARKER_H6:
1535 				walker = walker->prev;
1536 				break;
1537 
1538 			case TEXT_PLAIN:
1539 				if (walker->len == 1) {
1540 					if (source[walker->start] == ' ') {
1541 						walker = walker->prev;
1542 						break;
1543 					}
1544 				}
1545 
1546 				walker = NULL;
1547 				break;
1548 
1549 			case PAIR_BRACKET:
1550 				label = walker;
1551 
1552 				while (walker && walker->type == PAIR_BRACKET) {
1553 					walker = walker->prev;
1554 					count++;
1555 				}
1556 
1557 				if (count % 2 == 0) {
1558 					// Even count
1559 					label = NULL;
1560 				} else {
1561 					// Odd count
1562 					label->type = MANUAL_LABEL;
1563 				}
1564 
1565 			default:
1566 				walker = NULL;
1567 		}
1568 	}
1569 
1570 	return label;
1571 }
1572 
1573 
process_header_to_links(mmd_engine * e,token * h)1574 void process_header_to_links(mmd_engine * e, token * h) {
1575 	char * label;
1576 
1577 	// See if we have a manual label
1578 	token * manual = manual_label_from_header(h, e->dstr->str);
1579 
1580 	if (manual) {
1581 		label = label_from_token(e->dstr->str, manual);
1582 		h = manual;
1583 	} else {
1584 		label = label_from_token(e->dstr->str, h);
1585 	}
1586 
1587 	DString * url = d_string_new("#");
1588 
1589 	d_string_append(url, label);
1590 
1591 	link * l = link_new(e->dstr->str, h, url->str, NULL, NULL, LINK_AUTO);
1592 
1593 	// Store link for later use
1594 	stack_push(e->link_stack, l);
1595 
1596 	d_string_free(url, true);
1597 	free(label);
1598 }
1599 
1600 
process_header_stack(mmd_engine * e)1601 void process_header_stack(mmd_engine * e) {
1602 	// NTD in compatibility mode or if disabled
1603 	if (e->extensions & EXT_NO_LABELS) {
1604 		return;
1605 	}
1606 
1607 	for (int i = 0; i < e->header_stack->size; ++i) {
1608 		process_header_to_links(e, stack_peek_index(e->header_stack, i));
1609 	}
1610 }
1611 
1612 
process_table_to_link(mmd_engine * e,token * t)1613 void process_table_to_link(mmd_engine * e, token * t) {
1614 	// Is there a caption
1615 	if (table_has_caption(t)) {
1616 		token * temp_token = t->next->child;
1617 
1618 		if (temp_token->next &&
1619 				temp_token->next->type == PAIR_BRACKET) {
1620 			temp_token = temp_token->next;
1621 		}
1622 
1623 		char * label = label_from_token(e->dstr->str, temp_token);
1624 
1625 		DString * url = d_string_new("#");
1626 		d_string_append(url, label);
1627 
1628 		link * l = link_new(e->dstr->str, temp_token, url->str, NULL, NULL, LINK_AUTO);
1629 
1630 		stack_push(e->link_stack, l);
1631 
1632 		d_string_free(url, true);
1633 		free(label);
1634 	}
1635 }
1636 
1637 
process_table_stack(mmd_engine * e)1638 void process_table_stack(mmd_engine * e) {
1639 	for (int i = 0; i < e->table_stack->size; ++i) {
1640 		process_table_to_link(e, stack_peek_index(e->table_stack, i));
1641 	}
1642 }
1643 
1644 
1645 /// Parse metadata
process_metadata_stack(mmd_engine * e,scratch_pad * scratch)1646 void process_metadata_stack(mmd_engine * e, scratch_pad * scratch) {
1647 	if ((scratch->extensions & EXT_NO_METADATA) ||
1648 			(scratch->extensions & EXT_COMPATIBILITY)) {
1649 		return;
1650 	}
1651 
1652 	meta * m;
1653 	short header_level = -10;
1654 	char * temp_char = NULL;
1655 
1656 	for (int i = 0; i < e->metadata_stack->size; ++i) {
1657 		// Check for certain metadata keys
1658 		m = stack_peek_index(e->metadata_stack, i);
1659 
1660 		if (strcmp(m->key, "baseheaderlevel") == 0) {
1661 			if (header_level == -10) {
1662 				header_level = atoi(m->value);
1663 			}
1664 		} else if (strcmp(m->key, "epubheaderlevel") == 0) {
1665 			if (scratch->output_format == FORMAT_EPUB) {
1666 				header_level = atoi(m->value);
1667 			}
1668 		} else if (strcmp(m->key, "htmlheaderlevel") == 0) {
1669 			if (scratch->output_format == FORMAT_HTML) {
1670 				header_level = atoi(m->value);
1671 			}
1672 		} else if (strcmp(m->key, "xhtmlheaderlevel") == 0) {
1673 			if (scratch->output_format == FORMAT_HTML) {
1674 				header_level = atoi(m->value);
1675 			}
1676 		} else if (strcmp(m->key, "latexheaderlevel") == 0) {
1677 			if ((scratch->output_format == FORMAT_LATEX) ||
1678 					(scratch->output_format == FORMAT_BEAMER) ||
1679 					(scratch->output_format == FORMAT_MEMOIR)) {
1680 				header_level = atoi(m->value);
1681 			}
1682 		} else if (strcmp(m->key, "odfheaderlevel") == 0) {
1683 			if ((scratch->output_format == FORMAT_ODT) ||
1684 					(scratch->output_format == FORMAT_FODT)) {
1685 				header_level = atoi(m->value);
1686 			}
1687 		} else if (strcmp(m->key, "language") == 0) {
1688 			temp_char = label_from_string(m->value);
1689 
1690 			if (strcmp(temp_char, "de") == 0) {
1691 				scratch->language = LC_DE;
1692 				scratch->quotes_lang = GERMAN;
1693 			} else if (strcmp(temp_char, "es") == 0) {
1694 				scratch->language = LC_ES;
1695 				scratch->quotes_lang = SPANISH;
1696 			} else if (strcmp(temp_char, "fr") == 0) {
1697 				scratch->language = LC_FR;
1698 				scratch->quotes_lang = FRENCH;
1699 			} else if (strcmp(temp_char, "he") == 0) {
1700 				scratch->language = LC_HE;
1701 				scratch->quotes_lang = ENGLISH;
1702 			} else if (strcmp(temp_char, "nl") == 0) {
1703 				scratch->language = LC_NL;
1704 				scratch->quotes_lang = DUTCH;
1705 			} else if (strcmp(temp_char, "sv") == 0) {
1706 				scratch->language = LC_SV;
1707 				scratch->quotes_lang = SWEDISH;
1708 			} else {
1709 				scratch->language = LC_EN;
1710 				scratch->quotes_lang = ENGLISH;
1711 			}
1712 
1713 			free(temp_char);
1714 		} else if (strcmp(m->key, "latexmode") == 0) {
1715 			if (scratch->output_format == FORMAT_LATEX) {
1716 				temp_char = label_from_string(m->value);
1717 
1718 				if (strcmp(temp_char, "beamer") == 0) {
1719 					scratch->output_format = FORMAT_BEAMER;
1720 				} else if (strcmp(temp_char, "memoir") == 0) {
1721 					scratch->output_format = FORMAT_MEMOIR;
1722 				}
1723 
1724 				free(temp_char);
1725 			}
1726 		} else if (strcmp(m->key, "quoteslanguage") == 0) {
1727 			temp_char = label_from_string(m->value);
1728 
1729 			if ((strcmp(temp_char, "dutch") == 0) ||
1730 					(strcmp(temp_char, "nl") == 0)) {
1731 				scratch->quotes_lang = DUTCH;
1732 			} else if ((strcmp(temp_char, "french") == 0) ||
1733 					   (strcmp(temp_char, "fr") == 0)) {
1734 				scratch->quotes_lang = FRENCH;
1735 			} else if ((strcmp(temp_char, "german") == 0) ||
1736 					   (strcmp(temp_char, "de") == 0)) {
1737 				scratch->quotes_lang = GERMAN;
1738 			} else if (strcmp(temp_char, "germanguillemets") == 0) {
1739 				scratch->quotes_lang = GERMANGUILL;
1740 			} else if ((strcmp(temp_char, "spanish") == 0) ||
1741 					   (strcmp(temp_char, "es") == 0)) {
1742 				scratch->quotes_lang = SPANISH;
1743 			} else if ((strcmp(temp_char, "swedish") == 0) ||
1744 					   (strcmp(temp_char, "sv") == 0)) {
1745 				scratch->quotes_lang = SWEDISH;
1746 			} else {
1747 				scratch->quotes_lang = ENGLISH;
1748 			}
1749 
1750 			free(temp_char);
1751 		} else if (strcmp(m->key, "bibtex") == 0) {
1752 			scratch->bibtex_file = my_strdup(m->value);
1753 
1754 			// Trigger complete document unless explicitly denied
1755 			if (!(scratch->extensions & EXT_SNIPPET)) {
1756 				scratch->extensions |= EXT_COMPLETE;
1757 			}
1758 		} else {
1759 			// Any other key triggers complete document
1760 			if (!(scratch->extensions & EXT_SNIPPET)) {
1761 				scratch->extensions |= EXT_COMPLETE;
1762 			}
1763 		}
1764 
1765 	}
1766 
1767 	if (header_level != -10) {
1768 		scratch->base_header_level = header_level;
1769 	}
1770 }
1771 
1772 
automatic_search_text(mmd_engine * e,token * t,trie * ac)1773 void automatic_search_text(mmd_engine * e, token * t, trie * ac) {
1774 	match * m = ac_trie_leftmost_longest_search(ac, e->dstr->str, t->start, t->len);
1775 
1776 	match * walker;
1777 
1778 	token * tok = t;
1779 
1780 	if (m) {
1781 		walker = m->next;
1782 
1783 		while (walker) {
1784 			token_split(tok, walker->start, walker->len, walker->match_type);
1785 
1786 			// Advance token to next token
1787 			while (tok && (tok->start < walker->start + walker->len)) {
1788 				tok = tok->next;
1789 			}
1790 
1791 			// Advance to next match (if present)
1792 			walker = walker->next;
1793 		}
1794 	}
1795 
1796 	match_free(m);
1797 }
1798 
1799 
1800 /// Determine which nodes to descend into to search for abbreviations
automatic_search(mmd_engine * e,token * t,trie * ac)1801 void automatic_search(mmd_engine * e, token * t, trie * ac) {
1802 	while (t) {
1803 		switch (t->type) {
1804 			case TEXT_PLAIN:
1805 				automatic_search_text(e, t, ac);
1806 				break;
1807 
1808 			case DOC_START_TOKEN:
1809 			case BLOCK_BLOCKQUOTE:
1810 			case BLOCK_DEFINITION:
1811 			case BLOCK_DEFLIST:
1812 			case BLOCK_LIST_BULLETED:
1813 			case BLOCK_LIST_BULLETED_LOOSE:
1814 			case BLOCK_LIST_ENUMERATED:
1815 			case BLOCK_LIST_ENUMERATED_LOOSE:
1816 			case BLOCK_LIST_ITEM_TIGHT:
1817 			case BLOCK_LIST_ITEM:
1818 			case BLOCK_PARA:
1819 			case BLOCK_TABLE:
1820 			case BLOCK_TABLE_HEADER:
1821 			case BLOCK_TABLE_SECTION:
1822 			case BLOCK_TERM:
1823 			case LINE_LIST_BULLETED:
1824 			case LINE_LIST_ENUMERATED:
1825 			case PAIR_BRACKET:
1826 			case PAIR_BRACKET_FOOTNOTE:
1827 			case PAIR_BRACKET_GLOSSARY:
1828 			case PAIR_BRACKET_IMAGE:
1829 			case PAIR_QUOTE_DOUBLE:
1830 			case PAIR_QUOTE_SINGLE:
1831 			case PAIR_STAR:
1832 			case PAIR_UL:
1833 			case TABLE_CELL:
1834 			case TABLE_ROW:
1835 				automatic_search(e, t->child, ac);
1836 				break;
1837 
1838 //			case PAIR_PAREN:
1839 			default:
1840 				break;
1841 		}
1842 
1843 		t = t->next;
1844 	}
1845 }
1846 
1847 
identify_global_search_terms(mmd_engine * e,scratch_pad * scratch)1848 void identify_global_search_terms(mmd_engine * e, scratch_pad * scratch) {
1849 	// Only search if we have a target
1850 	size_t count = e->abbreviation_stack->size + e->glossary_stack->size;
1851 
1852 	if (count == 0) {
1853 		return;
1854 	}
1855 
1856 	trie * ac = trie_new(0);
1857 	footnote * f;
1858 
1859 	// Add abbreviations to search trie
1860 	for (int i = 0; i < e->abbreviation_stack->size; ++i) {
1861 		f = stack_peek_index(e->abbreviation_stack, i);
1862 		trie_insert(ac, f->label_text, PAIR_BRACKET_ABBREVIATION);
1863 	}
1864 
1865 	// Add glossary to search trie (without leading '?')
1866 	for (int i = 0; i < e->glossary_stack->size; ++i) {
1867 		f = stack_peek_index(e->glossary_stack, i);
1868 		trie_insert(ac, f->clean_text, PAIR_BRACKET_GLOSSARY);
1869 	}
1870 
1871 	ac_trie_prepare(ac);
1872 	automatic_search(e, e->root, ac);
1873 	trie_free(ac);
1874 }
1875 
1876 
mmd_engine_export_token_tree(DString * out,mmd_engine * e,short format)1877 void mmd_engine_export_token_tree(DString * out, mmd_engine * e, short format) {
1878 
1879 	// Process potential reference definitions
1880 	process_definition_stack(e);
1881 
1882 	// Process headers for potential cross-reference targets
1883 	process_header_stack(e);
1884 
1885 	// Process tables for potential cross-reference targets
1886 	process_table_stack(e);
1887 
1888 	// Create scratch pad
1889 	scratch_pad * scratch = scratch_pad_new(e, format);
1890 
1891 	// Process metadata
1892 	process_metadata_stack(e, scratch);
1893 
1894 	// Process abbreviations, glossary, etc.
1895 	if (!(e->extensions & EXT_COMPATIBILITY)) {
1896 		identify_global_search_terms(e, scratch);
1897 	}
1898 
1899 
1900 	switch (scratch->output_format) {
1901 		case FORMAT_BEAMER:
1902 			if (scratch->extensions & EXT_COMPLETE) {
1903 				mmd_start_complete_latex(out, e->dstr->str, scratch);
1904 			}
1905 
1906 			mmd_export_token_tree_beamer(out, e->dstr->str, e->root, scratch);
1907 
1908 			// Close out any existing outline levels
1909 			mmd_outline_add_beamer(out, NULL, scratch);
1910 
1911 			mmd_export_citation_list_beamer(out, e->dstr->str, scratch);
1912 
1913 			if (scratch->extensions & EXT_COMPLETE) {
1914 				mmd_end_complete_beamer(out, e->dstr->str, scratch);
1915 			}
1916 
1917 			break;
1918 
1919 		case FORMAT_EPUB:
1920 		case FORMAT_TEXTBUNDLE:
1921 		case FORMAT_TEXTBUNDLE_COMPRESSED:
1922 			scratch->store_assets = true;
1923 
1924 			mmd_start_complete_html(out, e->dstr->str, scratch);
1925 
1926 			mmd_export_token_tree_html(out, e->dstr->str, e->root, scratch);
1927 			mmd_export_footnote_list_html(out, e->dstr->str, scratch);
1928 			mmd_export_glossary_list_html(out, e->dstr->str, scratch);
1929 			mmd_export_citation_list_html(out, e->dstr->str, scratch);
1930 
1931 			mmd_end_complete_html(out, e->dstr->str, scratch);
1932 
1933 			break;
1934 
1935 		case FORMAT_HTML_WITH_ASSETS:
1936 			scratch->remember_assets = true;
1937 			scratch->output_format = FORMAT_HTML;
1938 
1939 		case FORMAT_HTML:
1940 			if (scratch->extensions & EXT_COMPLETE) {
1941 				mmd_start_complete_html(out, e->dstr->str, scratch);
1942 			}
1943 
1944 			mmd_export_token_tree_html(out, e->dstr->str, e->root, scratch);
1945 			mmd_export_footnote_list_html(out, e->dstr->str, scratch);
1946 			mmd_export_glossary_list_html(out, e->dstr->str, scratch);
1947 			mmd_export_citation_list_html(out, e->dstr->str, scratch);
1948 
1949 			if (scratch->extensions & EXT_COMPLETE) {
1950 				mmd_end_complete_html(out, e->dstr->str, scratch);
1951 			}
1952 
1953 			break;
1954 
1955 		case FORMAT_LATEX:
1956 			if (scratch->extensions & EXT_COMPLETE) {
1957 				mmd_start_complete_latex(out, e->dstr->str, scratch);
1958 			}
1959 
1960 			mmd_export_token_tree_latex(out, e->dstr->str, e->root, scratch);
1961 			mmd_export_citation_list_latex(out, e->dstr->str, scratch);
1962 
1963 			if (scratch->extensions & EXT_COMPLETE) {
1964 				mmd_end_complete_latex(out, e->dstr->str, scratch);
1965 			}
1966 
1967 			break;
1968 
1969 		case FORMAT_MEMOIR:
1970 			if (scratch->extensions & EXT_COMPLETE) {
1971 				mmd_start_complete_latex(out, e->dstr->str, scratch);
1972 			}
1973 
1974 			mmd_export_token_tree_memoir(out, e->dstr->str, e->root, scratch);
1975 			mmd_export_citation_list_latex(out, e->dstr->str, scratch);
1976 
1977 			if (scratch->extensions & EXT_COMPLETE) {
1978 				mmd_end_complete_latex(out, e->dstr->str, scratch);
1979 			}
1980 
1981 			break;
1982 
1983 		case FORMAT_ODT:
1984 			scratch->store_assets = true;
1985 
1986 		case FORMAT_FODT:
1987 //			mmd_start_complete_odf(out, e->dstr->str, scratch);
1988 
1989 			mmd_export_token_tree_opendocument(out, e->dstr->str, e->root, scratch);
1990 
1991 //			mmd_end_complete_odf(out, e->dstr->str, scratch);
1992 			break;
1993 
1994 		case FORMAT_OPML:
1995 			mmd_export_token_tree_opml(out, e->dstr->str, e->root, scratch);
1996 			break;
1997 
1998 		case FORMAT_ITMZ:
1999 			mmd_export_token_tree_itmz(out, e->dstr->str, e->root, scratch);
2000 			break;
2001 	}
2002 
2003 	// Preserve asset_hash for possible use in export
2004 	e->asset_hash = scratch->asset_hash;
2005 
2006 	// Preserve random label seed
2007 	e->random_seed_base_labels = scratch->random_seed_base_labels;
2008 
2009 	scratch_pad_free(scratch);
2010 }
2011 
2012 
parse_brackets(const char * source,scratch_pad * scratch,token * bracket,link ** final_link,short * skip_token,bool * free_link)2013 void parse_brackets(const char * source, scratch_pad * scratch, token * bracket, link ** final_link, short * skip_token, bool * free_link) {
2014 	link * temp_link = NULL;
2015 	char * temp_char = NULL;
2016 	short temp_short = 0;
2017 
2018 	// What is next?
2019 	token * next = bracket->next;
2020 
2021 	if (next) {
2022 		temp_short = 1;
2023 	}
2024 
2025 	// Do not free this link after using it
2026 	*free_link = false;
2027 
2028 	if (next && next->type == PAIR_PAREN) {
2029 		// We have `[foo](bar)` or `![foo](bar)`
2030 
2031 		temp_link = explicit_link(scratch, bracket, next, source);
2032 
2033 		if (temp_link) {
2034 			// Don't output brackets
2035 			bracket->child->type = TEXT_EMPTY;
2036 			bracket->child->mate->type = TEXT_EMPTY;
2037 
2038 			// This was an explicit link
2039 			*final_link = temp_link;
2040 
2041 			// Skip over parentheses
2042 			*skip_token = temp_short;
2043 
2044 			// Free this link
2045 			*free_link = true;
2046 			return;
2047 		}
2048 	}
2049 
2050 	if (next && next->type == PAIR_BRACKET) {
2051 		// Is this a reference link? `[foo][bar]` or `![foo][bar]`
2052 		temp_char = text_inside_pair(source, next);
2053 
2054 		if (temp_char[0] == '\0') {
2055 			// Empty label, use first bracket (e.g. implicit link `[foo][]`)
2056 			free(temp_char);
2057 			temp_char = text_inside_pair(source, bracket);
2058 		}
2059 	} else {
2060 		// This may be a simplified implicit link, e.g. `[foo]`
2061 
2062 		// But not if it's nested brackets, since it would not
2063 		// end up being a valid reference
2064 		token * walker = bracket->child;
2065 
2066 		while (walker) {
2067 			switch (walker->type) {
2068 				case PAIR_BRACKET:
2069 				case PAIR_BRACKET_CITATION:
2070 				case PAIR_BRACKET_FOOTNOTE:
2071 				case PAIR_BRACKET_GLOSSARY:
2072 				case PAIR_BRACKET_VARIABLE:
2073 				case PAIR_BRACKET_ABBREVIATION:
2074 					*final_link = NULL;
2075 					return;
2076 			}
2077 
2078 			walker = walker->next;
2079 		}
2080 
2081 		temp_char = text_inside_pair(source, bracket);
2082 		// Don't skip tokens
2083 		temp_short = 0;
2084 	}
2085 
2086 	temp_link = extract_link_from_stack(scratch, temp_char);
2087 
2088 	if (temp_char) {
2089 		free(temp_char);
2090 	}
2091 
2092 	if (temp_link) {
2093 		// Don't output brackets
2094 		if (bracket->child) {
2095 			bracket->child->type = TEXT_EMPTY;
2096 
2097 			if (bracket->child->mate) {
2098 				bracket->child->mate->type = TEXT_EMPTY;
2099 			}
2100 		}
2101 
2102 		*final_link = temp_link;
2103 
2104 		// Skip over second bracket if present
2105 		*skip_token = temp_short;
2106 		return;
2107 	}
2108 
2109 	// No existing links, so nothing to do
2110 	*final_link = NULL;
2111 }
2112 
2113 
mark_citation_as_used(scratch_pad * scratch,footnote * c)2114 void mark_citation_as_used(scratch_pad * scratch, footnote * c) {
2115 	if (c->count == -1) {
2116 		// Add citation to used stack
2117 		stack_push(scratch->used_citations, c);
2118 
2119 		// Update counter
2120 		c->count = scratch->used_citations->size;
2121 	}
2122 }
2123 
2124 
mark_footnote_as_used(scratch_pad * scratch,footnote * f)2125 void mark_footnote_as_used(scratch_pad * scratch, footnote * f) {
2126 	if (f->count == -1) {
2127 		// Add footnote to used stack
2128 		stack_push(scratch->used_footnotes, f);
2129 
2130 		// Update counter
2131 		f->count = scratch->used_footnotes->size;
2132 	}
2133 }
2134 
2135 
mark_glossary_as_used(scratch_pad * scratch,footnote * c)2136 void mark_glossary_as_used(scratch_pad * scratch, footnote * c) {
2137 	if (c->count == -1) {
2138 		// Add glossary to used stack
2139 		stack_push(scratch->used_glossaries, c);
2140 
2141 		// Update counter
2142 		c->count = scratch->used_glossaries->size;
2143 	}
2144 }
2145 
2146 
mark_abbreviation_as_used(scratch_pad * scratch,footnote * c)2147 void mark_abbreviation_as_used(scratch_pad * scratch, footnote * c) {
2148 	if (c->count == -1) {
2149 		// Add abbreviation to used stack
2150 		stack_push(scratch->used_abbreviations, c);
2151 
2152 		// Update counter
2153 		c->count = scratch->used_abbreviations->size;
2154 	}
2155 }
2156 
2157 
extract_citation_from_stack(scratch_pad * scratch,const char * target)2158 size_t extract_citation_from_stack(scratch_pad * scratch, const char * target) {
2159 	char * key = clean_string(target, true);
2160 
2161 	fn_holder * h;
2162 
2163 	HASH_FIND_STR(scratch->citation_hash, key, h);
2164 
2165 	free(key);
2166 
2167 	if (h) {
2168 		mark_citation_as_used(scratch, h->note);
2169 		return h->note->count;
2170 	}
2171 
2172 	key = label_from_string(target);
2173 
2174 	HASH_FIND_STR(scratch->citation_hash, key, h);
2175 
2176 	free(key);
2177 
2178 	if (h) {
2179 		mark_citation_as_used(scratch, h->note);
2180 		return h->note->count;
2181 	}
2182 
2183 	// None found
2184 	return -1;
2185 }
2186 
2187 
extract_footnote_from_stack(scratch_pad * scratch,const char * target)2188 size_t extract_footnote_from_stack(scratch_pad * scratch, const char * target) {
2189 	char * key = clean_string(target, true);
2190 
2191 	fn_holder * h;
2192 
2193 	HASH_FIND_STR(scratch->footnote_hash, key, h);
2194 
2195 	free(key);
2196 
2197 	if (h) {
2198 		mark_footnote_as_used(scratch, h->note);
2199 		return h->note->count;
2200 	}
2201 
2202 	key = label_from_string(target);
2203 
2204 	HASH_FIND_STR(scratch->footnote_hash, key, h);
2205 
2206 	free(key);
2207 
2208 	if (h) {
2209 		mark_footnote_as_used(scratch, h->note);
2210 		return h->note->count;
2211 	}
2212 
2213 	// None found
2214 	return -1;
2215 }
2216 
2217 
extract_abbreviation_from_stack(scratch_pad * scratch,const char * target)2218 size_t extract_abbreviation_from_stack(scratch_pad * scratch, const char * target) {
2219 	char * key = clean_string(target, false);
2220 
2221 	fn_holder * h;
2222 
2223 	HASH_FIND_STR(scratch->abbreviation_hash, key, h);
2224 
2225 	free(key);
2226 
2227 	if (h) {
2228 		mark_abbreviation_as_used(scratch, h->note);
2229 		return h->note->count;
2230 	}
2231 
2232 	key = label_from_string(target);
2233 
2234 	HASH_FIND_STR(scratch->abbreviation_hash, key, h);
2235 
2236 	free(key);
2237 
2238 	if (h) {
2239 		mark_abbreviation_as_used(scratch, h->note);
2240 		return h->note->count;
2241 	}
2242 
2243 	// None found
2244 	return -1;
2245 }
2246 
2247 
extract_glossary_from_stack(scratch_pad * scratch,const char * target)2248 size_t extract_glossary_from_stack(scratch_pad * scratch, const char * target) {
2249 	char * key = clean_string(target, false);
2250 
2251 	fn_holder * h;
2252 
2253 	HASH_FIND_STR(scratch->glossary_hash, key, h);
2254 
2255 	free(key);
2256 
2257 	if (h) {
2258 		mark_glossary_as_used(scratch, h->note);
2259 		return h->note->count;
2260 	}
2261 
2262 	key = label_from_string(target);
2263 
2264 	HASH_FIND_STR(scratch->glossary_hash, key, h);
2265 
2266 	free(key);
2267 
2268 	if (h) {
2269 		mark_glossary_as_used(scratch, h->note);
2270 		return h->note->count;
2271 	}
2272 
2273 	// None found
2274 	return -1;
2275 }
2276 
2277 
footnote_from_bracket(const char * source,scratch_pad * scratch,token * t,short * num)2278 void footnote_from_bracket(const char * source, scratch_pad * scratch, token * t, short * num) {
2279 	// Get text inside bracket
2280 	char * text = text_inside_pair(source, t);
2281 	short footnote_id = extract_footnote_from_stack(scratch, text);
2282 
2283 	free(text);
2284 
2285 	if (footnote_id == -1) {
2286 		// No match, this is an inline footnote -- create a new one
2287 		t->child->type = TEXT_EMPTY;
2288 		t->child->mate->type = TEXT_EMPTY;
2289 
2290 		// Create footnote
2291 		footnote * temp = footnote_new(source, NULL, t->child, true);
2292 
2293 		// Store as used
2294 		stack_push(scratch->used_footnotes, temp);
2295 		*num = scratch->used_footnotes->size;
2296 		temp->count = *num;
2297 
2298 		// We need to free this one later since it doesn't exist
2299 		// in the engine's stack, on the scratch_pad stack
2300 		stack_push(scratch->inline_footnotes_to_free, temp);
2301 	} else {
2302 		// Footnote in stack
2303 		*num = footnote_id;
2304 	}
2305 }
2306 
2307 
citation_from_bracket(const char * source,scratch_pad * scratch,token * t,short * num)2308 void citation_from_bracket(const char * source, scratch_pad * scratch, token * t, short * num) {
2309 	// Get text inside bracket
2310 	char * text = text_inside_pair(source, t);
2311 	short citation_id = extract_citation_from_stack(scratch, text);
2312 
2313 	free(text);
2314 
2315 	if (citation_id == -1) {
2316 		// No match, this is an inline citation -- create a new one
2317 
2318 		t->child->type = TEXT_EMPTY;
2319 		t->child->mate->type = TEXT_EMPTY;
2320 
2321 		// *UNLESS* we are using BibTeX, in which case we leave them alone
2322 		if (scratch->bibtex_file) {
2323 			*num = -1;
2324 			return;
2325 		}
2326 
2327 		// Create citation
2328 		footnote * temp = footnote_new(source, t, t->child, true);
2329 
2330 		// Store as used
2331 		stack_push(scratch->used_citations, temp);
2332 		*num = scratch->used_citations->size;
2333 		temp->count = *num;
2334 
2335 		// We need to free this one later since it doesn't exist
2336 		// in the engine's stack, on the scratch_pad stack
2337 		stack_push(scratch->inline_citations_to_free, temp);
2338 	} else {
2339 		// Citation in stack
2340 		*num = citation_id;
2341 	}
2342 }
2343 
2344 
glossary_from_bracket(const char * source,scratch_pad * scratch,token * t,short * num)2345 void glossary_from_bracket(const char * source, scratch_pad * scratch, token * t, short * num) {
2346 	// Get text inside bracket
2347 	char * text;
2348 
2349 	if (t->child) {
2350 		text = text_inside_pair(source, t);
2351 		memmove(text, &text[1], strlen(text));
2352 	} else {
2353 		text = malloc(t->len + 1);
2354 		memcpy(text, &source[t->start], t->len);
2355 		text[t->len] = '\0';
2356 	}
2357 
2358 	short glossary_id = extract_glossary_from_stack(scratch, text);
2359 
2360 	free(text);
2361 
2362 	if (glossary_id == -1) {
2363 		// No match, this is an inline glossary -- create a new glossary entry
2364 		if (t->child) {
2365 			t->child->type = TEXT_EMPTY;
2366 			t->child->mate->type = TEXT_EMPTY;
2367 		}
2368 
2369 		// Create glossary
2370 		token * label = t->child;
2371 
2372 		while (label && label->type != PAIR_PAREN) {
2373 			label = label->next;
2374 		}
2375 
2376 		if (label) {
2377 			footnote * temp = footnote_new(source, label, label->next, false);
2378 
2379 			// Store as used
2380 			stack_push(scratch->used_glossaries, temp);
2381 			*num = scratch->used_glossaries->size;
2382 			temp->count = *num;
2383 
2384 			// We need to free this one later since it doesn't exist
2385 			// in the engine's stack, on the scratch_pad stack
2386 			stack_push(scratch->inline_glossaries_to_free, temp);
2387 		} else {
2388 			// Improperly formatted glossary
2389 			*num = -1;
2390 		}
2391 	} else {
2392 		// Glossary in stack
2393 		*num = glossary_id;
2394 	}
2395 }
2396 
2397 
abbreviation_from_bracket(const char * source,scratch_pad * scratch,token * t,short * num)2398 void abbreviation_from_bracket(const char * source, scratch_pad * scratch, token * t, short * num) {
2399 	// Get text inside bracket
2400 	char * text;
2401 
2402 	if (t->child) {
2403 		text = text_inside_pair(source, t);
2404 	} else {
2405 		text = malloc(t->len + 2);
2406 		text[0] = '>';
2407 		memcpy(&text[1], &source[t->start], t->len);
2408 		text[t->len + 1] = '\0';
2409 	}
2410 
2411 	short abbr_id = extract_abbreviation_from_stack(scratch, &text[1]);
2412 
2413 	free(text);
2414 
2415 	if (abbr_id == -1) {
2416 		// No match, this is an inline glossary -- create a new glossary entry
2417 		if (t->child) {
2418 			t->child->type = TEXT_EMPTY;
2419 			t->child->mate->type = TEXT_EMPTY;
2420 		}
2421 
2422 		// Create glossary
2423 		token * label = t->child;
2424 
2425 		while (label && label->type != PAIR_PAREN) {
2426 			label = label->next;
2427 		}
2428 
2429 		if (label) {
2430 			footnote * temp = footnote_new(source, label, label->next, false);
2431 
2432 			// Adjust the properties
2433 			free(temp->label_text);
2434 			temp->label_text = temp->clean_text;
2435 
2436 			if (temp->content && temp->content->child) {
2437 				temp->clean_text = clean_string_from_range(source, temp->content->child->start, t->start + t->len - t->child->mate->len - temp->content->child->start, false);
2438 			}
2439 
2440 			// Store as used
2441 			stack_push(scratch->used_abbreviations, temp);
2442 			*num = scratch->used_abbreviations->size;
2443 			temp->count = *num;
2444 
2445 			// We need to free this one later since it doesn't exist
2446 			// in the engine's stack, on the scratch_pad stack
2447 			stack_push(scratch->inline_abbreviations_to_free, temp);
2448 		} else {
2449 			// Improperly formatted glossary
2450 			*num = -1;
2451 		}
2452 	} else {
2453 		// Glossary in stack
2454 		*num = abbr_id;
2455 	}
2456 }
2457 
2458 
read_table_column_alignments(const char * source,token * table,scratch_pad * scratch)2459 void read_table_column_alignments(const char * source, token * table, scratch_pad * scratch) {
2460 	token * walker = table->child->child;
2461 
2462 	scratch->table_alignment[0] = '\0';
2463 	scratch->table_column_count = 0;
2464 
2465 	if (walker == NULL) {
2466 		return;
2467 	}
2468 
2469 	// Find the separator line
2470 	while (walker->next) {
2471 		walker = walker->next;
2472 	}
2473 
2474 	walker->type = TEXT_EMPTY;
2475 
2476 	// Iterate through cells to create alignment string
2477 	short counter = 0;
2478 	short align = 0;
2479 
2480 	walker = walker->child;
2481 
2482 	while (walker) {
2483 		switch (walker->type) {
2484 			case TABLE_CELL:
2485 				align = scan_alignment_string(&source[walker->start]);
2486 
2487 				switch (align) {
2488 					case ALIGN_LEFT:
2489 						scratch->table_alignment[counter] = 'l';
2490 						break;
2491 
2492 					case ALIGN_RIGHT:
2493 						scratch->table_alignment[counter] = 'r';
2494 						break;
2495 
2496 					case ALIGN_CENTER:
2497 						scratch->table_alignment[counter] = 'c';
2498 						break;
2499 
2500 					case ALIGN_LEFT | ALIGN_WRAP:
2501 						scratch->table_alignment[counter] = 'L';
2502 						break;
2503 
2504 					case ALIGN_RIGHT | ALIGN_WRAP:
2505 						scratch->table_alignment[counter] = 'R';
2506 						break;
2507 
2508 					case ALIGN_CENTER | ALIGN_WRAP:
2509 						scratch->table_alignment[counter] = 'C';
2510 						break;
2511 
2512 					case ALIGN_WRAP:
2513 						scratch->table_alignment[counter] = 'N';
2514 						break;
2515 
2516 					default:
2517 						scratch->table_alignment[counter] = 'n';
2518 				}
2519 
2520 				counter++;
2521 				break;
2522 		}
2523 
2524 		walker = walker->next;
2525 	}
2526 
2527 	scratch->table_alignment[counter] = '\0';
2528 	scratch->table_column_count = counter;
2529 }
2530 
2531 
strip_leading_whitespace(token * chain,const char * source)2532 void strip_leading_whitespace(token * chain, const char * source) {
2533 	while (chain) {
2534 		switch (chain->type) {
2535 			case INDENT_TAB:
2536 			case INDENT_SPACE:
2537 			case NON_INDENT_SPACE:
2538 				chain->type = TEXT_EMPTY;
2539 
2540 			case TEXT_EMPTY:
2541 				chain = chain->next;
2542 				break;
2543 
2544 			case TEXT_PLAIN:
2545 				token_trim_leading_whitespace(chain, source);
2546 
2547 			default:
2548 				return;
2549 		}
2550 
2551 		if (chain) {
2552 			chain = chain->next;
2553 		}
2554 	}
2555 }
2556 
2557 
trim_trailing_whitespace_d_string(DString * d)2558 void trim_trailing_whitespace_d_string(DString * d) {
2559 	if (d) {
2560 		char * c = &(d->str[d->currentStringLength - 1]);
2561 
2562 		while (d->currentStringLength && char_is_whitespace(*c)) {
2563 			*c-- = 0;
2564 			d->currentStringLength--;
2565 		}
2566 	}
2567 }
2568 
2569 
table_has_caption(token * t)2570 bool table_has_caption(token * t) {
2571 
2572 	if (t->next && t->next->type == BLOCK_PARA) {
2573 		t = t->next->child;
2574 
2575 		if (t->type == PAIR_BRACKET) {
2576 			t = t->next;
2577 
2578 			if (t && t->next &&
2579 					t->next->type == PAIR_BRACKET) {
2580 				t = t->next;
2581 			}
2582 
2583 			if (t == NULL) {
2584 				// End of file
2585 				return true;
2586 			}
2587 
2588 			if (t && t->next &&
2589 					((t->next->type == TEXT_NL) ||
2590 					 (t->next->type == TEXT_LINEBREAK))) {
2591 				t = t->next;
2592 			}
2593 
2594 			if (t && t->next == NULL) {
2595 				return true;
2596 			}
2597 		}
2598 	}
2599 
2600 	return false;
2601 }
2602 
2603 
2604 /// Grab the first "word" after the end of the fence marker:
2605 /// ````perl
2606 /// or
2607 /// ```` perl
get_fence_language_specifier(token * fence,const char * source)2608 char * get_fence_language_specifier(token * fence, const char * source) {
2609 	if (fence == NULL) {
2610 		return NULL;
2611 	}
2612 
2613 	char * result = NULL;
2614 	size_t start = fence->start + fence->len;
2615 	size_t len = 0;
2616 
2617 	while (char_is_whitespace(source[start])) {
2618 		start++;
2619 	}
2620 
2621 	while (!char_is_whitespace_or_line_ending(source[start + len])) {
2622 		len++;
2623 	}
2624 
2625 	if (len) {
2626 		result = my_strndup(&source[start], len);
2627 	}
2628 
2629 	return result;
2630 }
2631 
2632 
raw_level_for_header(token * header)2633 short raw_level_for_header(token * header) {
2634 	switch (header->type) {
2635 		case BLOCK_H1:
2636 		case BLOCK_SETEXT_1:
2637 			return 1;
2638 
2639 		case BLOCK_H2:
2640 		case BLOCK_SETEXT_2:
2641 			return 2;
2642 
2643 		case BLOCK_H3:
2644 			return 3;
2645 
2646 		case BLOCK_H4:
2647 			return 4;
2648 
2649 		case BLOCK_H5:
2650 			return 5;
2651 
2652 		case BLOCK_H6:
2653 			return 6;
2654 	}
2655 
2656 	return 0;
2657 }
2658 
2659 
asset_new(char * url,scratch_pad * scratch)2660 asset * asset_new(char * url, scratch_pad * scratch) {
2661 	asset * a = malloc(sizeof(asset));
2662 
2663 	if (a) {
2664 		a->url = my_strdup(url);
2665 
2666 		// Create a unique local asset path
2667 		a->asset_path = uuid_new();
2668 	}
2669 
2670 	return a;
2671 }
2672 
2673 
asset_free(asset * a)2674 void asset_free(asset * a) {
2675 	if (a) {
2676 		free(a->url);
2677 		free(a->asset_path);
2678 
2679 		free(a);
2680 	}
2681 }
2682 
2683 
extract_asset(scratch_pad * scratch,char * url)2684 asset * extract_asset(scratch_pad * scratch, char * url) {
2685 	asset * a;
2686 
2687 	HASH_FIND_STR(scratch->asset_hash, url, a);
2688 
2689 	return a;
2690 }
2691 
2692 
store_asset(scratch_pad * scratch,char * url)2693 void store_asset(scratch_pad * scratch, char * url) {
2694 	asset * a = extract_asset(scratch, url);
2695 
2696 	// Only store if this url has not already been stored
2697 	if (!a) {
2698 		// Asset not found - create new one
2699 		a = asset_new(url, scratch);
2700 		HASH_ADD_KEYPTR(hh, scratch->asset_hash, a->url, strlen(a->url), a);
2701 	}
2702 }
2703 
2704 
raw_filter_text_matches(char * pattern,short format)2705 bool raw_filter_text_matches(char * pattern, short format) {
2706 	if (!pattern) {
2707 		return false;
2708 	}
2709 
2710 	if (strcmp("*", pattern) == 0) {
2711 		return true;
2712 	} else if (strcmp("{=*}", pattern) == 0) {
2713 		return true;
2714 	} else {
2715 		switch (format) {
2716 			case FORMAT_HTML:
2717 			case FORMAT_HTML_WITH_ASSETS:
2718 				if (strstr(pattern, "html")) {
2719 					return true;
2720 				}
2721 
2722 				break;
2723 
2724 			case FORMAT_ODT:
2725 			case FORMAT_FODT:
2726 				if (strstr(pattern, "odt")) {
2727 					return true;
2728 				}
2729 
2730 				break;
2731 
2732 			case FORMAT_EPUB:
2733 				if (strstr(pattern, "epub")) {
2734 					return true;
2735 				}
2736 
2737 				break;
2738 
2739 			case FORMAT_MEMOIR:
2740 			case FORMAT_BEAMER:
2741 			case FORMAT_LATEX:
2742 				if (strstr(pattern, "latex")) {
2743 					return true;
2744 				}
2745 
2746 				break;
2747 		}
2748 	}
2749 
2750 	return false;
2751 }
2752 
2753 
2754 /// Determine whether raw filter matches specified format
raw_filter_matches(token * t,const char * source,short format)2755 bool raw_filter_matches(token * t, const char * source, short format) {
2756 	bool result = false;
2757 
2758 	if (t->type != PAIR_RAW_FILTER) {
2759 		return result;
2760 	}
2761 
2762 	char * pattern = my_strndup(&source[t->child->start + 2], t->child->mate->start - t->child->start - 2);
2763 
2764 	result = raw_filter_text_matches(pattern, format);
2765 
2766 	free(pattern);
2767 
2768 	return result;
2769 }
2770 
2771