1 /**
2
3 MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more.
4
5 @file writer.c
6
7 @brief Coordinate conversion of token tree to output formats.
8
9
10 @author Fletcher T. Penney
11 @bug
12
13 **/
14
15 /*
16
17 Copyright © 2016 - 2017 Fletcher T. Penney.
18
19
20 The `MultiMarkdown 6` project is released under the MIT License..
21
22 GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project:
23
24 https://github.com/fletcher/MultiMarkdown-4/
25
26 MMD 4 is released under both the MIT License and GPL.
27
28
29 CuTest is released under the zlib/libpng license. See CuTest.c for the text
30 of the license.
31
32
33 ## The MIT License ##
34
35 Permission is hereby granted, free of charge, to any person obtaining a copy
36 of this software and associated documentation files (the "Software"), to deal
37 in the Software without restriction, including without limitation the rights
38 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
39 copies of the Software, and to permit persons to whom the Software is
40 furnished to do so, subject to the following conditions:
41
42 The above copyright notice and this permission notice shall be included in
43 all copies or substantial portions of the Software.
44
45 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
46 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
47 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
48 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
49 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
50 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
51 THE SOFTWARE.
52
53 */
54
55 #include <ctype.h>
56 #include <stdio.h>
57 #include <stdlib.h>
58 #include <string.h>
59
60 #include "libMultiMarkdown.h"
61
62 #include "aho-corasick.h"
63 #include "beamer.h"
64 #include "char.h"
65 #include "d_string.h"
66 #include "html.h"
67 #include "itmz.h"
68 #include "i18n.h"
69 #include "latex.h"
70 #include "memoir.h"
71 #include "mmd.h"
72 #include "opendocument-content.h"
73 #include "opml.h"
74 #include "parser.h"
75 #include "scanners.h"
76 #include "stack.h"
77 #include "token.h"
78 #include "uuid.h"
79 #include "writer.h"
80
81
82 void store_citation(scratch_pad * scratch, footnote * f);
83
84 void store_footnote(scratch_pad * scratch, footnote * f);
85
86 void store_glossary(scratch_pad * scratch, footnote * f);
87
88 void store_link(scratch_pad * scratch, link * l);
89
90 void store_metadata(scratch_pad * scratch, meta * m);
91
92 void store_abbreviation(scratch_pad * scratch, footnote * a);
93
94
95 /// strndup not available on all platforms
my_strndup(const char * source,size_t n)96 static char * my_strndup(const char * source, size_t n) {
97 if (source == NULL) {
98 return NULL;
99 }
100
101 size_t len = 0;
102 char * result;
103 const char * test = source;
104
105 // strlen is too slow if strlen(source) >> n
106 for (len = 0; len < n; ++len) {
107 if (*test == '\0') {
108 break;
109 }
110
111 test++;
112 }
113
114 result = malloc(len + 1);
115
116 if (result) {
117 memcpy(result, source, len);
118 result[len] = '\0';
119 }
120
121 return result;
122 }
123
124
125 /// strdup() not available on all platforms
my_strdup(const char * source)126 static char * my_strdup(const char * source) {
127 if (source == NULL) {
128 return NULL;
129 }
130
131 char * result = malloc(strlen(source) + 1);
132
133 if (result) {
134 strcpy(result, source);
135 }
136
137 return result;
138 }
139
140
141 /// Temporary storage while exporting parse tree to output format
scratch_pad_new(mmd_engine * e,short format)142 scratch_pad * scratch_pad_new(mmd_engine * e, short format) {
143 scratch_pad * p = malloc(sizeof(scratch_pad));
144
145 if (p) {
146 p->padded = 2; // Prevent unnecessary leading space
147 p->list_is_tight = false; // Tight vs Loose list
148 p->skip_token = 0; // Skip over next n tokens
149 p->close_para = true;
150
151 p->extensions = e->extensions;
152 p->output_format = format;
153 p->quotes_lang = e->quotes_lang;
154 p->language = e->language;
155
156 p->header_stack = e->header_stack;
157
158 p->outline_stack = stack_new(0);
159 p->opml_item_closed = 1;
160
161 p->recurse_depth = 0;
162
163 p->base_header_level = 1;
164
165 p->odf_para_type = BLOCK_PARA;
166
167 if (e->extensions & EXT_RANDOM_FOOT) {
168 p->random_seed_base = rand() % 32000;
169 } else {
170 p->random_seed_base = 0;
171 }
172
173 if (e->extensions & EXT_RANDOM_LABELS) {
174 p->random_seed_base_labels = rand() % 32000;
175 } else {
176 p->random_seed_base_labels = 0;
177 }
178
179 p->label_counter = 0;
180
181 // Store links in a hash for rapid retrieval when exporting
182 p->link_hash = NULL;
183 link * l;
184
185 for (int i = 0; i < e->link_stack->size; ++i) {
186 l = stack_peek_index(e->link_stack, i);
187
188 store_link(p, l);
189 }
190
191 // Store citations in a hash for rapid retrieval when exporting
192 footnote * f;
193
194 p->used_citations = stack_new(0);
195 p->inline_citations_to_free = stack_new(0);
196 p->citation_being_printed = 0;
197 p->bibtex_file = NULL;
198
199 p->citation_hash = NULL;
200
201 for (int i = 0; i < e->citation_stack->size; ++i) {
202 f = stack_peek_index(e->citation_stack, i);
203
204 store_citation(p, f);
205 }
206
207 // Store footnotes in a hash for rapid retrieval when exporting
208 p->used_footnotes = stack_new(0); // Store footnotes as we use them
209 p->inline_footnotes_to_free = stack_new(0); // Inline footnotes need to be freed
210 p->footnote_being_printed = 0;
211 p->footnote_para_counter = -1;
212
213 p->footnote_hash = NULL; // Store defined footnotes in a hash
214
215 for (int i = 0; i < e->footnote_stack->size; ++i) {
216 f = stack_peek_index(e->footnote_stack, i);
217
218 store_footnote(p, f);
219 }
220
221 // Store glossaries in a hash for rapid retrieval when exporting
222 p->used_glossaries = stack_new(0);
223 p->inline_glossaries_to_free = stack_new(0);
224 p->glossary_being_printed = 0;
225
226 p->glossary_hash = NULL;
227
228 for (int i = 0; i < e->glossary_stack->size; ++i) {
229 f = stack_peek_index(e->glossary_stack, i);
230
231 store_glossary(p, f);
232 }
233
234 // Store abbreviations in a hash for rapid retrieval when exporting
235 p->used_abbreviations = stack_new(0);
236 p->inline_abbreviations_to_free = stack_new(0);
237
238 p->abbreviation_hash = NULL;
239
240 for (int i = 0; i < e->abbreviation_stack->size; ++i) {
241 f = stack_peek_index(e->abbreviation_stack, i);
242
243 store_abbreviation(p, f);
244 }
245
246 // Store metadata in a hash for rapid retrieval when exporting
247 p->meta_hash = NULL;
248 meta * m;
249
250 for (int i = 0; i < e->metadata_stack->size; ++i) {
251 m = stack_peek_index(e->metadata_stack, i);
252
253 store_metadata(p, m);
254 }
255
256
257 // Store used assets in a hash
258 p->asset_hash = NULL;
259 p->store_assets = 0;
260 p->remember_assets = 0;
261
262 p->critic_stack = e->critic_stack;
263 }
264
265 return p;
266 }
267
268
scratch_pad_free(scratch_pad * scratch)269 void scratch_pad_free(scratch_pad * scratch) {
270 // HASH_CLEAR(hh, scratch->link_hash);
271
272 stack_free(scratch->outline_stack);
273
274 link * l, * l_tmp;
275
276 // Free link hash
277 HASH_ITER(hh, scratch->link_hash, l, l_tmp) {
278 HASH_DEL(scratch->link_hash, l); // Remove item from hash
279 free(l); // "Shallow" free -- the pointers will be freed
280 // with the original later.
281 }
282
283 fn_holder * f, * f_tmp;
284
285
286 // Free footnote hash
287 HASH_ITER(hh, scratch->footnote_hash, f, f_tmp) {
288 HASH_DEL(scratch->footnote_hash, f); // Remove item from hash
289 free(f); // Free the fn_holder
290 }
291 stack_free(scratch->used_footnotes);
292
293 while (scratch->inline_footnotes_to_free->size) {
294 footnote_free(stack_pop(scratch->inline_footnotes_to_free));
295 }
296
297 stack_free(scratch->inline_footnotes_to_free);
298
299
300 // Free citation hash
301 HASH_ITER(hh, scratch->citation_hash, f, f_tmp) {
302 HASH_DEL(scratch->citation_hash, f); // Remove item from hash
303 free(f); // Free the fn_holder
304 }
305 stack_free(scratch->used_citations);
306
307 while (scratch->inline_citations_to_free->size) {
308 footnote_free(stack_pop(scratch->inline_citations_to_free));
309 }
310
311 stack_free(scratch->inline_citations_to_free);
312
313 free(scratch->bibtex_file);
314
315 // Free glossary hash
316 HASH_ITER(hh, scratch->glossary_hash, f, f_tmp) {
317 HASH_DEL(scratch->glossary_hash, f); // Remove item from hash
318 free(f); // Free the fn_holder
319 }
320 stack_free(scratch->used_glossaries);
321
322 while (scratch->inline_glossaries_to_free->size) {
323 footnote_free(stack_pop(scratch->inline_glossaries_to_free));
324 }
325
326 stack_free(scratch->inline_glossaries_to_free);
327
328
329 // Free abbreviation hash
330 HASH_ITER(hh, scratch->abbreviation_hash, f, f_tmp) {
331 HASH_DEL(scratch->abbreviation_hash, f); // Remove item from hash
332 free(f); // Free the fn_holder
333 }
334 stack_free(scratch->used_abbreviations);
335
336 while (scratch->inline_abbreviations_to_free->size) {
337 footnote_free(stack_pop(scratch->inline_abbreviations_to_free));
338 }
339
340 stack_free(scratch->inline_abbreviations_to_free);
341
342
343 // Free metadata hash
344 meta * m, * m_tmp;
345
346 HASH_ITER(hh, scratch->meta_hash, m, m_tmp) {
347 HASH_DEL(scratch->meta_hash, m); // Remove item from hash
348 // Don't free meta pointer since it is freed with the mmd_engine
349 //meta_free(m);
350 }
351
352 free(scratch);
353 }
354
355
356 /// Ensure at least num newlines at end of output buffer
pad(DString * d,short num,scratch_pad * scratch)357 void pad(DString * d, short num, scratch_pad * scratch) {
358 while (num > scratch->padded) {
359 d_string_append_c(d, '\n');
360 scratch->padded++;
361 }
362 }
363
364
print_token_raw(DString * out,const char * source,token * t)365 void print_token_raw(DString * out, const char * source, token * t) {
366 if (t) {
367 switch (t->type) {
368 case EMPH_START:
369 case EMPH_STOP:
370 case STRONG_START:
371 case STRONG_STOP:
372 case TEXT_EMPTY:
373 case MARKER_BLOCKQUOTE:
374 break;
375
376 case PAIR_EMPH:
377 case PAIR_STRONG:
378 case BLOCK_HTML:
379 print_token_tree_raw(out, source, t->child);
380 break;
381
382 default:
383 d_string_append_c_array(out, &source[t->start], t->len);
384 break;
385 }
386 }
387 }
388
389
print_token_tree_raw(DString * out,const char * source,token * t)390 void print_token_tree_raw(DString * out, const char * source, token * t) {
391 while (t) {
392 print_token_raw(out, source, t);
393
394 t = t->next;
395 }
396 }
397
398
text_inside_pair(const char * source,token * pair)399 char * text_inside_pair(const char * source, token * pair) {
400 char * result = NULL;
401
402 if (source && pair) {
403 if (pair->child && pair->child->mate) {
404 // [foo], [^foo], [#foo] should give different strings -- use closer len
405 result = my_strndup(&source[pair->start + pair->child->mate->len], pair->len - (pair->child->mate->len * 2));
406 } else {
407 if (pair->child) {
408 result = my_strndup(&source[pair->start + pair->child->len], pair->len - (pair->child->len + 1));
409 }
410 }
411 }
412
413 return result;
414 }
415
416
label_from_string(const char * str)417 char * label_from_string(const char * str) {
418 const char * next_char;
419 char * label = NULL;
420
421 DString * out = d_string_new("");
422
423 while (*str != '\0') {
424 next_char = str;
425 next_char++;
426
427 if ((*next_char & 0xC0) == 0x80) {
428 // Allow multibyte characters
429 d_string_append_c(out, *str);
430
431 while ((*next_char & 0xC0) == 0x80) {
432 str++;
433 d_string_append_c(out, *str);
434 next_char++;
435 }
436 } else if ((*str >= '0' && *str <= '9') || (*str >= 'A' && *str <= 'Z')
437 || (*str >= 'a' && *str <= 'z') || (*str == '.') || (*str == '_')
438 || (*str == '-') || (*str == ':')) {
439 // Allow 0-9, A-Z, a-z, ., _, -, :
440 d_string_append_c(out, tolower(*str));
441 }
442
443 str++;
444 }
445
446 label = out->str;
447 d_string_free(out, false);
448
449 return label;
450 }
451
452
label_from_token(const char * source,token * t)453 char * label_from_token(const char * source, token * t) {
454 char * label = NULL;
455
456 DString * raw = d_string_new("");
457
458 d_string_append_c_array(raw, &source[t->start], t->len);
459
460 label = label_from_string(raw->str);
461
462 d_string_free(raw, true);
463
464 return label;
465 }
466
467
label_from_header(const char * source,token * t,scratch_pad * scratch)468 char * label_from_header(const char * source, token * t, scratch_pad * scratch) {
469 char * result;
470 short temp_short;
471
472 token * temp_token = manual_label_from_header(t, source);
473
474 if (temp_token) {
475 result = label_from_token(source, temp_token);
476 } else {
477 if (scratch->extensions & EXT_RANDOM_LABELS) {
478 srand(scratch->random_seed_base_labels + scratch->label_counter);
479 temp_short = rand() % 32000 + 1;
480 result = malloc(sizeof(char) * 6);
481 sprintf(result, "%d", temp_short);
482
483 scratch->label_counter++;
484 } else {
485 result = label_from_token(source, t);
486 }
487 }
488
489 return result;
490 }
491
492
493 /// Clean up whitespace in string for standardization
clean_string(const char * str,bool lowercase)494 char * clean_string(const char * str, bool lowercase) {
495 if (str == NULL) {
496 return NULL;
497 }
498
499 DString * out = d_string_new("");
500 char * clean = NULL;
501 bool block_whitespace = true;
502
503 while (*str != '\0') {
504 switch (*str) {
505 case '\\':
506 switch (*(str + 1)) {
507 case '\n':
508 case '\r':
509 d_string_append_c(out, '\n');
510 block_whitespace = true;
511 break;
512
513 default:
514 d_string_append_c(out, '\\');
515 block_whitespace = false;
516 break;
517 }
518
519 break;
520
521 case '\t':
522 case ' ':
523 case '\n':
524 case '\r':
525 if (!block_whitespace) {
526 d_string_append_c(out, ' ');
527 block_whitespace = true;
528 }
529
530 break;
531
532 default:
533 if (lowercase) {
534 d_string_append_c(out, tolower(*str));
535 } else {
536 d_string_append_c(out, *str);
537 }
538
539 block_whitespace = false;
540 break;
541 }
542
543 str++;
544 }
545
546 clean = out->str;
547
548 // Trim trailing whitespace/newlines
549 while (out->currentStringLength && char_is_whitespace_or_line_ending(clean[out->currentStringLength - 1])) {
550 out->currentStringLength--;
551 clean[out->currentStringLength] = '\0';
552 }
553
554 d_string_free(out, false);
555
556 // Trim trailing whitespace
557 return clean;
558 }
559
560
clean_string_from_range(const char * source,size_t start,size_t len,bool lowercase)561 char * clean_string_from_range(const char * source, size_t start, size_t len, bool lowercase) {
562 char * clean = NULL;
563
564 DString * raw = d_string_new("");
565
566 d_string_append_c_array(raw, &source[start], len);
567
568 clean = clean_string(raw->str, lowercase);
569
570 d_string_free(raw, true);
571
572 return clean;
573 }
574
575
clean_string_from_token(const char * source,token * t,bool lowercase)576 char * clean_string_from_token(const char * source, token * t, bool lowercase) {
577 return clean_string_from_range(source, t->start, t->len, lowercase);
578 }
579
580
clean_inside_pair(const char * source,token * t,bool lowercase)581 char * clean_inside_pair(const char * source, token * t, bool lowercase) {
582 char * text = text_inside_pair(source, t);
583
584 char * clean = clean_string(text, lowercase);
585
586 free(text);
587
588 return clean;
589 }
590
591
attr_new(char * key,char * value)592 attr * attr_new(char * key, char * value) {
593 attr * a = malloc(sizeof(attr));
594 size_t len = strlen(value);
595
596 // Strip quotes if present
597 if (value[0] == '"') {
598 value++;
599 len--;
600 }
601
602 if (value[len - 1] == '"') {
603 value[len - 1] = '\0';
604 }
605
606 if (a) {
607 a->key = key;
608 a->value = my_strdup(value);
609 a->next = NULL;
610 }
611
612 return a;
613 }
614
615
parse_attributes(char * source)616 attr * parse_attributes(char * source) {
617 attr * attributes = NULL;
618 attr * a = NULL;
619 char * key = NULL;
620 char * value = NULL;
621 size_t scan_len;
622 size_t pos = 0;
623
624 while (source[pos] != '\0' && scan_attr(&source[pos])) {
625 pos += scan_spnl(&source[pos]);
626
627 // Get key
628 scan_len = scan_key(&source[pos]);
629 key = my_strndup(&source[pos], scan_len);
630
631 // Skip '='
632 pos += scan_len + 1;
633
634 // Get value
635 scan_len = scan_value(&source[pos]);
636 value = my_strndup(&source[pos], scan_len);
637
638 pos += scan_len;
639
640 if (a) {
641 a->next = attr_new(key, value);
642 a = a->next;
643 } else {
644 #ifndef __clang_analyzer__
645 a = attr_new(key, value);
646 attributes = a;
647 #endif
648 }
649
650 free(value); // We stored a modified copy
651 }
652
653 return attributes;
654 }
655
656
link_new(const char * source,token * label,char * url,char * title,char * attributes,short flags)657 link * link_new(const char * source, token * label, char * url, char * title, char * attributes, short flags) {
658 link * l = malloc(sizeof(link));
659
660 if (l) {
661 l->label = label;
662
663 if (label) {
664 l->clean_text = clean_inside_pair(source, label, true);
665 l->label_text = label_from_token(source, label);
666 } else {
667 l->clean_text = NULL;
668 l->label_text = NULL;
669 }
670
671 l->url = clean_string(url, false);
672 l->title = (title == NULL) ? NULL : my_strdup(title);
673 l->attributes = (attributes == NULL) ? NULL : parse_attributes(attributes);
674
675 l->flags = flags;
676 }
677
678 return l;
679 }
680
681
682 /// Store shallow copies of links in the storage hash. The link
683 /// itself is new, but references the same data as the original.
684 /// This allows the copied link to simply be `free()`'d without
685 /// freeing the pointers.
link_shallow_copy(link * l)686 link * link_shallow_copy(link * l) {
687 link * new = malloc(sizeof(link));
688
689 if (new) {
690 new->label = l->label;
691 new->clean_text = l->clean_text;
692 new->label_text = l->label_text;
693 new->url = l->url;
694 new->title = l->title;
695 new->attributes = l->attributes;
696 }
697
698 return new;
699 }
700
701
702 /// Copy stored links to a hash for quick searching during export.
703 /// Links are stored via a clean version of their text(from
704 /// `clean_string()`) and a label version (`label_from_string()`).
705 /// The first link for each string is stored.
store_link(scratch_pad * scratch,link * l)706 void store_link(scratch_pad * scratch, link * l) {
707 link * temp_link;
708
709 // Add link via `clean_text`?
710 if (l->clean_text && l->clean_text[0] != '\0') {
711 HASH_FIND_STR(scratch->link_hash, l->clean_text, temp_link);
712
713 if (!temp_link) {
714 // Only add if another link is not found with clean_text
715 temp_link = link_shallow_copy(l);
716 HASH_ADD_KEYPTR(hh, scratch->link_hash, l->clean_text, strlen(l->clean_text), temp_link);
717 }
718 }
719
720 // Add link via `label_text`?
721 if (l->label_text && l->label_text[0] != '\0') {
722 HASH_FIND_STR(scratch->link_hash, l->label_text, temp_link);
723
724 if (!temp_link) {
725 // Only add if another link is not found with label_text
726 temp_link = link_shallow_copy(l);
727 HASH_ADD_KEYPTR(hh, scratch->link_hash, l->label_text, strlen(l->label_text), temp_link);
728 }
729 }
730 }
731
retrieve_link(scratch_pad * scratch,const char * key)732 link * retrieve_link(scratch_pad * scratch, const char * key) {
733 link * l;
734
735 HASH_FIND_STR(scratch->link_hash, key, l);
736
737 if (l) {
738 return l;
739 }
740
741 char * clean = clean_string(key, true);
742
743 HASH_FIND_STR(scratch->link_hash, clean, l);
744
745 free(clean);
746
747 return l;
748 }
749
750
fn_holder_new(footnote * f)751 fn_holder * fn_holder_new(footnote * f) {
752 fn_holder * h = malloc(sizeof(fn_holder));
753
754 if (h) {
755 h->note = f;
756 }
757
758 return h;
759 }
760
761
store_footnote(scratch_pad * scratch,footnote * f)762 void store_footnote(scratch_pad * scratch, footnote * f) {
763 fn_holder * temp_holder;
764
765 // Store by `clean_text`?
766 if (f->clean_text && f->clean_text[0] != '\0') {
767 HASH_FIND_STR(scratch->footnote_hash, f->clean_text, temp_holder);
768
769 if (!temp_holder) {
770 temp_holder = fn_holder_new(f);
771 HASH_ADD_KEYPTR(hh, scratch->footnote_hash, f->clean_text, strlen(f->clean_text), temp_holder);
772 }
773 }
774
775 // Store by `label_text`?
776 if (f->label_text && f->label_text[0] != '\0') {
777 HASH_FIND_STR(scratch->footnote_hash, f->label_text, temp_holder);
778
779 if (!temp_holder) {
780 temp_holder = fn_holder_new(f);
781 HASH_ADD_KEYPTR(hh, scratch->footnote_hash, f->label_text, strlen(f->label_text), temp_holder);
782 }
783 }
784 }
785
786
store_citation(scratch_pad * scratch,footnote * f)787 void store_citation(scratch_pad * scratch, footnote * f) {
788 fn_holder * temp_holder;
789
790 // Store by `clean_text`?
791 if (f->clean_text && f->clean_text[0] != '\0') {
792 HASH_FIND_STR(scratch->citation_hash, f->clean_text, temp_holder);
793
794 if (!temp_holder) {
795 temp_holder = fn_holder_new(f);
796 HASH_ADD_KEYPTR(hh, scratch->citation_hash, f->clean_text, strlen(f->clean_text), temp_holder);
797 }
798 }
799
800 // Store by `label_text`?
801 if (f->label_text && f->label_text[0] != '\0') {
802 HASH_FIND_STR(scratch->citation_hash, f->label_text, temp_holder);
803
804 if (!temp_holder) {
805 temp_holder = fn_holder_new(f);
806 HASH_ADD_KEYPTR(hh, scratch->citation_hash, f->label_text, strlen(f->label_text), temp_holder);
807 }
808 }
809 }
810
811
store_glossary(scratch_pad * scratch,footnote * f)812 void store_glossary(scratch_pad * scratch, footnote * f) {
813 fn_holder * temp_holder;
814
815 // Store by `clean_text`?
816 if (f->clean_text && f->clean_text[0] != '\0') {
817 HASH_FIND_STR(scratch->glossary_hash, f->clean_text, temp_holder);
818
819 if (!temp_holder) {
820 temp_holder = fn_holder_new(f);
821 HASH_ADD_KEYPTR(hh, scratch->glossary_hash, f->clean_text, strlen(f->clean_text), temp_holder);
822 }
823 }
824
825 // Store by `label_text`?
826 if (f->label_text && f->label_text[0] != '\0') {
827 HASH_FIND_STR(scratch->glossary_hash, f->label_text, temp_holder);
828
829 if (!temp_holder) {
830 temp_holder = fn_holder_new(f);
831 HASH_ADD_KEYPTR(hh, scratch->glossary_hash, f->label_text, strlen(f->label_text), temp_holder);
832 }
833 }
834 }
835
836
store_metadata(scratch_pad * scratch,meta * m)837 void store_metadata(scratch_pad * scratch, meta * m) {
838 meta * temp;
839
840 // Store by `key`
841 if (m->key && m->key[0] != '\0') {
842 HASH_FIND_STR(scratch->meta_hash, m->key, temp);
843
844 if (!temp) {
845 HASH_ADD_KEYPTR(hh, scratch->meta_hash, m->key, strlen(m->key), m);
846 }
847 }
848 }
849
850
store_abbreviation(scratch_pad * scratch,footnote * f)851 void store_abbreviation(scratch_pad * scratch, footnote * f) {
852 fn_holder * temp_holder;
853
854 // Store by `label_text`
855 if (f->label_text && f->label_text[0] != '\0') {
856 HASH_FIND_STR(scratch->abbreviation_hash, f->label_text, temp_holder);
857
858 if (!temp_holder) {
859 temp_holder = fn_holder_new(f);
860 HASH_ADD_KEYPTR(hh, scratch->abbreviation_hash, f->label_text, strlen(f->label_text), temp_holder);
861 }
862 }
863 }
864
865
link_free(link * l)866 void link_free(link * l) {
867 if (l) {
868 free(l->label_text);
869 free(l->clean_text);
870 free(l->url);
871 free(l->title);
872 // free(l->id);
873
874 attr * a = l->attributes;
875 attr * b;
876
877 while (a) {
878 b = a->next;
879 free(a->key);
880 free(a->value);
881 free(a);
882 a = b;
883 }
884
885 free(l);
886 }
887 }
888
889
whitespace_accept(token ** remainder)890 void whitespace_accept(token ** remainder) {
891 while (token_chain_accept_multiple(remainder, 3, NON_INDENT_SPACE, INDENT_SPACE, INDENT_TAB));
892 }
893
894
895 /// Find link based on label
extract_link_from_stack(scratch_pad * scratch,const char * target)896 link * extract_link_from_stack(scratch_pad * scratch, const char * target) {
897 char * key = clean_string(target, true);
898
899 link * temp = NULL;
900
901 HASH_FIND_STR(scratch->link_hash, key, temp);
902
903 free(key);
904
905 if (temp) {
906 return temp;
907 }
908
909 key = label_from_string(target);
910
911 HASH_FIND_STR(scratch->link_hash, key, temp);
912
913 free(key);
914
915 return temp;
916 }
917
918
validate_url(const char * url)919 bool validate_url(const char * url) {
920 size_t len = scan_url(url);
921
922 return (len && len == strlen(url)) ? true : false;
923 }
924
925
destination_accept(const char * source,token ** remainder,bool validate)926 char * destination_accept(const char * source, token ** remainder, bool validate) {
927 char * url = NULL;
928 char * clean = NULL;
929 token * t = NULL;
930 size_t start;
931 size_t scan_len;
932
933 if (*remainder == NULL) {
934 return url;
935 }
936
937 switch ((*remainder)->type) {
938 case PAIR_PAREN:
939 case PAIR_ANGLE:
940 case PAIR_QUOTE_SINGLE:
941 case PAIR_QUOTE_DOUBLE:
942 t = token_chain_accept_multiple(remainder, 2, PAIR_ANGLE, PAIR_PAREN);
943 url = text_inside_pair(source, t);
944 break;
945
946 default:
947 start = (*remainder)->start;
948
949 // Skip any whitespace
950 while (char_is_whitespace(source[start])) {
951 start++;
952 }
953
954 scan_len = scan_destination(&source[start]);
955
956 // Grab destination string
957 url = my_strndup(&source[start], scan_len);
958
959 // Advance remainder to end of destination
960 while ((*remainder)->next &&
961 (*remainder)->next->start < start + scan_len) {
962 *remainder = (*remainder)->next;
963 }
964
965 t = (*remainder); // We need to remember this for below
966 // Move remainder beyond destination
967 *remainder = (*remainder)->next;
968
969 // Is there a space in a URL concatenated with a title or attribute?
970 // e.g. [foo]: http://foo.bar/ class="foo"
971 // Since only one space between URL and class, they are joined.
972
973 if (t->type == TEXT_PLAIN) {
974 // Trim leading whitespace
975 token_trim_leading_whitespace(t, source);
976 token_split_on_char(t, source, ' ');
977 *remainder = t->next;
978 }
979
980 break;
981 }
982
983 // Is this a valid URL?
984 clean = clean_string(url, false);
985
986 if (validate && !validate_url(clean)) {
987 free(clean);
988 clean = NULL;
989 }
990
991 free(url);
992 return clean;
993 }
994
995
url_accept(const char * source,size_t start,size_t max_len,size_t * end_pos,bool validate)996 char * url_accept(const char * source, size_t start, size_t max_len, size_t * end_pos, bool validate) {
997 char * url = NULL;
998 char * clean = NULL;
999 size_t scan_len;
1000
1001 scan_len = scan_destination(&source[start]);
1002
1003 if (scan_len) {
1004 if (scan_len > max_len) {
1005 scan_len = max_len;
1006 }
1007
1008 if (end_pos) {
1009 *end_pos = start + scan_len;
1010 }
1011
1012 // Is this <foo>?
1013 if ((source[start] == '<') &&
1014 (source[start + scan_len - 1] == '>')) {
1015 // Strip '<' and '>'
1016 start++;
1017 scan_len -= 2;
1018 }
1019
1020 url = my_strndup(&source[start], scan_len);
1021
1022 clean = clean_string(url, false);
1023
1024 if (validate && !validate_url(clean)) {
1025 free(clean);
1026 clean = NULL;
1027 }
1028
1029 free(url);
1030 }
1031
1032 return clean;
1033 }
1034
1035
1036 /// Extract url string from `(foo)` or `(<foo>)` or `(foo "bar")`
extract_from_paren(token * paren,const char * source,char ** url,char ** title,char ** attributes)1037 void extract_from_paren(token * paren, const char * source, char ** url, char ** title, char ** attributes) {
1038 size_t scan_len;
1039 size_t pos = paren->child->next->start;
1040
1041
1042 size_t attr_len;
1043
1044 // Skip whitespace
1045 while (char_is_whitespace(source[pos])) {
1046 pos++;
1047 }
1048
1049 // Grab URL
1050 *url = url_accept(source, pos, paren->start + paren->len - 1 - pos, &pos, false);
1051
1052 // Skip whitespace
1053 while (char_is_whitespace(source[pos])) {
1054 pos++;
1055 }
1056
1057 // Grab title, if present
1058 scan_len = scan_title(&source[pos]);
1059
1060 if (scan_len) {
1061 *title = my_strndup(&source[pos + 1], scan_len - 2);
1062 pos += scan_len;
1063 }
1064
1065 // Skip whitespace
1066 while (char_is_whitespace(source[pos])) {
1067 pos++;
1068 }
1069
1070 // Grab attributes, if present
1071 attr_len = scan_attributes(&source[pos]);
1072
1073 if (attr_len) {
1074 *attributes = my_strndup(&source[pos], attr_len);
1075 }
1076 }
1077
1078
1079 /// Create a link from an explicit "inline" link `[foo](bar)`
explicit_link(scratch_pad * scratch,token * bracket,token * paren,const char * source)1080 link * explicit_link(scratch_pad * scratch, token * bracket, token * paren, const char * source) {
1081 char * url_char = NULL;
1082 char * title_char = NULL;
1083 char * attr_char = NULL;
1084 link * l = NULL;
1085
1086 extract_from_paren(paren, source, &url_char, &title_char, &attr_char);
1087
1088 if (attr_char) {
1089 if (!(scratch->extensions & EXT_COMPATIBILITY)) {
1090 l = link_new(source, NULL, url_char, title_char, attr_char, LINK_INLINE);
1091 }
1092 } else {
1093 l = link_new(source, NULL, url_char, title_char, attr_char, LINK_INLINE);
1094 }
1095
1096 free(url_char);
1097 free(title_char);
1098 free(attr_char);
1099
1100 return l;
1101 }
1102
1103
footnote_new(const char * source,token * label,token * content,bool lowercase)1104 footnote * footnote_new(const char * source, token * label, token * content, bool lowercase) {
1105 footnote * f = malloc(sizeof(footnote));
1106 token * walker;
1107
1108 if (f) {
1109 f->label = label;
1110 f->clean_text = (label == NULL) ? NULL : clean_inside_pair(source, label, lowercase);
1111 f->label_text = (label == NULL) ? NULL : label_from_token(source, label);
1112 f->free_para = false;
1113 f->count = -1;
1114
1115 if (content) {
1116 switch (content->type) {
1117 case BLOCK_PARA:
1118 f->content = content;
1119 break;
1120
1121 case TEXT_PLAIN:
1122 token_trim_leading_whitespace(content, source);
1123
1124 default:
1125 // Trim trailing newlines
1126 walker = content->tail;
1127
1128 while (walker) {
1129 switch (walker->type) {
1130 case TEXT_NL:
1131 case TEXT_NL_SP:
1132 content->tail = walker->prev;
1133 token_free(walker);
1134 walker = content->tail;
1135 walker->next = NULL;
1136 break;
1137
1138 default:
1139 walker = NULL;
1140 break;
1141 }
1142 }
1143
1144 f->content = token_new_parent(content, BLOCK_PARA);
1145 f->free_para = true;
1146 break;
1147 }
1148 } else {
1149 f->content = NULL;
1150 }
1151 }
1152
1153 return f;
1154 }
1155
1156
footnote_free(footnote * f)1157 void footnote_free(footnote * f) {
1158 if (f) {
1159 if (f->free_para) {
1160 #ifdef kUseObjectPool
1161 // Nothing to do here
1162 #else
1163 free(f->content);
1164 #endif
1165 }
1166
1167 free(f->clean_text);
1168 free(f->label_text);
1169
1170 free(f);
1171 }
1172 }
1173
1174
meta_new(const char * source,size_t key_start,size_t len)1175 meta * meta_new(const char * source, size_t key_start, size_t len) {
1176 meta * m = malloc(sizeof(meta));
1177 char * key;
1178
1179 if (m) {
1180 key = my_strndup(&source[key_start], len);
1181 m->key = label_from_string(key);
1182 free(key);
1183 m->value = NULL;
1184 m->start = key_start;
1185 }
1186
1187 return m;
1188 }
1189
1190
meta_set_value(meta * m,const char * value)1191 void meta_set_value(meta * m, const char * value) {
1192 if (value) {
1193 if (m->value) {
1194 free(m->value);
1195 }
1196
1197 m->value = clean_string(value, false);
1198 }
1199 }
1200
1201
meta_free(meta * m)1202 void meta_free(meta * m) {
1203 if (m) {
1204 free(m->key);
1205 free(m->value);
1206
1207 free(m);
1208 }
1209 }
1210
1211
1212 /// Find metadata based on key
extract_meta_from_stack(scratch_pad * scratch,const char * target)1213 meta * extract_meta_from_stack(scratch_pad * scratch, const char * target) {
1214 char * key = clean_string(target, true);
1215
1216 meta * temp = NULL;
1217
1218 HASH_FIND_STR(scratch->meta_hash, key, temp);
1219
1220 free(key);
1221
1222 return temp;
1223 }
1224
1225
extract_metadata(scratch_pad * scratch,const char * target)1226 char * extract_metadata(scratch_pad * scratch, const char * target) {
1227 char * clean = label_from_string(target);
1228
1229 meta * m = extract_meta_from_stack(scratch, clean);
1230 free(clean);
1231
1232 if (m) {
1233 return m->value;
1234 }
1235
1236 return NULL;
1237 }
1238
1239
abbr_new(const char * source,token * label,token * content)1240 abbr * abbr_new(const char * source, token * label, token * content) {
1241 abbr * a = malloc(sizeof(abbr));
1242
1243 if (a) {
1244 a->abbr = text_inside_pair(source, label);
1245 a->abbr_len = strlen(a->abbr);
1246 a->expansion = clean_string_from_range(source, content->start, content->len, false);
1247 a->expansion_len = strlen(a->expansion);
1248 }
1249
1250 return a;
1251 }
1252
abbreviation_free(abbr * a)1253 void abbreviation_free(abbr * a) {
1254 if (a) {
1255 free(a->abbr);
1256 free(a->expansion);
1257 free(a);
1258 }
1259 }
1260
1261
definition_extract(mmd_engine * e,token ** remainder)1262 bool definition_extract(mmd_engine * e, token ** remainder) {
1263 char * source = e->dstr->str;
1264 token * label = NULL;
1265 token * title = NULL;
1266 char * url_char = NULL;
1267 char * title_char = NULL;
1268 char * attr_char = NULL;
1269 token * temp = NULL;
1270 size_t attr_len;
1271
1272 link * l = NULL;
1273 footnote * f = NULL;
1274
1275 // Store label
1276 label = *remainder;
1277
1278 *remainder = (*remainder)->next;
1279
1280 // Prepare for parsing
1281
1282 // Account for settings
1283
1284 switch (label->type) {
1285 case PAIR_BRACKET_CITATION:
1286 case PAIR_BRACKET_FOOTNOTE:
1287 case PAIR_BRACKET_GLOSSARY:
1288 if (e->extensions & EXT_NOTES) {
1289 if (!token_chain_accept(remainder, COLON)) {
1290 return false;
1291 }
1292
1293 title = *remainder; // Track first token of content in 'title'
1294
1295 // Store for later use
1296 switch (label->type) {
1297 case PAIR_BRACKET_CITATION:
1298 f = footnote_new(e->dstr->str, label, title, true);
1299 stack_push(e->citation_stack, f);
1300 break;
1301
1302 case PAIR_BRACKET_FOOTNOTE:
1303 f = footnote_new(e->dstr->str, label, title, true);
1304 stack_push(e->footnote_stack, f);
1305 break;
1306
1307 case PAIR_BRACKET_GLOSSARY:
1308 f = footnote_new(e->dstr->str, label, title, false);
1309 stack_push(e->glossary_stack, f);
1310 break;
1311 }
1312
1313 break;
1314 }
1315
1316 case PAIR_BRACKET:
1317
1318 // Reference Link Definition
1319
1320 if (!token_chain_accept(remainder, COLON)) {
1321 return false;
1322 }
1323
1324 // Skip space
1325 whitespace_accept(remainder);
1326
1327 // Grab destination
1328 url_char = destination_accept(e->dstr->str, remainder, false);
1329
1330 whitespace_accept(remainder);
1331
1332 // Grab title, if present
1333 temp = *remainder;
1334
1335 title = token_chain_accept_multiple(remainder, 2, PAIR_QUOTE_DOUBLE, PAIR_QUOTE_SINGLE);
1336
1337 if (!title) {
1338 // See if there's a title on next line
1339 whitespace_accept(remainder);
1340 token_chain_accept_multiple(remainder, 2, TEXT_NL, TEXT_LINEBREAK);
1341 whitespace_accept(remainder);
1342
1343 title = token_chain_accept_multiple(remainder, 2, PAIR_QUOTE_DOUBLE, PAIR_QUOTE_SINGLE);
1344
1345 if (!title) {
1346 *remainder = temp;
1347 }
1348 }
1349
1350 title_char = text_inside_pair(e->dstr->str, title);
1351
1352 // Get attributes
1353 if ((*remainder) && (((*remainder)->type != TEXT_NL) && ((*remainder)->type != TEXT_LINEBREAK))) {
1354 if (!(e->extensions & EXT_COMPATIBILITY)) {
1355 attr_len = scan_attributes(&source[(*remainder)->start]);
1356
1357 if (attr_len) {
1358 attr_char = my_strndup(&source[(*remainder)->start], attr_len);
1359
1360 // Skip forward
1361 attr_len += (*remainder)->start;
1362
1363 while ((*remainder) && (*remainder)->start < attr_len) {
1364 *remainder = (*remainder)->next;
1365 }
1366 }
1367
1368 l = link_new(e->dstr->str, label, url_char, title_char, attr_char, LINK_REFERENCE);
1369 } else {
1370 // Not valid match
1371 }
1372 } else {
1373 l = link_new(e->dstr->str, label, url_char, title_char, attr_char, LINK_REFERENCE);
1374 }
1375
1376 // Store link for later use
1377 if (l) {
1378 stack_push(e->link_stack, l);
1379 }
1380
1381 break;
1382
1383 case PAIR_BRACKET_VARIABLE:
1384 fprintf(stderr, "Process variable:\n");
1385 token_describe(label, e->dstr->str);
1386 break;
1387
1388 default:
1389 // Rest of block is not definitions (or has already been processed)
1390 return false;
1391 }
1392
1393 // Advance to next line
1394 token_skip_until_type_multiple(remainder, 2, TEXT_NL, TEXT_LINEBREAK);
1395
1396 if (*remainder) {
1397 *remainder = (*remainder)->next;
1398 }
1399
1400 // Clean up
1401 free(url_char);
1402 free(title_char);
1403 free(attr_char);
1404
1405 return true;
1406 }
1407
1408
process_definition_block(mmd_engine * e,token * block)1409 void process_definition_block(mmd_engine * e, token * block) {
1410 footnote * f;
1411
1412 token * label = block->child;
1413
1414 if (label->type == BLOCK_PARA) {
1415 label = label->child;
1416 }
1417
1418 switch (block->type) {
1419 case BLOCK_DEF_ABBREVIATION:
1420 case BLOCK_DEF_CITATION:
1421 case BLOCK_DEF_FOOTNOTE:
1422 case BLOCK_DEF_GLOSSARY:
1423 switch (block->type) {
1424 case BLOCK_DEF_ABBREVIATION:
1425 // Strip leading '>'' from term
1426 f = footnote_new(e->dstr->str, label, block->child, false);
1427
1428 if (f && f->clean_text) {
1429 memmove(f->clean_text, &(f->clean_text)[1], strlen(f->clean_text));
1430
1431 while (char_is_whitespace((f->clean_text)[0])) {
1432 memmove(f->clean_text, &(f->clean_text)[1], strlen(f->clean_text));
1433 }
1434 }
1435
1436 // Adjust the properties
1437 if (f) {
1438 free(f->label_text);
1439 f->label_text = f->clean_text;
1440
1441 if (f->content &&
1442 f->content->child &&
1443 f->content->child->next &&
1444 f->content->child->next->next) {
1445 f->clean_text = clean_string_from_range(e->dstr->str, f->content->child->next->next->start, block->start + block->len - f->content->child->next->next->start, false);
1446 } else {
1447 f->clean_text = NULL;
1448 }
1449 }
1450
1451 stack_push(e->abbreviation_stack, f);
1452 break;
1453
1454 case BLOCK_DEF_CITATION:
1455 f = footnote_new(e->dstr->str, label, block->child, true);
1456 stack_push(e->citation_stack, f);
1457 break;
1458
1459 case BLOCK_DEF_FOOTNOTE:
1460 f = footnote_new(e->dstr->str, label, block->child, true);
1461 stack_push(e->footnote_stack, f);
1462 break;
1463
1464 case BLOCK_DEF_GLOSSARY:
1465 // Strip leading '?' from term
1466 f = footnote_new(e->dstr->str, label, block->child, false);
1467
1468 if (f && f->clean_text) {
1469 memmove(f->clean_text, &(f->clean_text)[1], strlen(f->clean_text));
1470 }
1471
1472 //if (f && f->label_text)
1473 // memmove(f->label_text, &(f->label_text)[1],strlen(f->label_text));
1474
1475 stack_push(e->glossary_stack, f);
1476 break;
1477 }
1478
1479 label->type = TEXT_EMPTY;
1480
1481 if (label->next) {
1482 label->next->type = TEXT_EMPTY;
1483 }
1484
1485 strip_leading_whitespace(label, e->dstr->str);
1486 break;
1487
1488 case BLOCK_DEF_LINK:
1489 definition_extract(e, &(label));
1490 break;
1491
1492 default:
1493 fprintf(stderr, "process %d\n", block->type);
1494 }
1495
1496 block->type = BLOCK_EMPTY;
1497 }
1498
1499
process_definition_stack(mmd_engine * e)1500 void process_definition_stack(mmd_engine * e) {
1501 for (int i = 0; i < e->definition_stack->size; ++i) {
1502 process_definition_block(e, stack_peek_index(e->definition_stack, i));
1503 }
1504 }
1505
manual_label_from_header(token * h,const char * source)1506 token * manual_label_from_header(token * h, const char * source) {
1507 if (!h || !h->child) {
1508 return NULL;
1509 }
1510
1511 token * walker = h->child->tail;
1512 token * label = NULL;
1513 short count = 0;
1514
1515 while (walker) {
1516 switch (walker->type) {
1517 case MANUAL_LABEL:
1518 // Already identified
1519 label = walker;
1520 walker = NULL;
1521 break;
1522
1523 case INDENT_TAB:
1524 case INDENT_SPACE:
1525 case NON_INDENT_SPACE:
1526 case TEXT_NL:
1527 case TEXT_LINEBREAK:
1528 case TEXT_EMPTY:
1529 case MARKER_H1:
1530 case MARKER_H2:
1531 case MARKER_H3:
1532 case MARKER_H4:
1533 case MARKER_H5:
1534 case MARKER_H6:
1535 walker = walker->prev;
1536 break;
1537
1538 case TEXT_PLAIN:
1539 if (walker->len == 1) {
1540 if (source[walker->start] == ' ') {
1541 walker = walker->prev;
1542 break;
1543 }
1544 }
1545
1546 walker = NULL;
1547 break;
1548
1549 case PAIR_BRACKET:
1550 label = walker;
1551
1552 while (walker && walker->type == PAIR_BRACKET) {
1553 walker = walker->prev;
1554 count++;
1555 }
1556
1557 if (count % 2 == 0) {
1558 // Even count
1559 label = NULL;
1560 } else {
1561 // Odd count
1562 label->type = MANUAL_LABEL;
1563 }
1564
1565 default:
1566 walker = NULL;
1567 }
1568 }
1569
1570 return label;
1571 }
1572
1573
process_header_to_links(mmd_engine * e,token * h)1574 void process_header_to_links(mmd_engine * e, token * h) {
1575 char * label;
1576
1577 // See if we have a manual label
1578 token * manual = manual_label_from_header(h, e->dstr->str);
1579
1580 if (manual) {
1581 label = label_from_token(e->dstr->str, manual);
1582 h = manual;
1583 } else {
1584 label = label_from_token(e->dstr->str, h);
1585 }
1586
1587 DString * url = d_string_new("#");
1588
1589 d_string_append(url, label);
1590
1591 link * l = link_new(e->dstr->str, h, url->str, NULL, NULL, LINK_AUTO);
1592
1593 // Store link for later use
1594 stack_push(e->link_stack, l);
1595
1596 d_string_free(url, true);
1597 free(label);
1598 }
1599
1600
process_header_stack(mmd_engine * e)1601 void process_header_stack(mmd_engine * e) {
1602 // NTD in compatibility mode or if disabled
1603 if (e->extensions & EXT_NO_LABELS) {
1604 return;
1605 }
1606
1607 for (int i = 0; i < e->header_stack->size; ++i) {
1608 process_header_to_links(e, stack_peek_index(e->header_stack, i));
1609 }
1610 }
1611
1612
process_table_to_link(mmd_engine * e,token * t)1613 void process_table_to_link(mmd_engine * e, token * t) {
1614 // Is there a caption
1615 if (table_has_caption(t)) {
1616 token * temp_token = t->next->child;
1617
1618 if (temp_token->next &&
1619 temp_token->next->type == PAIR_BRACKET) {
1620 temp_token = temp_token->next;
1621 }
1622
1623 char * label = label_from_token(e->dstr->str, temp_token);
1624
1625 DString * url = d_string_new("#");
1626 d_string_append(url, label);
1627
1628 link * l = link_new(e->dstr->str, temp_token, url->str, NULL, NULL, LINK_AUTO);
1629
1630 stack_push(e->link_stack, l);
1631
1632 d_string_free(url, true);
1633 free(label);
1634 }
1635 }
1636
1637
process_table_stack(mmd_engine * e)1638 void process_table_stack(mmd_engine * e) {
1639 for (int i = 0; i < e->table_stack->size; ++i) {
1640 process_table_to_link(e, stack_peek_index(e->table_stack, i));
1641 }
1642 }
1643
1644
1645 /// Parse metadata
process_metadata_stack(mmd_engine * e,scratch_pad * scratch)1646 void process_metadata_stack(mmd_engine * e, scratch_pad * scratch) {
1647 if ((scratch->extensions & EXT_NO_METADATA) ||
1648 (scratch->extensions & EXT_COMPATIBILITY)) {
1649 return;
1650 }
1651
1652 meta * m;
1653 short header_level = -10;
1654 char * temp_char = NULL;
1655
1656 for (int i = 0; i < e->metadata_stack->size; ++i) {
1657 // Check for certain metadata keys
1658 m = stack_peek_index(e->metadata_stack, i);
1659
1660 if (strcmp(m->key, "baseheaderlevel") == 0) {
1661 if (header_level == -10) {
1662 header_level = atoi(m->value);
1663 }
1664 } else if (strcmp(m->key, "epubheaderlevel") == 0) {
1665 if (scratch->output_format == FORMAT_EPUB) {
1666 header_level = atoi(m->value);
1667 }
1668 } else if (strcmp(m->key, "htmlheaderlevel") == 0) {
1669 if (scratch->output_format == FORMAT_HTML) {
1670 header_level = atoi(m->value);
1671 }
1672 } else if (strcmp(m->key, "xhtmlheaderlevel") == 0) {
1673 if (scratch->output_format == FORMAT_HTML) {
1674 header_level = atoi(m->value);
1675 }
1676 } else if (strcmp(m->key, "latexheaderlevel") == 0) {
1677 if ((scratch->output_format == FORMAT_LATEX) ||
1678 (scratch->output_format == FORMAT_BEAMER) ||
1679 (scratch->output_format == FORMAT_MEMOIR)) {
1680 header_level = atoi(m->value);
1681 }
1682 } else if (strcmp(m->key, "odfheaderlevel") == 0) {
1683 if ((scratch->output_format == FORMAT_ODT) ||
1684 (scratch->output_format == FORMAT_FODT)) {
1685 header_level = atoi(m->value);
1686 }
1687 } else if (strcmp(m->key, "language") == 0) {
1688 temp_char = label_from_string(m->value);
1689
1690 if (strcmp(temp_char, "de") == 0) {
1691 scratch->language = LC_DE;
1692 scratch->quotes_lang = GERMAN;
1693 } else if (strcmp(temp_char, "es") == 0) {
1694 scratch->language = LC_ES;
1695 scratch->quotes_lang = SPANISH;
1696 } else if (strcmp(temp_char, "fr") == 0) {
1697 scratch->language = LC_FR;
1698 scratch->quotes_lang = FRENCH;
1699 } else if (strcmp(temp_char, "he") == 0) {
1700 scratch->language = LC_HE;
1701 scratch->quotes_lang = ENGLISH;
1702 } else if (strcmp(temp_char, "nl") == 0) {
1703 scratch->language = LC_NL;
1704 scratch->quotes_lang = DUTCH;
1705 } else if (strcmp(temp_char, "sv") == 0) {
1706 scratch->language = LC_SV;
1707 scratch->quotes_lang = SWEDISH;
1708 } else {
1709 scratch->language = LC_EN;
1710 scratch->quotes_lang = ENGLISH;
1711 }
1712
1713 free(temp_char);
1714 } else if (strcmp(m->key, "latexmode") == 0) {
1715 if (scratch->output_format == FORMAT_LATEX) {
1716 temp_char = label_from_string(m->value);
1717
1718 if (strcmp(temp_char, "beamer") == 0) {
1719 scratch->output_format = FORMAT_BEAMER;
1720 } else if (strcmp(temp_char, "memoir") == 0) {
1721 scratch->output_format = FORMAT_MEMOIR;
1722 }
1723
1724 free(temp_char);
1725 }
1726 } else if (strcmp(m->key, "quoteslanguage") == 0) {
1727 temp_char = label_from_string(m->value);
1728
1729 if ((strcmp(temp_char, "dutch") == 0) ||
1730 (strcmp(temp_char, "nl") == 0)) {
1731 scratch->quotes_lang = DUTCH;
1732 } else if ((strcmp(temp_char, "french") == 0) ||
1733 (strcmp(temp_char, "fr") == 0)) {
1734 scratch->quotes_lang = FRENCH;
1735 } else if ((strcmp(temp_char, "german") == 0) ||
1736 (strcmp(temp_char, "de") == 0)) {
1737 scratch->quotes_lang = GERMAN;
1738 } else if (strcmp(temp_char, "germanguillemets") == 0) {
1739 scratch->quotes_lang = GERMANGUILL;
1740 } else if ((strcmp(temp_char, "spanish") == 0) ||
1741 (strcmp(temp_char, "es") == 0)) {
1742 scratch->quotes_lang = SPANISH;
1743 } else if ((strcmp(temp_char, "swedish") == 0) ||
1744 (strcmp(temp_char, "sv") == 0)) {
1745 scratch->quotes_lang = SWEDISH;
1746 } else {
1747 scratch->quotes_lang = ENGLISH;
1748 }
1749
1750 free(temp_char);
1751 } else if (strcmp(m->key, "bibtex") == 0) {
1752 scratch->bibtex_file = my_strdup(m->value);
1753
1754 // Trigger complete document unless explicitly denied
1755 if (!(scratch->extensions & EXT_SNIPPET)) {
1756 scratch->extensions |= EXT_COMPLETE;
1757 }
1758 } else {
1759 // Any other key triggers complete document
1760 if (!(scratch->extensions & EXT_SNIPPET)) {
1761 scratch->extensions |= EXT_COMPLETE;
1762 }
1763 }
1764
1765 }
1766
1767 if (header_level != -10) {
1768 scratch->base_header_level = header_level;
1769 }
1770 }
1771
1772
automatic_search_text(mmd_engine * e,token * t,trie * ac)1773 void automatic_search_text(mmd_engine * e, token * t, trie * ac) {
1774 match * m = ac_trie_leftmost_longest_search(ac, e->dstr->str, t->start, t->len);
1775
1776 match * walker;
1777
1778 token * tok = t;
1779
1780 if (m) {
1781 walker = m->next;
1782
1783 while (walker) {
1784 token_split(tok, walker->start, walker->len, walker->match_type);
1785
1786 // Advance token to next token
1787 while (tok && (tok->start < walker->start + walker->len)) {
1788 tok = tok->next;
1789 }
1790
1791 // Advance to next match (if present)
1792 walker = walker->next;
1793 }
1794 }
1795
1796 match_free(m);
1797 }
1798
1799
1800 /// Determine which nodes to descend into to search for abbreviations
automatic_search(mmd_engine * e,token * t,trie * ac)1801 void automatic_search(mmd_engine * e, token * t, trie * ac) {
1802 while (t) {
1803 switch (t->type) {
1804 case TEXT_PLAIN:
1805 automatic_search_text(e, t, ac);
1806 break;
1807
1808 case DOC_START_TOKEN:
1809 case BLOCK_BLOCKQUOTE:
1810 case BLOCK_DEFINITION:
1811 case BLOCK_DEFLIST:
1812 case BLOCK_LIST_BULLETED:
1813 case BLOCK_LIST_BULLETED_LOOSE:
1814 case BLOCK_LIST_ENUMERATED:
1815 case BLOCK_LIST_ENUMERATED_LOOSE:
1816 case BLOCK_LIST_ITEM_TIGHT:
1817 case BLOCK_LIST_ITEM:
1818 case BLOCK_PARA:
1819 case BLOCK_TABLE:
1820 case BLOCK_TABLE_HEADER:
1821 case BLOCK_TABLE_SECTION:
1822 case BLOCK_TERM:
1823 case LINE_LIST_BULLETED:
1824 case LINE_LIST_ENUMERATED:
1825 case PAIR_BRACKET:
1826 case PAIR_BRACKET_FOOTNOTE:
1827 case PAIR_BRACKET_GLOSSARY:
1828 case PAIR_BRACKET_IMAGE:
1829 case PAIR_QUOTE_DOUBLE:
1830 case PAIR_QUOTE_SINGLE:
1831 case PAIR_STAR:
1832 case PAIR_UL:
1833 case TABLE_CELL:
1834 case TABLE_ROW:
1835 automatic_search(e, t->child, ac);
1836 break;
1837
1838 // case PAIR_PAREN:
1839 default:
1840 break;
1841 }
1842
1843 t = t->next;
1844 }
1845 }
1846
1847
identify_global_search_terms(mmd_engine * e,scratch_pad * scratch)1848 void identify_global_search_terms(mmd_engine * e, scratch_pad * scratch) {
1849 // Only search if we have a target
1850 size_t count = e->abbreviation_stack->size + e->glossary_stack->size;
1851
1852 if (count == 0) {
1853 return;
1854 }
1855
1856 trie * ac = trie_new(0);
1857 footnote * f;
1858
1859 // Add abbreviations to search trie
1860 for (int i = 0; i < e->abbreviation_stack->size; ++i) {
1861 f = stack_peek_index(e->abbreviation_stack, i);
1862 trie_insert(ac, f->label_text, PAIR_BRACKET_ABBREVIATION);
1863 }
1864
1865 // Add glossary to search trie (without leading '?')
1866 for (int i = 0; i < e->glossary_stack->size; ++i) {
1867 f = stack_peek_index(e->glossary_stack, i);
1868 trie_insert(ac, f->clean_text, PAIR_BRACKET_GLOSSARY);
1869 }
1870
1871 ac_trie_prepare(ac);
1872 automatic_search(e, e->root, ac);
1873 trie_free(ac);
1874 }
1875
1876
mmd_engine_export_token_tree(DString * out,mmd_engine * e,short format)1877 void mmd_engine_export_token_tree(DString * out, mmd_engine * e, short format) {
1878
1879 // Process potential reference definitions
1880 process_definition_stack(e);
1881
1882 // Process headers for potential cross-reference targets
1883 process_header_stack(e);
1884
1885 // Process tables for potential cross-reference targets
1886 process_table_stack(e);
1887
1888 // Create scratch pad
1889 scratch_pad * scratch = scratch_pad_new(e, format);
1890
1891 // Process metadata
1892 process_metadata_stack(e, scratch);
1893
1894 // Process abbreviations, glossary, etc.
1895 if (!(e->extensions & EXT_COMPATIBILITY)) {
1896 identify_global_search_terms(e, scratch);
1897 }
1898
1899
1900 switch (scratch->output_format) {
1901 case FORMAT_BEAMER:
1902 if (scratch->extensions & EXT_COMPLETE) {
1903 mmd_start_complete_latex(out, e->dstr->str, scratch);
1904 }
1905
1906 mmd_export_token_tree_beamer(out, e->dstr->str, e->root, scratch);
1907
1908 // Close out any existing outline levels
1909 mmd_outline_add_beamer(out, NULL, scratch);
1910
1911 mmd_export_citation_list_beamer(out, e->dstr->str, scratch);
1912
1913 if (scratch->extensions & EXT_COMPLETE) {
1914 mmd_end_complete_beamer(out, e->dstr->str, scratch);
1915 }
1916
1917 break;
1918
1919 case FORMAT_EPUB:
1920 case FORMAT_TEXTBUNDLE:
1921 case FORMAT_TEXTBUNDLE_COMPRESSED:
1922 scratch->store_assets = true;
1923
1924 mmd_start_complete_html(out, e->dstr->str, scratch);
1925
1926 mmd_export_token_tree_html(out, e->dstr->str, e->root, scratch);
1927 mmd_export_footnote_list_html(out, e->dstr->str, scratch);
1928 mmd_export_glossary_list_html(out, e->dstr->str, scratch);
1929 mmd_export_citation_list_html(out, e->dstr->str, scratch);
1930
1931 mmd_end_complete_html(out, e->dstr->str, scratch);
1932
1933 break;
1934
1935 case FORMAT_HTML_WITH_ASSETS:
1936 scratch->remember_assets = true;
1937 scratch->output_format = FORMAT_HTML;
1938
1939 case FORMAT_HTML:
1940 if (scratch->extensions & EXT_COMPLETE) {
1941 mmd_start_complete_html(out, e->dstr->str, scratch);
1942 }
1943
1944 mmd_export_token_tree_html(out, e->dstr->str, e->root, scratch);
1945 mmd_export_footnote_list_html(out, e->dstr->str, scratch);
1946 mmd_export_glossary_list_html(out, e->dstr->str, scratch);
1947 mmd_export_citation_list_html(out, e->dstr->str, scratch);
1948
1949 if (scratch->extensions & EXT_COMPLETE) {
1950 mmd_end_complete_html(out, e->dstr->str, scratch);
1951 }
1952
1953 break;
1954
1955 case FORMAT_LATEX:
1956 if (scratch->extensions & EXT_COMPLETE) {
1957 mmd_start_complete_latex(out, e->dstr->str, scratch);
1958 }
1959
1960 mmd_export_token_tree_latex(out, e->dstr->str, e->root, scratch);
1961 mmd_export_citation_list_latex(out, e->dstr->str, scratch);
1962
1963 if (scratch->extensions & EXT_COMPLETE) {
1964 mmd_end_complete_latex(out, e->dstr->str, scratch);
1965 }
1966
1967 break;
1968
1969 case FORMAT_MEMOIR:
1970 if (scratch->extensions & EXT_COMPLETE) {
1971 mmd_start_complete_latex(out, e->dstr->str, scratch);
1972 }
1973
1974 mmd_export_token_tree_memoir(out, e->dstr->str, e->root, scratch);
1975 mmd_export_citation_list_latex(out, e->dstr->str, scratch);
1976
1977 if (scratch->extensions & EXT_COMPLETE) {
1978 mmd_end_complete_latex(out, e->dstr->str, scratch);
1979 }
1980
1981 break;
1982
1983 case FORMAT_ODT:
1984 scratch->store_assets = true;
1985
1986 case FORMAT_FODT:
1987 // mmd_start_complete_odf(out, e->dstr->str, scratch);
1988
1989 mmd_export_token_tree_opendocument(out, e->dstr->str, e->root, scratch);
1990
1991 // mmd_end_complete_odf(out, e->dstr->str, scratch);
1992 break;
1993
1994 case FORMAT_OPML:
1995 mmd_export_token_tree_opml(out, e->dstr->str, e->root, scratch);
1996 break;
1997
1998 case FORMAT_ITMZ:
1999 mmd_export_token_tree_itmz(out, e->dstr->str, e->root, scratch);
2000 break;
2001 }
2002
2003 // Preserve asset_hash for possible use in export
2004 e->asset_hash = scratch->asset_hash;
2005
2006 // Preserve random label seed
2007 e->random_seed_base_labels = scratch->random_seed_base_labels;
2008
2009 scratch_pad_free(scratch);
2010 }
2011
2012
parse_brackets(const char * source,scratch_pad * scratch,token * bracket,link ** final_link,short * skip_token,bool * free_link)2013 void parse_brackets(const char * source, scratch_pad * scratch, token * bracket, link ** final_link, short * skip_token, bool * free_link) {
2014 link * temp_link = NULL;
2015 char * temp_char = NULL;
2016 short temp_short = 0;
2017
2018 // What is next?
2019 token * next = bracket->next;
2020
2021 if (next) {
2022 temp_short = 1;
2023 }
2024
2025 // Do not free this link after using it
2026 *free_link = false;
2027
2028 if (next && next->type == PAIR_PAREN) {
2029 // We have `[foo](bar)` or `![foo](bar)`
2030
2031 temp_link = explicit_link(scratch, bracket, next, source);
2032
2033 if (temp_link) {
2034 // Don't output brackets
2035 bracket->child->type = TEXT_EMPTY;
2036 bracket->child->mate->type = TEXT_EMPTY;
2037
2038 // This was an explicit link
2039 *final_link = temp_link;
2040
2041 // Skip over parentheses
2042 *skip_token = temp_short;
2043
2044 // Free this link
2045 *free_link = true;
2046 return;
2047 }
2048 }
2049
2050 if (next && next->type == PAIR_BRACKET) {
2051 // Is this a reference link? `[foo][bar]` or `![foo][bar]`
2052 temp_char = text_inside_pair(source, next);
2053
2054 if (temp_char[0] == '\0') {
2055 // Empty label, use first bracket (e.g. implicit link `[foo][]`)
2056 free(temp_char);
2057 temp_char = text_inside_pair(source, bracket);
2058 }
2059 } else {
2060 // This may be a simplified implicit link, e.g. `[foo]`
2061
2062 // But not if it's nested brackets, since it would not
2063 // end up being a valid reference
2064 token * walker = bracket->child;
2065
2066 while (walker) {
2067 switch (walker->type) {
2068 case PAIR_BRACKET:
2069 case PAIR_BRACKET_CITATION:
2070 case PAIR_BRACKET_FOOTNOTE:
2071 case PAIR_BRACKET_GLOSSARY:
2072 case PAIR_BRACKET_VARIABLE:
2073 case PAIR_BRACKET_ABBREVIATION:
2074 *final_link = NULL;
2075 return;
2076 }
2077
2078 walker = walker->next;
2079 }
2080
2081 temp_char = text_inside_pair(source, bracket);
2082 // Don't skip tokens
2083 temp_short = 0;
2084 }
2085
2086 temp_link = extract_link_from_stack(scratch, temp_char);
2087
2088 if (temp_char) {
2089 free(temp_char);
2090 }
2091
2092 if (temp_link) {
2093 // Don't output brackets
2094 if (bracket->child) {
2095 bracket->child->type = TEXT_EMPTY;
2096
2097 if (bracket->child->mate) {
2098 bracket->child->mate->type = TEXT_EMPTY;
2099 }
2100 }
2101
2102 *final_link = temp_link;
2103
2104 // Skip over second bracket if present
2105 *skip_token = temp_short;
2106 return;
2107 }
2108
2109 // No existing links, so nothing to do
2110 *final_link = NULL;
2111 }
2112
2113
mark_citation_as_used(scratch_pad * scratch,footnote * c)2114 void mark_citation_as_used(scratch_pad * scratch, footnote * c) {
2115 if (c->count == -1) {
2116 // Add citation to used stack
2117 stack_push(scratch->used_citations, c);
2118
2119 // Update counter
2120 c->count = scratch->used_citations->size;
2121 }
2122 }
2123
2124
mark_footnote_as_used(scratch_pad * scratch,footnote * f)2125 void mark_footnote_as_used(scratch_pad * scratch, footnote * f) {
2126 if (f->count == -1) {
2127 // Add footnote to used stack
2128 stack_push(scratch->used_footnotes, f);
2129
2130 // Update counter
2131 f->count = scratch->used_footnotes->size;
2132 }
2133 }
2134
2135
mark_glossary_as_used(scratch_pad * scratch,footnote * c)2136 void mark_glossary_as_used(scratch_pad * scratch, footnote * c) {
2137 if (c->count == -1) {
2138 // Add glossary to used stack
2139 stack_push(scratch->used_glossaries, c);
2140
2141 // Update counter
2142 c->count = scratch->used_glossaries->size;
2143 }
2144 }
2145
2146
mark_abbreviation_as_used(scratch_pad * scratch,footnote * c)2147 void mark_abbreviation_as_used(scratch_pad * scratch, footnote * c) {
2148 if (c->count == -1) {
2149 // Add abbreviation to used stack
2150 stack_push(scratch->used_abbreviations, c);
2151
2152 // Update counter
2153 c->count = scratch->used_abbreviations->size;
2154 }
2155 }
2156
2157
extract_citation_from_stack(scratch_pad * scratch,const char * target)2158 size_t extract_citation_from_stack(scratch_pad * scratch, const char * target) {
2159 char * key = clean_string(target, true);
2160
2161 fn_holder * h;
2162
2163 HASH_FIND_STR(scratch->citation_hash, key, h);
2164
2165 free(key);
2166
2167 if (h) {
2168 mark_citation_as_used(scratch, h->note);
2169 return h->note->count;
2170 }
2171
2172 key = label_from_string(target);
2173
2174 HASH_FIND_STR(scratch->citation_hash, key, h);
2175
2176 free(key);
2177
2178 if (h) {
2179 mark_citation_as_used(scratch, h->note);
2180 return h->note->count;
2181 }
2182
2183 // None found
2184 return -1;
2185 }
2186
2187
extract_footnote_from_stack(scratch_pad * scratch,const char * target)2188 size_t extract_footnote_from_stack(scratch_pad * scratch, const char * target) {
2189 char * key = clean_string(target, true);
2190
2191 fn_holder * h;
2192
2193 HASH_FIND_STR(scratch->footnote_hash, key, h);
2194
2195 free(key);
2196
2197 if (h) {
2198 mark_footnote_as_used(scratch, h->note);
2199 return h->note->count;
2200 }
2201
2202 key = label_from_string(target);
2203
2204 HASH_FIND_STR(scratch->footnote_hash, key, h);
2205
2206 free(key);
2207
2208 if (h) {
2209 mark_footnote_as_used(scratch, h->note);
2210 return h->note->count;
2211 }
2212
2213 // None found
2214 return -1;
2215 }
2216
2217
extract_abbreviation_from_stack(scratch_pad * scratch,const char * target)2218 size_t extract_abbreviation_from_stack(scratch_pad * scratch, const char * target) {
2219 char * key = clean_string(target, false);
2220
2221 fn_holder * h;
2222
2223 HASH_FIND_STR(scratch->abbreviation_hash, key, h);
2224
2225 free(key);
2226
2227 if (h) {
2228 mark_abbreviation_as_used(scratch, h->note);
2229 return h->note->count;
2230 }
2231
2232 key = label_from_string(target);
2233
2234 HASH_FIND_STR(scratch->abbreviation_hash, key, h);
2235
2236 free(key);
2237
2238 if (h) {
2239 mark_abbreviation_as_used(scratch, h->note);
2240 return h->note->count;
2241 }
2242
2243 // None found
2244 return -1;
2245 }
2246
2247
extract_glossary_from_stack(scratch_pad * scratch,const char * target)2248 size_t extract_glossary_from_stack(scratch_pad * scratch, const char * target) {
2249 char * key = clean_string(target, false);
2250
2251 fn_holder * h;
2252
2253 HASH_FIND_STR(scratch->glossary_hash, key, h);
2254
2255 free(key);
2256
2257 if (h) {
2258 mark_glossary_as_used(scratch, h->note);
2259 return h->note->count;
2260 }
2261
2262 key = label_from_string(target);
2263
2264 HASH_FIND_STR(scratch->glossary_hash, key, h);
2265
2266 free(key);
2267
2268 if (h) {
2269 mark_glossary_as_used(scratch, h->note);
2270 return h->note->count;
2271 }
2272
2273 // None found
2274 return -1;
2275 }
2276
2277
footnote_from_bracket(const char * source,scratch_pad * scratch,token * t,short * num)2278 void footnote_from_bracket(const char * source, scratch_pad * scratch, token * t, short * num) {
2279 // Get text inside bracket
2280 char * text = text_inside_pair(source, t);
2281 short footnote_id = extract_footnote_from_stack(scratch, text);
2282
2283 free(text);
2284
2285 if (footnote_id == -1) {
2286 // No match, this is an inline footnote -- create a new one
2287 t->child->type = TEXT_EMPTY;
2288 t->child->mate->type = TEXT_EMPTY;
2289
2290 // Create footnote
2291 footnote * temp = footnote_new(source, NULL, t->child, true);
2292
2293 // Store as used
2294 stack_push(scratch->used_footnotes, temp);
2295 *num = scratch->used_footnotes->size;
2296 temp->count = *num;
2297
2298 // We need to free this one later since it doesn't exist
2299 // in the engine's stack, on the scratch_pad stack
2300 stack_push(scratch->inline_footnotes_to_free, temp);
2301 } else {
2302 // Footnote in stack
2303 *num = footnote_id;
2304 }
2305 }
2306
2307
citation_from_bracket(const char * source,scratch_pad * scratch,token * t,short * num)2308 void citation_from_bracket(const char * source, scratch_pad * scratch, token * t, short * num) {
2309 // Get text inside bracket
2310 char * text = text_inside_pair(source, t);
2311 short citation_id = extract_citation_from_stack(scratch, text);
2312
2313 free(text);
2314
2315 if (citation_id == -1) {
2316 // No match, this is an inline citation -- create a new one
2317
2318 t->child->type = TEXT_EMPTY;
2319 t->child->mate->type = TEXT_EMPTY;
2320
2321 // *UNLESS* we are using BibTeX, in which case we leave them alone
2322 if (scratch->bibtex_file) {
2323 *num = -1;
2324 return;
2325 }
2326
2327 // Create citation
2328 footnote * temp = footnote_new(source, t, t->child, true);
2329
2330 // Store as used
2331 stack_push(scratch->used_citations, temp);
2332 *num = scratch->used_citations->size;
2333 temp->count = *num;
2334
2335 // We need to free this one later since it doesn't exist
2336 // in the engine's stack, on the scratch_pad stack
2337 stack_push(scratch->inline_citations_to_free, temp);
2338 } else {
2339 // Citation in stack
2340 *num = citation_id;
2341 }
2342 }
2343
2344
glossary_from_bracket(const char * source,scratch_pad * scratch,token * t,short * num)2345 void glossary_from_bracket(const char * source, scratch_pad * scratch, token * t, short * num) {
2346 // Get text inside bracket
2347 char * text;
2348
2349 if (t->child) {
2350 text = text_inside_pair(source, t);
2351 memmove(text, &text[1], strlen(text));
2352 } else {
2353 text = malloc(t->len + 1);
2354 memcpy(text, &source[t->start], t->len);
2355 text[t->len] = '\0';
2356 }
2357
2358 short glossary_id = extract_glossary_from_stack(scratch, text);
2359
2360 free(text);
2361
2362 if (glossary_id == -1) {
2363 // No match, this is an inline glossary -- create a new glossary entry
2364 if (t->child) {
2365 t->child->type = TEXT_EMPTY;
2366 t->child->mate->type = TEXT_EMPTY;
2367 }
2368
2369 // Create glossary
2370 token * label = t->child;
2371
2372 while (label && label->type != PAIR_PAREN) {
2373 label = label->next;
2374 }
2375
2376 if (label) {
2377 footnote * temp = footnote_new(source, label, label->next, false);
2378
2379 // Store as used
2380 stack_push(scratch->used_glossaries, temp);
2381 *num = scratch->used_glossaries->size;
2382 temp->count = *num;
2383
2384 // We need to free this one later since it doesn't exist
2385 // in the engine's stack, on the scratch_pad stack
2386 stack_push(scratch->inline_glossaries_to_free, temp);
2387 } else {
2388 // Improperly formatted glossary
2389 *num = -1;
2390 }
2391 } else {
2392 // Glossary in stack
2393 *num = glossary_id;
2394 }
2395 }
2396
2397
abbreviation_from_bracket(const char * source,scratch_pad * scratch,token * t,short * num)2398 void abbreviation_from_bracket(const char * source, scratch_pad * scratch, token * t, short * num) {
2399 // Get text inside bracket
2400 char * text;
2401
2402 if (t->child) {
2403 text = text_inside_pair(source, t);
2404 } else {
2405 text = malloc(t->len + 2);
2406 text[0] = '>';
2407 memcpy(&text[1], &source[t->start], t->len);
2408 text[t->len + 1] = '\0';
2409 }
2410
2411 short abbr_id = extract_abbreviation_from_stack(scratch, &text[1]);
2412
2413 free(text);
2414
2415 if (abbr_id == -1) {
2416 // No match, this is an inline glossary -- create a new glossary entry
2417 if (t->child) {
2418 t->child->type = TEXT_EMPTY;
2419 t->child->mate->type = TEXT_EMPTY;
2420 }
2421
2422 // Create glossary
2423 token * label = t->child;
2424
2425 while (label && label->type != PAIR_PAREN) {
2426 label = label->next;
2427 }
2428
2429 if (label) {
2430 footnote * temp = footnote_new(source, label, label->next, false);
2431
2432 // Adjust the properties
2433 free(temp->label_text);
2434 temp->label_text = temp->clean_text;
2435
2436 if (temp->content && temp->content->child) {
2437 temp->clean_text = clean_string_from_range(source, temp->content->child->start, t->start + t->len - t->child->mate->len - temp->content->child->start, false);
2438 }
2439
2440 // Store as used
2441 stack_push(scratch->used_abbreviations, temp);
2442 *num = scratch->used_abbreviations->size;
2443 temp->count = *num;
2444
2445 // We need to free this one later since it doesn't exist
2446 // in the engine's stack, on the scratch_pad stack
2447 stack_push(scratch->inline_abbreviations_to_free, temp);
2448 } else {
2449 // Improperly formatted glossary
2450 *num = -1;
2451 }
2452 } else {
2453 // Glossary in stack
2454 *num = abbr_id;
2455 }
2456 }
2457
2458
read_table_column_alignments(const char * source,token * table,scratch_pad * scratch)2459 void read_table_column_alignments(const char * source, token * table, scratch_pad * scratch) {
2460 token * walker = table->child->child;
2461
2462 scratch->table_alignment[0] = '\0';
2463 scratch->table_column_count = 0;
2464
2465 if (walker == NULL) {
2466 return;
2467 }
2468
2469 // Find the separator line
2470 while (walker->next) {
2471 walker = walker->next;
2472 }
2473
2474 walker->type = TEXT_EMPTY;
2475
2476 // Iterate through cells to create alignment string
2477 short counter = 0;
2478 short align = 0;
2479
2480 walker = walker->child;
2481
2482 while (walker) {
2483 switch (walker->type) {
2484 case TABLE_CELL:
2485 align = scan_alignment_string(&source[walker->start]);
2486
2487 switch (align) {
2488 case ALIGN_LEFT:
2489 scratch->table_alignment[counter] = 'l';
2490 break;
2491
2492 case ALIGN_RIGHT:
2493 scratch->table_alignment[counter] = 'r';
2494 break;
2495
2496 case ALIGN_CENTER:
2497 scratch->table_alignment[counter] = 'c';
2498 break;
2499
2500 case ALIGN_LEFT | ALIGN_WRAP:
2501 scratch->table_alignment[counter] = 'L';
2502 break;
2503
2504 case ALIGN_RIGHT | ALIGN_WRAP:
2505 scratch->table_alignment[counter] = 'R';
2506 break;
2507
2508 case ALIGN_CENTER | ALIGN_WRAP:
2509 scratch->table_alignment[counter] = 'C';
2510 break;
2511
2512 case ALIGN_WRAP:
2513 scratch->table_alignment[counter] = 'N';
2514 break;
2515
2516 default:
2517 scratch->table_alignment[counter] = 'n';
2518 }
2519
2520 counter++;
2521 break;
2522 }
2523
2524 walker = walker->next;
2525 }
2526
2527 scratch->table_alignment[counter] = '\0';
2528 scratch->table_column_count = counter;
2529 }
2530
2531
strip_leading_whitespace(token * chain,const char * source)2532 void strip_leading_whitespace(token * chain, const char * source) {
2533 while (chain) {
2534 switch (chain->type) {
2535 case INDENT_TAB:
2536 case INDENT_SPACE:
2537 case NON_INDENT_SPACE:
2538 chain->type = TEXT_EMPTY;
2539
2540 case TEXT_EMPTY:
2541 chain = chain->next;
2542 break;
2543
2544 case TEXT_PLAIN:
2545 token_trim_leading_whitespace(chain, source);
2546
2547 default:
2548 return;
2549 }
2550
2551 if (chain) {
2552 chain = chain->next;
2553 }
2554 }
2555 }
2556
2557
trim_trailing_whitespace_d_string(DString * d)2558 void trim_trailing_whitespace_d_string(DString * d) {
2559 if (d) {
2560 char * c = &(d->str[d->currentStringLength - 1]);
2561
2562 while (d->currentStringLength && char_is_whitespace(*c)) {
2563 *c-- = 0;
2564 d->currentStringLength--;
2565 }
2566 }
2567 }
2568
2569
table_has_caption(token * t)2570 bool table_has_caption(token * t) {
2571
2572 if (t->next && t->next->type == BLOCK_PARA) {
2573 t = t->next->child;
2574
2575 if (t->type == PAIR_BRACKET) {
2576 t = t->next;
2577
2578 if (t && t->next &&
2579 t->next->type == PAIR_BRACKET) {
2580 t = t->next;
2581 }
2582
2583 if (t == NULL) {
2584 // End of file
2585 return true;
2586 }
2587
2588 if (t && t->next &&
2589 ((t->next->type == TEXT_NL) ||
2590 (t->next->type == TEXT_LINEBREAK))) {
2591 t = t->next;
2592 }
2593
2594 if (t && t->next == NULL) {
2595 return true;
2596 }
2597 }
2598 }
2599
2600 return false;
2601 }
2602
2603
2604 /// Grab the first "word" after the end of the fence marker:
2605 /// ````perl
2606 /// or
2607 /// ```` perl
get_fence_language_specifier(token * fence,const char * source)2608 char * get_fence_language_specifier(token * fence, const char * source) {
2609 if (fence == NULL) {
2610 return NULL;
2611 }
2612
2613 char * result = NULL;
2614 size_t start = fence->start + fence->len;
2615 size_t len = 0;
2616
2617 while (char_is_whitespace(source[start])) {
2618 start++;
2619 }
2620
2621 while (!char_is_whitespace_or_line_ending(source[start + len])) {
2622 len++;
2623 }
2624
2625 if (len) {
2626 result = my_strndup(&source[start], len);
2627 }
2628
2629 return result;
2630 }
2631
2632
raw_level_for_header(token * header)2633 short raw_level_for_header(token * header) {
2634 switch (header->type) {
2635 case BLOCK_H1:
2636 case BLOCK_SETEXT_1:
2637 return 1;
2638
2639 case BLOCK_H2:
2640 case BLOCK_SETEXT_2:
2641 return 2;
2642
2643 case BLOCK_H3:
2644 return 3;
2645
2646 case BLOCK_H4:
2647 return 4;
2648
2649 case BLOCK_H5:
2650 return 5;
2651
2652 case BLOCK_H6:
2653 return 6;
2654 }
2655
2656 return 0;
2657 }
2658
2659
asset_new(char * url,scratch_pad * scratch)2660 asset * asset_new(char * url, scratch_pad * scratch) {
2661 asset * a = malloc(sizeof(asset));
2662
2663 if (a) {
2664 a->url = my_strdup(url);
2665
2666 // Create a unique local asset path
2667 a->asset_path = uuid_new();
2668 }
2669
2670 return a;
2671 }
2672
2673
asset_free(asset * a)2674 void asset_free(asset * a) {
2675 if (a) {
2676 free(a->url);
2677 free(a->asset_path);
2678
2679 free(a);
2680 }
2681 }
2682
2683
extract_asset(scratch_pad * scratch,char * url)2684 asset * extract_asset(scratch_pad * scratch, char * url) {
2685 asset * a;
2686
2687 HASH_FIND_STR(scratch->asset_hash, url, a);
2688
2689 return a;
2690 }
2691
2692
store_asset(scratch_pad * scratch,char * url)2693 void store_asset(scratch_pad * scratch, char * url) {
2694 asset * a = extract_asset(scratch, url);
2695
2696 // Only store if this url has not already been stored
2697 if (!a) {
2698 // Asset not found - create new one
2699 a = asset_new(url, scratch);
2700 HASH_ADD_KEYPTR(hh, scratch->asset_hash, a->url, strlen(a->url), a);
2701 }
2702 }
2703
2704
raw_filter_text_matches(char * pattern,short format)2705 bool raw_filter_text_matches(char * pattern, short format) {
2706 if (!pattern) {
2707 return false;
2708 }
2709
2710 if (strcmp("*", pattern) == 0) {
2711 return true;
2712 } else if (strcmp("{=*}", pattern) == 0) {
2713 return true;
2714 } else {
2715 switch (format) {
2716 case FORMAT_HTML:
2717 case FORMAT_HTML_WITH_ASSETS:
2718 if (strstr(pattern, "html")) {
2719 return true;
2720 }
2721
2722 break;
2723
2724 case FORMAT_ODT:
2725 case FORMAT_FODT:
2726 if (strstr(pattern, "odt")) {
2727 return true;
2728 }
2729
2730 break;
2731
2732 case FORMAT_EPUB:
2733 if (strstr(pattern, "epub")) {
2734 return true;
2735 }
2736
2737 break;
2738
2739 case FORMAT_MEMOIR:
2740 case FORMAT_BEAMER:
2741 case FORMAT_LATEX:
2742 if (strstr(pattern, "latex")) {
2743 return true;
2744 }
2745
2746 break;
2747 }
2748 }
2749
2750 return false;
2751 }
2752
2753
2754 /// Determine whether raw filter matches specified format
raw_filter_matches(token * t,const char * source,short format)2755 bool raw_filter_matches(token * t, const char * source, short format) {
2756 bool result = false;
2757
2758 if (t->type != PAIR_RAW_FILTER) {
2759 return result;
2760 }
2761
2762 char * pattern = my_strndup(&source[t->child->start + 2], t->child->mate->start - t->child->start - 2);
2763
2764 result = raw_filter_text_matches(pattern, format);
2765
2766 free(pattern);
2767
2768 return result;
2769 }
2770
2771