1 /**
2 
3 	MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more.
4 
5 	@file libMultiMarkdown.h
6 
7 	@brief Header file for libMultiMarkdown.
8 
9 
10 	@author	Fletcher T. Penney
11 	@bug
12 
13 
14 	******IMPORTANT******
15 
16 	If you are using libMultiMarkdown in your own project, you need to either:
17 
18 	1. Disable kUseObjectPool in `token.h`
19 
20 	2. Properly manage the `token_pool_init` and `token_pool_free` functions.
21 
22 
23 	I recommend option #1, unless you absolutely need the best performance for
24 	long documents.  Doing #2 properly is tricky in any program that can handle
25 	multiple MMD text strings at overlapping times.
26 
27 **/
28 
29 /*
30 
31 	Copyright © 2016 - 2017 Fletcher T. Penney.
32 
33 
34 	The `MultiMarkdown 6` project is released under the MIT License..
35 
36 	GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project:
37 
38 		https://github.com/fletcher/MultiMarkdown-4/
39 
40 	MMD 4 is released under both the MIT License and GPL.
41 
42 
43 	CuTest is released under the zlib/libpng license. See CuTest.c for the text
44 	of the license.
45 
46 
47 	## The MIT License ##
48 
49 	Permission is hereby granted, free of charge, to any person obtaining a copy
50 	of this software and associated documentation files (the "Software"), to deal
51 	in the Software without restriction, including without limitation the rights
52 	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
53 	copies of the Software, and to permit persons to whom the Software is
54 	furnished to do so, subject to the following conditions:
55 
56 	The above copyright notice and this permission notice shall be included in
57 	all copies or substantial portions of the Software.
58 
59 	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
60 	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
61 	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
62 	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
63 	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
64 	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
65 	THE SOFTWARE.
66 
67 */
68 
69 
70 #ifndef MMD6_H
71 #define MMD6_H
72 
73 #include <stdbool.h>
74 #include <stdlib.h>
75 
76 
77 /// typedefs for internal data structures.  If you intend to work with these structures
78 /// in your own code, you may need to import additional header files.
79 
80 /// From token.h:
81 typedef struct token token;
82 
83 /// From d_string.h:
84 typedef struct DString DString;
85 
86 /// From mmd.h
87 typedef struct mmd_engine mmd_engine;
88 
89 /// From stack.h
90 typedef struct stack stack;
91 
92 
93 /// There are 3 main versions of the primary functions:
94 ///
95 ///	* `mmd_string...` -- start from source text in c string
96 /// * `mmd_d_string...` -- start from a DString (Useful if you already use DString's for your text)
97 /// * `mmd_engine...` -- useful when you are processing the same source multiple times
98 
99 
100 /*
101 	C string variants
102 */
103 
104 /// Convert OPML string to MMD
105 DString * mmd_string_convert_opml_to_text(const char * source);
106 
107 
108 /// Convert ITMZ string to MMD
109 DString * mmd_string_convert_itmz_to_text(const char * source);
110 
111 
112 /// Convert MMD text to specified format, with specified extensions, and language
113 /// Returned char * must be freed
114 char * mmd_string_convert(const char * source, unsigned long extensions, short format, short language);
115 
116 
117 /// Convert MMD text to specified format using DString as a container for block of data
118 /// and length of that block.  Must be used for "complex" output formats such as EPUB.
119 /// Returned DString * must be freed
120 DString * mmd_string_convert_to_data(const char * source, unsigned long extensions, short format, short language, const char * directory);
121 
122 
123 /// Convert MMD text and write results to specified file -- used for "complex" output formats requiring
124 /// multiple documents (e.g. EPUB)
125 void mmd_string_convert_to_file(const char * source, unsigned long extensions, short format, short language, const char * directory, const char * filepath);
126 
127 
128 /// Does the text have metadata?
129 bool mmd_string_has_metadata(char * source, size_t * end);
130 
131 
132 /// Return metadata keys, one per line
133 /// Returned char * must be freed
134 char * mmd_string_metadata_keys(char * source);
135 
136 
137 /// Extract desired metadata as string value
138 /// Returned char * must be freed
139 char * mmd_string_metavalue_for_key(char * source, const char * key);
140 
141 
142 /// Insert/replace metadata in string, returning new string
143 char * mmd_string_update_metavalue_for_key(const char * source, const char * key, const char * value);
144 
145 
146 /// Grab list of all transcluded files, but we need to know directory to search,
147 /// as well as the path to the file
148 /// Returned stack needs to be freed
149 struct stack * mmd_string_transclusion_manifest(const char * source, const char * search_path, const char * source_path);
150 
151 
152 
153 
154 /*
155 	DString variants - DString ("dynamic string") is a mutable string implementation used in this project
156 */
157 
158 /// Convert OPML DString to MMD
159 DString * mmd_d_string_convert_opml_to_text(DString * source);
160 
161 
162 /// Convert ITMZ DString to MMD
163 DString * mmd_d_string_convert_itmz_to_text(DString * source);
164 
165 
166 /// Convert MMD text to specified format, with specified extensions, and language
167 /// Returned char * must be freed
168 char * mmd_d_string_convert(DString * source, unsigned long extensions, short format, short language);
169 
170 
171 /// Convert MMD text to specified format using DString as a container for block of data
172 /// and length of that block.  Must be used for "complex" output formats such as EPUB.
173 /// Returned DString * must be freed
174 DString * mmd_d_string_convert_to_data(DString * source, unsigned long extensions, short format, short language, const char * directory);
175 
176 
177 /// Convert MMD text and write results to specified file -- used for "complex" output formats requiring
178 /// multiple documents (e.g. EPUB)
179 void mmd_d_string_convert_to_file(DString * source, unsigned long extensions, short format, short language, const char * directory, const char * filepath);
180 
181 
182 /// Does the text have metadata?
183 bool mmd_d_string_has_metadata(DString * source, size_t * end);
184 
185 
186 /// Return metadata keys, one per line
187 /// Returned char * must be freed
188 char * mmd_d_string_metadata_keys(DString * source);
189 
190 
191 /// Extract desired metadata as string value
192 /// Returned char * must be freed
193 char * mmd_d_string_metavalue_for_key(DString * source, const char * key);
194 
195 
196 /// Insert/replace metadata value in DString
197 void mmd_d_string_update_metavalue_for_key(DString * source, const char * key, const char * value);
198 
199 
200 /// Grab list of all transcluded files, but we need to know directory to search,
201 /// as well as the path to the file
202 /// Returned stack needs to be freed
203 struct stack * mmd_d_string_transclusion_manifest(DString * source, const char * search_path, const char * source_path);
204 
205 
206 
207 
208 /*
209 	MMD Engine variants
210 */
211 
212 /// Create MMD Engine using an existing DString (A new copy is *not* made)
213 mmd_engine * mmd_engine_create_with_dstring(
214 	DString 	*	d,
215 	unsigned long	extensions
216 );
217 
218 
219 /// Create MMD Engine using a C string (A private copy of the string will be
220 /// made.  The one passed here can be freed by the calling function)
221 mmd_engine * mmd_engine_create_with_string(
222 	const char *	str,
223 	unsigned long	extensions
224 );
225 
226 
227 /// Reset engine when finished parsing. (Usually not necessary to use this.)
228 void mmd_engine_reset(mmd_engine * e);
229 
230 
231 /// Free an existing MMD Engine
232 void mmd_engine_free(
233 	mmd_engine *	e,
234 	bool			freeDString
235 );
236 
237 
238 /// Set language and smart quotes language
239 void mmd_engine_set_language(mmd_engine * e, short language);
240 
241 
242 /// Access DString directly
243 DString * mmd_engine_d_string(mmd_engine * e);
244 
245 /// Return token tree after previous parsing
246 token * mmd_engine_root(mmd_engine * e);
247 
248 
249 /// Parse part of the string into a token tree
250 token * mmd_engine_parse_substring(mmd_engine * e, size_t byte_start, size_t byte_len);
251 
252 
253 /// Parse the entire string into a token tree
254 void mmd_engine_parse_string(mmd_engine * e);
255 
256 
257 /// Export parsed token tree to output format
258 void mmd_engine_export_token_tree(DString * out, mmd_engine * e, short format);
259 
260 
261 /// Convert MMD text to specified format, with specified extensions, and language
262 /// Returned char * must be freed
263 char * mmd_engine_convert(mmd_engine * e, short format);
264 
265 
266 /// Convert MMD text and write results to specified file -- used for "complex" output formats requiring
267 /// multiple documents (e.g. EPUB)
268 void mmd_engine_convert_to_file(mmd_engine * e, short format, const char * directory, const char * filepath);
269 
270 
271 /// Convert OPML to text without modifying original engine source
272 DString  * mmd_engine_convert_opml_to_text(mmd_engine * e);
273 
274 
275 /// Convert ITMZ to text without modifying original engine source
276 DString  * mmd_engine_convert_itmz_to_text(mmd_engine * e);
277 
278 
279 /// Convert MMD text to specified format using DString as a container for block of data
280 /// and length of that block.  Must be used for "complex" output formats such as EPUB.
281 /// Returned DString * must be freed
282 DString * mmd_engine_convert_to_data(mmd_engine * e, short format, const char * directory);
283 
284 
285 /// Does the text have metadata?
286 bool mmd_engine_has_metadata(mmd_engine * e, size_t * end);
287 
288 
289 /// Return metadata keys, one per line
290 /// Returned char * must be freed
291 char * mmd_engine_metadata_keys(mmd_engine * e);
292 
293 
294 /// Extract desired metadata as string value
295 char * mmd_engine_metavalue_for_key(mmd_engine * e, const char * key);
296 
297 
298 /// Insert/replace metadata value in mmd_engine
299 void mmd_engine_update_metavalue_for_key(mmd_engine * e, const char * key, const char * value);
300 
301 
302 /// Grab list of all transcluded files, but we need to know directory to search,
303 /// as well as the path to the file
304 /// Returned stack needs to be freed
305 struct stack * mmd_engine_transclusion_manifest(mmd_engine * e, const char * search_path, const char * source_path);
306 
307 
308 
309 
310 /*
311 	Utility functions
312 */
313 
314 /// Return the version string for this build of libMultiMarkdown
315 /// The returned `char *` will need to be freed after it is no longer needed.
316 char * mmd_version(void);
317 
318 
319 // Read file into memory
320 DString * scan_file(const char * fname);
321 
322 
323 /// Recursively transclude source text, given a search directory.
324 /// Track files to prevent infinite recursive loops
325 void mmd_transclude_source(DString * source, const char * search_path, const char * source_path, short format, struct stack * parsed, struct stack * manifest);
326 
327 
328 /// If MMD Header metadata used, insert it into appropriate place
329 void mmd_prepend_mmd_header(DString * source);
330 
331 
332 /// If MMD Footer metadata used, insert it into appropriate place
333 void mmd_append_mmd_footer(DString * source);
334 
335 
336 /// Accept all CriticMarkup changes in the source string
337 void mmd_critic_markup_accept(DString * d);
338 
339 
340 /// Accept all CriticMarkup changes in the specified range
341 void mmd_critic_markup_accept_range(DString * d, size_t start, size_t len);
342 
343 
344 /// Reject all CriticMarkup changes in the source string
345 void mmd_critic_markup_reject(DString * d);
346 
347 
348 /// Reject all CriticMarkup changes in the specified range
349 void mmd_critic_markup_reject_range(DString * d, size_t start, size_t len);
350 
351 
352 /// Token types for parse tree
353 enum token_types {
354 	DOC_START_TOKEN = 0,	//!< DOC_START_TOKEN must be type 0
355 
356 	BLOCK_BLOCKQUOTE = 50,		//!< This must start *after* the largest number in parser.h
357 	BLOCK_CODE_FENCED,
358 	BLOCK_CODE_INDENTED,
359 	BLOCK_DEFLIST,
360 	BLOCK_DEFINITION,
361 	BLOCK_DEF_ABBREVIATION,
362 	BLOCK_DEF_CITATION,
363 	BLOCK_DEF_GLOSSARY,
364 	BLOCK_DEF_FOOTNOTE,
365 	BLOCK_DEF_LINK,
366 	BLOCK_EMPTY,
367 	BLOCK_HEADING,				//!< Placeholder for theme cascading
368 	BLOCK_H1,					//!< Leave H1, H2, etc. in order
369 	BLOCK_H2,
370 	BLOCK_H3,
371 	BLOCK_H4,
372 	BLOCK_H5,
373 	BLOCK_H6,
374 	BLOCK_HR,
375 	BLOCK_HTML,
376 	BLOCK_LIST_BULLETED,
377 	BLOCK_LIST_BULLETED_LOOSE,
378 	BLOCK_LIST_ENUMERATED,
379 	BLOCK_LIST_ENUMERATED_LOOSE,
380 	BLOCK_LIST_ITEM,
381 	BLOCK_LIST_ITEM_TIGHT,
382 	BLOCK_META,
383 	BLOCK_PARA,
384 	BLOCK_SETEXT_1,
385 	BLOCK_SETEXT_2,
386 	BLOCK_TABLE,
387 	BLOCK_TABLE_HEADER,
388 	BLOCK_TABLE_SECTION,
389 	BLOCK_TERM,
390 	BLOCK_TOC,
391 
392 	CRITIC_ADD_OPEN,
393 	CRITIC_ADD_CLOSE,
394 	CRITIC_DEL_OPEN,
395 	CRITIC_DEL_CLOSE,
396 	CRITIC_COM_OPEN,
397 	CRITIC_COM_CLOSE,
398 	CRITIC_SUB_OPEN,
399 	CRITIC_SUB_DIV,
400 	CRITIC_SUB_DIV_A,
401 	CRITIC_SUB_DIV_B,
402 	CRITIC_SUB_CLOSE,
403 	CRITIC_HI_OPEN,
404 	CRITIC_HI_CLOSE,
405 
406 	PAIR_CRITIC_ADD,
407 	PAIR_CRITIC_DEL,
408 	PAIR_CRITIC_COM,
409 	PAIR_CRITIC_SUB_ADD,
410 	PAIR_CRITIC_SUB_DEL,
411 	PAIR_CRITIC_HI,
412 
413 	PAIRS,			//!< Placeholder for theme cascading
414 	PAIR_ANGLE,
415 	PAIR_BACKTICK,
416 	PAIR_BRACKET,
417 	PAIR_BRACKET_ABBREVIATION,
418 	PAIR_BRACKET_FOOTNOTE,
419 	PAIR_BRACKET_GLOSSARY,
420 	PAIR_BRACKET_CITATION,
421 	PAIR_BRACKET_IMAGE,
422 	PAIR_BRACKET_VARIABLE,
423 	PAIR_BRACE,
424 	PAIR_EMPH,
425 	PAIR_MATH,
426 	PAIR_PAREN,
427 	PAIR_QUOTE_SINGLE,
428 	PAIR_QUOTE_DOUBLE,
429 	PAIR_QUOTE_ALT,
430 	PAIR_RAW_FILTER,
431 	PAIR_SUBSCRIPT,
432 	PAIR_SUPERSCRIPT,
433 	PAIR_STAR,
434 	PAIR_STRONG,
435 	PAIR_UL,
436 	PAIR_BRACES,
437 
438 	MARKUP,
439 	STAR,
440 	UL,
441 	EMPH_START,
442 	EMPH_STOP,
443 	STRONG_START,
444 	STRONG_STOP,
445 
446 	BRACKET_LEFT,
447 	BRACKET_RIGHT,
448 	BRACKET_ABBREVIATION_LEFT,
449 	BRACKET_FOOTNOTE_LEFT,
450 	BRACKET_GLOSSARY_LEFT,
451 	BRACKET_CITATION_LEFT,
452 	BRACKET_IMAGE_LEFT,
453 	BRACKET_VARIABLE_LEFT,
454 
455 	PAREN_LEFT,
456 	PAREN_RIGHT,
457 
458 	ANGLE_LEFT,
459 	ANGLE_RIGHT,
460 
461 	BRACE_DOUBLE_LEFT,
462 	BRACE_DOUBLE_RIGHT,
463 
464 	AMPERSAND,
465 	AMPERSAND_LONG,
466 	APOSTROPHE,
467 	BACKTICK,
468 	CODE_FENCE,
469 	COLON,
470 	DASH_M,
471 	DASH_N,
472 	ELLIPSIS,
473 	QUOTE_SINGLE,
474 	QUOTE_DOUBLE,
475 	QUOTE_LEFT_SINGLE,
476 	QUOTE_RIGHT_SINGLE,
477 	QUOTE_LEFT_DOUBLE,
478 	QUOTE_RIGHT_DOUBLE,
479 	QUOTE_RIGHT_ALT,
480 
481 	ESCAPED_CHARACTER,
482 
483 	HTML_ENTITY,
484 	HTML_COMMENT_START,
485 	HTML_COMMENT_STOP,
486 	PAIR_HTML_COMMENT,
487 
488 	MATH_PAREN_OPEN,
489 	MATH_PAREN_CLOSE,
490 	MATH_BRACKET_OPEN,
491 	MATH_BRACKET_CLOSE,
492 	MATH_DOLLAR_SINGLE,
493 	MATH_DOLLAR_DOUBLE,
494 
495 	EQUAL,
496 	PIPE,
497 	PLUS,
498 	SLASH,
499 
500 	SUPERSCRIPT,
501 	SUBSCRIPT,
502 
503 	INDENT_TAB,
504 	INDENT_SPACE,
505 	NON_INDENT_SPACE,
506 
507 	HASH1,							//!< Leave HASH1, HASH2, etc. in order
508 	HASH2,
509 	HASH3,
510 	HASH4,
511 	HASH5,
512 	HASH6,
513 	MARKER_BLOCKQUOTE,
514 	MARKER_H1,						//!< Leave MARKER_H1, MARKER_H2, etc. in order
515 	MARKER_H2,
516 	MARKER_H3,
517 	MARKER_H4,
518 	MARKER_H5,
519 	MARKER_H6,
520 	MARKER_LIST_BULLET,
521 	MARKER_LIST_ENUMERATOR,
522 
523 	TABLE_ROW,
524 	TABLE_CELL,
525 	TABLE_DIVIDER,
526 
527 	TOC,
528 	TOC_SINGLE,
529 	TOC_RANGE,
530 
531 	TEXT_BACKSLASH,
532 	RAW_FILTER_LEFT,
533 	TEXT_BRACE_LEFT,
534 	TEXT_BRACE_RIGHT,
535 	TEXT_EMPTY,
536 	TEXT_HASH,
537 	TEXT_LINEBREAK,
538 	TEXT_LINEBREAK_SP,
539 	TEXT_NL,
540 	TEXT_NL_SP,
541 	TEXT_NUMBER_POSS_LIST,
542 	TEXT_PERCENT,
543 	TEXT_PERIOD,
544 	TEXT_PLAIN,
545 
546 	MANUAL_LABEL,
547 };
548 
549 
550 /// Define smart typography languages -- first in list is default
551 enum smart_quotes_language {
552 	ENGLISH = 0,
553 	DUTCH,
554 	FRENCH,
555 	GERMAN,
556 	GERMANGUILL,
557 	SPANISH,
558 	SWEDISH,
559 };
560 
561 
562 enum output_format {
563 	FORMAT_HTML,
564 	FORMAT_EPUB,
565 	FORMAT_LATEX,
566 	FORMAT_BEAMER,
567 	FORMAT_MEMOIR,
568 	FORMAT_FODT,
569 	FORMAT_ODT,
570 	FORMAT_TEXTBUNDLE,
571 	FORMAT_TEXTBUNDLE_COMPRESSED,
572 	FORMAT_OPML,
573 	FORMAT_ITMZ,
574 	FORMAT_MMD,
575 	FORMAT_HTML_WITH_ASSETS
576 };
577 
578 
579 enum parser_extensions {
580 	EXT_COMPATIBILITY       = 1 << 0,    //!< Markdown compatibility mode
581 	EXT_COMPLETE            = 1 << 1,    //!< Create complete document
582 	EXT_SNIPPET             = 1 << 2,    //!< Create snippet only
583 	EXT_SMART               = 1 << 3,    //!< Enable Smart quotes
584 	EXT_NOTES               = 1 << 4,    //!< Enable Footnotes
585 	EXT_NO_LABELS           = 1 << 5,    //!< Don't add anchors to headers, etc.
586 	EXT_PROCESS_HTML        = 1 << 6,    //!< Process Markdown inside HTML
587 	EXT_NO_METADATA         = 1 << 7,    //!< Don't parse Metadata
588 	EXT_OBFUSCATE           = 1 << 8,    //!< Mask email addresses
589 	EXT_CRITIC              = 1 << 9,    //!< Critic Markup Support
590 	EXT_CRITIC_ACCEPT       = 1 << 10,   //!< Accept all proposed changes
591 	EXT_CRITIC_REJECT       = 1 << 11,   //!< Reject all proposed changes
592 	EXT_RANDOM_FOOT         = 1 << 12,   //!< Use random numbers for footnote links
593 	EXT_TRANSCLUDE          = 1 << 13,   //!< Perform transclusion(s)
594 	EXT_PARSE_OPML          = 1 << 14,   //!< Convert from OPML before processing source text
595 	EXT_PARSE_ITMZ			= 1 << 15,   //!< Convert from ITMZ (iThoughts) before processing source text
596 	EXT_RANDOM_LABELS		= 1 << 16,   //!< Use random numbers for header labels (unless manually defined)
597 	EXT_FAKE                = 1 << 31,   //!< 31 is highest number allowed
598 };
599 
600 
601 #endif
602