1 /* markdown: a C implementation of John Gruber's Markdown markup language.
2  *
3  * Copyright (C) 2007 David L Parsons.
4  * The redistribution terms are provided in the COPYRIGHT file that must
5  * be distributed with this source code.
6  */
7 #include <stdio.h>
8 #include <string.h>
9 #include <stdarg.h>
10 #include <stdlib.h>
11 #include <time.h>
12 #include <ctype.h>
13 
14 #include "config.h"
15 
16 #include "cstring.h"
17 #include "markdown.h"
18 #include "amalloc.h"
19 
20 typedef int (*stfu)(const void*,const void*);
21 typedef void (*spanhandler)(MMIOT*,int);
22 
23 /* forward declarations */
24 static void text(MMIOT *f);
25 static Paragraph *display(Paragraph*, MMIOT*);
26 
27 /* externals from markdown.c */
28 int __mkd_footsort(Footnote *, Footnote *);
29 
30 /*
31  * push text into the generator input buffer
32  */
33 static void
push(char * bfr,int size,MMIOT * f)34 push(char *bfr, int size, MMIOT *f)
35 {
36     while ( size-- > 0 )
37 	EXPAND(f->in) = *bfr++;
38 }
39 
40 
41 /*
42  * push a character into the generator input buffer
43  */
44 static void
pushc(char c,MMIOT * f)45 pushc(char c, MMIOT *f)
46 {
47     EXPAND(f->in) = c;
48 }
49 
50 
51 /* look <i> characters ahead of the cursor.
52  */
53 static inline int
peek(MMIOT * f,int i)54 peek(MMIOT *f, int i)
55 {
56 
57     i += (f->isp-1);
58 
59     return (i >= 0) && (i < S(f->in)) ? (unsigned char)T(f->in)[i] : EOF;
60 }
61 
62 
63 /* pull a byte from the input buffer
64  */
65 static inline unsigned int
pull(MMIOT * f)66 pull(MMIOT *f)
67 {
68     return ( f->isp < S(f->in) ) ? (unsigned char)T(f->in)[f->isp++] : EOF;
69 }
70 
71 
72 /* return a pointer to the current position in the input buffer.
73  */
74 static inline char*
cursor(MMIOT * f)75 cursor(MMIOT *f)
76 {
77     return T(f->in) + f->isp;
78 }
79 
80 
81 static inline int
isthisspace(MMIOT * f,int i)82 isthisspace(MMIOT *f, int i)
83 {
84     int c = peek(f, i);
85 
86     if ( c == EOF )
87 	return 1;
88     if ( c & 0x80 )
89 	return 0;
90     return isspace(c) || (c < ' ');
91 }
92 
93 
94 static inline int
isthisalnum(MMIOT * f,int i)95 isthisalnum(MMIOT *f, int i)
96 {
97     int c = peek(f, i);
98 
99     return (c != EOF) && isalnum(c);
100 }
101 
102 
103 static inline int
isthisnonword(MMIOT * f,int i)104 isthisnonword(MMIOT *f, int i)
105 {
106     return isthisspace(f, i) || ispunct(peek(f,i));
107 }
108 
109 
110 /* return/set the current cursor position
111  * (when setting the current cursor position we also need to flush the
112  * last character written cache)
113  */
114 #define mmiotseek(f,x)	((f->isp = x), (f->last = 0))
115 #define mmiottell(f)	(f->isp)
116 
117 
118 /* move n characters forward ( or -n characters backward) in the input buffer.
119  */
120 static void
shift(MMIOT * f,int i)121 shift(MMIOT *f, int i)
122 {
123     if (f->isp + i >= 0 )
124 	f->isp += i;
125 }
126 
127 
128 /* Qchar()
129  */
130 static void
Qchar(int c,MMIOT * f)131 Qchar(int c, MMIOT *f)
132 {
133     block *cur;
134 
135     if ( S(f->Q) == 0 ) {
136 	cur = &EXPAND(f->Q);
137 	memset(cur, 0, sizeof *cur);
138 	cur->b_type = bTEXT;
139     }
140     else
141 	cur = &T(f->Q)[S(f->Q)-1];
142 
143     EXPAND(cur->b_text) = c;
144 }
145 
146 
147 /* Qstring()
148  */
149 static void
Qstring(char * s,MMIOT * f)150 Qstring(char *s, MMIOT *f)
151 {
152     while (*s)
153 	Qchar(*s++, f);
154 }
155 
156 
157 /* Qwrite()
158  */
159 static void
Qwrite(char * s,int size,MMIOT * f)160 Qwrite(char *s, int size, MMIOT *f)
161 {
162     while (size-- > 0)
163 	Qchar(*s++, f);
164 }
165 
166 
167 /* Qprintf()
168  */
169 static void
Qprintf(MMIOT * f,char * fmt,...)170 Qprintf(MMIOT *f, char *fmt, ...)
171 {
172     char bfr[80];
173     va_list ptr;
174 
175     va_start(ptr,fmt);
176     vsnprintf(bfr, sizeof bfr, fmt, ptr);
177     va_end(ptr);
178     Qstring(bfr, f);
179 }
180 
181 
182 /* Qanchor() prints out a suitable-for-id-tag version of a string
183  */
184 static void
Qanchor(struct line * p,MMIOT * f)185 Qanchor(struct line *p, MMIOT *f)
186 {
187     mkd_string_to_anchor(T(p->text), S(p->text),
188 			 (mkd_sta_function_t)Qchar, f, 1, f);
189 }
190 
191 
192 /* Qem()
193  */
194 static void
Qem(MMIOT * f,char c,int count)195 Qem(MMIOT *f, char c, int count)
196 {
197     block *p = &EXPAND(f->Q);
198 
199     memset(p, 0, sizeof *p);
200     p->b_type = (c == '*') ? bSTAR : bUNDER;
201     p->b_char = c;
202     p->b_count = count;
203 
204     memset(&EXPAND(f->Q), 0, sizeof(block));
205 }
206 
207 
208 /* generate html from a markup fragment
209  */
210 void
___mkd_reparse(char * bfr,int size,mkd_flag_t flags,MMIOT * f,char * esc)211 ___mkd_reparse(char *bfr, int size, mkd_flag_t flags, MMIOT *f, char *esc)
212 {
213     MMIOT sub;
214     struct escaped e;
215 
216     ___mkd_initmmiot(&sub, f->footnotes);
217 
218     sub.flags = f->flags | flags;
219     sub.cb = f->cb;
220     sub.ref_prefix = f->ref_prefix;
221 
222     if ( esc ) {
223 	sub.esc = &e;
224 	e.up = f->esc;
225 	e.text = esc;
226     }
227     else
228 	sub.esc = f->esc;
229 
230     push(bfr, size, &sub);
231     pushc(0, &sub);
232     S(sub.in)--;
233 
234     text(&sub);
235     ___mkd_emblock(&sub);
236 
237     Qwrite(T(sub.out), S(sub.out), f);
238     /* inherit the last character printed from the reparsed
239      * text;  this way superscripts can work when they're
240      * applied to something embedded in a link
241      */
242     f->last = sub.last;
243 
244     ___mkd_freemmiot(&sub, f->footnotes);
245 }
246 
247 
248 /*
249  * check the escape list for special cases
250  */
251 static int
escaped(MMIOT * f,char c)252 escaped(MMIOT *f, char c)
253 {
254     struct escaped *thing = f->esc;
255 
256     while ( thing ) {
257 	if ( strchr(thing->text, c) )
258 	    return 1;
259 	thing = thing->up;
260     }
261     return 0;
262 }
263 
264 
265 /*
266  * write out a url, escaping problematic characters
267  */
268 static void
puturl(char * s,int size,MMIOT * f,int display)269 puturl(char *s, int size, MMIOT *f, int display)
270 {
271     unsigned char c;
272 
273     while ( size-- > 0 ) {
274 	c = *s++;
275 
276 	if ( c == '\\' && size-- > 0 ) {
277 	    c = *s++;
278 
279 	    if ( !( ispunct(c) || isspace(c) ) )
280 		Qchar('\\', f);
281 	}
282 
283 	if ( c == '&' )
284 	    Qstring("&amp;", f);
285 	else if ( c == '<' )
286 	    Qstring("&lt;", f);
287 	else if ( c == '"' )
288 	    Qstring("%22", f);
289 	else if ( isalnum(c) || ispunct(c) || (display && isspace(c)) )
290 	    Qchar(c, f);
291 	else if ( c == MKD_EOLN )	/* untokenize hard return */
292 	    Qstring("  ", f);
293 	else
294 	    Qprintf(f, "%%%02X", c);
295     }
296 }
297 
298 
299 /* advance forward until the next character is not whitespace
300  */
301 static int
eatspace(MMIOT * f)302 eatspace(MMIOT *f)
303 {
304     int c;
305 
306     for ( ; ((c=peek(f, 1)) != EOF) && isspace(c); pull(f) )
307 	;
308     return c;
309 }
310 
311 
312 /* (match (a (nested (parenthetical (string.)))))
313  */
314 static int
parenthetical(int in,int out,MMIOT * f)315 parenthetical(int in, int out, MMIOT *f)
316 {
317     int size, indent, c;
318 
319     for ( indent=1,size=0; indent; size++ ) {
320 	if ( (c = pull(f)) == EOF )
321 	    return EOF;
322 	else if ( (c == '\\') && (peek(f,1) == out || peek(f,1) == in) ) {
323 	    ++size;
324 	    pull(f);
325 	}
326 	else if ( c == in )
327 	    ++indent;
328 	else if ( c == out )
329 	    --indent;
330     }
331     return size ? (size-1) : 0;
332 }
333 
334 
335 /* extract a []-delimited label from the input stream.
336  */
337 static int
linkylabel(MMIOT * f,Cstring * res)338 linkylabel(MMIOT *f, Cstring *res)
339 {
340     char *ptr = cursor(f);
341     int size;
342 
343     if ( (size = parenthetical('[',']',f)) != EOF ) {
344 	T(*res) = ptr;
345 	S(*res) = size;
346 	return 1;
347     }
348     return 0;
349 }
350 
351 
352 /* see if the quote-prefixed linky segment is actually a title.
353  */
354 static int
linkytitle(MMIOT * f,char quote,Footnote * ref)355 linkytitle(MMIOT *f, char quote, Footnote *ref)
356 {
357     int whence = mmiottell(f);
358     char *title = cursor(f);
359     char *e;
360     register int c;
361 
362     while ( (c = pull(f)) != EOF ) {
363 	e = cursor(f);
364 	if ( c == quote ) {
365 	    if ( (c = eatspace(f)) == ')' ) {
366 		T(ref->title) = 1+title;
367 		S(ref->title) = (e-title)-2;
368 		return 1;
369 	    }
370 	}
371     }
372     mmiotseek(f, whence);
373     return 0;
374 }
375 
376 
377 /* extract a =HHHxWWW size from the input stream
378  */
379 static int
linkysize(MMIOT * f,Footnote * ref)380 linkysize(MMIOT *f, Footnote *ref)
381 {
382     int height=0, width=0;
383     int whence = mmiottell(f);
384     int c;
385 
386     if ( isspace(peek(f,0)) ) {
387 	pull(f);	/* eat '=' */
388 
389 	for ( c = pull(f); isdigit(c); c = pull(f))
390 	    width = (width * 10) + (c - '0');
391 
392 	if ( c == 'x' ) {
393 	    for ( c = pull(f); isdigit(c); c = pull(f))
394 		height = (height*10) + (c - '0');
395 
396 	    if ( isspace(c) )
397 		c = eatspace(f);
398 
399 	    if ( (c == ')') || ((c == '\'' || c == '"') && linkytitle(f, c, ref)) ) {
400 		ref->height = height;
401 		ref->width  = width;
402 		return 1;
403 	    }
404 	}
405     }
406     mmiotseek(f, whence);
407     return 0;
408 }
409 
410 
411 /* extract a <...>-encased url from the input stream.
412  * (markdown 1.0.2b8 compatibility; older versions
413  * of markdown treated the < and > as syntactic
414  * sugar that didn't have to be there.  1.0.2b8
415  * requires a closing >, and then falls into the
416  * title or closing )
417  */
418 static int
linkybroket(MMIOT * f,int image,Footnote * p)419 linkybroket(MMIOT *f, int image, Footnote *p)
420 {
421     int c;
422     int good = 0;
423 
424     T(p->link) = cursor(f);
425     for ( S(p->link)=0; (c = pull(f)) != '>'; ++S(p->link) ) {
426 	/* pull in all input until a '>' is found, or die trying.
427 	 */
428 	if ( c == EOF )
429 	    return 0;
430 	else if ( (c == '\\') && ispunct(peek(f,2)) ) {
431 	    ++S(p->link);
432 	    pull(f);
433 	}
434     }
435 
436     c = eatspace(f);
437 
438     /* next nonspace needs to be a title, a size, or )
439      */
440     if ( ( c == '\'' || c == '"' ) && linkytitle(f,c,p) )
441 	good=1;
442     else if ( image && (c == '=') && linkysize(f,p) )
443 	good=1;
444     else
445 	good=( c == ')' );
446 
447     if ( good ) {
448 	if ( peek(f, 1) == ')' )
449 	    pull(f);
450 
451 	___mkd_tidy(&p->link);
452     }
453 
454     return good;
455 } /* linkybroket */
456 
457 
458 /* extract a (-prefixed url from the input stream.
459  * the label is either of the format `<link>`, where I
460  * extract until I find a >, or it is of the format
461  * `text`, where I extract until I reach a ')', a quote,
462  * or (if image) a '='
463  */
464 static int
linkyurl(MMIOT * f,int image,Footnote * p)465 linkyurl(MMIOT *f, int image, Footnote *p)
466 {
467     int c;
468     int mayneedtotrim=0;
469 
470     if ( (c = eatspace(f)) == EOF )
471 	return 0;
472 
473     if ( c == '<' ) {
474 	pull(f);
475 	if ( !is_flag_set(f->flags, MKD_1_COMPAT) )
476 	    return linkybroket(f,image,p);
477 	mayneedtotrim=1;
478     }
479 
480     T(p->link) = cursor(f);
481     for ( S(p->link)=0; (c = peek(f,1)) != ')'; ++S(p->link) ) {
482 	if ( c == EOF )
483 	    return 0;
484 	else if ( (c == '"' || c == '\'') && linkytitle(f, c, p) )
485 	    break;
486 	else if ( image && (c == '=') && linkysize(f, p) )
487 	    break;
488 	else if ( (c == '\\') && ispunct(peek(f,2)) ) {
489 	    ++S(p->link);
490 	    pull(f);
491 	}
492 	pull(f);
493     }
494     if ( peek(f, 1) == ')' )
495 	pull(f);
496 
497     ___mkd_tidy(&p->link);
498 
499     if ( mayneedtotrim && (T(p->link)[S(p->link)-1] == '>') )
500 	--S(p->link);
501 
502     return 1;
503 }
504 
505 
506 
507 /* prefixes for <automatic links>
508  */
509 static struct _protocol {
510     char *name;
511     int   nlen;
512 } protocol[] = {
513 #define _aprotocol(x)	{ x, (sizeof x)-1 }
514     _aprotocol( "https:" ),
515     _aprotocol( "http:" ),
516     _aprotocol( "news:" ),
517     _aprotocol( "ftp:" ),
518 #undef _aprotocol
519 };
520 #define NRPROTOCOLS	(sizeof protocol / sizeof protocol[0])
521 
522 
523 static int
isautoprefix(char * text,int size)524 isautoprefix(char *text, int size)
525 {
526     int i;
527     struct _protocol *p;
528 
529     for (i=0, p=protocol; i < NRPROTOCOLS; i++, p++)
530 	if ( (size >= p->nlen) && strncasecmp(text, p->name, p->nlen) == 0 )
531 	    return 1;
532     return 0;
533 }
534 
535 
536 /*
537  * all the tag types that linkylinky can produce are
538  * defined by this structure.
539  */
540 typedef struct linkytype {
541     char      *pat;
542     int      szpat;
543     char *link_pfx;	/* tag prefix and link pointer  (eg: "<a href="\"" */
544     char *link_sfx;	/* link suffix			(eg: "\""          */
545     int        WxH;	/* this tag allows width x height arguments */
546     char *text_pfx;	/* text prefix                  (eg: ">"           */
547     char *text_sfx;	/* text suffix			(eg: "</a>"        */
548     int      flags;	/* reparse flags */
549     int      kind;	/* tag is url or something else? */
550 #define IS_URL	0x01
551 } linkytype;
552 
553 static linkytype imaget = { 0, 0, "<img src=\"", "\"",
554 			     1, " alt=\"", "\" />", MKD_NOIMAGE|MKD_TAGTEXT, IS_URL };
555 static linkytype linkt  = { 0, 0, "<a href=\"", "\"",
556                              0, ">", "</a>", MKD_NOLINKS, IS_URL };
557 
558 /*
559  * pseudo-protocols for [][];
560  *
561  * id: generates <a id="link">tag</a>
562  * class: generates <span class="link">tag</span>
563  * raw: just dump the link without any processing
564  */
565 static linkytype specials[] = {
566     { "id:", 3, "<span id=\"", "\"", 0, ">", "</span>", 0, 0 },
567     { "raw:", 4, 0, 0, 0, 0, 0, MKD_NOHTML, 0 },
568     { "lang:", 5, "<span lang=\"", "\"", 0, ">", "</span>", 0, 0 },
569     { "abbr:", 5, "<abbr title=\"", "\"", 0, ">", "</abbr>", 0, 0 },
570     { "class:", 6, "<span class=\"", "\"", 0, ">", "</span>", 0, 0 },
571 } ;
572 
573 #define NR(x)	(sizeof x / sizeof x[0])
574 
575 /* see if t contains one of our pseudo-protocols.
576  */
577 static linkytype *
pseudo(Cstring t)578 pseudo(Cstring t)
579 {
580     int i;
581     linkytype *r;
582 
583     for ( i=0, r=specials; i < NR(specials); i++,r++ ) {
584 	if ( (S(t) > r->szpat) && (strncasecmp(T(t), r->pat, r->szpat) == 0) )
585 	    return r;
586     }
587     return 0;
588 }
589 
590 
591 /* print out the start of an `img' or `a' tag, applying callbacks as needed.
592  */
593 static void
printlinkyref(MMIOT * f,linkytype * tag,char * link,int size)594 printlinkyref(MMIOT *f, linkytype *tag, char *link, int size)
595 {
596     char *edit;
597 
598     if ( is_flag_set(f->flags, IS_LABEL) )
599 	return;
600 
601     Qstring(tag->link_pfx, f);
602 
603     if ( tag->kind & IS_URL ) {
604 	if ( f->cb && f->cb->e_url && (edit = (*f->cb->e_url)(link, size, f->cb->e_data)) ) {
605 	    puturl(edit, strlen(edit), f, 0);
606 	    if ( f->cb->e_free ) (*f->cb->e_free)(edit, f->cb->e_data);
607 	}
608 	else
609 	    puturl(link + tag->szpat, size - tag->szpat, f, 0);
610     }
611     else
612 	___mkd_reparse(link + tag->szpat, size - tag->szpat, MKD_TAGTEXT, f, 0);
613 
614     Qstring(tag->link_sfx, f);
615 
616     if ( f->cb && f->cb->e_flags && (edit = (*f->cb->e_flags)(link, size, f->cb->e_data)) ) {
617 	Qchar(' ', f);
618 	Qstring(edit, f);
619 	if ( f->cb->e_free ) (*f->cb->e_free)(edit, f->cb->e_data);
620     }
621 } /* printlinkyref */
622 
623 
624 /* helper function for php markdown extra footnotes; allow the user to
625  * define a prefix tag instead of just `fn`
626  */
627 static char *
p_or_nothing(p)628 p_or_nothing(p)
629 MMIOT *p;
630 {
631     return p->ref_prefix ? p->ref_prefix : "fn";
632 }
633 
634 
635 /* php markdown extra/daring fireball style print footnotes
636  */
637 static int
extra_linky(MMIOT * f,Cstring text,Footnote * ref)638 extra_linky(MMIOT *f, Cstring text, Footnote *ref)
639 {
640     if ( ref->flags & REFERENCED )
641 	return 0;
642 
643     if ( f->flags & IS_LABEL )
644     	___mkd_reparse(T(text), S(text), linkt.flags, f, 0);
645     else {
646 	ref->flags |= REFERENCED;
647 	ref->refnumber = ++ f->footnotes->reference;
648 	Qprintf(f, "<sup id=\"%sref:%d\"><a href=\"#%s:%d\" rel=\"footnote\">%d</a></sup>",
649 		p_or_nothing(f), ref->refnumber,
650 		p_or_nothing(f), ref->refnumber, ref->refnumber);
651     }
652     return 1;
653 } /* extra_linky */
654 
655 
656 
657 /* check a url (or url fragment to see that it begins with a known good
658  * protocol (or no protocol at all)
659  */
660 static int
safelink(Cstring link)661 safelink(Cstring link)
662 {
663     char *p, *colon;
664 
665     if ( T(link) == 0 )	/* no link; safe */
666 	return 1;
667 
668     p = T(link);
669     if ( (colon = memchr(p, ':', S(link))) == 0 )
670 	return 1; /* no protocol specified: safe */
671 
672     if ( !isalpha(*p) )	/* protocol/method is [alpha][alnum or '+.-'] */
673 	return 1;
674     while ( ++p < colon )
675 	if ( !(isalnum(*p) || *p == '.' || *p == '+' || *p == '-') )
676 	    return 1;
677 
678     return isautoprefix(T(link), S(link));
679 }
680 
681 
682 /* print out a linky (or fail if it's Not Allowed)
683  */
684 static int
linkyformat(MMIOT * f,Cstring text,int image,Footnote * ref)685 linkyformat(MMIOT *f, Cstring text, int image, Footnote *ref)
686 {
687     linkytype *tag;
688 
689 
690     if ( image )
691 	tag = &imaget;
692     else if ( tag = pseudo(ref->link) ) {
693 	if ( is_flag_set(f->flags, MKD_NO_EXT) || is_flag_set(f->flags, MKD_SAFELINK) )
694 	    return 0;
695     }
696     else if ( is_flag_set(f->flags, MKD_SAFELINK) && !safelink(ref->link) )
697 	/* if MKD_SAFELINK, only accept links that are local or
698 	 * a well-known protocol
699 	 */
700 	return 0;
701     else
702 	tag = &linkt;
703 
704     if ( f->flags & tag->flags )
705 	return 0;
706 
707     if ( is_flag_set(f->flags, IS_LABEL) )
708 	___mkd_reparse(T(text), S(text), tag->flags, f, 0);
709     else if ( tag->link_pfx ) {
710 	printlinkyref(f, tag, T(ref->link), S(ref->link));
711 
712 	if ( tag->WxH ) {
713 	    if ( ref->height ) Qprintf(f," height=\"%d\"", ref->height);
714 	    if ( ref->width ) Qprintf(f, " width=\"%d\"", ref->width);
715 	}
716 
717 	if ( S(ref->title) ) {
718 	    Qstring(" title=\"", f);
719 	    ___mkd_reparse(T(ref->title), S(ref->title), MKD_TAGTEXT, f, 0);
720 	    Qchar('"', f);
721 	}
722 
723 	Qstring(tag->text_pfx, f);
724 	___mkd_reparse(T(text), S(text), tag->flags, f, 0);
725 	Qstring(tag->text_sfx, f);
726     }
727     else
728 	Qwrite(T(ref->link) + tag->szpat, S(ref->link) - tag->szpat, f);
729 
730     return 1;
731 } /* linkyformat */
732 
733 
734 /*
735  * process embedded links and images
736  */
737 static int
linkylinky(int image,MMIOT * f)738 linkylinky(int image, MMIOT *f)
739 {
740     int start = mmiottell(f);
741     Cstring name;
742     Footnote key, *ref;
743 
744     int status = 0;
745     int extra_footnote = 0;
746 
747     CREATE(name);
748     memset(&key, 0, sizeof key);
749 
750     if ( linkylabel(f, &name) ) {
751 	if ( peek(f,1) == '(' ) {
752 	    pull(f);
753 	    if ( linkyurl(f, image, &key) )
754 		status = linkyformat(f, name, image, &key);
755 	}
756 	else {
757 	    int goodlink, implicit_mark = mmiottell(f);
758 
759 	    if ( isspace(peek(f,1)) )
760 		pull(f);
761 
762 	    if ( peek(f,1) == '[' ) {
763 		pull(f);	/* consume leading '[' */
764 		goodlink = linkylabel(f, &key.tag);
765 	    }
766 	    else {
767 		/* new markdown implicit name syntax doesn't
768 		 * require a second []
769 		 */
770 		mmiotseek(f, implicit_mark);
771 		goodlink = !is_flag_set(f->flags, MKD_1_COMPAT);
772 
773 		if ( is_flag_set(f->flags, MKD_EXTRA_FOOTNOTE) && (!image) && S(name) && T(name)[0] == '^' )
774 		    extra_footnote = 1;
775 	    }
776 
777 	    if ( goodlink ) {
778 		if ( !S(key.tag) ) {
779 		    DELETE(key.tag);
780 		    T(key.tag) = T(name);
781 		    S(key.tag) = S(name);
782 		}
783 
784 		if ( ref = bsearch(&key, T(f->footnotes->note),
785 					 S(f->footnotes->note),
786 					 sizeof key, (stfu)__mkd_footsort) ) {
787 		    if ( extra_footnote )
788 			status = extra_linky(f,name,ref);
789 		    else
790 			status = linkyformat(f, name, image, ref);
791 		}
792 	    }
793 	}
794     }
795 
796     DELETE(name);
797     ___mkd_freefootnote(&key);
798 
799     if ( status == 0 )
800 	mmiotseek(f, start);
801 
802     return status;
803 }
804 
805 
806 /* write a character to output, doing text escapes ( & -> &amp;,
807  *                                          > -> &gt; < -> &lt; )
808  */
809 static void
cputc(int c,MMIOT * f)810 cputc(int c, MMIOT *f)
811 {
812     switch (c) {
813     case '&':   Qstring("&amp;", f); break;
814     case '>':   Qstring("&gt;", f); break;
815     case '<':   Qstring("&lt;", f); break;
816     default :   Qchar(c, f); break;
817     }
818 }
819 
820 
821 /*
822  * convert an email address to a string of nonsense
823  */
824 static void
mangle(char * s,int len,MMIOT * f)825 mangle(char *s, int len, MMIOT *f)
826 {
827     while ( len-- > 0 ) {
828 #if DEBIAN_GLITCH
829 	Qprintf(f, "&#%02d;", *((unsigned char*)(s++)) );
830 #else
831 	Qstring("&#", f);
832 	Qprintf(f, COINTOSS() ? "x%02x;" : "%02d;", *((unsigned char*)(s++)) );
833 #endif
834     }
835 }
836 
837 
838 /* nrticks() -- count up a row of tick marks
839  */
840 static int
nrticks(int offset,int tickchar,MMIOT * f)841 nrticks(int offset, int tickchar, MMIOT *f)
842 {
843     int  tick = 0;
844 
845     while ( peek(f, offset+tick) == tickchar ) tick++;
846 
847     return tick;
848 } /* nrticks */
849 
850 
851 /* matchticks() -- match a certain # of ticks, and if that fails
852  *                 match the largest subset of those ticks.
853  *
854  *                 if a subset was matched, return the # of ticks
855  *		   that were matched.
856  */
857 static int
matchticks(MMIOT * f,int tickchar,int ticks,int * endticks)858 matchticks(MMIOT *f, int tickchar, int ticks, int *endticks)
859 {
860     int size, count, c;
861     int subsize=0, subtick=0;
862 
863     *endticks = ticks;
864     for (size = 0; (c=peek(f,size+ticks)) != EOF; size ++) {
865 	if ( (c == tickchar) && ( count = nrticks(size+ticks,tickchar,f)) ) {
866 	    if ( count == ticks )
867 		return size;
868 	    else if ( count ) {
869 		if ( (count > subtick) && (count < ticks) ) {
870 		    subsize = size;
871 		    subtick = count;
872 		}
873 		size += count;
874 	    }
875 	}
876     }
877     if ( subsize ) {
878 	*endticks = subtick;
879 	return subsize;
880     }
881     return 0;
882 } /* matchticks */
883 
884 
885 /* code() -- write a string out as code. The only characters that have
886  *           special meaning in a code block are * `<' and `&' , which
887  *           are /always/ expanded to &lt; and &amp;
888  */
889 static void
code(MMIOT * f,char * s,int length)890 code(MMIOT *f, char *s, int length)
891 {
892     int i,c;
893 
894     for ( i=0; i < length; i++ )
895 	if ( (c = s[i]) == MKD_EOLN)  /* expand back to 2 spaces */
896 	    Qstring("  ", f);
897 	else if ( c == '\\' && (i < length-1) && escaped(f, s[i+1]) )
898 	    cputc(s[++i], f);
899 	else
900 	    cputc(c, f);
901 } /* code */
902 
903 /*  delspan() -- write out a chunk of text, blocking with <del>...</del>
904  */
905 static void
delspan(MMIOT * f,int size)906 delspan(MMIOT *f, int size)
907 {
908     Qstring("<del>", f);
909     ___mkd_reparse(cursor(f)-1, size, 0, f, 0);
910     Qstring("</del>", f);
911 }
912 
913 
914 /*  codespan() -- write out a chunk of text as code, trimming one
915  *                space off the front and/or back as appropriate.
916  */
917 static void
codespan(MMIOT * f,int size)918 codespan(MMIOT *f, int size)
919 {
920     int i=0;
921 
922     if ( size > 1 && peek(f, size-1) == ' ' ) --size;
923     if ( peek(f,i) == ' ' ) ++i, --size;
924 
925     Qstring("<code>", f);
926     code(f, cursor(f)+(i-1), size);
927     Qstring("</code>", f);
928 } /* codespan */
929 
930 
931 /* before letting a tag through, validate against
932  * MKD_NOLINKS and MKD_NOIMAGE
933  */
934 static int
forbidden_tag(MMIOT * f)935 forbidden_tag(MMIOT *f)
936 {
937     int c = toupper(peek(f, 1));
938 
939     if ( is_flag_set(f->flags, MKD_NOHTML) )
940 	return 1;
941 
942     if ( c == 'A' && is_flag_set(f->flags, MKD_NOLINKS) && !isthisalnum(f,2) )
943 	return 1;
944     if ( c == 'I' && is_flag_set(f->flags, MKD_NOIMAGE)
945 		  && strncasecmp(cursor(f)+1, "MG", 2) == 0
946 		  && !isthisalnum(f,4) )
947 	return 1;
948     return 0;
949 }
950 
951 
952 /* Check a string to see if it looks like a mail address
953  * "looks like a mail address" means alphanumeric + some
954  * specials, then a `@`, then alphanumeric + some specials,
955  * but with a `.`
956  */
957 static int
maybe_address(char * p,int size)958 maybe_address(char *p, int size)
959 {
960     int ok = 0;
961 
962     for ( ;size && (isalnum(*p) || strchr("._-+*", *p)); ++p, --size)
963 	;
964 
965     if ( ! (size && *p == '@') )
966 	return 0;
967 
968     --size, ++p;
969 
970     if ( size && *p == '.' ) return 0;
971 
972     for ( ;size && (isalnum(*p) || strchr("._-+", *p)); ++p, --size )
973 	if ( *p == '.' && size > 1 ) ok = 1;
974 
975     return size ? 0 : ok;
976 }
977 
978 
979 /* The size-length token at cursor(f) is either a mailto:, an
980  * implicit mailto:, one of the approved url protocols, or just
981  * plain old text.   If it's a mailto: or an approved protocol,
982  * linkify it, otherwise say "no"
983  */
984 static int
process_possible_link(MMIOT * f,int size)985 process_possible_link(MMIOT *f, int size)
986 {
987     int address= 0;
988     int mailto = 0;
989     char *text = cursor(f);
990 
991     if ( is_flag_set(f->flags, MKD_NOLINKS) ) return 0;
992 
993     if ( (size > 7) && strncasecmp(text, "mailto:", 7) == 0 ) {
994 	/* if it says it's a mailto, it's a mailto -- who am
995 	 * I to second-guess the user?
996 	 */
997 	address = 1;
998 	mailto = 7; 	/* 7 is the length of "mailto:"; we need this */
999     }
1000     else
1001 	address = maybe_address(text, size);
1002 
1003     if ( address ) {
1004 	Qstring("<a href=\"", f);
1005 	if ( !mailto ) {
1006 	    /* supply a mailto: protocol if one wasn't attached */
1007 	    mangle("mailto:", 7, f);
1008 	}
1009 	mangle(text, size, f);
1010 	Qstring("\">", f);
1011 	mangle(text+mailto, size-mailto, f);
1012 	Qstring("</a>", f);
1013 	return 1;
1014     }
1015     else if ( isautoprefix(text, size) ) {
1016 	printlinkyref(f, &linkt, text, size);
1017 	Qchar('>', f);
1018 	puturl(text,size,f, 1);
1019 	Qstring("</a>", f);
1020 	return 1;
1021     }
1022     return 0;
1023 } /* process_possible_link */
1024 
1025 
1026 /* a < may be just a regular character, the start of an embedded html
1027  * tag, or the start of an <automatic link>.    If it's an automatic
1028  * link, we also need to know if it's an email address because if it
1029  * is we need to mangle it in our futile attempt to cut down on the
1030  * spaminess of the rendered page.
1031  */
1032 static int
maybe_tag_or_link(MMIOT * f)1033 maybe_tag_or_link(MMIOT *f)
1034 {
1035     int c, size;
1036     int maybetag = 1;
1037 
1038     if ( is_flag_set(f->flags, MKD_TAGTEXT) )
1039 	return 0;
1040 
1041     for ( size=0; (c = peek(f, size+1)) != '>'; size++) {
1042 	if ( c == EOF )
1043 	    return 0;
1044 	else if ( c == '\\' ) {
1045 	    maybetag=0;
1046 	    if ( peek(f, size+2) != EOF )
1047 		size++;
1048 	}
1049 	else if ( isspace(c) )
1050 	    break;
1051 	else if ( ! (c == '/'
1052 		     || (is_flag_set(f->flags, MKD_GITHUBTAGS) && (c == '-' || c == '_'))
1053 		     || isalnum(c) ) )
1054 	    maybetag=0;
1055     }
1056 
1057     if ( size ) {
1058 	if ( maybetag || (size >= 3 && strncmp(cursor(f), "!--", 3) == 0) ) {
1059 
1060 	    /* It is not a html tag unless we find the closing '>' in
1061 	     * the same block.
1062 	     */
1063 	    while ( (c = peek(f, size+1)) != '>' )
1064 		if ( c == EOF )
1065 		    return 0;
1066 		else
1067 		    size++;
1068 
1069 	    if ( forbidden_tag(f) )
1070 		return 0;
1071 
1072 	    Qchar('<', f);
1073 	    while ( ((c = peek(f, 1)) != EOF) && (c != '>') )
1074 		Qchar(pull(f), f);
1075 	    return 1;
1076 	}
1077 	else if ( !isspace(c) && process_possible_link(f, size) ) {
1078 	    shift(f, size+1);
1079 	    return 1;
1080 	}
1081     }
1082 
1083     return 0;
1084 }
1085 
1086 
1087 /* autolinking means that all inline html is <a href'ified>.   A
1088  * autolink url is alphanumerics, slashes, periods, underscores,
1089  * the at sign, colon, and the % character.
1090  */
1091 static int
maybe_autolink(MMIOT * f)1092 maybe_autolink(MMIOT *f)
1093 {
1094     register int c;
1095     int size;
1096 
1097     /* greedily scan forward for the end of a legitimate link.
1098      */
1099     for ( size=0; (c=peek(f, size+1)) != EOF; size++ ) {
1100 	if ( c == '\\' ) {
1101 	     if ( peek(f, size+2) != EOF )
1102 		++size;
1103 	}
1104 	else if ( c & 0x80 )	/* HACK: ignore utf-8 extended characters */
1105 	    continue;
1106 	else if ( isspace(c) || strchr("'\"()[]{}<>`", c) || c == MKD_EOLN )
1107 	    break;
1108     }
1109 
1110     if ( (size > 1) && process_possible_link(f, size) ) {
1111 	shift(f, size);
1112 	return 1;
1113     }
1114     return 0;
1115 }
1116 
1117 
1118 /* smartyquote code that's common for single and double quotes
1119  */
1120 static int
smartyquote(int * flags,char typeofquote,MMIOT * f)1121 smartyquote(int *flags, char typeofquote, MMIOT *f)
1122 {
1123     int bit = (typeofquote == 's') ? 0x01 : 0x02;
1124 
1125     if ( bit & (*flags) ) {
1126 	if ( isthisnonword(f,1) ) {
1127 	    Qprintf(f, "&r%cquo;", typeofquote);
1128 	    (*flags) &= ~bit;
1129 	    return 1;
1130 	}
1131     }
1132     else if ( isthisnonword(f,-1) && peek(f,1) != EOF ) {
1133 	Qprintf(f, "&l%cquo;", typeofquote);
1134 	(*flags) |= bit;
1135 	return 1;
1136     }
1137     return 0;
1138 }
1139 
1140 
1141 static int
islike(MMIOT * f,char * s)1142 islike(MMIOT *f, char *s)
1143 {
1144     int len;
1145     int i;
1146 
1147     if ( s[0] == '|' ) {
1148 	if ( !isthisnonword(f, -1) )
1149 	    return 0;
1150        ++s;
1151     }
1152 
1153     if ( !(len = strlen(s)) )
1154 	return 0;
1155 
1156     if ( s[len-1] == '|' ) {
1157 	if ( !isthisnonword(f,len-1) )
1158 	    return 0;
1159 	len--;
1160     }
1161 
1162     for (i=1; i < len; i++)
1163 	if (tolower(peek(f,i)) != s[i])
1164 	    return 0;
1165     return 1;
1166 }
1167 
1168 
1169 static struct smarties {
1170     char c0;
1171     char *pat;
1172     char *entity;
1173     int shift;
1174 } smarties[] = {
1175     { '\'', "'s|",      "rsquo",  0 },
1176     { '\'', "'t|",      "rsquo",  0 },
1177     { '\'', "'re|",     "rsquo",  0 },
1178     { '\'', "'ll|",     "rsquo",  0 },
1179     { '\'', "'ve|",     "rsquo",  0 },
1180     { '\'', "'m|",      "rsquo",  0 },
1181     { '\'', "'d|",      "rsquo",  0 },
1182     { '-',  "---",      "mdash",  2 },
1183     { '-',  "--",       "ndash",  1 },
1184     { '.',  "...",      "hellip", 2 },
1185     { '.',  ". . .",    "hellip", 4 },
1186     { '(',  "(c)",      "copy",   2 },
1187     { '(',  "(r)",      "reg",    2 },
1188     { '(',  "(tm)",     "trade",  3 },
1189     { '3',  "|3/4|",    "frac34", 2 },
1190     { '3',  "|3/4ths|", "frac34", 2 },
1191     { '1',  "|1/2|",    "frac12", 2 },
1192     { '1',  "|1/4|",    "frac14", 2 },
1193     { '1',  "|1/4th|",  "frac14", 2 },
1194     { '&',  "&#0;",      0,       3 },
1195 } ;
1196 #define NRSMART ( sizeof smarties / sizeof smarties[0] )
1197 
1198 
1199 /* Smarty-pants-style chrome for quotes, -, ellipses, and (r)(c)(tm)
1200  */
1201 static int
smartypants(int c,int * flags,MMIOT * f)1202 smartypants(int c, int *flags, MMIOT *f)
1203 {
1204     int i;
1205 
1206     if ( is_flag_set(f->flags, MKD_NOPANTS)
1207       || is_flag_set(f->flags, MKD_TAGTEXT)
1208       || is_flag_set(f->flags, IS_LABEL) )
1209 	return 0;
1210 
1211     for ( i=0; i < NRSMART; i++)
1212 	if ( (c == smarties[i].c0) && islike(f, smarties[i].pat) ) {
1213 	    if ( smarties[i].entity )
1214 		Qprintf(f, "&%s;", smarties[i].entity);
1215 	    shift(f, smarties[i].shift);
1216 	    return 1;
1217 	}
1218 
1219     switch (c) {
1220     case '<' :  return 0;
1221     case '\'':  if ( smartyquote(flags, 's', f) ) return 1;
1222 		break;
1223 
1224     case '"':	if ( smartyquote(flags, 'd', f) ) return 1;
1225 		break;
1226 
1227     case '`':   if ( peek(f, 1) == '`' ) {
1228 		    int j = 2;
1229 
1230 		    while ( (c=peek(f,j)) != EOF ) {
1231 			if ( c == '\\' )
1232 			    j += 2;
1233 			else if ( c == '`' )
1234 			    break;
1235 			else if ( c == '\'' && peek(f, j+1) == '\'' ) {
1236 			    Qstring("&ldquo;", f);
1237 			    ___mkd_reparse(cursor(f)+1, j-2, 0, f, 0);
1238 			    Qstring("&rdquo;", f);
1239 			    shift(f,j+1);
1240 			    return 1;
1241 			}
1242 			else ++j;
1243 		    }
1244 
1245 		}
1246 		break;
1247     }
1248     return 0;
1249 } /* smartypants */
1250 
1251 
1252 /* process latex with arbitrary 2-character ( $$ .. $$, \[ .. \], \( .. \)
1253  * delimiters
1254  */
1255 static int
mathhandler(MMIOT * f,int e1,int e2)1256 mathhandler(MMIOT *f, int e1, int e2)
1257 {
1258     int i = 0;
1259 
1260     while(peek(f, ++i) != EOF) {
1261         if (peek(f, i) == e1 && peek(f, i+1) == e2) {
1262             cputc(peek(f,-1), f);
1263             cputc(peek(f, 0), f);
1264             cputc(6, f);
1265             EXPAND(f->latex) = peek(f,-1);
1266             EXPAND(f->latex) = peek(f,0);
1267             EXPAND(f->latex) = 6;
1268             while ( i-- > -1 ) {
1269                 char c = pull(f);
1270                 EXPAND(f->latex) = c;
1271                 cputc(c, f);
1272             }
1273             EXPAND(f->latex) = 31;
1274             return 1;
1275         }
1276     }
1277     return 0;
1278 }
1279 
1280 /*
1281  * process latex with arbitrary custom delimiters
1282  */
1283 static int
mathhandlerExtended(MMIOT * f,char * begin,char * end)1284 mathhandlerExtended(MMIOT *f, char* begin, char* end)
1285 {
1286     int beginLength = strlen(begin);
1287     int endLength = strlen(end);
1288 
1289     for (int i = 0; i < beginLength; i++)
1290         if (peek(f, i) != begin[i])
1291             return 0;
1292 
1293     int i = beginLength;
1294     while(peek(f, ++i) != EOF) {
1295         int matchEnd = 1;
1296         for (int j = 0; j < endLength; j++)
1297             if (peek(f, i + j) != end[j])
1298                 matchEnd = 0;
1299 
1300         if (matchEnd == 1) {
1301             i += endLength;
1302 
1303             cputc(6, f);
1304 			EXPAND(f->latex) = '\\';
1305             EXPAND(f->latex) = 6;
1306 
1307             while ( --i > 0 )
1308             {
1309                 char c = pull(f);
1310                 EXPAND(f->latex) = c;
1311                 cputc(c, f);
1312             }
1313 
1314             EXPAND(f->latex) = 31;
1315             return 1;
1316         }
1317     }
1318     return 0;
1319 }
1320 
1321 
1322 /* process a body of text encased in some sort of tick marks.   If it
1323  * works, generate the output and return 1, otherwise just return 0 and
1324  * let the caller figure it out.
1325  */
1326 static int
tickhandler(MMIOT * f,int tickchar,int minticks,int allow_space,spanhandler spanner)1327 tickhandler(MMIOT *f, int tickchar, int minticks, int allow_space, spanhandler spanner)
1328 {
1329     int endticks, size;
1330     int tick = nrticks(0, tickchar, f);
1331 
1332     if ( !allow_space && isspace(peek(f,tick)) )
1333 	return 0;
1334 
1335     if ( (tick >= minticks) && (size = matchticks(f,tickchar,tick,&endticks)) ) {
1336 	if ( endticks < tick ) {
1337 	    size += (tick - endticks);
1338 	    tick = endticks;
1339 	}
1340 
1341 	shift(f, tick);
1342 	(*spanner)(f,size);
1343 	shift(f, size+tick-1);
1344 	return 1;
1345     }
1346     return 0;
1347 }
1348 
1349 #define tag_text(f)	is_flag_set(f->flags, MKD_TAGTEXT)
1350 
1351 
1352 static void
text(MMIOT * f)1353 text(MMIOT *f)
1354 {
1355     int c, j;
1356     int rep;
1357     int smartyflags = 0;
1358 
1359     while (1) {
1360         if ( is_flag_set(f->flags, MKD_AUTOLINK) && isalpha(peek(f,1)) && !tag_text(f) )
1361 	    maybe_autolink(f);
1362 
1363         c = pull(f);
1364 
1365         if (c == EOF)
1366           break;
1367 
1368 	if ( smartypants(c, &smartyflags, f) )
1369 	    continue;
1370 	switch (c) {
1371 	case 0:     break;
1372 
1373 	case MKD_EOLN:
1374 		    Qstring(tag_text(f) ? "  " : "<br/>", f);
1375 		    break;
1376 
1377 	case '>':   if ( tag_text(f) )
1378 			Qstring("&gt;", f);
1379 		    else
1380 			Qchar(c, f);
1381 		    break;
1382 
1383 	case '"':   if ( tag_text(f) )
1384 			Qstring("&quot;", f);
1385 		    else
1386 			Qchar(c, f);
1387 		    break;
1388 
1389 	case '!':   if ( peek(f,1) == '[' ) {
1390 			pull(f);
1391 			if ( tag_text(f) || !linkylinky(1, f) )
1392 			    Qstring("![", f);
1393 		    }
1394 		    else
1395 			Qchar(c, f);
1396 		    break;
1397 
1398 	case '[':   if ( tag_text(f) || !linkylinky(0, f) )
1399 			Qchar(c, f);
1400 		    break;
1401 	/* A^B -> A<sup>B</sup> */
1402 	case '^':   if ( is_flag_set(f->flags, MKD_NOSUPERSCRIPT)
1403 			    || is_flag_set(f->flags, MKD_STRICT)
1404 			    || is_flag_set(f->flags, MKD_TAGTEXT)
1405 			    || (f->last == 0)
1406 			    || ((ispunct(f->last) || isspace(f->last))
1407 						    && f->last != ')')
1408 			    || isthisspace(f,1) )
1409 			Qchar(c,f);
1410 		    else {
1411 			char *sup = cursor(f);
1412 			int len = 0;
1413 
1414 			if ( peek(f,1) == '(' ) {
1415 			    int here = mmiottell(f);
1416 			    pull(f);
1417 
1418 			    if ( (len = parenthetical('(',')',f)) <= 0 ) {
1419 				mmiotseek(f,here);
1420 				Qchar(c, f);
1421 				break;
1422 			    }
1423 			    sup++;
1424 			}
1425 			else {
1426 			    while ( isthisalnum(f,1+len) )
1427 				++len;
1428 			    if ( !len ) {
1429 				Qchar(c,f);
1430 				break;
1431 			    }
1432 			    shift(f,len);
1433 			}
1434 			Qstring("<sup>",f);
1435 			___mkd_reparse(sup, len, 0, f, "()");
1436 			Qstring("</sup>", f);
1437 		    }
1438 		    break;
1439 	case '_':
1440 	/* Underscores don't count if they're in the middle of a word */
1441 		    if ( !(is_flag_set(f->flags, MKD_NORELAXED) || is_flag_set(f->flags, MKD_STRICT))
1442 				&& isthisalnum(f,-1) && isthisalnum(f,1) ) {
1443 			Qchar(c, f);
1444 			break;
1445 		    }
1446 	case '*':
1447 	/* Underscores & stars don't count if they're out in the middle
1448 	 * of whitespace */
1449 		    if ( isthisspace(f,-1) && isthisspace(f,1) ) {
1450 			Qchar(c, f);
1451 			break;
1452 		    }
1453 		    /* else fall into the regular old emphasis case */
1454 		    if ( tag_text(f) )
1455 			Qchar(c, f);
1456 		    else {
1457 			for (rep = 1; peek(f,1) == c; pull(f) )
1458 			    ++rep;
1459 			Qem(f,c,rep);
1460 		    }
1461 		    break;
1462 
1463 	case '~':   if ( is_flag_set(f->flags, MKD_NOSTRIKETHROUGH)
1464 			 || is_flag_set(f->flags, MKD_STRICT)
1465 			 || is_flag_set(f->flags, MKD_TAGTEXT)
1466 			 || ! tickhandler(f,c,2,0, delspan) )
1467 			Qchar(c, f);
1468 		    break;
1469 
1470 	case '`':   if ( tag_text(f) || !tickhandler(f,c,1,1,codespan) )
1471 			Qchar(c, f);
1472 		    break;
1473 
1474 	case '\\':  switch ( c = pull(f) ) {
1475 		    case '&':   Qstring("&amp;", f);
1476 				break;
1477 		    case '<':   c = peek(f,1);
1478 				if ( (c == EOF) || isspace(c) )
1479 				    Qstring("&lt;", f);
1480 				else {
1481 				    /* Markdown.pl does not escape <[nonwhite]
1482 				     * sequences */
1483 				    Qchar('\\', f);
1484 				    shift(f, -1);
1485 				}
1486 
1487 				break;
1488 		    case '^':   if ( is_flag_set(f->flags, MKD_STRICT)
1489 					|| is_flag_set(f->flags, MKD_NOSUPERSCRIPT) ) {
1490 				    Qchar('\\', f);
1491 				    shift(f,-1);
1492 				    break;
1493 				}
1494 				Qchar(c, f);
1495 				break;
1496 
1497 		    case ':': case '|':
1498 				if ( is_flag_set(f->flags, MKD_NOTABLES) ) {
1499 				    Qchar('\\', f);
1500 				    shift(f,-1);
1501 				    break;
1502 				}
1503 				Qchar(c, f);
1504 				break;
1505 
1506 		    case EOF:	Qchar('\\', f);
1507 				break;
1508 
1509 		    case '[':
1510 		    case '(':
1511 				Qchar(c, f);
1512 				break;
1513 
1514 		    case '$':	if ( is_flag_set(f->flags, MKD_LATEX) ) {
1515 				    Qchar(c, f);
1516 				    break;
1517 				}
1518 
1519 		    default:
1520                 if ( escaped(f,c) ||
1521 				     strchr(">#.-+{}]![*_\\()`", c) )
1522 				    Qchar(c, f);
1523 				else {
1524 				    Qchar('\\', f);
1525                     shift(f, -1);
1526                     if ( is_flag_set(f->flags, MKD_LATEX) ) {
1527                         mathhandlerExtended(f, "\\begin{equation}", "\\end{equation}")
1528 						|| mathhandlerExtended(f, "\\begin{equation*}", "\\end{equation*}")
1529 						|| mathhandlerExtended(f, "\\begin{align}", "\\end{align}")
1530 						|| mathhandlerExtended(f, "\\begin{align*}", "\\end{align*}")
1531 						|| mathhandlerExtended(f, "\\begin{bmatrix}", "\\end{bmatrix}")
1532 						|| mathhandlerExtended(f, "\\begin{cases}", "\\end{cases}");
1533                     }
1534 				}
1535 				break;
1536 		    }
1537 		    break;
1538 
1539 	case '<':   if ( !maybe_tag_or_link(f) )
1540 			Qstring("&lt;", f);
1541 		    break;
1542 
1543 	case '&':   j = (peek(f,1) == '#' ) ? 2 : 1;
1544 		    while ( isthisalnum(f,j) )
1545 			++j;
1546 
1547 		    if ( peek(f,j) != ';' )
1548 			Qstring("&amp;", f);
1549 		    else
1550 			Qchar(c, f);
1551 		    break;
1552 
1553 	case '$':   if ( is_flag_set(f->flags, MKD_LATEX) ) {
1554 			if (peek(f, 1) == '$' ) {
1555 			    pull(f);
1556 			    if ( mathhandler(f, '$', '$') )
1557 				break;
1558 			    Qchar('$', f);
1559 			}
1560 			else {
1561 			    int c2;
1562 			    int i = 1;
1563 
1564 			    while ( ((c2=peek(f,i)) != '$') && (c2 != EOF) )
1565                     i++;
1566 			    if ( c2 != EOF ) {
1567                     Qchar('$', f);
1568 					cputc(6, f);
1569                     EXPAND(f->latex) = '$';
1570 					EXPAND(f->latex) = 6;
1571                     while (i-- > 0 ) {
1572                         char sym = pull(f);
1573                         EXPAND(f->latex) = sym;
1574                         Qchar(sym, f);
1575                     }
1576                     EXPAND(f->latex) = 31;
1577 				break;
1578 			    }
1579 			}
1580 		    }
1581 		    /* fall through to default */
1582 
1583 	default:    f->last = c;
1584 		    Qchar(c, f);
1585 		    break;
1586 	}
1587     }
1588     /* truncate the input string after we've finished processing it */
1589     S(f->in) = f->isp = 0;
1590 } /* text */
1591 
1592 
1593 /* print a header block
1594  */
1595 static void
printheader(Paragraph * pp,MMIOT * f)1596 printheader(Paragraph *pp, MMIOT *f)
1597 {
1598     if ( is_flag_set(f->flags, MKD_IDANCHOR) ) {
1599 	Qprintf(f, "<h%d", pp->hnumber);
1600 	if ( is_flag_set(f->flags, MKD_TOC) ) {
1601 	    Qstring(" id=\"", f);
1602 	    Qanchor(pp->text, f);
1603 	    Qchar('"', f);
1604 	}
1605 	Qchar('>', f);
1606     } else {
1607 	if ( is_flag_set(f->flags, MKD_TOC) ) {
1608 	    Qstring("<a name=\"", f);
1609 	    Qanchor(pp->text, f);
1610 	    Qstring("\"></a>\n", f);
1611 	}
1612 	Qprintf(f, "<h%d>", pp->hnumber);
1613     }
1614     push(T(pp->text->text), S(pp->text->text), f);
1615     text(f);
1616     Qprintf(f, "</h%d>", pp->hnumber);
1617 }
1618 
1619 
1620 enum e_alignments { a_NONE, a_CENTER, a_LEFT, a_RIGHT };
1621 
1622 static char* alignments[] = { "", " style=\"text-align:center;\"",
1623 				  " style=\"text-align:left;\"",
1624 				  " style=\"text-align:right;\"" };
1625 
1626 typedef STRING(int) Istring;
1627 
1628 static int
splat(Line * p,char * block,Istring align,int force,MMIOT * f)1629 splat(Line *p, char *block, Istring align, int force, MMIOT *f)
1630 {
1631     int first,
1632 	idx = p->dle,
1633 	colno = 0;
1634 
1635 
1636     ___mkd_tidy(&p->text);
1637     if ( T(p->text)[S(p->text)-1] == '|' )
1638 	--S(p->text);
1639 
1640     Qstring("<tr>\n", f);
1641     while ( idx < S(p->text) ) {
1642 	first = idx;
1643 	if ( force && (colno >= S(align)-1) )
1644 	    idx = S(p->text);
1645 	else
1646 	    while ( (idx < S(p->text)) && (T(p->text)[idx] != '|') ) {
1647 		if ( T(p->text)[idx] == '\\' )
1648 		    ++idx;
1649 		++idx;
1650 	    }
1651 
1652 	Qprintf(f, "<%s%s>",
1653 		   block,
1654 		   alignments[ (colno < S(align)) ? T(align)[colno] : a_NONE ]);
1655 	___mkd_reparse(T(p->text)+first, idx-first, 0, f, "|");
1656 	Qprintf(f, "</%s>\n", block);
1657 	idx++;
1658 	colno++;
1659     }
1660     if ( force )
1661 	while (colno < S(align) ) {
1662 	    Qprintf(f, "<%s></%s>\n", block, block);
1663 	    ++colno;
1664 	}
1665     Qstring("</tr>\n", f);
1666     return colno;
1667 }
1668 
1669 
1670 static int
printtable(Paragraph * pp,MMIOT * f)1671 printtable(Paragraph *pp, MMIOT *f)
1672 {
1673     /* header, dashes, then lines of content */
1674 
1675     Line *hdr, *dash, *body;
1676     Istring align;
1677     int hcols,start;
1678     char *p;
1679     enum e_alignments it;
1680 
1681     hdr = pp->text;
1682     dash= hdr->next;
1683     body= dash->next;
1684 
1685     if ( T(hdr->text)[hdr->dle] == '|' ) {
1686 	/* trim leading pipe off all lines
1687 	 */
1688 	Line *r;
1689 	for ( r = pp->text; r; r = r->next )
1690 	    r->dle ++;
1691     }
1692 
1693     /* figure out cell alignments */
1694 
1695     CREATE(align);
1696 
1697     for (p=T(dash->text), start=dash->dle; start < S(dash->text); ) {
1698 	char first, last;
1699 	int end;
1700 
1701 	last=first=0;
1702 	for (end=start ; (end < S(dash->text)) && p[end] != '|'; ++ end ) {
1703 	    if ( p[end] == '\\' )
1704 		++ end;
1705 	    else if ( !isspace(p[end]) ) {
1706 		if ( !first) first = p[end];
1707 		last = p[end];
1708 	    }
1709 	}
1710 	it = ( first == ':' ) ? (( last == ':') ? a_CENTER : a_LEFT)
1711 			      : (( last == ':') ? a_RIGHT : a_NONE );
1712 
1713 	EXPAND(align) = it;
1714 	start = 1+end;
1715     }
1716 
1717     Qstring("<table>\n", f);
1718     Qstring("<thead>\n", f);
1719     hcols = splat(hdr, "th", align, 0, f);
1720     Qstring("</thead>\n", f);
1721 
1722     if ( hcols < S(align) )
1723 	S(align) = hcols;
1724     else
1725 	while ( hcols > S(align) )
1726 	    EXPAND(align) = a_NONE;
1727 
1728     Qstring("<tbody>\n", f);
1729     for ( ; body; body = body->next)
1730 	splat(body, "td", align, 1, f);
1731     Qstring("</tbody>\n", f);
1732     Qstring("</table>\n", f);
1733 
1734     DELETE(align);
1735     return 1;
1736 }
1737 
1738 
1739 static int
printblock(Paragraph * pp,MMIOT * f)1740 printblock(Paragraph *pp, MMIOT *f)
1741 {
1742     static char *Begin[] = { "", "<p>", "<p style=\"text-align:center;\">"  };
1743     static char *End[]   = { "", "</p>","</p>" };
1744     Line *t = pp->text;
1745     int align = pp->align;
1746 
1747     while (t) {
1748 	if ( S(t->text) ) {
1749 	    if ( t->next && S(t->text) > 2
1750 			 && T(t->text)[S(t->text)-2] == ' '
1751 			 && T(t->text)[S(t->text)-1] == ' ' ) {
1752 		push(T(t->text), S(t->text)-2, f);
1753 		pushc(MKD_EOLN, f);
1754 		pushc('\n', f);
1755 	    }
1756 	    else {
1757 		___mkd_tidy(&t->text);
1758 		push(T(t->text), S(t->text), f);
1759 		if ( t->next )
1760 		    pushc('\n', f);
1761 	    }
1762 	}
1763 	t = t->next;
1764     }
1765     Qstring(Begin[align], f);
1766     text(f);
1767     Qstring(End[align], f);
1768     return 1;
1769 }
1770 
1771 
1772 static void
printcode(Line * t,char * lang,MMIOT * f)1773 printcode(Line *t, char *lang, MMIOT *f)
1774 {
1775     int blanks;
1776 
1777     if ( f->cb->e_codefmt ) {
1778 	/* external code block formatter;  copy the text into a buffer,
1779 	 * call the formatter to style it, then dump that styled text
1780 	 * directly to the queue
1781 	 */
1782 	char *text;
1783 	char *fmt;
1784 	int size, copy_p;
1785 	Line *p;
1786 
1787 	for (size=0, p = t; p; p = p->next )
1788 	    size += 1+S(p->text);
1789 
1790 	text = malloc(1+size);
1791 
1792 	for ( copy_p = 0; t ; t = t->next ) {
1793 	    memcpy(text+copy_p, T(t->text), S(t->text));
1794 	    copy_p += S(t->text);
1795 	    text[copy_p++] = '\n';
1796 	}
1797 	text[copy_p] = 0;
1798 
1799 	fmt = (*(f->cb->e_codefmt))(text, copy_p, (lang && lang[0]) ? lang : 0);
1800 	free(text);
1801 
1802 	if ( fmt ) {
1803 	    Qwrite(fmt, strlen(fmt), f);
1804 	    if ( f->cb->e_free )
1805 		(*(f->cb->e_free))(fmt, f->cb->e_data);
1806 	    return;
1807 	}
1808 	/* otherwise the external formatter failed and we need to
1809 	 * fall back to the traditional codeblock format
1810 	 */
1811     }
1812 
1813     Qstring("<pre><code", f);
1814     if (lang && lang[0]) {
1815       Qstring(" class=\"", f);
1816       Qstring(lang, f);
1817       Qstring("\"", f);
1818     }
1819     Qstring(">", f);
1820     for ( blanks = 0; t ; t = t->next ) {
1821 	if ( S(t->text) > t->dle ) {
1822 	    while ( blanks ) {
1823 		Qchar('\n', f);
1824 		--blanks;
1825 	    }
1826 	    code(f, T(t->text), S(t->text));
1827 	    Qchar('\n', f);
1828 	}
1829 	else blanks++;
1830     }
1831     Qstring("</code></pre>", f);
1832 }
1833 
1834 
1835 static void
printhtml(Line * t,MMIOT * f)1836 printhtml(Line *t, MMIOT *f)
1837 {
1838     int blanks;
1839 
1840     for ( blanks=0; t ; t = t->next )
1841 	if ( S(t->text) ) {
1842 	    for ( ; blanks; --blanks )
1843 		Qchar('\n', f);
1844 
1845 	    Qwrite(T(t->text), S(t->text), f);
1846 	    Qchar('\n', f);
1847 	}
1848 	else
1849 	    blanks++;
1850 }
1851 
1852 
1853 static void
htmlify_paragraphs(Paragraph * p,MMIOT * f)1854 htmlify_paragraphs(Paragraph *p, MMIOT *f)
1855 {
1856     ___mkd_emblock(f);
1857 
1858     while (( p = display(p, f) )) {
1859 	___mkd_emblock(f);
1860 	Qstring("\n\n", f);
1861     }
1862 }
1863 
1864 
1865 #ifdef GITHUB_CHECKBOX
1866 static void
li_htmlify(Paragraph * p,char * arguments,mkd_flag_t flags,MMIOT * f)1867 li_htmlify(Paragraph *p, char *arguments, mkd_flag_t flags, MMIOT *f)
1868 {
1869     ___mkd_emblock(f);
1870 
1871     Qprintf(f, "<li");
1872     if ( arguments )
1873 	Qprintf(f, " %s", arguments);
1874     if ( flags & GITHUB_CHECK )
1875 	Qprintf(f, " class=\"github_checkbox\"");
1876     Qprintf(f, ">");
1877 #if CHECKBOX_AS_INPUT
1878     if ( flags & GITHUB_CHECK ) {
1879 	Qprintf(f, "<input disabled=\"\" type=\"checkbox\"");
1880 	if ( flags & IS_CHECKED )
1881 	    Qprintf(f, " checked=\"checked\"");
1882 	Qprintf(f, "/>");
1883     }
1884 #else
1885     if ( flags & GITHUB_CHECK )
1886 	Qprintf(f, flags & IS_CHECKED ? "&#x2611;" : "&#x2610;");
1887 #endif
1888 
1889     htmlify_paragraphs(p, f);
1890 
1891      Qprintf(f, "</li>");
1892     ___mkd_emblock(f);
1893 }
1894 #endif
1895 
1896 
1897 static void
htmlify(Paragraph * p,char * block,char * arguments,MMIOT * f)1898 htmlify(Paragraph *p, char *block, char *arguments, MMIOT *f)
1899 {
1900     ___mkd_emblock(f);
1901     if ( block )
1902 	Qprintf(f, arguments ? "<%s %s>" : "<%s>", block, arguments);
1903 
1904     htmlify_paragraphs(p, f);
1905 
1906     if ( block )
1907 	 Qprintf(f, "</%s>", block);
1908     ___mkd_emblock(f);
1909 }
1910 
1911 
1912 static void
definitionlist(Paragraph * p,MMIOT * f)1913 definitionlist(Paragraph *p, MMIOT *f)
1914 {
1915     Line *tag;
1916 
1917     if ( p ) {
1918 	Qstring("<dl>\n", f);
1919 
1920 	for ( ; p ; p = p->next) {
1921 	    for ( tag = p->text; tag; tag = tag->next ) {
1922 		Qstring("<dt>", f);
1923 		___mkd_reparse(T(tag->text), S(tag->text), 0, f, 0);
1924 		Qstring("</dt>\n", f);
1925 	    }
1926 
1927 	    htmlify(p->down, "dd", p->ident, f);
1928 	    Qchar('\n', f);
1929 	}
1930 
1931 	Qstring("</dl>", f);
1932     }
1933 }
1934 
1935 
1936 static void
listdisplay(int typ,Paragraph * p,MMIOT * f)1937 listdisplay(int typ, Paragraph *p, MMIOT* f)
1938 {
1939     if ( p ) {
1940 	Qprintf(f, "<%cl", (typ==UL)?'u':'o');
1941 	if ( typ == AL )
1942 	    Qprintf(f, " type=\"a\"");
1943 	Qprintf(f, ">\n");
1944 
1945 	for ( ; p ; p = p->next ) {
1946 #ifdef GITHUB_CHECKBOX
1947 	    li_htmlify(p->down, p->ident, p->flags, f);
1948 #else
1949 	    htmlify(p->down, "li", p->ident, f);
1950 #endif
1951 	    Qchar('\n', f);
1952 	}
1953 
1954 	Qprintf(f, "</%cl>\n", (typ==UL)?'u':'o');
1955     }
1956 }
1957 
1958 
1959 /* dump out a Paragraph in the desired manner
1960  */
1961 static Paragraph*
display(Paragraph * p,MMIOT * f)1962 display(Paragraph *p, MMIOT *f)
1963 {
1964     if ( !p ) return 0;
1965 
1966     switch ( p->typ ) {
1967     case STYLE:
1968     case WHITESPACE:
1969 	break;
1970 
1971     case HTML:
1972 	printhtml(p->text, f);
1973 	break;
1974 
1975     case CODE:
1976 	printcode(p->text, p->lang, f);
1977 	break;
1978 
1979     case QUOTE:
1980 	htmlify(p->down, p->ident ? "div" : "blockquote", p->ident, f);
1981 	break;
1982 
1983     case UL:
1984     case OL:
1985     case AL:
1986 	listdisplay(p->typ, p->down, f);
1987 	break;
1988 
1989     case DL:
1990 	definitionlist(p->down, f);
1991 	break;
1992 
1993     case HR:
1994 	Qstring("<hr />", f);
1995 	break;
1996 
1997     case HDR:
1998 	printheader(p, f);
1999 	break;
2000 
2001     case TABLE:
2002 	printtable(p, f);
2003 	break;
2004 
2005     case SOURCE:
2006 	htmlify(p->down, 0, 0, f);
2007 	break;
2008 
2009     default:
2010 	printblock(p, f);
2011 	break;
2012     }
2013     return p->next;
2014 }
2015 
2016 
2017 /* dump out a list of footnotes
2018  */
2019 static void
mkd_extra_footnotes(MMIOT * m)2020 mkd_extra_footnotes(MMIOT *m)
2021 {
2022     int j, i;
2023     Footnote *t;
2024 
2025     if ( m->footnotes->reference == 0 )
2026 	return;
2027 
2028     Csprintf(&m->out, "\n<div class=\"footnotes\">\n<hr/>\n<ol>\n");
2029 
2030     for ( i=1; i <= m->footnotes->reference; i++ ) {
2031 	for ( j=0; j < S(m->footnotes->note); j++ ) {
2032 	    t = &T(m->footnotes->note)[j];
2033 	    if ( (t->refnumber == i) && (t->flags & REFERENCED) ) {
2034 		Csprintf(&m->out, "<li id=\"%s:%d\">\n",
2035 			    p_or_nothing(m), t->refnumber);
2036 		htmlify(t->text, 0, 0, m);
2037 		Csprintf(&m->out, "<a href=\"#%sref:%d\" rev=\"footnote\">&#8617;</a>",
2038 			    p_or_nothing(m), t->refnumber);
2039 		Csprintf(&m->out, "</li>\n");
2040 	    }
2041 	}
2042     }
2043     Csprintf(&m->out, "</ol>\n</div>\n");
2044 }
2045 
2046 
2047 /* return a pointer to the compiled markdown
2048  * document.
2049  */
2050 int
mkd_document(Document * p,char ** res)2051 mkd_document(Document *p, char **res)
2052 {
2053     int size;
2054 
2055     if ( p && p->compiled ) {
2056 	if ( ! p->html ) {
2057 	    htmlify(p->code, 0, 0, p->ctx);
2058 	    if ( is_flag_set(p->ctx->flags, MKD_EXTRA_FOOTNOTE) )
2059 		mkd_extra_footnotes(p->ctx);
2060 	    p->html = 1;
2061 	    size = S(p->ctx->out);
2062 
2063 	    if ( (size == 0) || T(p->ctx->out)[size-1] ) {
2064 		/* Add a null byte at the end of the generated html,
2065 		 * but pretend it doesn't exist.
2066 		 */
2067 		EXPAND(p->ctx->out) = 0;
2068 		--S(p->ctx->out);
2069 	    }
2070 	}
2071 
2072 	*res = T(p->ctx->out);
2073 	return S(p->ctx->out);
2074     }
2075     return EOF;
2076 }
2077 
2078 /* Return list of founded latex textes (only textes, without positions) separeted by ASCII unit separator (code - 31)
2079  * Ugly, but works
2080  */
2081 int
mkd_latextext(Document * p,char ** res)2082 mkd_latextext(Document *p, char **res)
2083 {
2084     int size;
2085 
2086     if ( p && p->compiled ) {
2087 	if ( ! p->html ) {
2088 	    htmlify(p->code, 0, 0, p->ctx);
2089 	    if ( is_flag_set(p->ctx->flags, MKD_EXTRA_FOOTNOTE) )
2090 		mkd_extra_footnotes(p->ctx);
2091 	    p->html = 1;
2092 	    size = S(p->ctx->latex);
2093 
2094 	    if ( (size == 0) || T(p->ctx->latex)[size-1] ) {
2095 		/* Add a null byte at the end of the generated html,
2096 		 * but pretend it doesn't exist.
2097 		 */
2098 		EXPAND(p->ctx->latex) = 0;
2099 		--S(p->ctx->latex);
2100 	    }
2101 	}
2102 
2103 	*res = T(p->ctx->latex);
2104 	return S(p->ctx->latex);
2105     }
2106     return EOF;
2107 }
2108