1 /* markdown: a C implementation of John Gruber's Markdown markup language.
2 *
3 * Copyright (C) 2007 David L Parsons.
4 * The redistribution terms are provided in the COPYRIGHT file that must
5 * be distributed with this source code.
6 */
7 #include <stdio.h>
8 #include <string.h>
9 #include <stdarg.h>
10 #include <stdlib.h>
11 #include <time.h>
12 #include <ctype.h>
13
14 #include "config.h"
15
16 #include "cstring.h"
17 #include "markdown.h"
18 #include "amalloc.h"
19
20 typedef int (*stfu)(const void*,const void*);
21 typedef void (*spanhandler)(MMIOT*,int);
22
23 /* forward declarations */
24 static void text(MMIOT *f);
25 static Paragraph *display(Paragraph*, MMIOT*);
26
27 /* externals from markdown.c */
28 int __mkd_footsort(Footnote *, Footnote *);
29
30 /*
31 * push text into the generator input buffer
32 */
33 static void
push(char * bfr,int size,MMIOT * f)34 push(char *bfr, int size, MMIOT *f)
35 {
36 while ( size-- > 0 )
37 EXPAND(f->in) = *bfr++;
38 }
39
40
41 /*
42 * push a character into the generator input buffer
43 */
44 static void
pushc(char c,MMIOT * f)45 pushc(char c, MMIOT *f)
46 {
47 EXPAND(f->in) = c;
48 }
49
50
51 /* look <i> characters ahead of the cursor.
52 */
53 static inline int
peek(MMIOT * f,int i)54 peek(MMIOT *f, int i)
55 {
56
57 i += (f->isp-1);
58
59 return (i >= 0) && (i < S(f->in)) ? (unsigned char)T(f->in)[i] : EOF;
60 }
61
62
63 /* pull a byte from the input buffer
64 */
65 static inline unsigned int
pull(MMIOT * f)66 pull(MMIOT *f)
67 {
68 return ( f->isp < S(f->in) ) ? (unsigned char)T(f->in)[f->isp++] : EOF;
69 }
70
71
72 /* return a pointer to the current position in the input buffer.
73 */
74 static inline char*
cursor(MMIOT * f)75 cursor(MMIOT *f)
76 {
77 return T(f->in) + f->isp;
78 }
79
80
81 static inline int
isthisspace(MMIOT * f,int i)82 isthisspace(MMIOT *f, int i)
83 {
84 int c = peek(f, i);
85
86 if ( c == EOF )
87 return 1;
88 if ( c & 0x80 )
89 return 0;
90 return isspace(c) || (c < ' ');
91 }
92
93
94 static inline int
isthisalnum(MMIOT * f,int i)95 isthisalnum(MMIOT *f, int i)
96 {
97 int c = peek(f, i);
98
99 return (c != EOF) && isalnum(c);
100 }
101
102
103 static inline int
isthisnonword(MMIOT * f,int i)104 isthisnonword(MMIOT *f, int i)
105 {
106 return isthisspace(f, i) || ispunct(peek(f,i));
107 }
108
109
110 /* return/set the current cursor position
111 * (when setting the current cursor position we also need to flush the
112 * last character written cache)
113 */
114 #define mmiotseek(f,x) ((f->isp = x), (f->last = 0))
115 #define mmiottell(f) (f->isp)
116
117
118 /* move n characters forward ( or -n characters backward) in the input buffer.
119 */
120 static void
shift(MMIOT * f,int i)121 shift(MMIOT *f, int i)
122 {
123 if (f->isp + i >= 0 )
124 f->isp += i;
125 }
126
127
128 /* Qchar()
129 */
130 static void
Qchar(int c,MMIOT * f)131 Qchar(int c, MMIOT *f)
132 {
133 block *cur;
134
135 if ( S(f->Q) == 0 ) {
136 cur = &EXPAND(f->Q);
137 memset(cur, 0, sizeof *cur);
138 cur->b_type = bTEXT;
139 }
140 else
141 cur = &T(f->Q)[S(f->Q)-1];
142
143 EXPAND(cur->b_text) = c;
144 }
145
146
147 /* Qstring()
148 */
149 static void
Qstring(char * s,MMIOT * f)150 Qstring(char *s, MMIOT *f)
151 {
152 while (*s)
153 Qchar(*s++, f);
154 }
155
156
157 /* Qwrite()
158 */
159 static void
Qwrite(char * s,int size,MMIOT * f)160 Qwrite(char *s, int size, MMIOT *f)
161 {
162 while (size-- > 0)
163 Qchar(*s++, f);
164 }
165
166
167 /* Qprintf()
168 */
169 static void
Qprintf(MMIOT * f,char * fmt,...)170 Qprintf(MMIOT *f, char *fmt, ...)
171 {
172 char bfr[80];
173 va_list ptr;
174
175 va_start(ptr,fmt);
176 vsnprintf(bfr, sizeof bfr, fmt, ptr);
177 va_end(ptr);
178 Qstring(bfr, f);
179 }
180
181
182 /* Qanchor() prints out a suitable-for-id-tag version of a string
183 */
184 static void
Qanchor(struct line * p,MMIOT * f)185 Qanchor(struct line *p, MMIOT *f)
186 {
187 mkd_string_to_anchor(T(p->text), S(p->text),
188 (mkd_sta_function_t)Qchar, f, 1, f);
189 }
190
191
192 /* Qem()
193 */
194 static void
Qem(MMIOT * f,char c,int count)195 Qem(MMIOT *f, char c, int count)
196 {
197 block *p = &EXPAND(f->Q);
198
199 memset(p, 0, sizeof *p);
200 p->b_type = (c == '*') ? bSTAR : bUNDER;
201 p->b_char = c;
202 p->b_count = count;
203
204 memset(&EXPAND(f->Q), 0, sizeof(block));
205 }
206
207
208 /* generate html from a markup fragment
209 */
210 void
___mkd_reparse(char * bfr,int size,mkd_flag_t flags,MMIOT * f,char * esc)211 ___mkd_reparse(char *bfr, int size, mkd_flag_t flags, MMIOT *f, char *esc)
212 {
213 MMIOT sub;
214 struct escaped e;
215
216 ___mkd_initmmiot(&sub, f->footnotes);
217
218 sub.flags = f->flags | flags;
219 sub.cb = f->cb;
220 sub.ref_prefix = f->ref_prefix;
221
222 if ( esc ) {
223 sub.esc = &e;
224 e.up = f->esc;
225 e.text = esc;
226 }
227 else
228 sub.esc = f->esc;
229
230 push(bfr, size, &sub);
231 pushc(0, &sub);
232 S(sub.in)--;
233
234 text(&sub);
235 ___mkd_emblock(&sub);
236
237 Qwrite(T(sub.out), S(sub.out), f);
238 /* inherit the last character printed from the reparsed
239 * text; this way superscripts can work when they're
240 * applied to something embedded in a link
241 */
242 f->last = sub.last;
243
244 ___mkd_freemmiot(&sub, f->footnotes);
245 }
246
247
248 /*
249 * check the escape list for special cases
250 */
251 static int
escaped(MMIOT * f,char c)252 escaped(MMIOT *f, char c)
253 {
254 struct escaped *thing = f->esc;
255
256 while ( thing ) {
257 if ( strchr(thing->text, c) )
258 return 1;
259 thing = thing->up;
260 }
261 return 0;
262 }
263
264
265 /*
266 * write out a url, escaping problematic characters
267 */
268 static void
puturl(char * s,int size,MMIOT * f,int display)269 puturl(char *s, int size, MMIOT *f, int display)
270 {
271 unsigned char c;
272
273 while ( size-- > 0 ) {
274 c = *s++;
275
276 if ( c == '\\' && size-- > 0 ) {
277 c = *s++;
278
279 if ( !( ispunct(c) || isspace(c) ) )
280 Qchar('\\', f);
281 }
282
283 if ( c == '&' )
284 Qstring("&", f);
285 else if ( c == '<' )
286 Qstring("<", f);
287 else if ( c == '"' )
288 Qstring("%22", f);
289 else if ( isalnum(c) || ispunct(c) || (display && isspace(c)) )
290 Qchar(c, f);
291 else if ( c == MKD_EOLN ) /* untokenize hard return */
292 Qstring(" ", f);
293 else
294 Qprintf(f, "%%%02X", c);
295 }
296 }
297
298
299 /* advance forward until the next character is not whitespace
300 */
301 static int
eatspace(MMIOT * f)302 eatspace(MMIOT *f)
303 {
304 int c;
305
306 for ( ; ((c=peek(f, 1)) != EOF) && isspace(c); pull(f) )
307 ;
308 return c;
309 }
310
311
312 /* (match (a (nested (parenthetical (string.)))))
313 */
314 static int
parenthetical(int in,int out,MMIOT * f)315 parenthetical(int in, int out, MMIOT *f)
316 {
317 int size, indent, c;
318
319 for ( indent=1,size=0; indent; size++ ) {
320 if ( (c = pull(f)) == EOF )
321 return EOF;
322 else if ( (c == '\\') && (peek(f,1) == out || peek(f,1) == in) ) {
323 ++size;
324 pull(f);
325 }
326 else if ( c == in )
327 ++indent;
328 else if ( c == out )
329 --indent;
330 }
331 return size ? (size-1) : 0;
332 }
333
334
335 /* extract a []-delimited label from the input stream.
336 */
337 static int
linkylabel(MMIOT * f,Cstring * res)338 linkylabel(MMIOT *f, Cstring *res)
339 {
340 char *ptr = cursor(f);
341 int size;
342
343 if ( (size = parenthetical('[',']',f)) != EOF ) {
344 T(*res) = ptr;
345 S(*res) = size;
346 return 1;
347 }
348 return 0;
349 }
350
351
352 /* see if the quote-prefixed linky segment is actually a title.
353 */
354 static int
linkytitle(MMIOT * f,char quote,Footnote * ref)355 linkytitle(MMIOT *f, char quote, Footnote *ref)
356 {
357 int whence = mmiottell(f);
358 char *title = cursor(f);
359 char *e;
360 register int c;
361
362 while ( (c = pull(f)) != EOF ) {
363 e = cursor(f);
364 if ( c == quote ) {
365 if ( (c = eatspace(f)) == ')' ) {
366 T(ref->title) = 1+title;
367 S(ref->title) = (e-title)-2;
368 return 1;
369 }
370 }
371 }
372 mmiotseek(f, whence);
373 return 0;
374 }
375
376
377 /* extract a =HHHxWWW size from the input stream
378 */
379 static int
linkysize(MMIOT * f,Footnote * ref)380 linkysize(MMIOT *f, Footnote *ref)
381 {
382 int height=0, width=0;
383 int whence = mmiottell(f);
384 int c;
385
386 if ( isspace(peek(f,0)) ) {
387 pull(f); /* eat '=' */
388
389 for ( c = pull(f); isdigit(c); c = pull(f))
390 width = (width * 10) + (c - '0');
391
392 if ( c == 'x' ) {
393 for ( c = pull(f); isdigit(c); c = pull(f))
394 height = (height*10) + (c - '0');
395
396 if ( isspace(c) )
397 c = eatspace(f);
398
399 if ( (c == ')') || ((c == '\'' || c == '"') && linkytitle(f, c, ref)) ) {
400 ref->height = height;
401 ref->width = width;
402 return 1;
403 }
404 }
405 }
406 mmiotseek(f, whence);
407 return 0;
408 }
409
410
411 /* extract a <...>-encased url from the input stream.
412 * (markdown 1.0.2b8 compatibility; older versions
413 * of markdown treated the < and > as syntactic
414 * sugar that didn't have to be there. 1.0.2b8
415 * requires a closing >, and then falls into the
416 * title or closing )
417 */
418 static int
linkybroket(MMIOT * f,int image,Footnote * p)419 linkybroket(MMIOT *f, int image, Footnote *p)
420 {
421 int c;
422 int good = 0;
423
424 T(p->link) = cursor(f);
425 for ( S(p->link)=0; (c = pull(f)) != '>'; ++S(p->link) ) {
426 /* pull in all input until a '>' is found, or die trying.
427 */
428 if ( c == EOF )
429 return 0;
430 else if ( (c == '\\') && ispunct(peek(f,2)) ) {
431 ++S(p->link);
432 pull(f);
433 }
434 }
435
436 c = eatspace(f);
437
438 /* next nonspace needs to be a title, a size, or )
439 */
440 if ( ( c == '\'' || c == '"' ) && linkytitle(f,c,p) )
441 good=1;
442 else if ( image && (c == '=') && linkysize(f,p) )
443 good=1;
444 else
445 good=( c == ')' );
446
447 if ( good ) {
448 if ( peek(f, 1) == ')' )
449 pull(f);
450
451 ___mkd_tidy(&p->link);
452 }
453
454 return good;
455 } /* linkybroket */
456
457
458 /* extract a (-prefixed url from the input stream.
459 * the label is either of the format `<link>`, where I
460 * extract until I find a >, or it is of the format
461 * `text`, where I extract until I reach a ')', a quote,
462 * or (if image) a '='
463 */
464 static int
linkyurl(MMIOT * f,int image,Footnote * p)465 linkyurl(MMIOT *f, int image, Footnote *p)
466 {
467 int c;
468 int mayneedtotrim=0;
469
470 if ( (c = eatspace(f)) == EOF )
471 return 0;
472
473 if ( c == '<' ) {
474 pull(f);
475 if ( !is_flag_set(f->flags, MKD_1_COMPAT) )
476 return linkybroket(f,image,p);
477 mayneedtotrim=1;
478 }
479
480 T(p->link) = cursor(f);
481 for ( S(p->link)=0; (c = peek(f,1)) != ')'; ++S(p->link) ) {
482 if ( c == EOF )
483 return 0;
484 else if ( (c == '"' || c == '\'') && linkytitle(f, c, p) )
485 break;
486 else if ( image && (c == '=') && linkysize(f, p) )
487 break;
488 else if ( (c == '\\') && ispunct(peek(f,2)) ) {
489 ++S(p->link);
490 pull(f);
491 }
492 pull(f);
493 }
494 if ( peek(f, 1) == ')' )
495 pull(f);
496
497 ___mkd_tidy(&p->link);
498
499 if ( mayneedtotrim && (T(p->link)[S(p->link)-1] == '>') )
500 --S(p->link);
501
502 return 1;
503 }
504
505
506
507 /* prefixes for <automatic links>
508 */
509 static struct _protocol {
510 char *name;
511 int nlen;
512 } protocol[] = {
513 #define _aprotocol(x) { x, (sizeof x)-1 }
514 _aprotocol( "https:" ),
515 _aprotocol( "http:" ),
516 _aprotocol( "news:" ),
517 _aprotocol( "ftp:" ),
518 #undef _aprotocol
519 };
520 #define NRPROTOCOLS (sizeof protocol / sizeof protocol[0])
521
522
523 static int
isautoprefix(char * text,int size)524 isautoprefix(char *text, int size)
525 {
526 int i;
527 struct _protocol *p;
528
529 for (i=0, p=protocol; i < NRPROTOCOLS; i++, p++)
530 if ( (size >= p->nlen) && strncasecmp(text, p->name, p->nlen) == 0 )
531 return 1;
532 return 0;
533 }
534
535
536 /*
537 * all the tag types that linkylinky can produce are
538 * defined by this structure.
539 */
540 typedef struct linkytype {
541 char *pat;
542 int szpat;
543 char *link_pfx; /* tag prefix and link pointer (eg: "<a href="\"" */
544 char *link_sfx; /* link suffix (eg: "\"" */
545 int WxH; /* this tag allows width x height arguments */
546 char *text_pfx; /* text prefix (eg: ">" */
547 char *text_sfx; /* text suffix (eg: "</a>" */
548 int flags; /* reparse flags */
549 int kind; /* tag is url or something else? */
550 #define IS_URL 0x01
551 } linkytype;
552
553 static linkytype imaget = { 0, 0, "<img src=\"", "\"",
554 1, " alt=\"", "\" />", MKD_NOIMAGE|MKD_TAGTEXT, IS_URL };
555 static linkytype linkt = { 0, 0, "<a href=\"", "\"",
556 0, ">", "</a>", MKD_NOLINKS, IS_URL };
557
558 /*
559 * pseudo-protocols for [][];
560 *
561 * id: generates <a id="link">tag</a>
562 * class: generates <span class="link">tag</span>
563 * raw: just dump the link without any processing
564 */
565 static linkytype specials[] = {
566 { "id:", 3, "<span id=\"", "\"", 0, ">", "</span>", 0, 0 },
567 { "raw:", 4, 0, 0, 0, 0, 0, MKD_NOHTML, 0 },
568 { "lang:", 5, "<span lang=\"", "\"", 0, ">", "</span>", 0, 0 },
569 { "abbr:", 5, "<abbr title=\"", "\"", 0, ">", "</abbr>", 0, 0 },
570 { "class:", 6, "<span class=\"", "\"", 0, ">", "</span>", 0, 0 },
571 } ;
572
573 #define NR(x) (sizeof x / sizeof x[0])
574
575 /* see if t contains one of our pseudo-protocols.
576 */
577 static linkytype *
pseudo(Cstring t)578 pseudo(Cstring t)
579 {
580 int i;
581 linkytype *r;
582
583 for ( i=0, r=specials; i < NR(specials); i++,r++ ) {
584 if ( (S(t) > r->szpat) && (strncasecmp(T(t), r->pat, r->szpat) == 0) )
585 return r;
586 }
587 return 0;
588 }
589
590
591 /* print out the start of an `img' or `a' tag, applying callbacks as needed.
592 */
593 static void
printlinkyref(MMIOT * f,linkytype * tag,char * link,int size)594 printlinkyref(MMIOT *f, linkytype *tag, char *link, int size)
595 {
596 char *edit;
597
598 if ( is_flag_set(f->flags, IS_LABEL) )
599 return;
600
601 Qstring(tag->link_pfx, f);
602
603 if ( tag->kind & IS_URL ) {
604 if ( f->cb && f->cb->e_url && (edit = (*f->cb->e_url)(link, size, f->cb->e_data)) ) {
605 puturl(edit, strlen(edit), f, 0);
606 if ( f->cb->e_free ) (*f->cb->e_free)(edit, f->cb->e_data);
607 }
608 else
609 puturl(link + tag->szpat, size - tag->szpat, f, 0);
610 }
611 else
612 ___mkd_reparse(link + tag->szpat, size - tag->szpat, MKD_TAGTEXT, f, 0);
613
614 Qstring(tag->link_sfx, f);
615
616 if ( f->cb && f->cb->e_flags && (edit = (*f->cb->e_flags)(link, size, f->cb->e_data)) ) {
617 Qchar(' ', f);
618 Qstring(edit, f);
619 if ( f->cb->e_free ) (*f->cb->e_free)(edit, f->cb->e_data);
620 }
621 } /* printlinkyref */
622
623
624 /* helper function for php markdown extra footnotes; allow the user to
625 * define a prefix tag instead of just `fn`
626 */
627 static char *
p_or_nothing(p)628 p_or_nothing(p)
629 MMIOT *p;
630 {
631 return p->ref_prefix ? p->ref_prefix : "fn";
632 }
633
634
635 /* php markdown extra/daring fireball style print footnotes
636 */
637 static int
extra_linky(MMIOT * f,Cstring text,Footnote * ref)638 extra_linky(MMIOT *f, Cstring text, Footnote *ref)
639 {
640 if ( ref->flags & REFERENCED )
641 return 0;
642
643 if ( f->flags & IS_LABEL )
644 ___mkd_reparse(T(text), S(text), linkt.flags, f, 0);
645 else {
646 ref->flags |= REFERENCED;
647 ref->refnumber = ++ f->footnotes->reference;
648 Qprintf(f, "<sup id=\"%sref:%d\"><a href=\"#%s:%d\" rel=\"footnote\">%d</a></sup>",
649 p_or_nothing(f), ref->refnumber,
650 p_or_nothing(f), ref->refnumber, ref->refnumber);
651 }
652 return 1;
653 } /* extra_linky */
654
655
656
657 /* check a url (or url fragment to see that it begins with a known good
658 * protocol (or no protocol at all)
659 */
660 static int
safelink(Cstring link)661 safelink(Cstring link)
662 {
663 char *p, *colon;
664
665 if ( T(link) == 0 ) /* no link; safe */
666 return 1;
667
668 p = T(link);
669 if ( (colon = memchr(p, ':', S(link))) == 0 )
670 return 1; /* no protocol specified: safe */
671
672 if ( !isalpha(*p) ) /* protocol/method is [alpha][alnum or '+.-'] */
673 return 1;
674 while ( ++p < colon )
675 if ( !(isalnum(*p) || *p == '.' || *p == '+' || *p == '-') )
676 return 1;
677
678 return isautoprefix(T(link), S(link));
679 }
680
681
682 /* print out a linky (or fail if it's Not Allowed)
683 */
684 static int
linkyformat(MMIOT * f,Cstring text,int image,Footnote * ref)685 linkyformat(MMIOT *f, Cstring text, int image, Footnote *ref)
686 {
687 linkytype *tag;
688
689
690 if ( image )
691 tag = &imaget;
692 else if ( tag = pseudo(ref->link) ) {
693 if ( is_flag_set(f->flags, MKD_NO_EXT) || is_flag_set(f->flags, MKD_SAFELINK) )
694 return 0;
695 }
696 else if ( is_flag_set(f->flags, MKD_SAFELINK) && !safelink(ref->link) )
697 /* if MKD_SAFELINK, only accept links that are local or
698 * a well-known protocol
699 */
700 return 0;
701 else
702 tag = &linkt;
703
704 if ( f->flags & tag->flags )
705 return 0;
706
707 if ( is_flag_set(f->flags, IS_LABEL) )
708 ___mkd_reparse(T(text), S(text), tag->flags, f, 0);
709 else if ( tag->link_pfx ) {
710 printlinkyref(f, tag, T(ref->link), S(ref->link));
711
712 if ( tag->WxH ) {
713 if ( ref->height ) Qprintf(f," height=\"%d\"", ref->height);
714 if ( ref->width ) Qprintf(f, " width=\"%d\"", ref->width);
715 }
716
717 if ( S(ref->title) ) {
718 Qstring(" title=\"", f);
719 ___mkd_reparse(T(ref->title), S(ref->title), MKD_TAGTEXT, f, 0);
720 Qchar('"', f);
721 }
722
723 Qstring(tag->text_pfx, f);
724 ___mkd_reparse(T(text), S(text), tag->flags, f, 0);
725 Qstring(tag->text_sfx, f);
726 }
727 else
728 Qwrite(T(ref->link) + tag->szpat, S(ref->link) - tag->szpat, f);
729
730 return 1;
731 } /* linkyformat */
732
733
734 /*
735 * process embedded links and images
736 */
737 static int
linkylinky(int image,MMIOT * f)738 linkylinky(int image, MMIOT *f)
739 {
740 int start = mmiottell(f);
741 Cstring name;
742 Footnote key, *ref;
743
744 int status = 0;
745 int extra_footnote = 0;
746
747 CREATE(name);
748 memset(&key, 0, sizeof key);
749
750 if ( linkylabel(f, &name) ) {
751 if ( peek(f,1) == '(' ) {
752 pull(f);
753 if ( linkyurl(f, image, &key) )
754 status = linkyformat(f, name, image, &key);
755 }
756 else {
757 int goodlink, implicit_mark = mmiottell(f);
758
759 if ( isspace(peek(f,1)) )
760 pull(f);
761
762 if ( peek(f,1) == '[' ) {
763 pull(f); /* consume leading '[' */
764 goodlink = linkylabel(f, &key.tag);
765 }
766 else {
767 /* new markdown implicit name syntax doesn't
768 * require a second []
769 */
770 mmiotseek(f, implicit_mark);
771 goodlink = !is_flag_set(f->flags, MKD_1_COMPAT);
772
773 if ( is_flag_set(f->flags, MKD_EXTRA_FOOTNOTE) && (!image) && S(name) && T(name)[0] == '^' )
774 extra_footnote = 1;
775 }
776
777 if ( goodlink ) {
778 if ( !S(key.tag) ) {
779 DELETE(key.tag);
780 T(key.tag) = T(name);
781 S(key.tag) = S(name);
782 }
783
784 if ( ref = bsearch(&key, T(f->footnotes->note),
785 S(f->footnotes->note),
786 sizeof key, (stfu)__mkd_footsort) ) {
787 if ( extra_footnote )
788 status = extra_linky(f,name,ref);
789 else
790 status = linkyformat(f, name, image, ref);
791 }
792 }
793 }
794 }
795
796 DELETE(name);
797 ___mkd_freefootnote(&key);
798
799 if ( status == 0 )
800 mmiotseek(f, start);
801
802 return status;
803 }
804
805
806 /* write a character to output, doing text escapes ( & -> &,
807 * > -> > < -> < )
808 */
809 static void
cputc(int c,MMIOT * f)810 cputc(int c, MMIOT *f)
811 {
812 switch (c) {
813 case '&': Qstring("&", f); break;
814 case '>': Qstring(">", f); break;
815 case '<': Qstring("<", f); break;
816 default : Qchar(c, f); break;
817 }
818 }
819
820
821 /*
822 * convert an email address to a string of nonsense
823 */
824 static void
mangle(char * s,int len,MMIOT * f)825 mangle(char *s, int len, MMIOT *f)
826 {
827 while ( len-- > 0 ) {
828 #if DEBIAN_GLITCH
829 Qprintf(f, "&#%02d;", *((unsigned char*)(s++)) );
830 #else
831 Qstring("&#", f);
832 Qprintf(f, COINTOSS() ? "x%02x;" : "%02d;", *((unsigned char*)(s++)) );
833 #endif
834 }
835 }
836
837
838 /* nrticks() -- count up a row of tick marks
839 */
840 static int
nrticks(int offset,int tickchar,MMIOT * f)841 nrticks(int offset, int tickchar, MMIOT *f)
842 {
843 int tick = 0;
844
845 while ( peek(f, offset+tick) == tickchar ) tick++;
846
847 return tick;
848 } /* nrticks */
849
850
851 /* matchticks() -- match a certain # of ticks, and if that fails
852 * match the largest subset of those ticks.
853 *
854 * if a subset was matched, return the # of ticks
855 * that were matched.
856 */
857 static int
matchticks(MMIOT * f,int tickchar,int ticks,int * endticks)858 matchticks(MMIOT *f, int tickchar, int ticks, int *endticks)
859 {
860 int size, count, c;
861 int subsize=0, subtick=0;
862
863 *endticks = ticks;
864 for (size = 0; (c=peek(f,size+ticks)) != EOF; size ++) {
865 if ( (c == tickchar) && ( count = nrticks(size+ticks,tickchar,f)) ) {
866 if ( count == ticks )
867 return size;
868 else if ( count ) {
869 if ( (count > subtick) && (count < ticks) ) {
870 subsize = size;
871 subtick = count;
872 }
873 size += count;
874 }
875 }
876 }
877 if ( subsize ) {
878 *endticks = subtick;
879 return subsize;
880 }
881 return 0;
882 } /* matchticks */
883
884
885 /* code() -- write a string out as code. The only characters that have
886 * special meaning in a code block are * `<' and `&' , which
887 * are /always/ expanded to < and &
888 */
889 static void
code(MMIOT * f,char * s,int length)890 code(MMIOT *f, char *s, int length)
891 {
892 int i,c;
893
894 for ( i=0; i < length; i++ )
895 if ( (c = s[i]) == MKD_EOLN) /* expand back to 2 spaces */
896 Qstring(" ", f);
897 else if ( c == '\\' && (i < length-1) && escaped(f, s[i+1]) )
898 cputc(s[++i], f);
899 else
900 cputc(c, f);
901 } /* code */
902
903 /* delspan() -- write out a chunk of text, blocking with <del>...</del>
904 */
905 static void
delspan(MMIOT * f,int size)906 delspan(MMIOT *f, int size)
907 {
908 Qstring("<del>", f);
909 ___mkd_reparse(cursor(f)-1, size, 0, f, 0);
910 Qstring("</del>", f);
911 }
912
913
914 /* codespan() -- write out a chunk of text as code, trimming one
915 * space off the front and/or back as appropriate.
916 */
917 static void
codespan(MMIOT * f,int size)918 codespan(MMIOT *f, int size)
919 {
920 int i=0;
921
922 if ( size > 1 && peek(f, size-1) == ' ' ) --size;
923 if ( peek(f,i) == ' ' ) ++i, --size;
924
925 Qstring("<code>", f);
926 code(f, cursor(f)+(i-1), size);
927 Qstring("</code>", f);
928 } /* codespan */
929
930
931 /* before letting a tag through, validate against
932 * MKD_NOLINKS and MKD_NOIMAGE
933 */
934 static int
forbidden_tag(MMIOT * f)935 forbidden_tag(MMIOT *f)
936 {
937 int c = toupper(peek(f, 1));
938
939 if ( is_flag_set(f->flags, MKD_NOHTML) )
940 return 1;
941
942 if ( c == 'A' && is_flag_set(f->flags, MKD_NOLINKS) && !isthisalnum(f,2) )
943 return 1;
944 if ( c == 'I' && is_flag_set(f->flags, MKD_NOIMAGE)
945 && strncasecmp(cursor(f)+1, "MG", 2) == 0
946 && !isthisalnum(f,4) )
947 return 1;
948 return 0;
949 }
950
951
952 /* Check a string to see if it looks like a mail address
953 * "looks like a mail address" means alphanumeric + some
954 * specials, then a `@`, then alphanumeric + some specials,
955 * but with a `.`
956 */
957 static int
maybe_address(char * p,int size)958 maybe_address(char *p, int size)
959 {
960 int ok = 0;
961
962 for ( ;size && (isalnum(*p) || strchr("._-+*", *p)); ++p, --size)
963 ;
964
965 if ( ! (size && *p == '@') )
966 return 0;
967
968 --size, ++p;
969
970 if ( size && *p == '.' ) return 0;
971
972 for ( ;size && (isalnum(*p) || strchr("._-+", *p)); ++p, --size )
973 if ( *p == '.' && size > 1 ) ok = 1;
974
975 return size ? 0 : ok;
976 }
977
978
979 /* The size-length token at cursor(f) is either a mailto:, an
980 * implicit mailto:, one of the approved url protocols, or just
981 * plain old text. If it's a mailto: or an approved protocol,
982 * linkify it, otherwise say "no"
983 */
984 static int
process_possible_link(MMIOT * f,int size)985 process_possible_link(MMIOT *f, int size)
986 {
987 int address= 0;
988 int mailto = 0;
989 char *text = cursor(f);
990
991 if ( is_flag_set(f->flags, MKD_NOLINKS) ) return 0;
992
993 if ( (size > 7) && strncasecmp(text, "mailto:", 7) == 0 ) {
994 /* if it says it's a mailto, it's a mailto -- who am
995 * I to second-guess the user?
996 */
997 address = 1;
998 mailto = 7; /* 7 is the length of "mailto:"; we need this */
999 }
1000 else
1001 address = maybe_address(text, size);
1002
1003 if ( address ) {
1004 Qstring("<a href=\"", f);
1005 if ( !mailto ) {
1006 /* supply a mailto: protocol if one wasn't attached */
1007 mangle("mailto:", 7, f);
1008 }
1009 mangle(text, size, f);
1010 Qstring("\">", f);
1011 mangle(text+mailto, size-mailto, f);
1012 Qstring("</a>", f);
1013 return 1;
1014 }
1015 else if ( isautoprefix(text, size) ) {
1016 printlinkyref(f, &linkt, text, size);
1017 Qchar('>', f);
1018 puturl(text,size,f, 1);
1019 Qstring("</a>", f);
1020 return 1;
1021 }
1022 return 0;
1023 } /* process_possible_link */
1024
1025
1026 /* a < may be just a regular character, the start of an embedded html
1027 * tag, or the start of an <automatic link>. If it's an automatic
1028 * link, we also need to know if it's an email address because if it
1029 * is we need to mangle it in our futile attempt to cut down on the
1030 * spaminess of the rendered page.
1031 */
1032 static int
maybe_tag_or_link(MMIOT * f)1033 maybe_tag_or_link(MMIOT *f)
1034 {
1035 int c, size;
1036 int maybetag = 1;
1037
1038 if ( is_flag_set(f->flags, MKD_TAGTEXT) )
1039 return 0;
1040
1041 for ( size=0; (c = peek(f, size+1)) != '>'; size++) {
1042 if ( c == EOF )
1043 return 0;
1044 else if ( c == '\\' ) {
1045 maybetag=0;
1046 if ( peek(f, size+2) != EOF )
1047 size++;
1048 }
1049 else if ( isspace(c) )
1050 break;
1051 else if ( ! (c == '/'
1052 || (is_flag_set(f->flags, MKD_GITHUBTAGS) && (c == '-' || c == '_'))
1053 || isalnum(c) ) )
1054 maybetag=0;
1055 }
1056
1057 if ( size ) {
1058 if ( maybetag || (size >= 3 && strncmp(cursor(f), "!--", 3) == 0) ) {
1059
1060 /* It is not a html tag unless we find the closing '>' in
1061 * the same block.
1062 */
1063 while ( (c = peek(f, size+1)) != '>' )
1064 if ( c == EOF )
1065 return 0;
1066 else
1067 size++;
1068
1069 if ( forbidden_tag(f) )
1070 return 0;
1071
1072 Qchar('<', f);
1073 while ( ((c = peek(f, 1)) != EOF) && (c != '>') )
1074 Qchar(pull(f), f);
1075 return 1;
1076 }
1077 else if ( !isspace(c) && process_possible_link(f, size) ) {
1078 shift(f, size+1);
1079 return 1;
1080 }
1081 }
1082
1083 return 0;
1084 }
1085
1086
1087 /* autolinking means that all inline html is <a href'ified>. A
1088 * autolink url is alphanumerics, slashes, periods, underscores,
1089 * the at sign, colon, and the % character.
1090 */
1091 static int
maybe_autolink(MMIOT * f)1092 maybe_autolink(MMIOT *f)
1093 {
1094 register int c;
1095 int size;
1096
1097 /* greedily scan forward for the end of a legitimate link.
1098 */
1099 for ( size=0; (c=peek(f, size+1)) != EOF; size++ ) {
1100 if ( c == '\\' ) {
1101 if ( peek(f, size+2) != EOF )
1102 ++size;
1103 }
1104 else if ( c & 0x80 ) /* HACK: ignore utf-8 extended characters */
1105 continue;
1106 else if ( isspace(c) || strchr("'\"()[]{}<>`", c) || c == MKD_EOLN )
1107 break;
1108 }
1109
1110 if ( (size > 1) && process_possible_link(f, size) ) {
1111 shift(f, size);
1112 return 1;
1113 }
1114 return 0;
1115 }
1116
1117
1118 /* smartyquote code that's common for single and double quotes
1119 */
1120 static int
smartyquote(int * flags,char typeofquote,MMIOT * f)1121 smartyquote(int *flags, char typeofquote, MMIOT *f)
1122 {
1123 int bit = (typeofquote == 's') ? 0x01 : 0x02;
1124
1125 if ( bit & (*flags) ) {
1126 if ( isthisnonword(f,1) ) {
1127 Qprintf(f, "&r%cquo;", typeofquote);
1128 (*flags) &= ~bit;
1129 return 1;
1130 }
1131 }
1132 else if ( isthisnonword(f,-1) && peek(f,1) != EOF ) {
1133 Qprintf(f, "&l%cquo;", typeofquote);
1134 (*flags) |= bit;
1135 return 1;
1136 }
1137 return 0;
1138 }
1139
1140
1141 static int
islike(MMIOT * f,char * s)1142 islike(MMIOT *f, char *s)
1143 {
1144 int len;
1145 int i;
1146
1147 if ( s[0] == '|' ) {
1148 if ( !isthisnonword(f, -1) )
1149 return 0;
1150 ++s;
1151 }
1152
1153 if ( !(len = strlen(s)) )
1154 return 0;
1155
1156 if ( s[len-1] == '|' ) {
1157 if ( !isthisnonword(f,len-1) )
1158 return 0;
1159 len--;
1160 }
1161
1162 for (i=1; i < len; i++)
1163 if (tolower(peek(f,i)) != s[i])
1164 return 0;
1165 return 1;
1166 }
1167
1168
1169 static struct smarties {
1170 char c0;
1171 char *pat;
1172 char *entity;
1173 int shift;
1174 } smarties[] = {
1175 { '\'', "'s|", "rsquo", 0 },
1176 { '\'', "'t|", "rsquo", 0 },
1177 { '\'', "'re|", "rsquo", 0 },
1178 { '\'', "'ll|", "rsquo", 0 },
1179 { '\'', "'ve|", "rsquo", 0 },
1180 { '\'', "'m|", "rsquo", 0 },
1181 { '\'', "'d|", "rsquo", 0 },
1182 { '-', "---", "mdash", 2 },
1183 { '-', "--", "ndash", 1 },
1184 { '.', "...", "hellip", 2 },
1185 { '.', ". . .", "hellip", 4 },
1186 { '(', "(c)", "copy", 2 },
1187 { '(', "(r)", "reg", 2 },
1188 { '(', "(tm)", "trade", 3 },
1189 { '3', "|3/4|", "frac34", 2 },
1190 { '3', "|3/4ths|", "frac34", 2 },
1191 { '1', "|1/2|", "frac12", 2 },
1192 { '1', "|1/4|", "frac14", 2 },
1193 { '1', "|1/4th|", "frac14", 2 },
1194 { '&', "�", 0, 3 },
1195 } ;
1196 #define NRSMART ( sizeof smarties / sizeof smarties[0] )
1197
1198
1199 /* Smarty-pants-style chrome for quotes, -, ellipses, and (r)(c)(tm)
1200 */
1201 static int
smartypants(int c,int * flags,MMIOT * f)1202 smartypants(int c, int *flags, MMIOT *f)
1203 {
1204 int i;
1205
1206 if ( is_flag_set(f->flags, MKD_NOPANTS)
1207 || is_flag_set(f->flags, MKD_TAGTEXT)
1208 || is_flag_set(f->flags, IS_LABEL) )
1209 return 0;
1210
1211 for ( i=0; i < NRSMART; i++)
1212 if ( (c == smarties[i].c0) && islike(f, smarties[i].pat) ) {
1213 if ( smarties[i].entity )
1214 Qprintf(f, "&%s;", smarties[i].entity);
1215 shift(f, smarties[i].shift);
1216 return 1;
1217 }
1218
1219 switch (c) {
1220 case '<' : return 0;
1221 case '\'': if ( smartyquote(flags, 's', f) ) return 1;
1222 break;
1223
1224 case '"': if ( smartyquote(flags, 'd', f) ) return 1;
1225 break;
1226
1227 case '`': if ( peek(f, 1) == '`' ) {
1228 int j = 2;
1229
1230 while ( (c=peek(f,j)) != EOF ) {
1231 if ( c == '\\' )
1232 j += 2;
1233 else if ( c == '`' )
1234 break;
1235 else if ( c == '\'' && peek(f, j+1) == '\'' ) {
1236 Qstring("“", f);
1237 ___mkd_reparse(cursor(f)+1, j-2, 0, f, 0);
1238 Qstring("”", f);
1239 shift(f,j+1);
1240 return 1;
1241 }
1242 else ++j;
1243 }
1244
1245 }
1246 break;
1247 }
1248 return 0;
1249 } /* smartypants */
1250
1251
1252 /* process latex with arbitrary 2-character ( $$ .. $$, \[ .. \], \( .. \)
1253 * delimiters
1254 */
1255 static int
mathhandler(MMIOT * f,int e1,int e2)1256 mathhandler(MMIOT *f, int e1, int e2)
1257 {
1258 int i = 0;
1259
1260 while(peek(f, ++i) != EOF) {
1261 if (peek(f, i) == e1 && peek(f, i+1) == e2) {
1262 cputc(peek(f,-1), f);
1263 cputc(peek(f, 0), f);
1264 cputc(6, f);
1265 EXPAND(f->latex) = peek(f,-1);
1266 EXPAND(f->latex) = peek(f,0);
1267 EXPAND(f->latex) = 6;
1268 while ( i-- > -1 ) {
1269 char c = pull(f);
1270 EXPAND(f->latex) = c;
1271 cputc(c, f);
1272 }
1273 EXPAND(f->latex) = 31;
1274 return 1;
1275 }
1276 }
1277 return 0;
1278 }
1279
1280 /*
1281 * process latex with arbitrary custom delimiters
1282 */
1283 static int
mathhandlerExtended(MMIOT * f,char * begin,char * end)1284 mathhandlerExtended(MMIOT *f, char* begin, char* end)
1285 {
1286 int beginLength = strlen(begin);
1287 int endLength = strlen(end);
1288
1289 for (int i = 0; i < beginLength; i++)
1290 if (peek(f, i) != begin[i])
1291 return 0;
1292
1293 int i = beginLength;
1294 while(peek(f, ++i) != EOF) {
1295 int matchEnd = 1;
1296 for (int j = 0; j < endLength; j++)
1297 if (peek(f, i + j) != end[j])
1298 matchEnd = 0;
1299
1300 if (matchEnd == 1) {
1301 i += endLength;
1302
1303 cputc(6, f);
1304 EXPAND(f->latex) = '\\';
1305 EXPAND(f->latex) = 6;
1306
1307 while ( --i > 0 )
1308 {
1309 char c = pull(f);
1310 EXPAND(f->latex) = c;
1311 cputc(c, f);
1312 }
1313
1314 EXPAND(f->latex) = 31;
1315 return 1;
1316 }
1317 }
1318 return 0;
1319 }
1320
1321
1322 /* process a body of text encased in some sort of tick marks. If it
1323 * works, generate the output and return 1, otherwise just return 0 and
1324 * let the caller figure it out.
1325 */
1326 static int
tickhandler(MMIOT * f,int tickchar,int minticks,int allow_space,spanhandler spanner)1327 tickhandler(MMIOT *f, int tickchar, int minticks, int allow_space, spanhandler spanner)
1328 {
1329 int endticks, size;
1330 int tick = nrticks(0, tickchar, f);
1331
1332 if ( !allow_space && isspace(peek(f,tick)) )
1333 return 0;
1334
1335 if ( (tick >= minticks) && (size = matchticks(f,tickchar,tick,&endticks)) ) {
1336 if ( endticks < tick ) {
1337 size += (tick - endticks);
1338 tick = endticks;
1339 }
1340
1341 shift(f, tick);
1342 (*spanner)(f,size);
1343 shift(f, size+tick-1);
1344 return 1;
1345 }
1346 return 0;
1347 }
1348
1349 #define tag_text(f) is_flag_set(f->flags, MKD_TAGTEXT)
1350
1351
1352 static void
text(MMIOT * f)1353 text(MMIOT *f)
1354 {
1355 int c, j;
1356 int rep;
1357 int smartyflags = 0;
1358
1359 while (1) {
1360 if ( is_flag_set(f->flags, MKD_AUTOLINK) && isalpha(peek(f,1)) && !tag_text(f) )
1361 maybe_autolink(f);
1362
1363 c = pull(f);
1364
1365 if (c == EOF)
1366 break;
1367
1368 if ( smartypants(c, &smartyflags, f) )
1369 continue;
1370 switch (c) {
1371 case 0: break;
1372
1373 case MKD_EOLN:
1374 Qstring(tag_text(f) ? " " : "<br/>", f);
1375 break;
1376
1377 case '>': if ( tag_text(f) )
1378 Qstring(">", f);
1379 else
1380 Qchar(c, f);
1381 break;
1382
1383 case '"': if ( tag_text(f) )
1384 Qstring(""", f);
1385 else
1386 Qchar(c, f);
1387 break;
1388
1389 case '!': if ( peek(f,1) == '[' ) {
1390 pull(f);
1391 if ( tag_text(f) || !linkylinky(1, f) )
1392 Qstring("![", f);
1393 }
1394 else
1395 Qchar(c, f);
1396 break;
1397
1398 case '[': if ( tag_text(f) || !linkylinky(0, f) )
1399 Qchar(c, f);
1400 break;
1401 /* A^B -> A<sup>B</sup> */
1402 case '^': if ( is_flag_set(f->flags, MKD_NOSUPERSCRIPT)
1403 || is_flag_set(f->flags, MKD_STRICT)
1404 || is_flag_set(f->flags, MKD_TAGTEXT)
1405 || (f->last == 0)
1406 || ((ispunct(f->last) || isspace(f->last))
1407 && f->last != ')')
1408 || isthisspace(f,1) )
1409 Qchar(c,f);
1410 else {
1411 char *sup = cursor(f);
1412 int len = 0;
1413
1414 if ( peek(f,1) == '(' ) {
1415 int here = mmiottell(f);
1416 pull(f);
1417
1418 if ( (len = parenthetical('(',')',f)) <= 0 ) {
1419 mmiotseek(f,here);
1420 Qchar(c, f);
1421 break;
1422 }
1423 sup++;
1424 }
1425 else {
1426 while ( isthisalnum(f,1+len) )
1427 ++len;
1428 if ( !len ) {
1429 Qchar(c,f);
1430 break;
1431 }
1432 shift(f,len);
1433 }
1434 Qstring("<sup>",f);
1435 ___mkd_reparse(sup, len, 0, f, "()");
1436 Qstring("</sup>", f);
1437 }
1438 break;
1439 case '_':
1440 /* Underscores don't count if they're in the middle of a word */
1441 if ( !(is_flag_set(f->flags, MKD_NORELAXED) || is_flag_set(f->flags, MKD_STRICT))
1442 && isthisalnum(f,-1) && isthisalnum(f,1) ) {
1443 Qchar(c, f);
1444 break;
1445 }
1446 case '*':
1447 /* Underscores & stars don't count if they're out in the middle
1448 * of whitespace */
1449 if ( isthisspace(f,-1) && isthisspace(f,1) ) {
1450 Qchar(c, f);
1451 break;
1452 }
1453 /* else fall into the regular old emphasis case */
1454 if ( tag_text(f) )
1455 Qchar(c, f);
1456 else {
1457 for (rep = 1; peek(f,1) == c; pull(f) )
1458 ++rep;
1459 Qem(f,c,rep);
1460 }
1461 break;
1462
1463 case '~': if ( is_flag_set(f->flags, MKD_NOSTRIKETHROUGH)
1464 || is_flag_set(f->flags, MKD_STRICT)
1465 || is_flag_set(f->flags, MKD_TAGTEXT)
1466 || ! tickhandler(f,c,2,0, delspan) )
1467 Qchar(c, f);
1468 break;
1469
1470 case '`': if ( tag_text(f) || !tickhandler(f,c,1,1,codespan) )
1471 Qchar(c, f);
1472 break;
1473
1474 case '\\': switch ( c = pull(f) ) {
1475 case '&': Qstring("&", f);
1476 break;
1477 case '<': c = peek(f,1);
1478 if ( (c == EOF) || isspace(c) )
1479 Qstring("<", f);
1480 else {
1481 /* Markdown.pl does not escape <[nonwhite]
1482 * sequences */
1483 Qchar('\\', f);
1484 shift(f, -1);
1485 }
1486
1487 break;
1488 case '^': if ( is_flag_set(f->flags, MKD_STRICT)
1489 || is_flag_set(f->flags, MKD_NOSUPERSCRIPT) ) {
1490 Qchar('\\', f);
1491 shift(f,-1);
1492 break;
1493 }
1494 Qchar(c, f);
1495 break;
1496
1497 case ':': case '|':
1498 if ( is_flag_set(f->flags, MKD_NOTABLES) ) {
1499 Qchar('\\', f);
1500 shift(f,-1);
1501 break;
1502 }
1503 Qchar(c, f);
1504 break;
1505
1506 case EOF: Qchar('\\', f);
1507 break;
1508
1509 case '[':
1510 case '(':
1511 Qchar(c, f);
1512 break;
1513
1514 case '$': if ( is_flag_set(f->flags, MKD_LATEX) ) {
1515 Qchar(c, f);
1516 break;
1517 }
1518
1519 default:
1520 if ( escaped(f,c) ||
1521 strchr(">#.-+{}]![*_\\()`", c) )
1522 Qchar(c, f);
1523 else {
1524 Qchar('\\', f);
1525 shift(f, -1);
1526 if ( is_flag_set(f->flags, MKD_LATEX) ) {
1527 mathhandlerExtended(f, "\\begin{equation}", "\\end{equation}")
1528 || mathhandlerExtended(f, "\\begin{equation*}", "\\end{equation*}")
1529 || mathhandlerExtended(f, "\\begin{align}", "\\end{align}")
1530 || mathhandlerExtended(f, "\\begin{align*}", "\\end{align*}")
1531 || mathhandlerExtended(f, "\\begin{bmatrix}", "\\end{bmatrix}")
1532 || mathhandlerExtended(f, "\\begin{cases}", "\\end{cases}");
1533 }
1534 }
1535 break;
1536 }
1537 break;
1538
1539 case '<': if ( !maybe_tag_or_link(f) )
1540 Qstring("<", f);
1541 break;
1542
1543 case '&': j = (peek(f,1) == '#' ) ? 2 : 1;
1544 while ( isthisalnum(f,j) )
1545 ++j;
1546
1547 if ( peek(f,j) != ';' )
1548 Qstring("&", f);
1549 else
1550 Qchar(c, f);
1551 break;
1552
1553 case '$': if ( is_flag_set(f->flags, MKD_LATEX) ) {
1554 if (peek(f, 1) == '$' ) {
1555 pull(f);
1556 if ( mathhandler(f, '$', '$') )
1557 break;
1558 Qchar('$', f);
1559 }
1560 else {
1561 int c2;
1562 int i = 1;
1563
1564 while ( ((c2=peek(f,i)) != '$') && (c2 != EOF) )
1565 i++;
1566 if ( c2 != EOF ) {
1567 Qchar('$', f);
1568 cputc(6, f);
1569 EXPAND(f->latex) = '$';
1570 EXPAND(f->latex) = 6;
1571 while (i-- > 0 ) {
1572 char sym = pull(f);
1573 EXPAND(f->latex) = sym;
1574 Qchar(sym, f);
1575 }
1576 EXPAND(f->latex) = 31;
1577 break;
1578 }
1579 }
1580 }
1581 /* fall through to default */
1582
1583 default: f->last = c;
1584 Qchar(c, f);
1585 break;
1586 }
1587 }
1588 /* truncate the input string after we've finished processing it */
1589 S(f->in) = f->isp = 0;
1590 } /* text */
1591
1592
1593 /* print a header block
1594 */
1595 static void
printheader(Paragraph * pp,MMIOT * f)1596 printheader(Paragraph *pp, MMIOT *f)
1597 {
1598 if ( is_flag_set(f->flags, MKD_IDANCHOR) ) {
1599 Qprintf(f, "<h%d", pp->hnumber);
1600 if ( is_flag_set(f->flags, MKD_TOC) ) {
1601 Qstring(" id=\"", f);
1602 Qanchor(pp->text, f);
1603 Qchar('"', f);
1604 }
1605 Qchar('>', f);
1606 } else {
1607 if ( is_flag_set(f->flags, MKD_TOC) ) {
1608 Qstring("<a name=\"", f);
1609 Qanchor(pp->text, f);
1610 Qstring("\"></a>\n", f);
1611 }
1612 Qprintf(f, "<h%d>", pp->hnumber);
1613 }
1614 push(T(pp->text->text), S(pp->text->text), f);
1615 text(f);
1616 Qprintf(f, "</h%d>", pp->hnumber);
1617 }
1618
1619
1620 enum e_alignments { a_NONE, a_CENTER, a_LEFT, a_RIGHT };
1621
1622 static char* alignments[] = { "", " style=\"text-align:center;\"",
1623 " style=\"text-align:left;\"",
1624 " style=\"text-align:right;\"" };
1625
1626 typedef STRING(int) Istring;
1627
1628 static int
splat(Line * p,char * block,Istring align,int force,MMIOT * f)1629 splat(Line *p, char *block, Istring align, int force, MMIOT *f)
1630 {
1631 int first,
1632 idx = p->dle,
1633 colno = 0;
1634
1635
1636 ___mkd_tidy(&p->text);
1637 if ( T(p->text)[S(p->text)-1] == '|' )
1638 --S(p->text);
1639
1640 Qstring("<tr>\n", f);
1641 while ( idx < S(p->text) ) {
1642 first = idx;
1643 if ( force && (colno >= S(align)-1) )
1644 idx = S(p->text);
1645 else
1646 while ( (idx < S(p->text)) && (T(p->text)[idx] != '|') ) {
1647 if ( T(p->text)[idx] == '\\' )
1648 ++idx;
1649 ++idx;
1650 }
1651
1652 Qprintf(f, "<%s%s>",
1653 block,
1654 alignments[ (colno < S(align)) ? T(align)[colno] : a_NONE ]);
1655 ___mkd_reparse(T(p->text)+first, idx-first, 0, f, "|");
1656 Qprintf(f, "</%s>\n", block);
1657 idx++;
1658 colno++;
1659 }
1660 if ( force )
1661 while (colno < S(align) ) {
1662 Qprintf(f, "<%s></%s>\n", block, block);
1663 ++colno;
1664 }
1665 Qstring("</tr>\n", f);
1666 return colno;
1667 }
1668
1669
1670 static int
printtable(Paragraph * pp,MMIOT * f)1671 printtable(Paragraph *pp, MMIOT *f)
1672 {
1673 /* header, dashes, then lines of content */
1674
1675 Line *hdr, *dash, *body;
1676 Istring align;
1677 int hcols,start;
1678 char *p;
1679 enum e_alignments it;
1680
1681 hdr = pp->text;
1682 dash= hdr->next;
1683 body= dash->next;
1684
1685 if ( T(hdr->text)[hdr->dle] == '|' ) {
1686 /* trim leading pipe off all lines
1687 */
1688 Line *r;
1689 for ( r = pp->text; r; r = r->next )
1690 r->dle ++;
1691 }
1692
1693 /* figure out cell alignments */
1694
1695 CREATE(align);
1696
1697 for (p=T(dash->text), start=dash->dle; start < S(dash->text); ) {
1698 char first, last;
1699 int end;
1700
1701 last=first=0;
1702 for (end=start ; (end < S(dash->text)) && p[end] != '|'; ++ end ) {
1703 if ( p[end] == '\\' )
1704 ++ end;
1705 else if ( !isspace(p[end]) ) {
1706 if ( !first) first = p[end];
1707 last = p[end];
1708 }
1709 }
1710 it = ( first == ':' ) ? (( last == ':') ? a_CENTER : a_LEFT)
1711 : (( last == ':') ? a_RIGHT : a_NONE );
1712
1713 EXPAND(align) = it;
1714 start = 1+end;
1715 }
1716
1717 Qstring("<table>\n", f);
1718 Qstring("<thead>\n", f);
1719 hcols = splat(hdr, "th", align, 0, f);
1720 Qstring("</thead>\n", f);
1721
1722 if ( hcols < S(align) )
1723 S(align) = hcols;
1724 else
1725 while ( hcols > S(align) )
1726 EXPAND(align) = a_NONE;
1727
1728 Qstring("<tbody>\n", f);
1729 for ( ; body; body = body->next)
1730 splat(body, "td", align, 1, f);
1731 Qstring("</tbody>\n", f);
1732 Qstring("</table>\n", f);
1733
1734 DELETE(align);
1735 return 1;
1736 }
1737
1738
1739 static int
printblock(Paragraph * pp,MMIOT * f)1740 printblock(Paragraph *pp, MMIOT *f)
1741 {
1742 static char *Begin[] = { "", "<p>", "<p style=\"text-align:center;\">" };
1743 static char *End[] = { "", "</p>","</p>" };
1744 Line *t = pp->text;
1745 int align = pp->align;
1746
1747 while (t) {
1748 if ( S(t->text) ) {
1749 if ( t->next && S(t->text) > 2
1750 && T(t->text)[S(t->text)-2] == ' '
1751 && T(t->text)[S(t->text)-1] == ' ' ) {
1752 push(T(t->text), S(t->text)-2, f);
1753 pushc(MKD_EOLN, f);
1754 pushc('\n', f);
1755 }
1756 else {
1757 ___mkd_tidy(&t->text);
1758 push(T(t->text), S(t->text), f);
1759 if ( t->next )
1760 pushc('\n', f);
1761 }
1762 }
1763 t = t->next;
1764 }
1765 Qstring(Begin[align], f);
1766 text(f);
1767 Qstring(End[align], f);
1768 return 1;
1769 }
1770
1771
1772 static void
printcode(Line * t,char * lang,MMIOT * f)1773 printcode(Line *t, char *lang, MMIOT *f)
1774 {
1775 int blanks;
1776
1777 if ( f->cb->e_codefmt ) {
1778 /* external code block formatter; copy the text into a buffer,
1779 * call the formatter to style it, then dump that styled text
1780 * directly to the queue
1781 */
1782 char *text;
1783 char *fmt;
1784 int size, copy_p;
1785 Line *p;
1786
1787 for (size=0, p = t; p; p = p->next )
1788 size += 1+S(p->text);
1789
1790 text = malloc(1+size);
1791
1792 for ( copy_p = 0; t ; t = t->next ) {
1793 memcpy(text+copy_p, T(t->text), S(t->text));
1794 copy_p += S(t->text);
1795 text[copy_p++] = '\n';
1796 }
1797 text[copy_p] = 0;
1798
1799 fmt = (*(f->cb->e_codefmt))(text, copy_p, (lang && lang[0]) ? lang : 0);
1800 free(text);
1801
1802 if ( fmt ) {
1803 Qwrite(fmt, strlen(fmt), f);
1804 if ( f->cb->e_free )
1805 (*(f->cb->e_free))(fmt, f->cb->e_data);
1806 return;
1807 }
1808 /* otherwise the external formatter failed and we need to
1809 * fall back to the traditional codeblock format
1810 */
1811 }
1812
1813 Qstring("<pre><code", f);
1814 if (lang && lang[0]) {
1815 Qstring(" class=\"", f);
1816 Qstring(lang, f);
1817 Qstring("\"", f);
1818 }
1819 Qstring(">", f);
1820 for ( blanks = 0; t ; t = t->next ) {
1821 if ( S(t->text) > t->dle ) {
1822 while ( blanks ) {
1823 Qchar('\n', f);
1824 --blanks;
1825 }
1826 code(f, T(t->text), S(t->text));
1827 Qchar('\n', f);
1828 }
1829 else blanks++;
1830 }
1831 Qstring("</code></pre>", f);
1832 }
1833
1834
1835 static void
printhtml(Line * t,MMIOT * f)1836 printhtml(Line *t, MMIOT *f)
1837 {
1838 int blanks;
1839
1840 for ( blanks=0; t ; t = t->next )
1841 if ( S(t->text) ) {
1842 for ( ; blanks; --blanks )
1843 Qchar('\n', f);
1844
1845 Qwrite(T(t->text), S(t->text), f);
1846 Qchar('\n', f);
1847 }
1848 else
1849 blanks++;
1850 }
1851
1852
1853 static void
htmlify_paragraphs(Paragraph * p,MMIOT * f)1854 htmlify_paragraphs(Paragraph *p, MMIOT *f)
1855 {
1856 ___mkd_emblock(f);
1857
1858 while (( p = display(p, f) )) {
1859 ___mkd_emblock(f);
1860 Qstring("\n\n", f);
1861 }
1862 }
1863
1864
1865 #ifdef GITHUB_CHECKBOX
1866 static void
li_htmlify(Paragraph * p,char * arguments,mkd_flag_t flags,MMIOT * f)1867 li_htmlify(Paragraph *p, char *arguments, mkd_flag_t flags, MMIOT *f)
1868 {
1869 ___mkd_emblock(f);
1870
1871 Qprintf(f, "<li");
1872 if ( arguments )
1873 Qprintf(f, " %s", arguments);
1874 if ( flags & GITHUB_CHECK )
1875 Qprintf(f, " class=\"github_checkbox\"");
1876 Qprintf(f, ">");
1877 #if CHECKBOX_AS_INPUT
1878 if ( flags & GITHUB_CHECK ) {
1879 Qprintf(f, "<input disabled=\"\" type=\"checkbox\"");
1880 if ( flags & IS_CHECKED )
1881 Qprintf(f, " checked=\"checked\"");
1882 Qprintf(f, "/>");
1883 }
1884 #else
1885 if ( flags & GITHUB_CHECK )
1886 Qprintf(f, flags & IS_CHECKED ? "☑" : "☐");
1887 #endif
1888
1889 htmlify_paragraphs(p, f);
1890
1891 Qprintf(f, "</li>");
1892 ___mkd_emblock(f);
1893 }
1894 #endif
1895
1896
1897 static void
htmlify(Paragraph * p,char * block,char * arguments,MMIOT * f)1898 htmlify(Paragraph *p, char *block, char *arguments, MMIOT *f)
1899 {
1900 ___mkd_emblock(f);
1901 if ( block )
1902 Qprintf(f, arguments ? "<%s %s>" : "<%s>", block, arguments);
1903
1904 htmlify_paragraphs(p, f);
1905
1906 if ( block )
1907 Qprintf(f, "</%s>", block);
1908 ___mkd_emblock(f);
1909 }
1910
1911
1912 static void
definitionlist(Paragraph * p,MMIOT * f)1913 definitionlist(Paragraph *p, MMIOT *f)
1914 {
1915 Line *tag;
1916
1917 if ( p ) {
1918 Qstring("<dl>\n", f);
1919
1920 for ( ; p ; p = p->next) {
1921 for ( tag = p->text; tag; tag = tag->next ) {
1922 Qstring("<dt>", f);
1923 ___mkd_reparse(T(tag->text), S(tag->text), 0, f, 0);
1924 Qstring("</dt>\n", f);
1925 }
1926
1927 htmlify(p->down, "dd", p->ident, f);
1928 Qchar('\n', f);
1929 }
1930
1931 Qstring("</dl>", f);
1932 }
1933 }
1934
1935
1936 static void
listdisplay(int typ,Paragraph * p,MMIOT * f)1937 listdisplay(int typ, Paragraph *p, MMIOT* f)
1938 {
1939 if ( p ) {
1940 Qprintf(f, "<%cl", (typ==UL)?'u':'o');
1941 if ( typ == AL )
1942 Qprintf(f, " type=\"a\"");
1943 Qprintf(f, ">\n");
1944
1945 for ( ; p ; p = p->next ) {
1946 #ifdef GITHUB_CHECKBOX
1947 li_htmlify(p->down, p->ident, p->flags, f);
1948 #else
1949 htmlify(p->down, "li", p->ident, f);
1950 #endif
1951 Qchar('\n', f);
1952 }
1953
1954 Qprintf(f, "</%cl>\n", (typ==UL)?'u':'o');
1955 }
1956 }
1957
1958
1959 /* dump out a Paragraph in the desired manner
1960 */
1961 static Paragraph*
display(Paragraph * p,MMIOT * f)1962 display(Paragraph *p, MMIOT *f)
1963 {
1964 if ( !p ) return 0;
1965
1966 switch ( p->typ ) {
1967 case STYLE:
1968 case WHITESPACE:
1969 break;
1970
1971 case HTML:
1972 printhtml(p->text, f);
1973 break;
1974
1975 case CODE:
1976 printcode(p->text, p->lang, f);
1977 break;
1978
1979 case QUOTE:
1980 htmlify(p->down, p->ident ? "div" : "blockquote", p->ident, f);
1981 break;
1982
1983 case UL:
1984 case OL:
1985 case AL:
1986 listdisplay(p->typ, p->down, f);
1987 break;
1988
1989 case DL:
1990 definitionlist(p->down, f);
1991 break;
1992
1993 case HR:
1994 Qstring("<hr />", f);
1995 break;
1996
1997 case HDR:
1998 printheader(p, f);
1999 break;
2000
2001 case TABLE:
2002 printtable(p, f);
2003 break;
2004
2005 case SOURCE:
2006 htmlify(p->down, 0, 0, f);
2007 break;
2008
2009 default:
2010 printblock(p, f);
2011 break;
2012 }
2013 return p->next;
2014 }
2015
2016
2017 /* dump out a list of footnotes
2018 */
2019 static void
mkd_extra_footnotes(MMIOT * m)2020 mkd_extra_footnotes(MMIOT *m)
2021 {
2022 int j, i;
2023 Footnote *t;
2024
2025 if ( m->footnotes->reference == 0 )
2026 return;
2027
2028 Csprintf(&m->out, "\n<div class=\"footnotes\">\n<hr/>\n<ol>\n");
2029
2030 for ( i=1; i <= m->footnotes->reference; i++ ) {
2031 for ( j=0; j < S(m->footnotes->note); j++ ) {
2032 t = &T(m->footnotes->note)[j];
2033 if ( (t->refnumber == i) && (t->flags & REFERENCED) ) {
2034 Csprintf(&m->out, "<li id=\"%s:%d\">\n",
2035 p_or_nothing(m), t->refnumber);
2036 htmlify(t->text, 0, 0, m);
2037 Csprintf(&m->out, "<a href=\"#%sref:%d\" rev=\"footnote\">↩</a>",
2038 p_or_nothing(m), t->refnumber);
2039 Csprintf(&m->out, "</li>\n");
2040 }
2041 }
2042 }
2043 Csprintf(&m->out, "</ol>\n</div>\n");
2044 }
2045
2046
2047 /* return a pointer to the compiled markdown
2048 * document.
2049 */
2050 int
mkd_document(Document * p,char ** res)2051 mkd_document(Document *p, char **res)
2052 {
2053 int size;
2054
2055 if ( p && p->compiled ) {
2056 if ( ! p->html ) {
2057 htmlify(p->code, 0, 0, p->ctx);
2058 if ( is_flag_set(p->ctx->flags, MKD_EXTRA_FOOTNOTE) )
2059 mkd_extra_footnotes(p->ctx);
2060 p->html = 1;
2061 size = S(p->ctx->out);
2062
2063 if ( (size == 0) || T(p->ctx->out)[size-1] ) {
2064 /* Add a null byte at the end of the generated html,
2065 * but pretend it doesn't exist.
2066 */
2067 EXPAND(p->ctx->out) = 0;
2068 --S(p->ctx->out);
2069 }
2070 }
2071
2072 *res = T(p->ctx->out);
2073 return S(p->ctx->out);
2074 }
2075 return EOF;
2076 }
2077
2078 /* Return list of founded latex textes (only textes, without positions) separeted by ASCII unit separator (code - 31)
2079 * Ugly, but works
2080 */
2081 int
mkd_latextext(Document * p,char ** res)2082 mkd_latextext(Document *p, char **res)
2083 {
2084 int size;
2085
2086 if ( p && p->compiled ) {
2087 if ( ! p->html ) {
2088 htmlify(p->code, 0, 0, p->ctx);
2089 if ( is_flag_set(p->ctx->flags, MKD_EXTRA_FOOTNOTE) )
2090 mkd_extra_footnotes(p->ctx);
2091 p->html = 1;
2092 size = S(p->ctx->latex);
2093
2094 if ( (size == 0) || T(p->ctx->latex)[size-1] ) {
2095 /* Add a null byte at the end of the generated html,
2096 * but pretend it doesn't exist.
2097 */
2098 EXPAND(p->ctx->latex) = 0;
2099 --S(p->ctx->latex);
2100 }
2101 }
2102
2103 *res = T(p->ctx->latex);
2104 return S(p->ctx->latex);
2105 }
2106 return EOF;
2107 }
2108