1 /*
2 ** Copyright (c) 2012 D. Richard Hipp
3 **
4 ** This program is free software; you can redistribute it and/or
5 ** modify it under the terms of the Simplified BSD License (also
6 ** known as the "2-Clause License" or "FreeBSD License".)
7 
8 ** This program is distributed in the hope that it will be useful,
9 ** but without any warranty; without even the implied warranty of
10 ** merchantability or fitness for a particular purpose.
11 **
12 ** Author contact information:
13 **   drh@hwaci.com
14 **   http://www.hwaci.com/drh/
15 **
16 *******************************************************************************
17 **
18 ** This file contains callbacks for the markdown parser that generate
19 ** XHTML output.
20 */
21 
22 #include "config.h"
23 #include "markdown_html.h"
24 
25 #if INTERFACE
26 
27 void markdown_to_html(
28   struct Blob *input_markdown,
29   struct Blob *output_title,
30   struct Blob *output_body);
31 
32 #endif /* INTERFACE */
33 
34 /*
35 ** An instance of the following structure is passed through the
36 ** "opaque" pointer.
37 */
38 typedef struct MarkdownToHtml MarkdownToHtml;
39 struct MarkdownToHtml {
40   Blob *output_title;     /* Store the title here */
41 };
42 
43 
44 /* INTER_BLOCK -- skip a line between block level elements */
45 #define INTER_BLOCK(ob) \
46   do { if( blob_size(ob)>0 ) blob_append_char(ob, '\n'); } while (0)
47 
48 /* BLOB_APPEND_LITERAL -- append a string literal to a blob */
49 #define BLOB_APPEND_LITERAL(blob, literal) \
50   blob_append((blob), "" literal, (sizeof literal)-1)
51   /*
52    * The empty string in the second argument leads to a syntax error
53    * when the macro is not used with a string literal. Unfortunately
54    * the error is not overly explicit.
55    */
56 
57 /* BLOB_APPEND_BLOB -- append blob contents to another */
58 #define BLOB_APPEND_BLOB(dest, src) \
59   blob_append((dest), blob_buffer(src), blob_size(src))
60 
61 
62 /* HTML escapes
63 **
64 ** html_escape() converts < to &lt;, > to &gt;, and & to &amp;.
65 ** html_quote() goes further and converts " into &quot; and ' in &#39;.
66 */
html_quote(struct Blob * ob,const char * data,size_t size)67 static void html_quote(struct Blob *ob, const char *data, size_t size){
68   size_t beg = 0, i = 0;
69   while( i<size ){
70     beg = i;
71     while( i<size
72      && data[i]!='<'
73      && data[i]!='>'
74      && data[i]!='"'
75      && data[i]!='&'
76      && data[i]!='\''
77     ){
78       i++;
79     }
80     blob_append(ob, data+beg, i-beg);
81     while( i<size ){
82       if( data[i]=='<' ){
83         BLOB_APPEND_LITERAL(ob, "&lt;");
84       }else if( data[i]=='>' ){
85         BLOB_APPEND_LITERAL(ob, "&gt;");
86       }else if( data[i]=='&' ){
87         BLOB_APPEND_LITERAL(ob, "&amp;");
88       }else if( data[i]=='"' ){
89         BLOB_APPEND_LITERAL(ob, "&quot;");
90       }else if( data[i]=='\'' ){
91         BLOB_APPEND_LITERAL(ob, "&#39;");
92       }else{
93         break;
94       }
95       i++;
96     }
97   }
98 }
html_escape(struct Blob * ob,const char * data,size_t size)99 static void html_escape(struct Blob *ob, const char *data, size_t size){
100   size_t beg = 0, i = 0;
101   while( i<size ){
102     beg = i;
103     while( i<size
104      && data[i]!='<'
105      && data[i]!='>'
106      && data[i]!='&'
107     ){
108       i++;
109     }
110     blob_append(ob, data+beg, i-beg);
111     while( i<size ){
112       if( data[i]=='<' ){
113         BLOB_APPEND_LITERAL(ob, "&lt;");
114       }else if( data[i]=='>' ){
115         BLOB_APPEND_LITERAL(ob, "&gt;");
116       }else if( data[i]=='&' ){
117         BLOB_APPEND_LITERAL(ob, "&amp;");
118       }else{
119         break;
120       }
121       i++;
122     }
123   }
124 }
125 
126 
127 /* HTML block tags */
128 
129 /* Size of the prolog: "<div class='markdown'>\n" */
130 #define PROLOG_SIZE 23
131 
html_prolog(struct Blob * ob,void * opaque)132 static void html_prolog(struct Blob *ob, void *opaque){
133   INTER_BLOCK(ob);
134   BLOB_APPEND_LITERAL(ob, "<div class=\"markdown\">\n");
135   assert( blob_size(ob)==PROLOG_SIZE );
136 }
137 
html_epilog(struct Blob * ob,void * opaque)138 static void html_epilog(struct Blob *ob, void *opaque){
139   INTER_BLOCK(ob);
140   BLOB_APPEND_LITERAL(ob, "</div>\n");
141 }
142 
html_blockhtml(struct Blob * ob,struct Blob * text,void * opaque)143 static void html_blockhtml(struct Blob *ob, struct Blob *text, void *opaque){
144   char *data = blob_buffer(text);
145   size_t size = blob_size(text);
146   Blob *title = ((MarkdownToHtml*)opaque)->output_title;
147   while( size>0 && fossil_isspace(data[0]) ){ data++; size--; }
148   while( size>0 && fossil_isspace(data[size-1]) ){ size--; }
149   /* If the first raw block is an <h1> element, then use it as the title. */
150   if( blob_size(ob)<=PROLOG_SIZE
151    && size>9
152    && title!=0
153    && sqlite3_strnicmp("<h1",data,3)==0
154    && sqlite3_strnicmp("</h1>", &data[size-5],5)==0
155   ){
156     int nTag = html_tag_length(data);
157     blob_append(title, data+nTag, size - nTag - 5);
158     return;
159   }
160   INTER_BLOCK(ob);
161   blob_append(ob, data, size);
162   BLOB_APPEND_LITERAL(ob, "\n");
163 }
164 
html_blockcode(struct Blob * ob,struct Blob * text,void * opaque)165 static void html_blockcode(struct Blob *ob, struct Blob *text, void *opaque){
166   INTER_BLOCK(ob);
167   BLOB_APPEND_LITERAL(ob, "<pre><code>");
168   html_escape(ob, blob_buffer(text), blob_size(text));
169   BLOB_APPEND_LITERAL(ob, "</code></pre>\n");
170 }
171 
html_blockquote(struct Blob * ob,struct Blob * text,void * opaque)172 static void html_blockquote(struct Blob *ob, struct Blob *text, void *opaque){
173   INTER_BLOCK(ob);
174   BLOB_APPEND_LITERAL(ob, "<blockquote>\n");
175   BLOB_APPEND_BLOB(ob, text);
176   BLOB_APPEND_LITERAL(ob, "</blockquote>\n");
177 }
178 
html_header(struct Blob * ob,struct Blob * text,int level,void * opaque)179 static void html_header(
180   struct Blob *ob,
181   struct Blob *text,
182   int level,
183   void *opaque
184 ){
185   struct Blob *title = ((MarkdownToHtml*)opaque)->output_title;
186   /* The first header at the beginning of a text is considered as
187    * a title and not output. */
188   if( blob_size(ob)<=PROLOG_SIZE && title!=0 && blob_size(title)==0 ){
189     BLOB_APPEND_BLOB(title, text);
190     return;
191   }
192   INTER_BLOCK(ob);
193   blob_appendf(ob, "<h%d>", level);
194   BLOB_APPEND_BLOB(ob, text);
195   blob_appendf(ob, "</h%d>", level);
196 }
197 
html_hrule(struct Blob * ob,void * opaque)198 static void html_hrule(struct Blob *ob, void *opaque){
199   INTER_BLOCK(ob);
200   BLOB_APPEND_LITERAL(ob, "<hr />\n");
201 }
202 
203 
html_list(struct Blob * ob,struct Blob * text,int flags,void * opaque)204 static void html_list(
205   struct Blob *ob,
206   struct Blob *text,
207   int flags,
208   void *opaque
209 ){
210   char ol[] = "ol";
211   char ul[] = "ul";
212   char *tag = (flags & MKD_LIST_ORDERED) ? ol : ul;
213   INTER_BLOCK(ob);
214   blob_appendf(ob, "<%s>\n", tag);
215   BLOB_APPEND_BLOB(ob, text);
216   blob_appendf(ob, "</%s>\n", tag);
217 }
218 
html_list_item(struct Blob * ob,struct Blob * text,int flags,void * opaque)219 static void html_list_item(
220   struct Blob *ob,
221   struct Blob *text,
222   int flags,
223   void *opaque
224 ){
225   char *text_data = blob_buffer(text);
226   size_t text_size = blob_size(text);
227   while( text_size>0 && text_data[text_size-1]=='\n' ) text_size--;
228   BLOB_APPEND_LITERAL(ob, "<li>");
229   blob_append(ob, text_data, text_size);
230   BLOB_APPEND_LITERAL(ob, "</li>\n");
231 }
232 
html_paragraph(struct Blob * ob,struct Blob * text,void * opaque)233 static void html_paragraph(struct Blob *ob, struct Blob *text, void *opaque){
234   INTER_BLOCK(ob);
235   BLOB_APPEND_LITERAL(ob, "<p>");
236   BLOB_APPEND_BLOB(ob, text);
237   BLOB_APPEND_LITERAL(ob, "</p>\n");
238 }
239 
240 
html_table(struct Blob * ob,struct Blob * head_row,struct Blob * rows,void * opaque)241 static void html_table(
242   struct Blob *ob,
243   struct Blob *head_row,
244   struct Blob *rows,
245   void *opaque
246 ){
247   INTER_BLOCK(ob);
248   BLOB_APPEND_LITERAL(ob, "<table>\n");
249   if( head_row && blob_size(head_row)>0 ){
250     BLOB_APPEND_LITERAL(ob, "<thead>\n");
251     BLOB_APPEND_BLOB(ob, head_row);
252     BLOB_APPEND_LITERAL(ob, "</thead>\n<tbody>\n");
253   }
254   if( rows ){
255     BLOB_APPEND_BLOB(ob, rows);
256   }
257   if( head_row && blob_size(head_row)>0 ){
258     BLOB_APPEND_LITERAL(ob, "</tbody>\n");
259   }
260   BLOB_APPEND_LITERAL(ob, "</table>\n");
261 }
262 
html_table_cell(struct Blob * ob,struct Blob * text,int flags,void * opaque)263 static void html_table_cell(
264   struct Blob *ob,
265   struct Blob *text,
266   int flags,
267   void *opaque
268 ){
269   if( flags & MKD_CELL_HEAD ){
270     BLOB_APPEND_LITERAL(ob, "    <th");
271   }else{
272     BLOB_APPEND_LITERAL(ob, "    <td");
273   }
274   switch( flags & MKD_CELL_ALIGN_MASK ){
275     case MKD_CELL_ALIGN_LEFT: {
276       BLOB_APPEND_LITERAL(ob, " align=\"left\"");
277       break;
278     }
279     case MKD_CELL_ALIGN_RIGHT: {
280       BLOB_APPEND_LITERAL(ob, " align=\"right\"");
281       break;
282     }
283     case MKD_CELL_ALIGN_CENTER: {
284       BLOB_APPEND_LITERAL(ob, " align=\"center\"");
285       break;
286     }
287   }
288   BLOB_APPEND_LITERAL(ob, ">");
289   BLOB_APPEND_BLOB(ob, text);
290   if( flags & MKD_CELL_HEAD ){
291     BLOB_APPEND_LITERAL(ob, "</th>\n");
292   }else{
293     BLOB_APPEND_LITERAL(ob, "</td>\n");
294   }
295 }
296 
html_table_row(struct Blob * ob,struct Blob * cells,int flags,void * opaque)297 static void html_table_row(
298   struct Blob *ob,
299   struct Blob *cells,
300   int flags,
301   void *opaque
302 ){
303   BLOB_APPEND_LITERAL(ob, "  <tr>\n");
304   BLOB_APPEND_BLOB(ob, cells);
305   BLOB_APPEND_LITERAL(ob, "  </tr>\n");
306 }
307 
308 
309 
310 /* HTML span tags */
311 
html_raw_html_tag(struct Blob * ob,struct Blob * text,void * opaque)312 static int html_raw_html_tag(struct Blob *ob, struct Blob *text, void *opaque){
313   blob_append(ob, blob_buffer(text), blob_size(text));
314   return 1;
315 }
316 
html_autolink(struct Blob * ob,struct Blob * link,enum mkd_autolink type,void * opaque)317 static int html_autolink(
318   struct Blob *ob,
319   struct Blob *link,
320   enum mkd_autolink type,
321   void *opaque
322 ){
323   if( !link || blob_size(link)<=0 ) return 0;
324   BLOB_APPEND_LITERAL(ob, "<a href=\"");
325   if( type==MKDA_IMPLICIT_EMAIL ) BLOB_APPEND_LITERAL(ob, "mailto:");
326   html_quote(ob, blob_buffer(link), blob_size(link));
327   BLOB_APPEND_LITERAL(ob, "\">");
328   if( type==MKDA_EXPLICIT_EMAIL && blob_size(link)>7 ){
329     /* remove "mailto:" from displayed text */
330     html_escape(ob, blob_buffer(link)+7, blob_size(link)-7);
331   }else{
332     html_escape(ob, blob_buffer(link), blob_size(link));
333   }
334   BLOB_APPEND_LITERAL(ob, "</a>");
335   return 1;
336 }
337 
338 /*
339 ** The nSrc bytes at zSrc[] are Pikchr input text (allegedly).  Process that
340 ** text and insert the result in place of the original.
341 */
pikchr_to_html(Blob * ob,const char * zSrc,int nSrc,const char * zArg,int nArg)342 void pikchr_to_html(
343   Blob *ob,                     /* Write the generated SVG here */
344   const char *zSrc, int nSrc,   /* The Pikchr source text */
345   const char *zArg, int nArg    /* Addition arguments */
346 ){
347   int pikFlags = PIKCHR_PROCESS_NONCE
348     | PIKCHR_PROCESS_DIV
349     | PIKCHR_PROCESS_SRC
350     | PIKCHR_PROCESS_ERR_PRE;
351   Blob bSrc = empty_blob;
352   const char *zPikVar;
353   double rPikVar;
354 
355   while( nArg>0 ){
356     int i;
357     for(i=0; i<nArg && !fossil_isspace(zArg[i]); i++){}
358     if( i==6 && strncmp(zArg, "center", 6)==0 ){
359       pikFlags |= PIKCHR_PROCESS_DIV_CENTER;
360     }else if( i==6 && strncmp(zArg, "indent", 6)==0 ){
361       pikFlags |= PIKCHR_PROCESS_DIV_INDENT;
362     }else if( i==10 && strncmp(zArg, "float-left", 10)==0 ){
363       pikFlags |= PIKCHR_PROCESS_DIV_FLOAT_LEFT;
364     }else if( i==11 && strncmp(zArg, "float-right", 11)==0 ){
365       pikFlags |= PIKCHR_PROCESS_DIV_FLOAT_RIGHT;
366     }else if( i==6 && strncmp(zArg, "toggle", 6)==0 ){
367       pikFlags |= PIKCHR_PROCESS_DIV_TOGGLE;
368     }else if( i==6 && strncmp(zArg, "source", 6)==0 ){
369       pikFlags |= PIKCHR_PROCESS_DIV_SOURCE;
370     }else if( i==13 && strncmp(zArg, "source-inline", 13)==0 ){
371       pikFlags |= PIKCHR_PROCESS_DIV_SOURCE_INLINE;
372     }
373     while( i<nArg && fossil_isspace(zArg[i]) ){ i++; }
374     zArg += i;
375     nArg -= i;
376   }
377   if( skin_detail_boolean("white-foreground") ){
378     pikFlags |= 0x02;  /* PIKCHR_DARK_MODE */
379   }
380   zPikVar = skin_detail("pikchr-foreground");
381   if( zPikVar && zPikVar[0] ){
382     blob_appendf(&bSrc, "fgcolor = %s\n", zPikVar);
383   }
384   zPikVar = skin_detail("pikchr-background");
385   if( zPikVar && zPikVar[0] ){
386     blob_appendf(&bSrc, "bgcolor = %s\n", zPikVar);
387   }
388   zPikVar = skin_detail("pikchr-scale");
389   if( zPikVar
390    && (rPikVar = atof(zPikVar))>=0.1
391    && rPikVar<10.0
392   ){
393     blob_appendf(&bSrc, "scale = %.13g\n", rPikVar);
394   }
395   zPikVar = skin_detail("pikchr-fontscale");
396   if( zPikVar
397    && (rPikVar = atof(zPikVar))>=0.1
398    && rPikVar<10.0
399   ){
400     blob_appendf(&bSrc, "fontscale = %.13g\n", rPikVar);
401   }
402   blob_append(&bSrc, zSrc, nSrc)
403     /*have to dup input to ensure a NUL-terminated source string */;
404   pikchr_process(blob_str(&bSrc), pikFlags, 0, ob);
405   blob_reset(&bSrc);
406 }
407 
408 /* Invoked for `...` blocks where there are nSep grave accents in a
409 ** row that serve as the delimiter.  According to CommonMark:
410 **
411 **   *  https://spec.commonmark.org/0.29/#fenced-code-blocks
412 **   *  https://spec.commonmark.org/0.29/#code-spans
413 **
414 ** If nSep is 1 or 2, then this is a code-span which is inline.
415 ** If nSep is 3 or more, then this is a fenced code block
416 */
html_codespan(struct Blob * ob,struct Blob * text,int nSep,void * opaque)417 static int html_codespan(
418   struct Blob *ob,    /* Write the output here */
419   struct Blob *text,  /* The stuff in between the code span marks */
420   int nSep,           /* Number of grave accents marks as delimiters */
421   void *opaque
422 ){
423   if( text==0 ){
424     /* no-op */
425   }else if( nSep<=2 ){
426     /* One or two graves: an in-line code span */
427     BLOB_APPEND_LITERAL(ob, "<code>");
428     html_escape(ob, blob_buffer(text), blob_size(text));
429     BLOB_APPEND_LITERAL(ob, "</code>");
430   }else{
431     /* Three or more graves: a fenced code block */
432     int n = blob_size(text);
433     const char *z = blob_buffer(text);
434     int i;
435     for(i=0; i<n && z[i]!='\n'; i++){}
436     if( i>=n ){
437       blob_appendf(ob, "<pre><code>%#h</code></pre>", n, z);
438     }else{
439       int k, j;
440       i++;
441       for(k=0; k<i && fossil_isspace(z[k]); k++){}
442       if( k==i ){
443         blob_appendf(ob, "<pre><code>%#h</code></pre>", n-i, z+i);
444       }else{
445         for(j=k+1; j<i && !fossil_isspace(z[j]); j++){}
446         if( j-k==6 && strncmp(z+k,"pikchr",6)==0 ){
447           while( j<i && fossil_isspace(z[j]) ){ j++; }
448           pikchr_to_html(ob, z+i, n-i, z+j, i-j);
449         }else{
450           blob_appendf(ob, "<pre><code class='language-%#h'>%#h</code></pre>",
451                             j-k, z+k, n-i, z+i);
452         }
453       }
454     }
455   }
456   return 1;
457 }
458 
html_double_emphasis(struct Blob * ob,struct Blob * text,char c,void * opaque)459 static int html_double_emphasis(
460   struct Blob *ob,
461   struct Blob *text,
462   char c,
463   void *opaque
464 ){
465   BLOB_APPEND_LITERAL(ob, "<strong>");
466   BLOB_APPEND_BLOB(ob, text);
467   BLOB_APPEND_LITERAL(ob, "</strong>");
468   return 1;
469 }
470 
html_emphasis(struct Blob * ob,struct Blob * text,char c,void * opaque)471 static int html_emphasis(
472   struct Blob *ob,
473   struct Blob *text,
474   char c,
475   void *opaque
476 ){
477   BLOB_APPEND_LITERAL(ob, "<em>");
478   BLOB_APPEND_BLOB(ob, text);
479   BLOB_APPEND_LITERAL(ob, "</em>");
480   return 1;
481 }
482 
html_image(struct Blob * ob,struct Blob * link,struct Blob * title,struct Blob * alt,void * opaque)483 static int html_image(
484   struct Blob *ob,
485   struct Blob *link,
486   struct Blob *title,
487   struct Blob *alt,
488   void *opaque
489 ){
490   BLOB_APPEND_LITERAL(ob, "<img src=\"");
491   html_quote(ob, blob_buffer(link), blob_size(link));
492   BLOB_APPEND_LITERAL(ob, "\" alt=\"");
493   html_quote(ob, blob_buffer(alt), blob_size(alt));
494   if( title && blob_size(title)>0 ){
495     BLOB_APPEND_LITERAL(ob, "\" title=\"");
496     html_quote(ob, blob_buffer(title), blob_size(title));
497   }
498   BLOB_APPEND_LITERAL(ob, "\" />");
499   return 1;
500 }
501 
html_linebreak(struct Blob * ob,void * opaque)502 static int html_linebreak(struct Blob *ob, void *opaque){
503   BLOB_APPEND_LITERAL(ob, "<br />\n");
504   return 1;
505 }
506 
html_link(struct Blob * ob,struct Blob * link,struct Blob * title,struct Blob * content,void * opaque)507 static int html_link(
508   struct Blob *ob,
509   struct Blob *link,
510   struct Blob *title,
511   struct Blob *content,
512   void *opaque
513 ){
514   char *zLink = blob_buffer(link);
515   char *zTitle = title!=0 && blob_size(title)>0 ? blob_str(title) : 0;
516   char zClose[20];
517 
518   if( zLink==0 || zLink[0]==0 ){
519     zClose[0] = 0;
520   }else{
521     static const int flags =
522        WIKI_NOBADLINKS |
523        WIKI_MARKDOWNLINKS
524     ;
525     wiki_resolve_hyperlink(ob, flags, zLink, zClose, sizeof(zClose), 0, zTitle);
526   }
527   if( blob_size(content)==0 ){
528     if( link ) BLOB_APPEND_BLOB(ob, link);
529   }else{
530     BLOB_APPEND_BLOB(ob, content);
531   }
532   blob_append(ob, zClose, -1);
533   return 1;
534 }
535 
html_triple_emphasis(struct Blob * ob,struct Blob * text,char c,void * opaque)536 static int html_triple_emphasis(
537   struct Blob *ob,
538   struct Blob *text,
539   char c,
540   void *opaque
541 ){
542   BLOB_APPEND_LITERAL(ob, "<strong><em>");
543   BLOB_APPEND_BLOB(ob, text);
544   BLOB_APPEND_LITERAL(ob, "</em></strong>");
545   return 1;
546 }
547 
548 
html_normal_text(struct Blob * ob,struct Blob * text,void * opaque)549 static void html_normal_text(struct Blob *ob, struct Blob *text, void *opaque){
550   html_escape(ob, blob_buffer(text), blob_size(text));
551 }
552 
553 /*
554 ** Convert markdown into HTML.
555 **
556 ** The document title is placed in output_title if not NULL.  Or if
557 ** output_title is NULL, the document title appears in the body.
558 */
markdown_to_html(struct Blob * input_markdown,struct Blob * output_title,struct Blob * output_body)559 void markdown_to_html(
560   struct Blob *input_markdown,   /* Markdown content to be rendered */
561   struct Blob *output_title,     /* Put title here.  May be NULL */
562   struct Blob *output_body       /* Put document body here. */
563 ){
564   struct mkd_renderer html_renderer = {
565     /* prolog and epilog */
566     html_prolog,
567     html_epilog,
568 
569     /* block level elements */
570     html_blockcode,
571     html_blockquote,
572     html_blockhtml,
573     html_header,
574     html_hrule,
575     html_list,
576     html_list_item,
577     html_paragraph,
578     html_table,
579     html_table_cell,
580     html_table_row,
581 
582     /* span level elements */
583     html_autolink,
584     html_codespan,
585     html_double_emphasis,
586     html_emphasis,
587     html_image,
588     html_linebreak,
589     html_link,
590     html_raw_html_tag,
591     html_triple_emphasis,
592 
593     /* low level elements */
594     0,    /* entity */
595     html_normal_text,
596 
597     /* misc. parameters */
598     "*_", /* emph_chars */
599     0     /* opaque */
600   };
601   MarkdownToHtml context;
602   memset(&context, 0, sizeof(context));
603   context.output_title = output_title;
604   html_renderer.opaque = &context;
605   if( output_title ) blob_reset(output_title);
606   blob_reset(output_body);
607   markdown(output_body, input_markdown, &html_renderer);
608 }
609