1 /*
2 ** Copyright (c) 2007 D. Richard Hipp
3 **
4 ** This program is free software; you can redistribute it and/or
5 ** modify it under the terms of the Simplified BSD License (also
6 ** known as the "2-Clause License" or "FreeBSD License".)
7 
8 ** This program is distributed in the hope that it will be useful,
9 ** but without any warranty; without even the implied warranty of
10 ** merchantability or fitness for a particular purpose.
11 **
12 ** Author contact information:
13 **   drh@hwaci.com
14 **   http://www.hwaci.com/drh/
15 **
16 *******************************************************************************
17 **
18 ** This file contains code to do formatting of wiki text.
19 */
20 #include "config.h"
21 #include <assert.h>
22 #include "wikiformat.h"
23 
24 #if INTERFACE
25 /*
26 ** Allowed wiki transformation operations
27 */
28 #define WIKI_HTMLONLY       0x001  /* HTML markup only.  No wiki */
29 #define WIKI_INLINE         0x002  /* Do not surround with <p>..</p> */
30 #define WIKI_NOBLOCK        0x004  /* No block markup of any kind */
31 #define WIKI_BUTTONS        0x008  /* Allow sub-menu buttons */
32 #define WIKI_NOBADLINKS     0x010  /* Ignore broken hyperlinks */
33 #define WIKI_LINKSONLY      0x020  /* No markup.  Only decorate links */
34 #define WIKI_NEWLINE        0x040  /* Honor \n - break lines at each \n */
35 #define WIKI_MARKDOWNLINKS  0x080  /* Resolve hyperlinks as in markdown */
36 #define WIKI_SAFE           0x100  /* Make the result safe for embedding */
37 #define WIKI_TARGET_BLANK   0x200  /* Hyperlinks go to a new window */
38 #define WIKI_NOBRACKET      0x400  /* Omit extra [..] around hyperlinks */
39 #endif
40 
41 
42 /*
43 ** These are the only markup attributes allowed.
44 */
45 enum allowed_attr_t {
46   ATTR_ALIGN = 1,
47   ATTR_ALT,
48   ATTR_BGCOLOR,
49   ATTR_BORDER,
50   ATTR_CELLPADDING,
51   ATTR_CELLSPACING,
52   ATTR_CLASS,
53   ATTR_CLEAR,
54   ATTR_COLOR,
55   ATTR_COLSPAN,
56   ATTR_COMPACT,
57   ATTR_FACE,
58   ATTR_HEIGHT,
59   ATTR_HREF,
60   ATTR_HSPACE,
61   ATTR_ID,
62   ATTR_LINKS,
63   ATTR_NAME,
64   ATTR_ROWSPAN,
65   ATTR_SIZE,
66   ATTR_SRC,
67   ATTR_START,
68   ATTR_STYLE,
69   ATTR_TARGET,
70   ATTR_TITLE,
71   ATTR_TYPE,
72   ATTR_VALIGN,
73   ATTR_VALUE,
74   ATTR_VSPACE,
75   ATTR_WIDTH
76 };
77 
78 enum amsk_t {
79   AMSK_ALIGN        = 0x00000001,
80   AMSK_ALT          = 0x00000002,
81   AMSK_BGCOLOR      = 0x00000004,
82   AMSK_BORDER       = 0x00000008,
83   AMSK_CELLPADDING  = 0x00000010,
84   AMSK_CELLSPACING  = 0x00000020,
85   AMSK_CLASS        = 0x00000040,
86   AMSK_CLEAR        = 0x00000080,
87   AMSK_COLOR        = 0x00000100,
88   AMSK_COLSPAN      = 0x00000200,
89   AMSK_COMPACT      = 0x00000400,
90   AMSK_FACE         = 0x00000800,
91   AMSK_HEIGHT       = 0x00001000,
92   AMSK_HREF         = 0x00002000,
93   AMSK_HSPACE       = 0x00004000,
94   AMSK_ID           = 0x00008000,
95   AMSK_LINKS        = 0x00010000,
96   AMSK_NAME         = 0x00020000,
97   AMSK_ROWSPAN      = 0x00040000,
98   AMSK_SIZE         = 0x00080000,
99   AMSK_SRC          = 0x00100000,
100   AMSK_START        = 0x00200000,
101   AMSK_STYLE        = 0x00400000,
102   AMSK_TARGET       = 0x00800000,
103   AMSK_TITLE        = 0x01000000,
104   AMSK_TYPE         = 0x02000000,
105   AMSK_VALIGN       = 0x04000000,
106   AMSK_VALUE        = 0x08000000,
107   AMSK_VSPACE       = 0x10000000,
108   AMSK_WIDTH        = 0x20000000
109 };
110 
111 static const struct AllowedAttribute {
112   const char *zName;
113   unsigned int iMask;
114 } aAttribute[] = {
115   /* These indexes MUST line up with their
116      corresponding allowed_attr_t enum values.
117   */
118   { 0, 0 },
119   { "align",         AMSK_ALIGN          },
120   { "alt",           AMSK_ALT            },
121   { "bgcolor",       AMSK_BGCOLOR        },
122   { "border",        AMSK_BORDER         },
123   { "cellpadding",   AMSK_CELLPADDING    },
124   { "cellspacing",   AMSK_CELLSPACING    },
125   { "class",         AMSK_CLASS          },
126   { "clear",         AMSK_CLEAR          },
127   { "color",         AMSK_COLOR          },
128   { "colspan",       AMSK_COLSPAN        },
129   { "compact",       AMSK_COMPACT        },
130   { "face",          AMSK_FACE           },
131   { "height",        AMSK_HEIGHT         },
132   { "href",          AMSK_HREF           },
133   { "hspace",        AMSK_HSPACE         },
134   { "id",            AMSK_ID             },
135   { "links",         AMSK_LINKS          },
136   { "name",          AMSK_NAME           },
137   { "rowspan",       AMSK_ROWSPAN        },
138   { "size",          AMSK_SIZE           },
139   { "src",           AMSK_SRC            },
140   { "start",         AMSK_START          },
141   { "style",         AMSK_STYLE          },
142   { "target",        AMSK_TARGET         },
143   { "title",         AMSK_TITLE          },
144   { "type",          AMSK_TYPE           },
145   { "valign",        AMSK_VALIGN         },
146   { "value",         AMSK_VALUE          },
147   { "vspace",        AMSK_VSPACE         },
148   { "width",         AMSK_WIDTH          },
149 };
150 
151 /*
152 ** Use binary search to locate a tag in the aAttribute[] table.
153 */
findAttr(const char * z)154 static int findAttr(const char *z){
155   int i, c, first, last;
156   first = 1;
157   last = count(aAttribute) - 1;
158   while( first<=last ){
159     i = (first+last)/2;
160     c = fossil_strcmp(aAttribute[i].zName, z);
161     if( c==0 ){
162       return i;
163     }else if( c<0 ){
164       first = i+1;
165     }else{
166       last = i-1;
167     }
168   }
169   return 0;
170 }
171 
172 
173 
174 /*
175 ** Allowed markup.
176 **
177 ** Except for MARKUP_INVALID, this must all be in alphabetical order
178 ** and in numerical sequence.  The first markup type must be zero.
179 ** The value for MARKUP_XYZ must correspond to the <xyz> entry
180 ** in aAllowedMarkup[].
181 */
182 #define MARKUP_INVALID            0
183 #define MARKUP_A                  1
184 #define MARKUP_ADDRESS            2
185 #define MARKUP_HTML5_ARTICLE      3
186 #define MARKUP_HTML5_ASIDE        4
187 #define MARKUP_B                  5
188 #define MARKUP_BIG                6
189 #define MARKUP_BLOCKQUOTE         7
190 #define MARKUP_BR                 8
191 #define MARKUP_CENTER             9
192 #define MARKUP_CITE               10
193 #define MARKUP_CODE               11
194 #define MARKUP_COL                12
195 #define MARKUP_COLGROUP           13
196 #define MARKUP_DD                 14
197 #define MARKUP_DEL                15
198 #define MARKUP_DFN                16
199 #define MARKUP_DIV                17
200 #define MARKUP_DL                 18
201 #define MARKUP_DT                 19
202 #define MARKUP_EM                 20
203 #define MARKUP_FONT               21
204 #define MARKUP_HTML5_FOOTER       22
205 #define MARKUP_H1                 23
206 #define MARKUP_H2                 24
207 #define MARKUP_H3                 25
208 #define MARKUP_H4                 26
209 #define MARKUP_H5                 27
210 #define MARKUP_H6                 28
211 #define MARKUP_HTML5_HEADER       29
212 #define MARKUP_HR                 30
213 #define MARKUP_I                  31
214 #define MARKUP_IMG                32
215 #define MARKUP_INS                33
216 #define MARKUP_KBD                34
217 #define MARKUP_LI                 35
218 #define MARKUP_HTML5_NAV          36
219 #define MARKUP_NOBR               37
220 #define MARKUP_NOWIKI             38
221 #define MARKUP_OL                 39
222 #define MARKUP_P                  40
223 #define MARKUP_PRE                41
224 #define MARKUP_S                  42
225 #define MARKUP_SAMP               43
226 #define MARKUP_HTML5_SECTION      44
227 #define MARKUP_SMALL              45
228 #define MARKUP_SPAN               46
229 #define MARKUP_STRIKE             47
230 #define MARKUP_STRONG             48
231 #define MARKUP_SUB                49
232 #define MARKUP_SUP                50
233 #define MARKUP_TABLE              51
234 #define MARKUP_TBODY              52
235 #define MARKUP_TD                 53
236 #define MARKUP_TFOOT              54
237 #define MARKUP_TH                 55
238 #define MARKUP_THEAD              56
239 #define MARKUP_TITLE              57
240 #define MARKUP_TR                 58
241 #define MARKUP_TT                 59
242 #define MARKUP_U                  60
243 #define MARKUP_UL                 61
244 #define MARKUP_VAR                62
245 #define MARKUP_VERBATIM           63
246 
247 /*
248 ** The various markup is divided into the following types:
249 */
250 #define MUTYPE_SINGLE      0x0001   /* <img>, <br>, or <hr> */
251 #define MUTYPE_BLOCK       0x0002   /* Forms a new paragraph. ex: <p>, <h2> */
252 #define MUTYPE_FONT        0x0004   /* Font changes. ex: <b>, <font>, <sub> */
253 #define MUTYPE_LIST        0x0010   /* Lists.  <ol>, <ul>, or <dl> */
254 #define MUTYPE_LI          0x0020   /* List items.  <li>, <dd>, <dt> */
255 #define MUTYPE_TABLE       0x0040   /* <table> */
256 #define MUTYPE_TR          0x0080   /* <tr> */
257 #define MUTYPE_TD          0x0100   /* <td> or <th> */
258 #define MUTYPE_SPECIAL     0x0200   /* <nowiki> or <verbatim> */
259 #define MUTYPE_HYPERLINK   0x0400   /* <a> */
260 
261 /* MUTYPE values for elements that require strictly nested end-tags */
262 #define MUTYPE_Nested      0x0656
263 
264 /*
265 ** These markup types must have an end tag.
266 */
267 #define MUTYPE_STACK  (MUTYPE_BLOCK | MUTYPE_FONT | MUTYPE_LIST | MUTYPE_TABLE)
268 
269 /*
270 ** This markup types are allowed for "inline" text.
271 */
272 #define MUTYPE_INLINE (MUTYPE_FONT | MUTYPE_HYPERLINK)
273 
274 static const struct AllowedMarkup {
275   const char *zName;       /* Name of the markup */
276   char iCode;              /* The MARKUP_* code */
277   short int iType;         /* The MUTYPE_* code */
278   int allowedAttr;         /* Allowed attributes on this markup */
279 } aMarkup[] = {
280  { 0,               MARKUP_INVALID,      0,                    0  },
281  { "a",             MARKUP_A,            MUTYPE_HYPERLINK,
282                     AMSK_HREF|AMSK_NAME|AMSK_CLASS|AMSK_TARGET|AMSK_STYLE|
283                     AMSK_TITLE},
284  { "address",       MARKUP_ADDRESS,      MUTYPE_BLOCK,         AMSK_STYLE },
285  { "article",       MARKUP_HTML5_ARTICLE, MUTYPE_BLOCK,
286                                             AMSK_ID|AMSK_CLASS|AMSK_STYLE },
287  { "aside",         MARKUP_HTML5_ASIDE,  MUTYPE_BLOCK,
288                                             AMSK_ID|AMSK_CLASS|AMSK_STYLE },
289 
290  { "b",             MARKUP_B,            MUTYPE_FONT,          AMSK_STYLE },
291  { "big",           MARKUP_BIG,          MUTYPE_FONT,          AMSK_STYLE },
292  { "blockquote",    MARKUP_BLOCKQUOTE,   MUTYPE_BLOCK,         AMSK_STYLE },
293  { "br",            MARKUP_BR,           MUTYPE_SINGLE,        AMSK_CLEAR },
294  { "center",        MARKUP_CENTER,       MUTYPE_BLOCK,         AMSK_STYLE },
295  { "cite",          MARKUP_CITE,         MUTYPE_FONT,          AMSK_STYLE },
296  { "code",          MARKUP_CODE,         MUTYPE_FONT,          AMSK_STYLE },
297  { "col",           MARKUP_COL,          MUTYPE_SINGLE,
298                     AMSK_ALIGN|AMSK_CLASS|AMSK_COLSPAN|AMSK_WIDTH|AMSK_STYLE },
299  { "colgroup",      MARKUP_COLGROUP,     MUTYPE_BLOCK,
300                     AMSK_ALIGN|AMSK_CLASS|AMSK_COLSPAN|AMSK_WIDTH|AMSK_STYLE},
301  { "dd",            MARKUP_DD,           MUTYPE_LI,            AMSK_STYLE },
302  { "del",           MARKUP_DEL,          MUTYPE_FONT,          AMSK_STYLE },
303  { "dfn",           MARKUP_DFN,          MUTYPE_FONT,          AMSK_STYLE },
304  { "div",           MARKUP_DIV,          MUTYPE_BLOCK,
305                     AMSK_ID|AMSK_CLASS|AMSK_STYLE },
306  { "dl",            MARKUP_DL,           MUTYPE_LIST,
307                     AMSK_COMPACT|AMSK_STYLE },
308  { "dt",            MARKUP_DT,           MUTYPE_LI,            AMSK_STYLE },
309  { "em",            MARKUP_EM,           MUTYPE_FONT,          AMSK_STYLE },
310  { "font",          MARKUP_FONT,         MUTYPE_FONT,
311                     AMSK_COLOR|AMSK_FACE|AMSK_SIZE|AMSK_STYLE },
312  { "footer",        MARKUP_HTML5_FOOTER, MUTYPE_BLOCK,
313                                             AMSK_ID|AMSK_CLASS|AMSK_STYLE },
314 
315  { "h1",            MARKUP_H1,           MUTYPE_BLOCK,
316                     AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE  },
317  { "h2",            MARKUP_H2,           MUTYPE_BLOCK,
318                     AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE  },
319  { "h3",            MARKUP_H3,           MUTYPE_BLOCK,
320                     AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE  },
321  { "h4",            MARKUP_H4,           MUTYPE_BLOCK,
322                     AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE  },
323  { "h5",            MARKUP_H5,           MUTYPE_BLOCK,
324                     AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE  },
325  { "h6",            MARKUP_H6,           MUTYPE_BLOCK,
326                     AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE  },
327 
328  { "header",        MARKUP_HTML5_HEADER, MUTYPE_BLOCK,
329                                             AMSK_ID|AMSK_CLASS|AMSK_STYLE },
330 
331  { "hr",            MARKUP_HR,           MUTYPE_SINGLE,
332                     AMSK_ALIGN|AMSK_COLOR|AMSK_SIZE|AMSK_WIDTH|
333                     AMSK_STYLE|AMSK_CLASS  },
334  { "i",             MARKUP_I,            MUTYPE_FONT,          AMSK_STYLE },
335  { "img",           MARKUP_IMG,          MUTYPE_SINGLE,
336                     AMSK_ALIGN|AMSK_ALT|AMSK_BORDER|AMSK_HEIGHT|
337                     AMSK_HSPACE|AMSK_SRC|AMSK_VSPACE|AMSK_WIDTH|AMSK_STYLE  },
338  { "ins",           MARKUP_INS,          MUTYPE_FONT,          AMSK_STYLE },
339  { "kbd",           MARKUP_KBD,          MUTYPE_FONT,          AMSK_STYLE },
340  { "li",            MARKUP_LI,           MUTYPE_LI,
341                     AMSK_TYPE|AMSK_VALUE|AMSK_STYLE  },
342  { "nav",           MARKUP_HTML5_NAV,    MUTYPE_BLOCK,
343                                             AMSK_ID|AMSK_CLASS|AMSK_STYLE },
344  { "nobr",          MARKUP_NOBR,         MUTYPE_FONT,          0  },
345  { "nowiki",        MARKUP_NOWIKI,       MUTYPE_SPECIAL,       0  },
346  { "ol",            MARKUP_OL,           MUTYPE_LIST,
347                     AMSK_START|AMSK_TYPE|AMSK_COMPACT|AMSK_STYLE  },
348  { "p",             MARKUP_P,            MUTYPE_BLOCK,
349                     AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE  },
350  { "pre",           MARKUP_PRE,          MUTYPE_BLOCK,         AMSK_STYLE },
351  { "s",             MARKUP_S,            MUTYPE_FONT,          AMSK_STYLE },
352  { "samp",          MARKUP_SAMP,         MUTYPE_FONT,          AMSK_STYLE },
353  { "section",       MARKUP_HTML5_SECTION, MUTYPE_BLOCK,
354                                             AMSK_ID|AMSK_CLASS|AMSK_STYLE },
355  { "small",         MARKUP_SMALL,        MUTYPE_FONT,          AMSK_STYLE },
356  { "span",          MARKUP_SPAN,         MUTYPE_BLOCK,
357                     AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE  },
358  { "strike",        MARKUP_STRIKE,       MUTYPE_FONT,          AMSK_STYLE },
359  { "strong",        MARKUP_STRONG,       MUTYPE_FONT,          AMSK_STYLE },
360  { "sub",           MARKUP_SUB,          MUTYPE_FONT,          AMSK_STYLE },
361  { "sup",           MARKUP_SUP,          MUTYPE_FONT,          AMSK_STYLE },
362  { "table",         MARKUP_TABLE,        MUTYPE_TABLE,
363                     AMSK_ALIGN|AMSK_BGCOLOR|AMSK_BORDER|AMSK_CELLPADDING|
364                     AMSK_CELLSPACING|AMSK_HSPACE|AMSK_VSPACE|AMSK_CLASS|
365                     AMSK_STYLE  },
366  { "tbody",         MARKUP_TBODY,        MUTYPE_BLOCK,
367                     AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE  },
368  { "td",            MARKUP_TD,           MUTYPE_TD,
369                     AMSK_ALIGN|AMSK_BGCOLOR|AMSK_COLSPAN|
370                     AMSK_ROWSPAN|AMSK_VALIGN|AMSK_CLASS|AMSK_STYLE  },
371  { "tfoot",         MARKUP_TFOOT,        MUTYPE_BLOCK,
372                     AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE  },
373  { "th",            MARKUP_TH,           MUTYPE_TD,
374                     AMSK_ALIGN|AMSK_BGCOLOR|AMSK_COLSPAN|
375                     AMSK_ROWSPAN|AMSK_VALIGN|AMSK_CLASS|AMSK_STYLE  },
376  { "thead",         MARKUP_THEAD,        MUTYPE_BLOCK,
377                     AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE  },
378  { "title",         MARKUP_TITLE,        MUTYPE_BLOCK, 0 },
379  { "tr",            MARKUP_TR,           MUTYPE_TR,
380                     AMSK_ALIGN|AMSK_BGCOLOR|AMSK_VALIGN|AMSK_CLASS|AMSK_STYLE },
381  { "tt",            MARKUP_TT,           MUTYPE_FONT,          AMSK_STYLE },
382  { "u",             MARKUP_U,            MUTYPE_FONT,          AMSK_STYLE },
383  { "ul",            MARKUP_UL,           MUTYPE_LIST,
384                     AMSK_TYPE|AMSK_COMPACT|AMSK_STYLE  },
385  { "var",           MARKUP_VAR,          MUTYPE_FONT,          AMSK_STYLE },
386  { "verbatim",      MARKUP_VERBATIM,     MUTYPE_SPECIAL,
387                     AMSK_ID|AMSK_TYPE },
388 };
389 
show_allowed_wiki_markup(void)390 void show_allowed_wiki_markup( void ){
391   int i; /* loop over allowedAttr */
392   for( i=1 ; i<=count(aMarkup) - 1 ; i++ ){
393     @ &lt;%s(aMarkup[i].zName)&gt;
394   }
395 }
396 
397 /*
398 ** Use binary search to locate a tag in the aMarkup[] table.
399 */
findTag(const char * z)400 static int findTag(const char *z){
401   int i, c, first, last;
402   first = 1;
403   last = count(aMarkup) - 1;
404   while( first<=last ){
405     i = (first+last)/2;
406     c = fossil_strcmp(aMarkup[i].zName, z);
407     if( c==0 ){
408       assert( aMarkup[i].iCode==i );
409       return i;
410     }else if( c<0 ){
411       first = i+1;
412     }else{
413       last = i-1;
414     }
415   }
416   return MARKUP_INVALID;
417 }
418 
419 /*
420 ** Token types
421 */
422 #define TOKEN_MARKUP        1  /* <...> */
423 #define TOKEN_CHARACTER     2  /* "&" or "<" not part of markup */
424 #define TOKEN_LINK          3  /* [...] */
425 #define TOKEN_PARAGRAPH     4  /* blank lines */
426 #define TOKEN_NEWLINE       5  /* A single "\n" */
427 #define TOKEN_BUL_LI        6  /*  "  *  " */
428 #define TOKEN_NUM_LI        7  /*  "  #  " */
429 #define TOKEN_ENUM          8  /*  "  \(?\d+[.)]?  " */
430 #define TOKEN_INDENT        9  /*  "   " */
431 #define TOKEN_RAW           10 /* Output exactly (used when wiki-use-html==1) */
432 #define TOKEN_TEXT          11 /* None of the above */
433 
434 /*
435 ** State flags.  Save the lower 16 bits for the WIKI_* flags.
436 */
437 #define AT_NEWLINE          0x0010000  /* At start of a line */
438 #define AT_PARAGRAPH        0x0020000  /* At start of a paragraph */
439 #define ALLOW_WIKI          0x0040000  /* Allow wiki markup */
440 #define ALLOW_LINKS         0x0080000  /* Allow [...] hyperlinks */
441 #define FONT_MARKUP_ONLY    0x0100000  /* Only allow MUTYPE_FONT markup */
442 #define INLINE_MARKUP_ONLY  0x0200000  /* Allow only "inline" markup */
443 #define IN_LIST             0x0400000  /* Within wiki <ul> or <ol> */
444 
445 /*
446 ** Current state of the rendering engine
447 */
448 typedef struct Renderer Renderer;
449 struct Renderer {
450   Blob *pOut;                 /* Output appended to this blob */
451   int state;                  /* Flag that govern rendering */
452   unsigned renderFlags;       /* Flags from the client */
453   int wikiList;               /* Current wiki list type */
454   int inVerbatim;             /* True in <verbatim> mode */
455   int preVerbState;           /* Value of state prior to verbatim */
456   int wantAutoParagraph;      /* True if a <p> is desired */
457   int inAutoParagraph;        /* True if within an automatic paragraph */
458   const char *zVerbatimId;    /* The id= attribute of <verbatim> */
459   int nStack;                 /* Number of elements on the stack */
460   int nAlloc;                 /* Space allocated for aStack */
461   struct sStack {
462     short iCode;                 /* Markup code */
463     short allowWiki;             /* ALLOW_WIKI if wiki allowed before tag */
464     const char *zId;             /* ID attribute or NULL */
465   } *aStack;
466 };
467 
468 /*
469 ** Return TRUE if HTML should be used as the sole markup language for wiki.
470 **
471 ** On first invocation, this routine consults the "wiki-use-html" setting.
472 ** It caches the result for subsequent invocations, under the assumption
473 ** that the setting will not change.
474 */
wikiUsesHtml(void)475 static int wikiUsesHtml(void){
476   static int r = -1;
477   if( r<0 ) r = db_get_boolean("wiki-use-html", 0);
478   return r;
479 }
480 
481 /*
482 ** z points to a "<" character.  Check to see if this is the start of
483 ** a valid markup.  If it is, return the total number of characters in
484 ** the markup including the initial "<" and the terminating ">".  If
485 ** it is not well-formed markup, return 0.
486 */
html_tag_length(const char * z)487 int html_tag_length(const char *z){
488   int n = 1;
489   int inparen = 0;
490   int c;
491   if( z[n]=='/' ){ n++; }
492   if( !fossil_isalpha(z[n]) ) return 0;
493   while( fossil_isalnum(z[n]) || z[n]=='-' ){ n++; }
494   c = z[n];
495   if( c=='/' && z[n+1]=='>' ){ return n+2; }
496   if( c!='>' && !fossil_isspace(c) ) return 0;
497   while( (c = z[n])!=0 && (c!='>' || inparen) ){
498     if( c==inparen ){
499       inparen = 0;
500     }else if( inparen==0 && (c=='"' || c=='\'') ){
501       inparen = c;
502     }
503     n++;
504   }
505   if( z[n]!='>' ) return 0;
506   return n+1;
507 }
508 
509 /*
510 ** z points to a "\n" character.  Check to see if this newline is
511 ** followed by one or more blank lines.  If it is, return the number
512 ** of characters through the closing "\n".  If not, return 0.
513 */
paragraphBreakLength(const char * z)514 static int paragraphBreakLength(const char *z){
515   int i, n;
516   int nNewline = 1;
517   for(i=1, n=0; fossil_isspace(z[i]); i++){
518     if( z[i]=='\n' ){
519       nNewline++;
520       n = i;
521     }
522   }
523   if( nNewline>=2 ){
524     return n+1;
525   }else{
526     return 0;
527   }
528 }
529 
530 /*
531 ** Return the number of characters until the next "interesting"
532 ** characters.
533 **
534 ** Interesting characters are:
535 **
536 **      <
537 **      &
538 **      \n
539 **      [
540 **
541 ** The "[" is only considered if flags contain ALLOW_LINKS or ALLOW_WIKI.
542 ** The "\n" is only considered interesting if the flags constains ALLOW_WIKI.
543 */
textLength(const char * z,int flags)544 static int textLength(const char *z, int flags){
545   const char *zReject;
546   if( flags & ALLOW_WIKI ){
547     zReject = "<&[\n";
548   }else if( flags & ALLOW_LINKS ){
549     zReject = "<&[";
550   }else{
551     zReject = "<&";
552   }
553   return strcspn(z, zReject);
554 }
555 
556 /*
557 ** Return true if z[] begins with an HTML character element.
558 */
isElement(const char * z)559 static int isElement(const char *z){
560   int i;
561   assert( z[0]=='&' );
562   if( z[1]=='#' ){
563     for(i=2; fossil_isdigit(z[i]); i++){}
564     return i>2 && z[i]==';';
565   }else{
566     for(i=1; fossil_isalpha(z[i]); i++){}
567     return i>1 && z[i]==';';
568   }
569 }
570 
571 /*
572 ** Check to see if the z[] string is the beginning of a wiki list item.
573 ** If it is, return the length of the bullet text.  Otherwise return 0.
574 */
listItemLength(const char * z,const char listChar)575 static int listItemLength(const char *z, const char listChar){
576   int i, n;
577   n = 0;
578   i = 0;
579   while( z[n]==' ' || z[n]=='\t' ){
580     if( z[n]=='\t' ) i++;
581     i++;
582     n++;
583   }
584   if( i<2 || z[n]!=listChar ) return 0;
585   n++;
586   i = 0;
587   while( z[n]==' ' || z[n]=='\t' ){
588     if( z[n]=='\t' ) i++;
589     i++;
590     n++;
591   }
592   if( i<2 || fossil_isspace(z[n]) ) return 0;
593   return n;
594 }
595 
596 /*
597 ** Check to see if the z[] string is the beginning of a enumeration value.
598 ** If it is, return the length of the bullet text.  Otherwise return 0.
599 **
600 ** Syntax:
601 **    *  a tab or two or more spaces
602 **    *  one or more digits
603 **    *  optional "."
604 **    *  another tab or two ore more spaces.
605 **
606 */
enumLength(const char * z)607 static int enumLength(const char *z){
608   int i, n;
609   n = 0;
610   i = 0;
611   while( z[n]==' ' || z[n]=='\t' ){
612     if( z[n]=='\t' ) i++;
613     i++;
614     n++;
615   }
616   if( i<2 ) return 0;
617   for(i=0; fossil_isdigit(z[n]); i++, n++){}
618   if( i==0 ) return 0;
619   if( z[n]=='.' ){
620     n++;
621   }
622   i = 0;
623   while( z[n]==' ' || z[n]=='\t' ){
624     if( z[n]=='\t' ) i++;
625     i++;
626     n++;
627   }
628   if( i<2 || fossil_isspace(z[n]) ) return 0;
629   return n;
630 }
631 
632 /*
633 ** Check to see if the z[] string is the beginning of an indented
634 ** paragraph.  If it is, return the length of the indent.  Otherwise
635 ** return 0.
636 */
indentLength(const char * z)637 static int indentLength(const char *z){
638   int i, n;
639   n = 0;
640   i = 0;
641   while( z[n]==' ' || z[n]=='\t' ){
642     if( z[n]=='\t' ) i++;
643     i++;
644     n++;
645   }
646   if( i<2 || fossil_isspace(z[n]) ) return 0;
647   return n;
648 }
649 
650 /*
651 ** Check to see if the z[] string is a wiki hyperlink.  If it is,
652 ** return the length of the hyperlink.  Otherwise return 0.
653 */
linkLength(const char * z)654 static int linkLength(const char *z){
655   int n;
656   assert( z[0]=='[' );
657   for(n=0; z[n] && z[n]!=']'; n++){}
658   if( z[n]==']' ){
659     return n+1;
660   }else{
661     return 0;
662   }
663 }
664 
665 /*
666 ** Get the next wiki token.
667 **
668 ** z points to the start of a token.  Return the number of
669 ** characters in that token.  Write the token type into *pTokenType.
670 */
nextWikiToken(const char * z,Renderer * p,int * pTokenType)671 static int nextWikiToken(const char *z, Renderer *p, int *pTokenType){
672   int n;
673   if( z[0]=='<' ){
674     n = html_tag_length(z);
675     if( n>0 ){
676       *pTokenType = TOKEN_MARKUP;
677       return n;
678     }else{
679       *pTokenType = TOKEN_CHARACTER;
680       return 1;
681     }
682   }
683   if( z[0]=='&' && (p->inVerbatim || !isElement(z)) ){
684     *pTokenType = TOKEN_CHARACTER;
685     return 1;
686   }
687   if( (p->state & ALLOW_WIKI)!=0 ){
688     if( z[0]=='\n' ){
689       n = paragraphBreakLength(z);
690       if( n>0 ){
691         *pTokenType = TOKEN_PARAGRAPH;
692         return n;
693       }else{
694         *pTokenType = TOKEN_NEWLINE;
695         return 1;
696       }
697     }
698     if( (p->state & AT_NEWLINE)!=0 && fossil_isspace(z[0]) ){
699       n = listItemLength(z, '*');
700       if( n>0 ){
701         *pTokenType = TOKEN_BUL_LI;
702         return n;
703       }
704       n = listItemLength(z, '#');
705       if( n>0 ){
706         *pTokenType = TOKEN_NUM_LI;
707         return n;
708       }
709       n = enumLength(z);
710       if( n>0 ){
711         *pTokenType = TOKEN_ENUM;
712         return n;
713       }
714     }
715     if( (p->state & AT_PARAGRAPH)!=0 && fossil_isspace(z[0]) ){
716       n = indentLength(z);
717       if( n>0 ){
718         *pTokenType = TOKEN_INDENT;
719         return n;
720       }
721     }
722     if( z[0]=='[' && (n = linkLength(z))>0 ){
723       *pTokenType = TOKEN_LINK;
724       return n;
725     }
726   }else if( (p->state & ALLOW_LINKS)!=0 && z[0]=='[' && (n = linkLength(z))>0 ){
727     *pTokenType = TOKEN_LINK;
728     return n;
729   }
730   *pTokenType = TOKEN_TEXT;
731   return 1 + textLength(z+1, p->state);
732 }
733 
734 /*
735 ** Parse only Wiki links, return everything else as TOKEN_RAW.
736 **
737 ** z points to the start of a token.  Return the number of
738 ** characters in that token. Write the token type into *pTokenType.
739 */
nextRawToken(const char * z,Renderer * p,int * pTokenType)740 static int nextRawToken(const char *z, Renderer *p, int *pTokenType){
741   int n;
742   if( z[0]=='[' && (n = linkLength(z))>0 ){
743     *pTokenType = TOKEN_LINK;
744     return n;
745   }
746   *pTokenType = TOKEN_RAW;
747   return 1 + textLength(z+1, p->state);
748 }
749 
750 /*
751 ** A single markup is parsed into an instance of the following
752 ** structure.
753 */
754 typedef struct ParsedMarkup ParsedMarkup;
755 struct ParsedMarkup {
756   unsigned char endTag;   /* True if </...> instead of <...> */
757   unsigned char iCode;    /* MARKUP_* */
758   unsigned char nAttr;    /* Number of attributes */
759   unsigned short iType;   /* MUTYPE_* */
760   struct {
761     unsigned char iACode;    /* ATTR_* */
762     char *zValue;            /* Argument to this attribute.  Might be NULL */
763     char cTerm;              /* Original argument termination character */
764   } aAttr[10];
765 };
766 
767 /*
768 ** z[] is an HTML markup element - something that begins with '<'.
769 ** Parse this element into the p structure.
770 **
771 ** The content of z[] might be modified by converting characters
772 ** to lowercase and by inserting some "\000" characters.
773 */
parseMarkup(ParsedMarkup * p,char * z)774 static int parseMarkup(ParsedMarkup *p, char *z){
775   int i, j, c;
776   int iACode;
777   char *zValue;
778   int seen = 0;
779   char zTag[100];
780 
781   if( z[1]=='/' ){
782     p->endTag = 1;
783     i = 2;
784   }else{
785     p->endTag = 0;
786     i = 1;
787   }
788   j = 0;
789   while( fossil_isalnum(z[i]) ){
790     if( j<sizeof(zTag)-1 ) zTag[j++] = fossil_tolower(z[i]);
791     i++;
792   }
793   zTag[j] = 0;
794   p->iCode = findTag(zTag);
795   p->iType = aMarkup[p->iCode].iType;
796   p->nAttr = 0;
797   c = 0;
798   if( z[i]=='-' ){
799     p->aAttr[0].iACode = iACode = ATTR_ID;
800     i++;
801     p->aAttr[0].zValue = &z[i];
802     while( fossil_isalnum(z[i]) ){ i++; }
803     p->aAttr[0].cTerm = c = z[i];
804     z[i++] = 0;
805     p->nAttr = 1;
806     if( c=='>' ) return 0;
807   }
808   while( fossil_isspace(z[i]) ){ i++; }
809   while( c!='>' && p->nAttr<8 && fossil_isalpha(z[i]) ){
810     int attrOk;    /* True to preserve attribute.  False to ignore it */
811     j = 0;
812     while( fossil_isalnum(z[i]) ){
813       if( j<sizeof(zTag)-1 ) zTag[j++] = fossil_tolower(z[i]);
814       i++;
815     }
816     zTag[j] = 0;
817     p->aAttr[p->nAttr].iACode = iACode = findAttr(zTag);
818     attrOk = iACode!=0 && (seen & aAttribute[iACode].iMask)==0;
819     while( fossil_isspace(z[i]) ){ z++; }
820     if( z[i]!='=' ){
821       p->aAttr[p->nAttr].zValue = 0;
822       p->aAttr[p->nAttr].cTerm = 0;
823       c = 0;
824     }else{
825       i++;
826       while( fossil_isspace(z[i]) ){ z++; }
827       if( z[i]=='"' ){
828         i++;
829         zValue = &z[i];
830         while( z[i] && z[i]!='"' ){ i++; }
831       }else if( z[i]=='\'' ){
832         i++;
833         zValue = &z[i];
834         while( z[i] && z[i]!='\'' ){ i++; }
835       }else{
836         zValue = &z[i];
837         while( !fossil_isspace(z[i]) && z[i]!='>' ){
838           if( z[i]=='\'' || z[i]=='"' ) attrOk = 0;
839           i++;
840         }
841       }
842       if( attrOk ){
843         p->aAttr[p->nAttr].zValue = zValue;
844         p->aAttr[p->nAttr].cTerm = c = z[i];
845         if( z[i]==0 ){
846           i--;
847         }else{
848           z[i] = 0;
849         }
850       }
851       i++;
852     }
853     if( attrOk ){
854       seen |= aAttribute[iACode].iMask;
855       p->nAttr++;
856     }
857     while( fossil_isspace(z[i]) ){ i++; }
858     if( z[i]==0 || z[i]=='>' || (z[i]=='/' && z[i+1]=='>') ) break;
859   }
860   return seen;
861 }
862 
863 /*
864 ** Render markup on the given blob.
865 */
renderMarkup(Blob * pOut,ParsedMarkup * p)866 static void renderMarkup(Blob *pOut, ParsedMarkup *p){
867   int i;
868   if( p->endTag ){
869     blob_appendf(pOut, "</%s>", aMarkup[p->iCode].zName);
870   }else{
871     blob_appendf(pOut, "<%s", aMarkup[p->iCode].zName);
872     for(i=0; i<p->nAttr; i++){
873       blob_appendf(pOut, " %s", aAttribute[p->aAttr[i].iACode].zName);
874       if( p->aAttr[i].zValue ){
875         const char *zVal = p->aAttr[i].zValue;
876         if( p->aAttr[i].iACode==ATTR_SRC && zVal[0]=='/' ){
877           blob_appendf(pOut, "=\"%R%s\"", zVal);
878         }else{
879           blob_appendf(pOut, "=\"%s\"", zVal);
880         }
881       }
882     }
883     if (p->iType & MUTYPE_SINGLE){
884       blob_append_string(pOut, " /");
885     }
886     blob_append_char(pOut, '>');
887   }
888 }
889 
890 /*
891 ** When the markup was parsed, some "\000" may have been inserted.
892 ** This routine restores to those "\000" values back to their
893 ** original content.
894 */
unparseMarkup(ParsedMarkup * p)895 static void unparseMarkup(ParsedMarkup *p){
896   int i, n;
897   for(i=0; i<p->nAttr; i++){
898     char *z = p->aAttr[i].zValue;
899     if( z==0 ) continue;
900     if( p->aAttr[i].cTerm ){
901       n = strlen(z);
902       z[n] = p->aAttr[i].cTerm;
903     }
904   }
905 }
906 
907 /*
908 ** Return the value of attribute attrId.  Return NULL if there is no
909 ** ID attribute.
910 */
attributeValue(ParsedMarkup * p,int attrId)911 static const char *attributeValue(ParsedMarkup *p, int attrId){
912   int i;
913   for(i=0; i<p->nAttr; i++){
914     if( p->aAttr[i].iACode==attrId ){
915       return p->aAttr[i].zValue;
916     }
917   }
918   return 0;
919 }
920 
921 /*
922 ** Return the ID attribute for markup.  Return NULL if there is no
923 ** ID attribute.
924 */
markupId(ParsedMarkup * p)925 static const char *markupId(ParsedMarkup *p){
926   return attributeValue(p, ATTR_ID);
927 }
928 
929 /*
930 ** Check markup pMarkup to see if it is a hyperlink with class "button"
931 ** that is follows by simple text and an </a> only.  Example:
932 **
933 **     <a class="button" href="../index.wiki">Index</a>
934 **
935 ** If the markup matches this pattern, and if the WIKI_BUTTONS flag was
936 ** passed to wiki_convert(), then transform this link into a submenu
937 ** button, skip the text, and set *pN equal to the total length of the
938 ** text through the end of </a> and return true.  If the markup does
939 ** not match or if WIKI_BUTTONS is not set, then make no changes to *pN
940 ** and return false.
941 */
isButtonHyperlink(Renderer * p,ParsedMarkup * pMarkup,const char * z,int * pN)942 static int isButtonHyperlink(
943   Renderer *p,              /* Renderer state */
944   ParsedMarkup *pMarkup,    /* Potential button markup */
945   const char *z,            /* Complete text of Wiki */
946   int *pN                   /* Characters of z[] consumed */
947 ){
948   const char *zClass;
949   const char *zHref;
950   char *zTag;
951   int i, j;
952   if( (p->state & WIKI_BUTTONS)==0 ) return 0;
953   zClass = attributeValue(pMarkup, ATTR_CLASS);
954   if( zClass==0 ) return 0;
955   if( fossil_strcmp(zClass, "button")!=0 ) return 0;
956   zHref = attributeValue(pMarkup, ATTR_HREF);
957   if( zHref==0 ) return 0;
958   i = *pN;
959   while( z[i] && z[i]!='<' ){ i++; }
960   if( fossil_strnicmp(&z[i], "</a>",4)!=0 ) return 0;
961   for(j=*pN; fossil_isspace(z[j]); j++){}
962   zTag = mprintf("%.*s", i-j, &z[j]);
963   j = (int)strlen(zTag);
964   while( j>0 && fossil_isspace(zTag[j-1]) ){ j--; }
965   if( j==0 ) return 0;
966   style_submenu_element(zTag, "%s", zHref);
967   *pN = i+4;
968   return 1;
969 }
970 
971 /*
972 ** Pop a single element off of the stack.  As the element is popped,
973 ** output its end tag if it is not a </div> tag.
974 */
popStack(Renderer * p)975 static void popStack(Renderer *p){
976   if( p->nStack ){
977     int iCode;
978     p->nStack--;
979     iCode = p->aStack[p->nStack].iCode;
980     if( (iCode!=MARKUP_DIV || p->aStack[p->nStack].zId==0) && p->pOut ){
981       blob_appendf(p->pOut, "</%s>", aMarkup[iCode].zName);
982     }
983   }
984 }
985 
986 /*
987 ** Push a new markup value onto the stack.  Enlarge the stack
988 ** if necessary.
989 */
pushStackWithId(Renderer * p,int elem,const char * zId,int w)990 static void pushStackWithId(Renderer *p, int elem, const char *zId, int w){
991   if( p->nStack>=p->nAlloc ){
992     p->nAlloc = p->nAlloc*2 + 100;
993     p->aStack = fossil_realloc(p->aStack, p->nAlloc*sizeof(p->aStack[0]));
994   }
995   p->aStack[p->nStack].iCode = elem;
996   p->aStack[p->nStack].zId = zId;
997   p->aStack[p->nStack].allowWiki = w;
998   p->nStack++;
999 }
pushStack(Renderer * p,int elem)1000 static void pushStack(Renderer *p, int elem){
1001   pushStackWithId(p, elem, 0, 0);
1002 }
1003 
1004 /*
1005 ** Pop the stack until the top-most iTag element is removed.
1006 ** If there is no iTag element on the stack, this routine
1007 ** is a no-op.
1008 */
popStackToTag(Renderer * p,int iTag)1009 static void popStackToTag(Renderer *p, int iTag){
1010   int i;
1011   for(i=p->nStack-1; i>=0; i--){
1012     if( p->aStack[i].iCode!=iTag ) continue;
1013     if( p->aStack[i].zId ) continue;
1014     break;
1015   }
1016   if( i<0 ) return;
1017   while( p->nStack>i ){
1018     popStack(p);
1019   }
1020 }
1021 
1022 /*
1023 ** Attempt to find a find a tag of type iTag with id zId.  Return -1
1024 ** if not found.  If found, return its stack level.
1025 */
findTagWithId(Renderer * p,int iTag,const char * zId)1026 static int findTagWithId(Renderer *p, int iTag, const char *zId){
1027   int i;
1028   assert( zId!=0 );
1029   for(i=p->nStack-1; i>=0; i--){
1030     if( p->aStack[i].iCode!=iTag ) continue;
1031     if( p->aStack[i].zId==0 ) continue;
1032     if( fossil_strcmp(zId, p->aStack[i].zId)!=0 ) continue;
1033     break;
1034   }
1035   return i;
1036 }
1037 
1038 /*
1039 ** Pop the stack until the top-most element of the stack
1040 ** is an element that matches the type in iMask.  Return
1041 ** code of the markup element that is on left on top of the stack.
1042 ** If the stack does not have an element
1043 ** that matches iMask, then leave the stack unchanged and
1044 ** return false (MARKUP_INVALID).
1045 */
backupToType(Renderer * p,int iMask)1046 static int backupToType(Renderer *p, int iMask){
1047   int i;
1048   for(i=p->nStack-1; i>=0; i--){
1049     if( aMarkup[p->aStack[i].iCode].iType & iMask ) break;
1050   }
1051   if( i<0 ) return 0;
1052   i++;
1053   while( p->nStack>i ){
1054     popStack(p);
1055   }
1056   return p->aStack[i-1].iCode;
1057 }
1058 
1059 /*
1060 ** Begin a new paragraph if that something that is needed.
1061 */
startAutoParagraph(Renderer * p)1062 static void startAutoParagraph(Renderer *p){
1063   if( p->wantAutoParagraph==0 ) return;
1064   if( p->state & WIKI_LINKSONLY ) return;
1065   if( p->wikiList==MARKUP_OL || p->wikiList==MARKUP_UL ) return;
1066   blob_append_string(p->pOut, "<p>");
1067   p->wantAutoParagraph = 0;
1068   p->inAutoParagraph = 1;
1069 }
1070 
1071 /*
1072 ** End a paragraph if we are in one.
1073 */
endAutoParagraph(Renderer * p)1074 static void endAutoParagraph(Renderer *p){
1075   if( p->inAutoParagraph ){
1076     p->inAutoParagraph = 0;
1077   }
1078 }
1079 
1080 /*
1081 ** If the input string corresponds to an existing baseline,
1082 ** return true.
1083 */
is_valid_hname(const char * z)1084 static int is_valid_hname(const char *z){
1085   int n = strlen(z);
1086   if( n<4 || n>HNAME_MAX ) return 0;
1087   if( !validate16(z, n) ) return 0;
1088   return 1;
1089 }
1090 
1091 /*
1092 ** Return TRUE if a hash name corresponds to an artifact in this
1093 ** repository.
1094 */
in_this_repo(const char * zUuid)1095 static int in_this_repo(const char *zUuid){
1096   static Stmt q;
1097   int rc;
1098   int n;
1099   char zU2[HNAME_MAX+1];
1100   db_static_prepare(&q,
1101      "SELECT 1 FROM blob WHERE uuid>=:u AND uuid<:u2"
1102   );
1103   db_bind_text(&q, ":u", zUuid);
1104   n = (int)strlen(zUuid);
1105   if( n>=sizeof(zU2) ) n = sizeof(zU2)-1;
1106   memcpy(zU2, zUuid, n);
1107   zU2[n-1]++;
1108   zU2[n] = 0;
1109   db_bind_text(&q, ":u2", zU2);
1110   rc = db_step(&q);
1111   db_reset(&q);
1112   return rc==SQLITE_ROW;
1113 }
1114 
1115 /*
1116 ** zTarget is guaranteed to be a UUID.  It might be the UUID of a ticket.
1117 ** If it is, store in *pClosed a true or false depending on whether or not
1118 ** the ticket is closed and return true. If zTarget
1119 ** is not the UUID of a ticket, return false.
1120 */
is_ticket(const char * zTarget,int * pClosed)1121 int is_ticket(
1122   const char *zTarget,    /* Ticket UUID */
1123   int *pClosed            /* True if the ticket is closed */
1124 ){
1125   static Stmt q;
1126   int n;
1127   int rc;
1128   char zLower[HNAME_MAX+1];
1129   char zUpper[HNAME_MAX+1];
1130   n = strlen(zTarget);
1131   memcpy(zLower, zTarget, n+1);
1132   canonical16(zLower, n+1);
1133   memcpy(zUpper, zLower, n+1);
1134   zUpper[n-1]++;
1135   if( !db_static_stmt_is_init(&q) ){
1136     char *zClosedExpr = db_get("ticket-closed-expr", "status='Closed'");
1137     db_static_prepare(&q,
1138       "SELECT %z FROM ticket "
1139       " WHERE tkt_uuid>=:lwr AND tkt_uuid<:upr",
1140       zClosedExpr /*safe-for-%s*/
1141     );
1142   }
1143   db_bind_text(&q, ":lwr", zLower);
1144   db_bind_text(&q, ":upr", zUpper);
1145   if( db_step(&q)==SQLITE_ROW ){
1146     rc = 1;
1147     *pClosed = db_column_int(&q, 0);
1148   }else{
1149     rc = 0;
1150   }
1151   db_reset(&q);
1152   return rc;
1153 }
1154 
1155 /*
1156 ** Return a pointer to the name part of zTarget (skipping the "wiki:" prefix
1157 ** if there is one) if zTarget is a valid wiki page name.  Return NULL if
1158 ** zTarget names a page that does not exist.
1159 */
validWikiPageName(int mFlags,const char * zTarget)1160 static const char *validWikiPageName(int mFlags, const char *zTarget){
1161   if( strncmp(zTarget, "wiki:", 5)==0
1162       && wiki_name_is_wellformed((const unsigned char*)zTarget) ){
1163     return zTarget+5;
1164   }
1165   if( strcmp(zTarget, "Sandbox")==0 ) return zTarget;
1166   if( wiki_name_is_wellformed((const unsigned char *)zTarget)
1167    && ((mFlags & WIKI_NOBADLINKS)==0 ||
1168         db_exists("SELECT 1 FROM tag WHERE tagname GLOB 'wiki-%q'"
1169                   " AND (SELECT value FROM tagxref WHERE tagid=tag.tagid"
1170                   " ORDER BY mtime DESC LIMIT 1) > 0", zTarget))
1171   ){
1172     return zTarget;
1173   }
1174   return 0;
1175 }
1176 
1177 static const char *wikiOverrideHash = 0;
1178 
1179 /*
1180 ** Fossil-wiki hyperlinks to wiki pages should be overridden to the
1181 ** hash value supplied.  If the value is NULL, then override is cancelled
1182 ** and all overwrites operate normally.
1183 */
wiki_hyperlink_override(const char * zUuid)1184 void wiki_hyperlink_override(const char *zUuid){
1185   wikiOverrideHash = zUuid;
1186 }
1187 
1188 
1189 /*
1190 ** If links to wiki page zTarget should be redirected to some historical
1191 ** version of that page, then return the hash of the historical version.
1192 ** If no override is required, return NULL.
1193 */
wiki_is_overridden(const char * zTarget)1194 static const char *wiki_is_overridden(const char *zTarget){
1195   if( wikiOverrideHash==0 ) return 0;
1196   /* The override should only happen if the override version is not the
1197   ** latest version of the wiki page. */
1198   if( !db_exists(
1199     "SELECT 1 FROM tag, blob, tagxref AS xA, tagxref AS xB "
1200     " WHERE tag.tagname GLOB 'wiki-%q*'"
1201     "   AND blob.uuid GLOB '%q'"
1202     "   AND xA.tagid=tag.tagid AND xA.rid=blob.rid"
1203     "   AND xB.tagid=tag.tagid AND xB.mtime>xA.mtime",
1204     zTarget, wikiOverrideHash
1205   ) ){
1206     return 0;
1207   }
1208   return wikiOverrideHash;
1209 }
1210 
1211 /*
1212 ** Resolve a hyperlink.  The zTarget argument is the content of the [...]
1213 ** in the wiki.  Append to the output string whatever text is appropriate
1214 ** for opening the hyperlink.  Write into zClose[0...nClose-1] text that will
1215 ** close the markup.
1216 **
1217 ** If this routine determines that no hyperlink should be generated, then
1218 ** set zClose[0] to 0.
1219 **
1220 ** Actually, this routine might or might not append the hyperlink, depending
1221 ** on current rendering rules: specifically does the current user have
1222 ** "History" permission.
1223 **
1224 **    [http://fossil-scm.org/]
1225 **    [https://fossil-scm.org/]
1226 **    [ftp://fossil-scm.org/]
1227 **    [mailto:fossil-users@lists.fossil-scm.org]
1228 **
1229 **    [/path]        ->  Refers to the root of the Fossil hierarchy, not
1230 **                       the root of the URI domain
1231 **
1232 **    [./relpath]
1233 **    [../relpath]
1234 **
1235 **    [#fragment]
1236 **
1237 **    [0123456789abcdef]
1238 **
1239 **    [WikiPageName]
1240 **    [wiki:WikiPageName]
1241 **
1242 **    [2010-02-27 07:13]
1243 **
1244 **    [InterMap:Link]  ->  Interwiki link
1245 */
wiki_resolve_hyperlink(Blob * pOut,int mFlags,const char * zTarget,char * zClose,int nClose,const char * zOrig,const char * zTitle)1246 void wiki_resolve_hyperlink(
1247   Blob *pOut,             /* Write the HTML output here */
1248   int mFlags,             /* Rendering option flags */
1249   const char *zTarget,    /* Hyperlink target; text within [...] */
1250   char *zClose,           /* Write hyperlink closing text here */
1251   int nClose,             /* Bytes available in zClose[] */
1252   const char *zOrig,      /* Complete document text */
1253   const char *zTitle      /* Title of the link */
1254 ){
1255   const char *zTerm = "</a>";
1256   const char *z;
1257   char *zExtra = 0;
1258   const char *zExtraNS = 0;
1259   char *zRemote = 0;
1260 
1261   if( zTitle ){
1262     zExtra = mprintf(" title='%h'", zTitle);
1263     zExtraNS = zExtra+1;
1264   }else if( mFlags & WIKI_TARGET_BLANK ){
1265     zExtra = mprintf(" target='_blank'");
1266     zExtraNS = zExtra+1;
1267   }
1268   assert( nClose>=20 );
1269   if( strncmp(zTarget, "http:", 5)==0
1270    || strncmp(zTarget, "https:", 6)==0
1271    || strncmp(zTarget, "ftp:", 4)==0
1272    || strncmp(zTarget, "mailto:", 7)==0
1273   ){
1274     blob_appendf(pOut, "<a href=\"%s\"%s>", zTarget, zExtra);
1275   }else if( zTarget[0]=='/' ){
1276     blob_appendf(pOut, "<a href=\"%R%h\"%s>", zTarget, zExtra);
1277   }else if( zTarget[0]=='.'
1278          && (zTarget[1]=='/' || (zTarget[1]=='.' && zTarget[2]=='/'))
1279          && (mFlags & WIKI_LINKSONLY)==0 ){
1280     blob_appendf(pOut, "<a href=\"%h\"%s>", zTarget, zExtra);
1281   }else if( zTarget[0]=='#' ){
1282     blob_appendf(pOut, "<a href=\"%h\"%s>", zTarget, zExtra);
1283   }else if( is_valid_hname(zTarget) ){
1284     int isClosed = 0;
1285     const char *zLB = (mFlags & WIKI_NOBRACKET)==0 ? "[" : "";
1286     if( strlen(zTarget)<=HNAME_MAX && is_ticket(zTarget, &isClosed) ){
1287       /* Special display processing for tickets.  Display the hyperlink
1288       ** as crossed out if the ticket is closed.
1289       */
1290       if( isClosed ){
1291         if( g.perm.Hyperlink ){
1292           blob_appendf(pOut,
1293              "%z<span class=\"wikiTagCancelled\">%s",
1294              xhref(zExtraNS,"%R/info/%s",zTarget), zLB
1295           );
1296           zTerm = "]</span></a>";
1297         }else{
1298           blob_appendf(pOut,"<span class=\"wikiTagCancelled\">%s", zLB);
1299           zTerm = "]</span>";
1300         }
1301       }else{
1302         if( g.perm.Hyperlink ){
1303           blob_appendf(pOut,"%z%s", xhref(zExtraNS,"%R/info/%s", zTarget),zLB);
1304           zTerm = "]</a>";
1305         }else{
1306           blob_appendf(pOut, "%s", zLB);
1307           zTerm = "]";
1308         }
1309       }
1310     }else if( !in_this_repo(zTarget) ){
1311       if( (mFlags & (WIKI_LINKSONLY|WIKI_NOBADLINKS))!=0 ){
1312         zTerm = "";
1313       }else{
1314         blob_appendf(pOut, "<span class=\"brokenlink\">%s", zLB);
1315         zTerm = "]</span>";
1316       }
1317     }else if( g.perm.Hyperlink ){
1318       blob_appendf(pOut, "%z%s",xhref(zExtraNS, "%R/info/%s", zTarget), zLB);
1319       zTerm = "]</a>";
1320     }else{
1321       zTerm = "";
1322     }
1323     if( zTerm[0]==']' && (mFlags & WIKI_NOBRACKET)!=0 ) zTerm++;
1324   }else if( (zRemote = interwiki_url(zTarget))!=0 ){
1325     blob_appendf(pOut, "<a href=\"%z\"%s>", zRemote, zExtra);
1326     zTerm = "</a>";
1327   }else if( (z = validWikiPageName(mFlags, zTarget))!=0 ){
1328     /* The link is to a valid wiki page name */
1329     const char *zOverride = wiki_is_overridden(zTarget);
1330     if( zOverride ){
1331       blob_appendf(pOut, "<a href=\"%R/info/%S\"%s>", zOverride, zExtra);
1332     }else{
1333       blob_appendf(pOut, "<a href=\"%R/wiki?name=%T\"%s>", z, zExtra);
1334     }
1335   }else if( strlen(zTarget)>=10 && fossil_isdigit(zTarget[0]) && zTarget[4]=='-'
1336             && db_int(0, "SELECT datetime(%Q) NOT NULL", zTarget) ){
1337     /* Dates or date-and-times in ISO8610 resolve to a link to the
1338     ** timeline for that date */
1339     blob_appendf(pOut, "<a href=\"%R/timeline?c=%T\"%s>", zTarget, zExtra);
1340   }else if( mFlags & WIKI_MARKDOWNLINKS ){
1341     /* If none of the above, and if rendering links for markdown, then
1342     ** create a link to the literal text of the target */
1343     blob_appendf(pOut, "<a href=\"%h\"%s>", zTarget, zExtra);
1344   }else if( zOrig && zTarget>=&zOrig[2]
1345         && zTarget[-1]=='[' && !fossil_isspace(zTarget[-2]) ){
1346     /* If the hyperlink markup is not preceded by whitespace, then it
1347     ** is probably a C-language subscript or similar, not really a
1348     ** hyperlink.  Just ignore it. */
1349     zTerm = "";
1350   }else if( (mFlags & (WIKI_NOBADLINKS|WIKI_LINKSONLY))!=0 ){
1351     /* Also ignore the link if various flags are set */
1352     zTerm = "";
1353   }else{
1354     blob_appendf(pOut, "<span class=\"brokenlink\">[%h]", zTarget);
1355     zTerm = "</span>";
1356   }
1357   if( zExtra ) fossil_free(zExtra);
1358   assert( strlen(zTerm)<nClose );
1359   sqlite3_snprintf(nClose, zClose, "%s", zTerm);
1360 }
1361 
1362 /*
1363 ** Check to see if the given parsed markup is the correct
1364 ** </verbatim> tag.
1365 */
endVerbatim(Renderer * p,ParsedMarkup * pMarkup)1366 static int endVerbatim(Renderer *p, ParsedMarkup *pMarkup){
1367   char *z;
1368   assert( p->inVerbatim );
1369   if( pMarkup->iCode!=MARKUP_VERBATIM ) return 0;
1370   if( !pMarkup->endTag ) return 0;
1371   if( p->zVerbatimId==0 ) return 1;
1372   if( pMarkup->nAttr!=1 ) return 0;
1373   z = pMarkup->aAttr[0].zValue;
1374   return fossil_strcmp(z, p->zVerbatimId)==0;
1375 }
1376 
1377 /*
1378 ** z[] points to the text that immediately follows markup of the form:
1379 **
1380 **      <verbatim type='pikchr ...'>
1381 **
1382 ** zClass is the argument to "type".  This routine will process the
1383 ** Pikchr text through the next matching </verbatim> (or until end-of-file)
1384 ** and append the resulting SVG output onto p.  It then returns the
1385 ** number of bytes of text processed, including the closing </verbatim>.
1386 */
wiki_process_pikchr(Renderer * p,char * z,const char * zClass)1387 static int wiki_process_pikchr(Renderer *p, char *z, const char *zClass){
1388   ParsedMarkup m;         /* Parsed closing tag */
1389   int i = 0;              /* For looping over z[] in search of </verbatim> */
1390   int iRet = 0;           /* Value  to return */
1391   int atEnd = 0;          /* True if se have found the </verbatim> */
1392   int nMarkup = 0;        /* Length of a markup we are checking */
1393 
1394   /* Search for the closing </verbatim> tag */
1395   while( z[i]!=0 ){
1396     char *zEnd = strchr(z+i, '<');
1397     if( zEnd==0 ){
1398       i += (int)strlen(z+i);
1399       iRet = i;
1400       break;
1401     }
1402     nMarkup = html_tag_length(zEnd);
1403     if( nMarkup<11 || fossil_strnicmp(zEnd, "</verbatim", 10)!=0 ){
1404       i = (int)(zEnd - z) + 1;
1405       continue;
1406     }
1407     (void)parseMarkup(&m, z+i);
1408     atEnd = endVerbatim(p, &m);
1409     unparseMarkup(&m);
1410     if( atEnd ){
1411       iRet = i + nMarkup;
1412       break;
1413     }
1414     i++;
1415   }
1416 
1417   /* The Pikchr source text should be i character in length and iRet is
1418   ** i plus the number of bytes in the </verbatim>.  Generate the reply.
1419   */
1420   assert( strncmp(zClass,"pikchr",6)==0 );
1421   zClass += 6;
1422   while( fossil_isspace(zClass[0]) ) zClass++;
1423   blob_append(p->pOut, "<p>", 3);
1424   pikchr_to_html(p->pOut, z, i, zClass, (int)strlen(zClass));
1425   blob_append(p->pOut, "</p>\n", 5);
1426   return iRet;
1427 }
1428 
1429 /*
1430 ** Return the MUTYPE for the top of the stack.
1431 */
stackTopType(Renderer * p)1432 static int stackTopType(Renderer *p){
1433   if( p->nStack<=0 ) return 0;
1434   return aMarkup[p->aStack[p->nStack-1].iCode].iType;
1435 }
1436 
1437 /*
1438 ** Convert the wiki in z[] into html in the renderer p.  The
1439 ** renderer has already been initialized.
1440 **
1441 ** This routine will probably modify the content of z[].
1442 */
wiki_render(Renderer * p,char * z)1443 static void wiki_render(Renderer *p, char *z){
1444   int tokenType;
1445   ParsedMarkup markup;
1446   int n;
1447   int inlineOnly = (p->state & INLINE_MARKUP_ONLY)!=0;
1448   int wikiHtmlOnly = (p->state & (WIKI_HTMLONLY | WIKI_LINKSONLY))!=0;
1449   int linksOnly = (p->state & WIKI_LINKSONLY)!=0;
1450   char *zOrig = z;
1451 
1452   /* Make sure the attribute constants and names still align
1453   ** following changes in the attribute list. */
1454   assert( fossil_strcmp(aAttribute[ATTR_WIDTH].zName, "width")==0 );
1455 
1456   while( z[0] ){
1457     if( wikiHtmlOnly ){
1458       n = nextRawToken(z, p, &tokenType);
1459     }else{
1460       n = nextWikiToken(z, p, &tokenType);
1461     }
1462     p->state &= ~(AT_NEWLINE|AT_PARAGRAPH);
1463     switch( tokenType ){
1464       case TOKEN_PARAGRAPH: {
1465         if( inlineOnly ){
1466           /* blob_append_string(p->pOut, " &para; "); */
1467           blob_append_string(p->pOut, " &nbsp;&nbsp; ");
1468         }else{
1469           if( p->wikiList ){
1470             popStackToTag(p, p->wikiList);
1471             p->wikiList = 0;
1472           }
1473           endAutoParagraph(p);
1474           blob_append_string(p->pOut, "\n\n");
1475           p->wantAutoParagraph = 1;
1476         }
1477         p->state |= AT_PARAGRAPH|AT_NEWLINE;
1478         break;
1479       }
1480       case TOKEN_NEWLINE: {
1481         if( p->renderFlags & WIKI_NEWLINE ){
1482           blob_append_string(p->pOut, "<br>\n");
1483         }else{
1484           blob_append_string(p->pOut, "\n");
1485         }
1486         p->state |= AT_NEWLINE;
1487         break;
1488       }
1489       case TOKEN_BUL_LI: {
1490         if( inlineOnly ){
1491           blob_append_string(p->pOut, " &bull; ");
1492         }else{
1493           if( p->wikiList!=MARKUP_UL ){
1494             if( p->wikiList ){
1495               popStackToTag(p, p->wikiList);
1496             }
1497             endAutoParagraph(p);
1498             pushStack(p, MARKUP_UL);
1499             blob_append_string(p->pOut, "<ul>");
1500             p->wikiList = MARKUP_UL;
1501           }
1502           popStackToTag(p, MARKUP_LI);
1503           startAutoParagraph(p);
1504           pushStack(p, MARKUP_LI);
1505           blob_append_string(p->pOut, "<li>");
1506         }
1507         break;
1508       }
1509       case TOKEN_NUM_LI: {
1510         if( inlineOnly ){
1511           blob_append_string(p->pOut, " # ");
1512         }else{
1513           if( p->wikiList!=MARKUP_OL ){
1514             if( p->wikiList ){
1515               popStackToTag(p, p->wikiList);
1516             }
1517             endAutoParagraph(p);
1518             pushStack(p, MARKUP_OL);
1519             blob_append_string(p->pOut, "<ol>");
1520             p->wikiList = MARKUP_OL;
1521           }
1522           popStackToTag(p, MARKUP_LI);
1523           startAutoParagraph(p);
1524           pushStack(p, MARKUP_LI);
1525           blob_append_string(p->pOut, "<li>");
1526         }
1527         break;
1528       }
1529       case TOKEN_ENUM: {
1530         if( inlineOnly ){
1531           blob_appendf(p->pOut, " (%d) ", atoi(z));
1532         }else{
1533           if( p->wikiList!=MARKUP_OL ){
1534             if( p->wikiList ){
1535               popStackToTag(p, p->wikiList);
1536             }
1537             endAutoParagraph(p);
1538             pushStack(p, MARKUP_OL);
1539             blob_append_string(p->pOut, "<ol>");
1540             p->wikiList = MARKUP_OL;
1541           }
1542           popStackToTag(p, MARKUP_LI);
1543           startAutoParagraph(p);
1544           pushStack(p, MARKUP_LI);
1545           blob_appendf(p->pOut, "<li value=\"%d\">", atoi(z));
1546         }
1547         break;
1548       }
1549       case TOKEN_INDENT: {
1550         if( !inlineOnly ){
1551           assert( p->wikiList==0 );
1552           pushStack(p, MARKUP_BLOCKQUOTE);
1553           blob_append_string(p->pOut, "<blockquote>");
1554           p->wantAutoParagraph = 0;
1555           p->wikiList = MARKUP_BLOCKQUOTE;
1556         }
1557         break;
1558       }
1559       case TOKEN_CHARACTER: {
1560         startAutoParagraph(p);
1561         if( z[0]=='<' ){
1562           blob_append_string(p->pOut, "&lt;");
1563         }else if( z[0]=='&' ){
1564           blob_append_string(p->pOut, "&amp;");
1565         }
1566         break;
1567       }
1568       case TOKEN_LINK: {
1569         char *zTarget;
1570         char *zDisplay = 0;
1571         int i, j;
1572         int savedState;
1573         char zClose[20];
1574         char cS1 = 0;
1575         int iS1 = 0;
1576 
1577         startAutoParagraph(p);
1578         zTarget = &z[1];
1579         for(i=1; z[i] && z[i]!=']'; i++){
1580           if( z[i]=='|' && zDisplay==0 ){
1581             zDisplay = &z[i+1];
1582             for(j=i; j>0 && fossil_isspace(z[j-1]); j--){}
1583             iS1 = j;
1584             cS1 = z[j];
1585             z[j] = 0;
1586           }
1587         }
1588         z[i] = 0;
1589         if( zDisplay==0 ){
1590           zDisplay = zTarget + interwiki_removable_prefix(zTarget);
1591         }else{
1592           while( fossil_isspace(*zDisplay) ) zDisplay++;
1593         }
1594         wiki_resolve_hyperlink(p->pOut, p->state,
1595                                zTarget, zClose, sizeof(zClose), zOrig, 0);
1596         if( linksOnly || zClose[0]==0 || p->inVerbatim ){
1597           if( cS1 ) z[iS1] = cS1;
1598           if( zClose[0]!=']' ){
1599             blob_appendf(p->pOut, "[%h]%s", zTarget, zClose);
1600           }else{
1601             blob_appendf(p->pOut, "%h%s", zTarget, zClose);
1602           }
1603         }else{
1604           savedState = p->state;
1605           p->state &= ~ALLOW_WIKI;
1606           p->state |= FONT_MARKUP_ONLY;
1607           wiki_render(p, zDisplay);
1608           p->state = savedState;
1609           blob_append(p->pOut, zClose, -1);
1610         }
1611         break;
1612       }
1613       case TOKEN_TEXT: {
1614         int i;
1615         for(i=0; i<n && fossil_isspace(z[i]); i++){}
1616         if( i<n ) startAutoParagraph(p);
1617         blob_append(p->pOut, z, n);
1618         break;
1619       }
1620       case TOKEN_RAW: {
1621         if( linksOnly ){
1622           htmlize_to_blob(p->pOut, z, n);
1623         }else{
1624           blob_append(p->pOut, z, n);
1625         }
1626         break;
1627       }
1628       case TOKEN_MARKUP: {
1629         const char *zId;
1630         int iDiv;
1631         int mAttr = parseMarkup(&markup, z);
1632 
1633         /* Convert <title> to <h1 align='center'> */
1634         if( markup.iCode==MARKUP_TITLE && !p->inVerbatim ){
1635           markup.iCode = MARKUP_H1;
1636           markup.nAttr = 1;
1637           markup.aAttr[0].iACode = AMSK_ALIGN;
1638           markup.aAttr[0].zValue = "center";
1639           markup.aAttr[0].cTerm = 0;
1640         }
1641 
1642         /* Markup of the form </div id=ID> where there is a matching
1643         ** ID somewhere on the stack.  Exit any contained verbatim.
1644         ** Pop the stack up to the matching <div>.  Discard the </div>
1645         */
1646         if( markup.iCode==MARKUP_DIV && markup.endTag &&
1647              (zId = markupId(&markup))!=0 &&
1648              (iDiv = findTagWithId(p, MARKUP_DIV, zId))>=0
1649         ){
1650           if( p->inVerbatim ){
1651             p->inVerbatim = 0;
1652             p->state = p->preVerbState;
1653             blob_append_string(p->pOut, "</pre>");
1654           }
1655           while( p->nStack>iDiv+1 ) popStack(p);
1656           if( p->aStack[iDiv].allowWiki ){
1657             p->state |= ALLOW_WIKI;
1658           }else{
1659             p->state &= ~ALLOW_WIKI;
1660           }
1661           assert( p->nStack==iDiv+1 );
1662           p->nStack--;
1663         }else
1664 
1665         /* If within <verbatim id=ID> ignore everything other than
1666         ** </verbatim id=ID> and the </dev id=ID2> above.
1667         */
1668         if( p->inVerbatim ){
1669           if( endVerbatim(p, &markup) ){
1670             p->inVerbatim = 0;
1671             p->state = p->preVerbState;
1672             blob_append_string(p->pOut, "</pre>");
1673           }else{
1674             unparseMarkup(&markup);
1675             blob_append_string(p->pOut, "&lt;");
1676             n = 1;
1677           }
1678         }else
1679 
1680         /* Render invalid markup literally.  The markup appears in the
1681         ** final output as plain text.
1682         */
1683         if( markup.iCode==MARKUP_INVALID ){
1684           unparseMarkup(&markup);
1685           startAutoParagraph(p);
1686           blob_append_string(p->pOut, "&lt;");
1687           n = 1;
1688         }else
1689 
1690         /* If the markup is not font-change markup ignore it if the
1691         ** font-change-only flag is set.
1692         */
1693         if( (markup.iType&MUTYPE_FONT)==0 && (p->state & FONT_MARKUP_ONLY)!=0 ){
1694           /* Do nothing */
1695         }else
1696 
1697         if( markup.iCode==MARKUP_NOWIKI ){
1698           if( markup.endTag ){
1699             p->state |= ALLOW_WIKI;
1700           }else{
1701             p->state &= ~ALLOW_WIKI;
1702           }
1703         }else
1704 
1705         /* Ignore block markup for in-line rendering.
1706         */
1707         if( inlineOnly && (markup.iType&MUTYPE_INLINE)==0 ){
1708           /* Do nothing */
1709         }else
1710 
1711         /* Generate end-tags */
1712         if( markup.endTag ){
1713           popStackToTag(p, markup.iCode);
1714         }else
1715 
1716         /* Push <div> markup onto the stack together with the id=ID attribute.
1717         */
1718         if( markup.iCode==MARKUP_DIV && (mAttr & ATTR_ID)!=0 ){
1719           pushStackWithId(p, markup.iCode, markupId(&markup),
1720                           (p->state & ALLOW_WIKI)!=0);
1721         }else
1722 
1723         /* Enter <verbatim> processing.  With verbatim enabled, all other
1724         ** markup other than the corresponding end-tag with the same ID is
1725         ** ignored.
1726         */
1727         if( markup.iCode==MARKUP_VERBATIM ){
1728           int ii; //, vAttrDidAppend=0;
1729           const char *zClass = 0;
1730           p->zVerbatimId = 0;
1731           p->inVerbatim = 1;
1732           p->preVerbState = p->state;
1733           p->state &= ~ALLOW_WIKI;
1734           for(ii=0; ii<markup.nAttr; ii++){
1735             if( markup.aAttr[ii].iACode == ATTR_ID ){
1736               p->zVerbatimId = markup.aAttr[ii].zValue;
1737             }else if( markup.aAttr[ii].iACode==ATTR_TYPE ){
1738               zClass = markup.aAttr[ii].zValue;
1739             }else if( markup.aAttr[ii].iACode==ATTR_LINKS
1740                    && !is_false(markup.aAttr[ii].zValue) ){
1741               p->state |= ALLOW_LINKS;
1742             }
1743           }
1744           endAutoParagraph(p);
1745           if( zClass==0 ){
1746             blob_append_string(p->pOut, "<pre class='verbatim'>");
1747           }else if( strncmp(zClass,"pikchr",6)==0 &&
1748                     (fossil_isspace(zClass[6]) || zClass[6]==0) ){
1749             n += wiki_process_pikchr(p, z+n, zClass);
1750             p->inVerbatim = 0;
1751             p->state = p->preVerbState;
1752           }else{
1753             blob_appendf(p->pOut, "<pre name='code' class='%h'>",
1754                zClass);
1755           }
1756           p->wantAutoParagraph = 0;
1757         }else
1758         if( markup.iType==MUTYPE_LI ){
1759           if( backupToType(p, MUTYPE_LIST)==0 ){
1760             endAutoParagraph(p);
1761             pushStack(p, MARKUP_UL);
1762             blob_append_string(p->pOut, "<ul>");
1763           }
1764           pushStack(p, MARKUP_LI);
1765           renderMarkup(p->pOut, &markup);
1766         }else
1767         if( markup.iType==MUTYPE_TR ){
1768           if( backupToType(p, MUTYPE_TABLE) ){
1769             pushStack(p, MARKUP_TR);
1770             renderMarkup(p->pOut, &markup);
1771           }
1772         }else
1773         if( markup.iType==MUTYPE_TD ){
1774           if( backupToType(p, MUTYPE_TABLE|MUTYPE_TR) ){
1775             if( stackTopType(p)==MUTYPE_TABLE ){
1776               pushStack(p, MARKUP_TR);
1777               blob_append_string(p->pOut, "<tr>");
1778             }
1779             pushStack(p, markup.iCode);
1780             renderMarkup(p->pOut, &markup);
1781           }
1782         }else
1783         if( markup.iType==MUTYPE_HYPERLINK ){
1784           if( !isButtonHyperlink(p, &markup, z, &n) ){
1785             popStackToTag(p, markup.iCode);
1786             startAutoParagraph(p);
1787             renderMarkup(p->pOut, &markup);
1788             pushStack(p, markup.iCode);
1789           }
1790         }else
1791         {
1792           if( markup.iType==MUTYPE_FONT ){
1793             startAutoParagraph(p);
1794           }else if( markup.iType==MUTYPE_BLOCK || markup.iType==MUTYPE_LIST ){
1795             p->wantAutoParagraph = 0;
1796           }
1797           if(   markup.iCode==MARKUP_HR
1798              || markup.iCode==MARKUP_H1
1799              || markup.iCode==MARKUP_H2
1800              || markup.iCode==MARKUP_H3
1801              || markup.iCode==MARKUP_H4
1802              || markup.iCode==MARKUP_H5
1803              || markup.iCode==MARKUP_P
1804           ){
1805             endAutoParagraph(p);
1806           }
1807           if( (markup.iType & MUTYPE_STACK )!=0 ){
1808             pushStack(p, markup.iCode);
1809           }
1810           renderMarkup(p->pOut, &markup);
1811         }
1812         break;
1813       }
1814     }
1815     z += n;
1816   }
1817 }
1818 
1819 /*
1820 ** Transform the text in the pIn blob.  Write the results
1821 ** into the pOut blob.  The pOut blob should already be
1822 ** initialized.  The output is merely appended to pOut.
1823 ** If pOut is NULL, then the output is appended to the CGI
1824 ** reply.
1825 */
wiki_convert(Blob * pIn,Blob * pOut,int flags)1826 void wiki_convert(Blob *pIn, Blob *pOut, int flags){
1827   Renderer renderer;
1828 
1829   memset(&renderer, 0, sizeof(renderer));
1830   renderer.renderFlags = flags;
1831   renderer.state = ALLOW_WIKI|AT_NEWLINE|AT_PARAGRAPH|flags;
1832   if( flags & WIKI_INLINE ){
1833     renderer.wantAutoParagraph = 0;
1834   }else{
1835     renderer.wantAutoParagraph = 1;
1836   }
1837   if( wikiUsesHtml() ){
1838     renderer.state |= WIKI_HTMLONLY;
1839   }
1840   if( pOut ){
1841     renderer.pOut = pOut;
1842   }else{
1843     renderer.pOut = cgi_output_blob();
1844   }
1845 
1846   blob_to_utf8_no_bom(pIn, 0);
1847   wiki_render(&renderer, blob_str(pIn));
1848   endAutoParagraph(&renderer);
1849   while( renderer.nStack ){
1850     popStack(&renderer);
1851   }
1852   blob_append_char(renderer.pOut, '\n');
1853   free(renderer.aStack);
1854 }
1855 
1856 /*
1857 ** COMMAND: test-wiki-render
1858 **
1859 ** Usage: %fossil test-wiki-render FILE [OPTIONS]
1860 **
1861 ** Translate the input FILE from Fossil-wiki into HTML and write
1862 ** the resulting HTML on standard output.
1863 **
1864 ** Options:
1865 **    --buttons        Set the WIKI_BUTTONS flag
1866 **    --htmlonly       Set the WIKI_HTMLONLY flag
1867 **    --linksonly      Set the WIKI_LINKSONLY flag
1868 **    --nobadlinks     Set the WIKI_NOBADLINKS flag
1869 **    --inline         Set the WIKI_INLINE flag
1870 **    --noblock        Set the WIKI_NOBLOCK flag
1871 */
test_wiki_render(void)1872 void test_wiki_render(void){
1873   Blob in, out;
1874   int flags = 0;
1875   if( find_option("buttons",0,0)!=0 ) flags |= WIKI_BUTTONS;
1876   if( find_option("htmlonly",0,0)!=0 ) flags |= WIKI_HTMLONLY;
1877   if( find_option("linksonly",0,0)!=0 ) flags |= WIKI_LINKSONLY;
1878   if( find_option("nobadlinks",0,0)!=0 ) flags |= WIKI_NOBADLINKS;
1879   if( find_option("inline",0,0)!=0 ) flags |= WIKI_INLINE;
1880   if( find_option("noblock",0,0)!=0 ) flags |= WIKI_NOBLOCK;
1881   db_find_and_open_repository(OPEN_OK_NOT_FOUND|OPEN_SUBSTITUTE,0);
1882   verify_all_options();
1883   if( g.argc!=3 ) usage("FILE");
1884   blob_zero(&out);
1885   blob_read_from_file(&in, g.argv[2], ExtFILE);
1886   wiki_convert(&in, &out, flags);
1887   blob_write_to_file(&out, "-");
1888 }
1889 
1890 /*
1891 ** COMMAND: test-markdown-render
1892 **
1893 ** Usage: %fossil test-markdown-render FILE ...
1894 **
1895 ** Render markdown in FILE as HTML on stdout.
1896 ** Options:
1897 **
1898 **    --safe           Restrict the output to use only "safe" HTML
1899 */
test_markdown_render(void)1900 void test_markdown_render(void){
1901   Blob in, out;
1902   int i;
1903   int bSafe = 0;
1904   db_find_and_open_repository(OPEN_OK_NOT_FOUND|OPEN_SUBSTITUTE,0);
1905   bSafe = find_option("safe",0,0)!=0;
1906   verify_all_options();
1907   for(i=2; i<g.argc; i++){
1908     blob_zero(&out);
1909     blob_read_from_file(&in, g.argv[i], ExtFILE);
1910     if( g.argc>3 ){
1911       fossil_print("<!------ %h ------->\n", g.argv[i]);
1912     }
1913     markdown_to_html(&in, 0, &out);
1914     safe_html_context( bSafe ? DOCSRC_UNTRUSTED : DOCSRC_TRUSTED );
1915     safe_html(&out);
1916     blob_write_to_file(&out, "-");
1917     blob_reset(&in);
1918     blob_reset(&out);
1919   }
1920 }
1921 
1922 /*
1923 ** Search for a <title>...</title> at the beginning of a wiki page.
1924 ** Return true (nonzero) if a title is found.  Return zero if there is
1925 ** not title.
1926 **
1927 ** If a title is found, initialize the pTitle blob to be the content
1928 ** of the title and initialize pTail to be the text that follows the
1929 ** title.
1930 */
wiki_find_title(Blob * pIn,Blob * pTitle,Blob * pTail)1931 int wiki_find_title(Blob *pIn, Blob *pTitle, Blob *pTail){
1932   char *z;
1933   int i;
1934   int iStart;
1935   blob_to_utf8_no_bom(pIn, 0);
1936   z = blob_str(pIn);
1937   for(i=0; fossil_isspace(z[i]); i++){}
1938   if( z[i]!='<' ) return 0;
1939   i++;
1940   if( strncmp(&z[i],"title>", 6)!=0 ) return 0;
1941   for(iStart=i+6; fossil_isspace(z[iStart]); iStart++){}
1942   for(i=iStart; z[i] && (z[i]!='<' || strncmp(&z[i],"</title>",8)!=0); i++){}
1943   if( strncmp(&z[i],"</title>",8)!=0 ){
1944     blob_init(pTitle, 0, 0);
1945     blob_init(pTail, &z[iStart], -1);
1946     return 1;
1947   }
1948   if( i-iStart>0 ){
1949     blob_init(pTitle, &z[iStart], i-iStart);
1950   }else{
1951     blob_init(pTitle, 0, 0);
1952   }
1953   blob_init(pTail, &z[i+8], -1);
1954   return 1;
1955 }
1956 
1957 /*
1958 ** Parse text looking for wiki hyperlinks in one of the formats:
1959 **
1960 **       [target]
1961 **       [target|...]
1962 **
1963 ** Where "target" can be either an artifact ID prefix or a wiki page
1964 ** name.  For each such hyperlink found, add an entry to the
1965 ** backlink table.
1966 */
wiki_extract_links(char * z,Backlink * pBklnk,int flags)1967 void wiki_extract_links(
1968   char *z,           /* The wiki text from which to extract links */
1969   Backlink *pBklnk,  /* Backlink extraction context */
1970   int flags          /* wiki parsing flags */
1971 ){
1972   Renderer renderer;
1973   int tokenType;
1974   ParsedMarkup markup;
1975   int n;
1976   int inlineOnly;
1977   int wikiHtmlOnly = 0;
1978 
1979   memset(&renderer, 0, sizeof(renderer));
1980   renderer.state = ALLOW_WIKI|AT_NEWLINE|AT_PARAGRAPH;
1981   if( flags & WIKI_NOBLOCK ){
1982     renderer.state |= INLINE_MARKUP_ONLY;
1983   }
1984   if( wikiUsesHtml() ){
1985     renderer.state |= WIKI_HTMLONLY;
1986     wikiHtmlOnly = 1;
1987   }
1988   inlineOnly = (renderer.state & INLINE_MARKUP_ONLY)!=0;
1989 
1990   while( z[0] ){
1991     if( wikiHtmlOnly ){
1992       n = nextRawToken(z, &renderer, &tokenType);
1993     }else{
1994       n = nextWikiToken(z, &renderer, &tokenType);
1995     }
1996     switch( tokenType ){
1997       case TOKEN_LINK: {
1998         char *zTarget;
1999         int i;
2000 
2001         zTarget = &z[1];
2002         for(i=0; zTarget[i] && zTarget[i]!='|' && zTarget[i]!=']'; i++){}
2003         while(i>1 && zTarget[i-1]==' '){ i--; }
2004         backlink_create(pBklnk, zTarget, i);
2005         break;
2006       }
2007       case TOKEN_MARKUP: {
2008         const char *zId;
2009         int iDiv;
2010         parseMarkup(&markup, z);
2011 
2012         /* Markup of the form </div id=ID> where there is a matching
2013         ** ID somewhere on the stack.  Exit the verbatim if were are in
2014         ** it.  Pop the stack up to the matching <div>.  Discard the
2015         ** </div>
2016         */
2017         if( markup.iCode==MARKUP_DIV && markup.endTag &&
2018              (zId = markupId(&markup))!=0 &&
2019              (iDiv = findTagWithId(&renderer, MARKUP_DIV, zId))>=0
2020         ){
2021           if( renderer.inVerbatim ){
2022             renderer.inVerbatim = 0;
2023             renderer.state = renderer.preVerbState;
2024           }
2025           while( renderer.nStack>iDiv+1 ) popStack(&renderer);
2026           if( renderer.aStack[iDiv].allowWiki ){
2027             renderer.state |= ALLOW_WIKI;
2028           }else{
2029             renderer.state &= ~ALLOW_WIKI;
2030           }
2031           renderer.nStack--;
2032         }else
2033 
2034         /* If within <verbatim id=ID> ignore everything other than
2035         ** </verbatim id=ID> and the </dev id=ID2> above.
2036         */
2037         if( renderer.inVerbatim ){
2038           if( endVerbatim(&renderer, &markup) ){
2039             renderer.inVerbatim = 0;
2040             renderer.state = renderer.preVerbState;
2041           }else{
2042             n = 1;
2043           }
2044         }else
2045 
2046         /* Render invalid markup literally.  The markup appears in the
2047         ** final output as plain text.
2048         */
2049         if( markup.iCode==MARKUP_INVALID ){
2050           n = 1;
2051         }else
2052 
2053         /* If the markup is not font-change markup ignore it if the
2054         ** font-change-only flag is set.
2055         */
2056         if( (markup.iType&MUTYPE_FONT)==0 &&
2057                             (renderer.state & FONT_MARKUP_ONLY)!=0 ){
2058           /* Do nothing */
2059         }else
2060 
2061         if( markup.iCode==MARKUP_NOWIKI ){
2062           if( markup.endTag ){
2063             renderer.state |= ALLOW_WIKI;
2064           }else{
2065             renderer.state &= ~ALLOW_WIKI;
2066           }
2067         }else
2068 
2069         /* Ignore block markup for in-line rendering.
2070         */
2071         if( inlineOnly && (markup.iType&MUTYPE_INLINE)==0 ){
2072           /* Do nothing */
2073         }else
2074 
2075         /* Generate end-tags */
2076         if( markup.endTag ){
2077           popStackToTag(&renderer, markup.iCode);
2078         }else
2079 
2080         /* Push <div> markup onto the stack together with the id=ID attribute.
2081         */
2082         if( markup.iCode==MARKUP_DIV ){
2083           pushStackWithId(&renderer, markup.iCode, markupId(&markup),
2084                           (renderer.state & ALLOW_WIKI)!=0);
2085         }else
2086 
2087         /* Enter <verbatim> processing.  With verbatim enabled, all other
2088         ** markup other than the corresponding end-tag with the same ID is
2089         ** ignored.
2090         */
2091         if( markup.iCode==MARKUP_VERBATIM ){
2092           int vAttrIdx;
2093           renderer.zVerbatimId = 0;
2094           renderer.inVerbatim = 1;
2095           renderer.preVerbState = renderer.state;
2096           renderer.state &= ~ALLOW_WIKI;
2097           for (vAttrIdx = 0; vAttrIdx < markup.nAttr; vAttrIdx++){
2098             if( markup.aAttr[vAttrIdx].iACode == ATTR_ID ){
2099               renderer.zVerbatimId = markup.aAttr[0].zValue;
2100             }
2101           }
2102           renderer.wantAutoParagraph = 0;
2103         }
2104 
2105         /* Restore the input text to its original configuration
2106         */
2107         unparseMarkup(&markup);
2108         break;
2109       }
2110       default: {
2111         break;
2112       }
2113     }
2114     z += n;
2115   }
2116   free(renderer.aStack);
2117 }
2118 
2119 /*
2120 ** Return the length, in bytes, of the HTML token that z is pointing to.
2121 */
html_token_length(const char * z)2122 int html_token_length(const char *z){
2123   int n;
2124   char c;
2125   if( (c=z[0])=='<' ){
2126     n = html_tag_length(z);
2127     if( n<=0 ) n = 1;
2128   }else if( fossil_isspace(c) ){
2129     for(n=1; z[n] && fossil_isspace(z[n]); n++){}
2130   }else if( c=='&' ){
2131     n = z[1]=='#' ? 2 : 1;
2132     while( fossil_isalnum(z[n]) ) n++;
2133     if( z[n]==';' ) n++;
2134   }else{
2135     n = 1;
2136     for(n=1; 1; n++){
2137       if( (c = z[n]) > '<' ) continue;
2138       if( c=='<' || c=='&' || fossil_isspace(c) || c==0 ) break;
2139     }
2140   }
2141   return n;
2142 }
2143 
2144 /*
2145 ** z points to someplace in the middle of HTML markup.  Return the length
2146 ** of the subtoken that starts on z.
2147 */
html_subtoken_length(const char * z)2148 int html_subtoken_length(const char *z){
2149   int n;
2150   char c;
2151   c = z[0];
2152   if( fossil_isspace(c) ){
2153     for(n=1; z[n] && fossil_isspace(z[n]); n++){}
2154     return n;
2155   }
2156   if( c=='"' || c=='\'' ){
2157     for(n=1; z[n] && z[n]!=c && z[n]!='>'; n++){}
2158     if( z[n]==c ) n++;
2159     return n;
2160   }
2161   if( c=='>' ){
2162     return 0;
2163   }
2164   if( c=='=' ){
2165     return 1;
2166   }
2167   if( fossil_isalnum(c) || c=='/' ){
2168     for(n=1; (c=z[n])!=0 && (fossil_isalnum(c) || c=='-' || c=='_'); n++){}
2169     return n;
2170   }
2171   return 1;
2172 }
2173 
2174 /*
2175 ** z points to an HTML markup token:  <TAG ATTR=VALUE ...>
2176 ** This routine looks for the VALUE associated with zAttr and returns
2177 ** a pointer to the start of that value and sets *pLen to be the length
2178 ** in bytes for the value.  Or it returns NULL if no such attr exists.
2179 */
html_attribute(const char * zMarkup,const char * zAttr,int * pLen)2180 const char *html_attribute(const char *zMarkup, const char *zAttr, int *pLen){
2181   int i = 1;
2182   int n;
2183   int nAttr;
2184   int iMatchCnt = 0;
2185   assert( zMarkup[0]=='<' );
2186   assert( zMarkup[1]!=0 );
2187   n = html_subtoken_length(zMarkup+i);
2188   if( n==0 ) return 0;
2189   i += n;
2190   nAttr = (int)strlen(zAttr);
2191   while( 1 ){
2192     const char *zStart = zMarkup+i;
2193     n = html_subtoken_length(zStart);
2194     if( n==0 ) break;
2195     i += n;
2196     if( fossil_isspace(zStart[0]) ) continue;
2197     if( n==nAttr && fossil_strnicmp(zAttr,zStart,nAttr)==0 ){
2198       iMatchCnt = 1;
2199     }else if( n==1 && zStart[0]=='=' && iMatchCnt==1 ){
2200       iMatchCnt = 2;
2201     }else if( iMatchCnt==2 ){
2202       if( (zStart[0]=='"' || zStart[0]=='\'') && zStart[n-1]==zStart[0] ){
2203         zStart++;
2204         n -= 2;
2205       }
2206       *pLen = n;
2207       return zStart;
2208     }else{
2209       iMatchCnt = 0;
2210     }
2211   }
2212   return 0;
2213 }
2214 
2215 /*
2216 ** COMMAND: test-html-tokenize
2217 **
2218 ** Tokenize an HTML file.  Return the offset and length and text of
2219 ** each token - one token per line.  Omit white-space tokens.
2220 */
test_html_tokenize(void)2221 void test_html_tokenize(void){
2222   Blob in;
2223   char *z;
2224   int i;
2225   int iOfst, n;
2226 
2227   for(i=2; i<g.argc; i++){
2228     blob_read_from_file(&in, g.argv[i], ExtFILE);
2229     z = blob_str(&in);
2230     for(iOfst=0; z[iOfst]; iOfst+=n){
2231       n = html_token_length(z+iOfst);
2232       if( fossil_isspace(z[iOfst]) ) continue;
2233       fossil_print("%d %d %.*s\n", iOfst, n, n, z+iOfst);
2234       if( z[iOfst]=='<' && n>1 ){
2235         int j,k;
2236         for(j=iOfst+1; (k = html_subtoken_length(z+j))>0; j+=k){
2237           if( fossil_isspace(z[j]) || z[j]=='=' ) continue;
2238           fossil_print("# %d %d %.*s\n", j, k, k, z+j);
2239         }
2240       }
2241     }
2242     blob_reset(&in);
2243   }
2244 }
2245 
2246 /*
2247 ** Attempt to reformat messy HTML to be easily readable by humans.
2248 **
2249 **    *  Try to keep lines less than 80 characters in length
2250 **    *  Collapse white space into a single space
2251 **    *  Put a blank line before:
2252 **          <blockquote><center><code><hN><p><pre><table>
2253 **    *  Put a newline after <br> and <hr>
2254 **    *  Start each of the following elements on a new line:
2255 **          <address><cite><dd><div><dl><dt><li><ol><samp>
2256 **          <tbody><td><tfoot><th><thead><tr><ul>
2257 **
2258 ** Except, do not do any reformatting inside of <pre>...</pre>
2259 */
htmlTidy(const char * zIn,Blob * pOut)2260 void htmlTidy(const char *zIn, Blob *pOut){
2261   int n;
2262   int nPre = 0;
2263   int iCur = 0;
2264   int wantSpace = 0;
2265   int omitSpace = 1;
2266   while( zIn[0] ){
2267     n = html_token_length(zIn);
2268     if( zIn[0]=='<' && n>1 ){
2269       int i, j;
2270       int isCloseTag;
2271       int eTag;
2272       int eType;
2273       char zTag[32];
2274       isCloseTag = zIn[1]=='/';
2275       for(i=0, j=1+isCloseTag; i<30 && fossil_isalnum(zIn[j]); i++, j++){
2276          zTag[i] = fossil_tolower(zIn[j]);
2277       }
2278       zTag[i] = 0;
2279       eTag = findTag(zTag);
2280       eType = aMarkup[eTag].iType;
2281       if( eTag==MARKUP_PRE ){
2282         if( isCloseTag ){
2283           nPre--;
2284           blob_append(pOut, zIn, n);
2285           zIn += n;
2286           if( nPre==0 ){ blob_append_char(pOut, '\n'); iCur = 0; }
2287           continue;
2288         }else{
2289           if( iCur && nPre==0 ){ blob_append_char(pOut, '\n'); iCur = 0; }
2290           nPre++;
2291         }
2292       }else if( eType & (MUTYPE_BLOCK|MUTYPE_TABLE) ){
2293         if( !isCloseTag && nPre==0 && blob_size(pOut)>0 ){
2294           blob_append(pOut, "\n\n", 1 + (iCur>0));
2295           iCur = 0;
2296         }
2297         wantSpace = 0;
2298         omitSpace = 1;
2299       }else if( (eType & (MUTYPE_LIST|MUTYPE_LI|MUTYPE_TR|MUTYPE_TD))!=0
2300              || eTag==MARKUP_HR
2301       ){
2302         if( nPre==0 && (!isCloseTag || (eType&MUTYPE_LIST)!=0) && iCur>0 ){
2303           blob_append_char(pOut, '\n');
2304           iCur = 0;
2305         }
2306         wantSpace = 0;
2307         omitSpace = 1;
2308       }
2309       if( wantSpace && nPre==0 ){
2310         if( iCur+n+1>=80 ){
2311           blob_append_char(pOut, '\n');
2312           iCur = 0;
2313         }else{
2314           blob_append_char(pOut, ' ');
2315           iCur++;
2316         }
2317       }
2318       blob_append(pOut, zIn, n);
2319       iCur += n;
2320       wantSpace = 0;
2321       if( eTag==MARKUP_BR || eTag==MARKUP_HR ){
2322         blob_append_char(pOut, '\n');
2323         iCur = 0;
2324       }
2325     }else if( fossil_isspace(zIn[0]) ){
2326       if( nPre ){
2327         blob_append(pOut, zIn, n);
2328       }else{
2329         wantSpace = !omitSpace;
2330       }
2331     }else{
2332       if( wantSpace && nPre==0 ){
2333         if( iCur+n+1>=80 ){
2334           blob_append_char(pOut, '\n');
2335           iCur = 0;
2336         }else{
2337           blob_append_char(pOut, ' ');
2338           iCur++;
2339         }
2340       }
2341       blob_append(pOut, zIn, n);
2342       iCur += n;
2343       wantSpace = omitSpace = 0;
2344     }
2345     zIn += n;
2346   }
2347   if( iCur ) blob_append_char(pOut, '\n');
2348 }
2349 
2350 /*
2351 ** COMMAND: test-html-tidy
2352 **
2353 ** Run the htmlTidy() routine on the content of all files named on
2354 ** the command-line and write the results to standard output.
2355 */
test_html_tidy(void)2356 void test_html_tidy(void){
2357   Blob in, out;
2358   int i;
2359 
2360   for(i=2; i<g.argc; i++){
2361     blob_read_from_file(&in, g.argv[i], ExtFILE);
2362     blob_zero(&out);
2363     htmlTidy(blob_str(&in), &out);
2364     blob_reset(&in);
2365     fossil_puts(blob_buffer(&out), 0, blob_size(&out));
2366     blob_reset(&out);
2367   }
2368 }
2369 
2370 /*
2371 ** Remove all HTML markup from the input text.  The output written into
2372 ** pOut is pure text.
2373 **
2374 ** Put the title on the first line, if there is any <title> markup.
2375 ** If there is no <title>, then create a blank first line.
2376 */
html_to_plaintext(const char * zIn,Blob * pOut)2377 void html_to_plaintext(const char *zIn, Blob *pOut){
2378   int n;
2379   int i, j;
2380   int inTitle = 0;          /* True between <title>...</title> */
2381   int seenText = 0;         /* True after first non-whitespace seen */
2382   int nNL = 0;              /* Number of \n characters at the end of pOut */
2383   int nWS = 0;              /* True if pOut ends with whitespace */
2384   while( fossil_isspace(zIn[0]) ) zIn++;
2385   while( zIn[0] ){
2386     n = html_token_length(zIn);
2387     if( zIn[0]=='<' && n>1 ){
2388       int isCloseTag;
2389       int eTag;
2390       int eType;
2391       char zTag[32];
2392       isCloseTag = zIn[1]=='/';
2393       for(i=0, j=1+isCloseTag; i<30 && fossil_isalnum(zIn[j]); i++, j++){
2394          zTag[i] = fossil_tolower(zIn[j]);
2395       }
2396       zTag[i] = 0;
2397       eTag = findTag(zTag);
2398       eType = aMarkup[eTag].iType;
2399       if( eTag==MARKUP_INVALID && fossil_strnicmp(zIn,"<style",6)==0 ){
2400         zIn += n;
2401         while( zIn[0] ){
2402           n = html_token_length(zIn);
2403           if( fossil_strnicmp(zIn, "</style",7)==0 ) break;
2404           zIn += n;
2405         }
2406         if( zIn[0]=='<' ) zIn += n;
2407         continue;
2408       }
2409       if( eTag==MARKUP_TITLE ){
2410         inTitle = !isCloseTag;
2411       }
2412       if( !isCloseTag && seenText && (eType & (MUTYPE_BLOCK|MUTYPE_TABLE))!=0 ){
2413         if( nNL==0 ){
2414           blob_append_char(pOut, '\n');
2415           nNL++;
2416         }
2417         nWS = 1;
2418       }
2419     }else if( fossil_isspace(zIn[0]) ){
2420       if( seenText ){
2421         nNL = 0;
2422         if( !inTitle ){ /* '\n' -> ' ' within <title> */
2423           for(i=0; i<n; i++) if( zIn[i]=='\n' ) nNL++;
2424         }
2425         if( !nWS ){
2426           blob_append_char(pOut, nNL ? '\n' : ' ');
2427           nWS = 1;
2428         }
2429       }
2430     }else if( zIn[0]=='&' ){
2431       char c = '?';
2432       if( zIn[1]=='#' ){
2433         int x = atoi(&zIn[1]);
2434         if( x>0 && x<=127 ) c = x;
2435       }else{
2436         static const struct { int n; char c; char *z; } aEntity[] = {
2437            { 5, '&', "&amp;"   },
2438            { 4, '<', "&lt;"    },
2439            { 4, '>', "&gt;"    },
2440            { 6, ' ', "&nbsp;"  },
2441         };
2442         int jj;
2443         for(jj=0; jj<count(aEntity); jj++){
2444           if( aEntity[jj].n==n && strncmp(aEntity[jj].z,zIn,n)==0 ){
2445             c = aEntity[jj].c;
2446             break;
2447           }
2448         }
2449       }
2450       if( fossil_isspace(c) ){
2451         if( nWS==0 && seenText ) blob_append_char(pOut, c);
2452         nWS = 1;
2453         nNL = c=='\n';
2454       }else{
2455         if( !seenText && !inTitle ) blob_append_char(pOut, '\n');
2456         seenText = 1;
2457         nNL = nWS = 0;
2458         blob_append_char(pOut, c);
2459       }
2460     }else{
2461       if( !seenText && !inTitle ) blob_append_char(pOut, '\n');
2462       seenText = 1;
2463       nNL = nWS = 0;
2464       blob_append(pOut, zIn, n);
2465     }
2466     zIn += n;
2467   }
2468   if( nNL==0 ) blob_append_char(pOut, '\n');
2469 }
2470 
2471 /*
2472 ** COMMAND: test-html-to-text
2473 **
2474 ** Usage: %fossil test-html-to-text FILE ...
2475 **
2476 ** Read all files named on the command-line.  Convert the file
2477 ** content from HTML to text and write the results on standard
2478 ** output.
2479 **
2480 ** This command is intended as a test and debug interface for
2481 ** the html_to_plaintext() routine.
2482 */
test_html_to_text(void)2483 void test_html_to_text(void){
2484   Blob in, out;
2485   int i;
2486 
2487   for(i=2; i<g.argc; i++){
2488     blob_read_from_file(&in, g.argv[i], ExtFILE);
2489     blob_zero(&out);
2490     html_to_plaintext(blob_str(&in), &out);
2491     blob_reset(&in);
2492     fossil_puts(blob_buffer(&out), 0, blob_size(&out));
2493     blob_reset(&out);
2494   }
2495 }
2496 
2497 /****************************************************************************
2498 ** safe-html:
2499 **
2500 ** An interface for preventing HTML constructs (ex: <style>, <form>, etc)
2501 ** from being inserted into Wiki and Forum posts using Markdown.   See the
2502 ** comment on safe_html_append() for additional information on what is meant
2503 ** by "safe".
2504 **
2505 ** The safe-html restrictions only apply to Markdown, as Fossil-Wiki only
2506 ** allows safe-html by design - unsafe-HTML is never and has never been
2507 ** allowed in Fossil-Wiki.
2508 **
2509 ** This code is in the wikiformat.c file so that it can have access to the
2510 ** white-list of acceptable HTML in the aMarkup[] array.
2511 */
2512 
2513 /*
2514 ** An instance of this object keeps track of the nesting of HTML
2515 ** elements for safe_html_append().
2516 */
2517 typedef struct HtmlTagStack HtmlTagStack;
2518 struct HtmlTagStack {
2519   int n;                /* Current tag stack depth */
2520   int nAlloc;           /* Space allocated for aStack[] */
2521   int *aStack;          /* The stack of tags */
2522   int aSpace[10];       /* Initial static space, to avoid malloc() */
2523 };
2524 
2525 /*
2526 ** Initialize bulk memory to a valid empty tagstack.
2527 */
html_tagstack_init(HtmlTagStack * p)2528 static void html_tagstack_init(HtmlTagStack *p){
2529   p->n = 0;
2530   p->nAlloc = 0;
2531   p->aStack = p->aSpace;
2532 }
2533 
2534 /*
2535 ** Push a new element onto the tag statk
2536 */
html_tagstack_push(HtmlTagStack * p,int e)2537 static void html_tagstack_push(HtmlTagStack *p, int e){
2538   if( p->n>=ArraySize(p->aSpace) && p->n>=p->nAlloc ){
2539     if( p->nAlloc==0 ){
2540       int *aNew;
2541       p->nAlloc = 50;
2542       aNew = fossil_malloc( sizeof(p->aStack[0])*p->nAlloc );
2543       memcpy(aNew, p->aStack, sizeof(p->aStack[0])*p->n );
2544       p->aStack = aNew;
2545     }else{
2546       p->nAlloc *= 2;
2547       p->aStack = fossil_realloc(p->aStack, sizeof(p->aStack[0])*p->nAlloc );
2548     }
2549   }
2550   p->aStack[p->n++] = e;
2551 }
2552 
2553 /*
2554 ** Clear a tag stack, reclaiming any memory allocations.
2555 */
html_tagstack_clear(HtmlTagStack * p)2556 static void html_tagstack_clear(HtmlTagStack *p){
2557   if( p->nAlloc ){
2558     fossil_free(p->aStack);
2559     p->nAlloc = 0;
2560     p->aStack = p->aSpace;
2561   }
2562   p->n = 0;
2563 }
2564 
2565 /*
2566 ** The HTML end-tag eEnd wants to be added to pBlob.
2567 **
2568 ** If an open-tag for eEnd exists anywhere on the stack, then
2569 ** pop it and all prior elements from the task, issuing appropriate
2570 ** end-tags as you go.
2571 **
2572 ** If there is no open-tag for eEnd on the stack, then this
2573 ** routine is a no-op.
2574 */
html_tagstack_pop(HtmlTagStack * p,Blob * pBlob,int eEnd)2575 static void html_tagstack_pop(HtmlTagStack *p, Blob *pBlob, int eEnd){
2576   int i, e;
2577   if( eEnd!=0 ){
2578     for(i=p->n-1; i>=0 && p->aStack[i]!=eEnd; i--){}
2579     if( i<0 ){
2580       blob_appendf(pBlob, "<span class='error'>&lt;/%s&gt;</span>",
2581                    aMarkup[eEnd].zName);
2582       return;
2583     }
2584   }else if( p->n==0 ){
2585     return;
2586   }
2587   do{
2588     e = p->aStack[--p->n];
2589     if( e==eEnd || (aMarkup[e].iType & MUTYPE_Nested)!=0 ){
2590       blob_appendf(pBlob, "</%s>", aMarkup[e].zName);
2591     }
2592   }while( e!=eEnd && p->n>0 );
2593 }
2594 
2595 /*
2596 ** Return a nonce to indicate that safe_html() can allow code through
2597 ** without censoring.
2598 **
2599 ** When safe_html() is asked to sanitize some HTML, it will ignore
2600 ** any text in between two consecutive instances of the nonce.  The
2601 ** nonce itself is an HTML comment so it is harmless to keep the
2602 ** nonce in the middle of the HTML stream.  A different nonce is
2603 ** choosen each time Fossil is run, using a lot of randomness, so
2604 ** an attacker will be unable to guess the nonce in advance.
2605 **
2606 ** The original use-case for this mechanism is to allow Pikchr-generated
2607 ** SVG in the middle of HTML generated from Markdown.  The Markdown
2608 ** output will normally be processed by safe_html() to prevent accidental
2609 ** or malicious introduction of harmful HTML (ex: <script>) in the
2610 ** output stream.  The safe_html() only lets through HTML elements
2611 ** that are on its allow-list and SVG is not on that list.  Hence, in order
2612 ** to allow the Pikchr-generated SVG through, it must be surrounded by
2613 ** the nonce.
2614 */
safe_html_nonce(int bGenerate)2615 const char *safe_html_nonce(int bGenerate){
2616   static char *zNonce = 0;
2617   if( zNonce==0 && bGenerate ){
2618     zNonce = db_text(0, "SELECT '<!--'||hex(randomblob(32))||'-->';");
2619   }
2620   return zNonce;
2621 }
2622 #define SAFE_NONCE_SIZE (4+64+3)
2623 
2624 /*
2625 ** Append a safe translation of HTML text to a Blob object.
2626 **
2627 ** Restriction: The input to this routine must be writable.
2628 *  Temporary changes may be made to the input, but the input is restored
2629 ** to its original state prior to returning.  If zHtml[nHtml] is not a
2630 ** zero character, then a zero might be written in that position
2631 ** temporarily, but that slot will also be restored before this routine
2632 ** returns.
2633 */
safe_html_append(Blob * pBlob,char * zHtml,int nHtml)2634 static void safe_html_append(Blob *pBlob, char *zHtml, int nHtml){
2635   char cLast;
2636   int i, j, n;
2637   HtmlTagStack s;
2638   ParsedMarkup markup;
2639   const char *zNonce;
2640   char *z;
2641 
2642   if( nHtml<=0 ) return;
2643   cLast = zHtml[nHtml];
2644   zHtml[nHtml] = 0;
2645   html_tagstack_init(&s);
2646 
2647   i = 0;
2648   while( i<nHtml ){
2649     if( zHtml[i]=='<' ){
2650       j = i;
2651     }else{
2652       z = strchr(zHtml+i, '<');
2653       if( z==0 ){
2654         blob_append(pBlob, zHtml+i, nHtml-i);
2655         break;
2656       }
2657       j = (int)(z - zHtml);
2658       blob_append(pBlob, zHtml+i, j-i);
2659     }
2660     if( zHtml[j+1]=='!'
2661      && j+2*SAFE_NONCE_SIZE<nHtml
2662      && (zNonce = safe_html_nonce(0))!=0
2663      && strncmp(zHtml+j,zNonce,SAFE_NONCE_SIZE)==0
2664      && (z = strstr(zHtml+j+SAFE_NONCE_SIZE,zNonce))!=0
2665     ){
2666       i = (int)(z - zHtml) + SAFE_NONCE_SIZE;
2667       blob_append(pBlob, zHtml+j, i-j);
2668       continue;
2669     }
2670     n = html_tag_length(zHtml+j);
2671     if( n==0 ){
2672       blob_append(pBlob, "&lt;", 4);
2673       i = j+1;
2674       continue;
2675     }else{
2676       i = j + n;
2677     }
2678     parseMarkup(&markup, zHtml+j);
2679     if( markup.iCode==MARKUP_INVALID ){
2680       unparseMarkup(&markup);
2681       blob_appendf(pBlob, "<span class='error'>&lt;%.*s&gt;</span>",
2682                    n-2, zHtml+j+1);
2683       continue;
2684     }
2685     if( (markup.iType & MUTYPE_Nested)==0 || markup.iCode==MARKUP_P ){
2686       renderMarkup(pBlob, &markup);
2687     }else{
2688       if( markup.endTag ){
2689         html_tagstack_pop(&s, pBlob, markup.iCode);
2690       }else{
2691         renderMarkup(pBlob, &markup);
2692         html_tagstack_push(&s, markup.iCode);
2693       }
2694     }
2695     unparseMarkup(&markup);
2696   }
2697   html_tagstack_pop(&s, pBlob, 0);
2698   html_tagstack_clear(&s);
2699   zHtml[nHtml] = cLast;
2700 }
2701 
2702 /*
2703 ** This local variable is true if the safe_html() function is enabled.
2704 ** In other words, this is true if the output of Markdown should be
2705 ** restricted to use only "safe" HTML.
2706 */
2707 static int safeHtmlEnable = 1;
2708 
2709 
2710 #if INTERFACE
2711 /*
2712 ** Allowed values for the eTrust parameter to safe_html_context().
2713 */
2714 #define DOCSRC_FILE       1     /* Document is a checked-in file */
2715 #define DOCSRC_FORUM      2     /* Document is a forum post */
2716 #define DOCSRC_TICKET     3     /* Document is a ticket comment */
2717 #define DOCSRC_WIKI       4     /* Document is a wiki page */
2718 #define DOCSRC_TRUSTED    5     /* safe_html() is always a no-op */
2719 #define DOCSRC_UNTRUSTED  6     /* safe_html() is always enabled */
2720 #endif /* INTERFACE */
2721 
2722 
2723 /*
2724 ** Specify the context in which a markdown document with potentially
2725 ** unsafe HTML will be rendered.
2726 */
safe_html_context(int eTrust)2727 void safe_html_context(int eTrust){
2728   static const char *zSafeHtmlSetting = 0;
2729   char cPerm = 0;
2730   if( eTrust==DOCSRC_TRUSTED ){
2731     safeHtmlEnable = 0;
2732     return;
2733   }
2734   if( eTrust==DOCSRC_UNTRUSTED ){
2735     safeHtmlEnable = 1;
2736     return;
2737   }
2738   if( zSafeHtmlSetting==0 ){
2739     zSafeHtmlSetting = db_get("safe-html", "");
2740   }
2741   switch( eTrust ){
2742     case DOCSRC_FILE:   cPerm = 'b';  break;
2743     case DOCSRC_FORUM:  cPerm = 'f';  break;
2744     case DOCSRC_TICKET: cPerm = 't';  break;
2745     case DOCSRC_WIKI:   cPerm = 'w';  break;
2746   }
2747   safeHtmlEnable = (strchr(zSafeHtmlSetting,cPerm)==0);
2748 }
2749 
2750 /*
2751 ** SETTING: safe-html        width=8
2752 ** This setting controls whether or not unsafe HTML elements
2753 ** (such as SCRIPT or STYLE tags) are allowed in Markdown-formatted
2754 ** documents.  Unsafe HTML is disabled by default.  If this setting
2755 ** exists and is a string, then letters in that string can enable
2756 ** unsafe HTML in various contexts:
2757 **
2758 **    - b         Unsafe HTML allowed in embedded documentation
2759 **    - f         Unsafe HTML allowed in forum posts
2760 **    - t         Unsafe HTML allowed in tickets
2761 **    - w         Unsafe HTML allowed on wiki pages
2762 */
2763 /*
2764 ** The input blob contains HTML.  If safe-html is enabled, then
2765 ** convert the input into "safe HTML".  The following modifications
2766 ** are made:
2767 **
2768 **    1.  Remove any elements that are not on the AllowedMarkup list.
2769 **        (ex: <script>, <form>, etc.)
2770 **
2771 **    2.  Remove any attributes that are not on the AllowedMarkup list.
2772 **        (ex: onload=, etc.)
2773 **
2774 **    3.  Omit any surplus close-tags.  This prevents the script from
2775 **        terminating an <div> or similar in the outer context.
2776 **
2777 **    4.  Insert additional close-tags as necessary so that any
2778 **        tag in the input that needs a close-tag has one.  This
2779 **        prevents tags in the embedded script from affecting the
2780 **        display of content that follows this script in the enclosing
2781 **        context.
2782 **
2783 ** These modifications are intended to make the generated HTML safe
2784 ** to be embedded in a larger HTML document, such that the embedded
2785 ** HTML has no influence on the formatting and operation of the
2786 ** larger document.
2787 **
2788 ** If safe-html is disabled, then this routine is a no-op.
2789 */
safe_html(Blob * in)2790 void safe_html(Blob *in){
2791   Blob out;      /* Holding area for the revised text during construction */
2792   char *z;       /* Original input text */
2793   int n;         /* Number of bytes in the original input text */
2794   int k;
2795 
2796   if( safeHtmlEnable==0 ) return;
2797   z = blob_str(in);
2798   n = blob_size(in);
2799   blob_init(&out, 0, 0);
2800   while( fossil_isspace(z[0]) ){ z++; n--; }
2801   for(k=n-1; k>5 && fossil_isspace(z[k]); k--){}
2802 
2803   if( fossil_strnicmp(z, "<div",4)==0 && !fossil_isalpha(z[4])
2804    && fossil_strnicmp(z+k-5, "</div>",6)==0
2805   ){
2806     /* The input contains an outer <div>...</div>.  Preserve the
2807     ** full scope of that <div>. */
2808     int m = html_tag_length(z);
2809     k -= 5;
2810     blob_append(&out, z, m);
2811     safe_html_append(&out, z+m, k-m);
2812     blob_append(&out, z+k, n-k);
2813   }else{
2814     safe_html_append(&out, z, n);
2815   }
2816   blob_reset(in);
2817   *in = out;
2818 }
2819 
2820 /*
2821 ** COMMAND: test-safe-html
2822 **
2823 ** Usage: %fossil test-safe-html FILE ...
2824 **
2825 ** Read files named on the command-line.  Send the text of each file
2826 ** through safe_html_append() and then write the result on
2827 ** standard output.
2828 */
test_safe_html_cmd(void)2829 void test_safe_html_cmd(void){
2830   int i;
2831   Blob x;
2832   for(i=2; i<g.argc; i++){
2833     char *z;
2834     int n;
2835     blob_read_from_file(&x, g.argv[i], ExtFILE);
2836     blob_terminate(&x);
2837     safe_html(&x);
2838     z = blob_str(&x);
2839     n = blob_size(&x);
2840     while( n>0 && (z[n-1]=='\n' || z[n-1]=='\r') ) n--;
2841     fossil_print("%.*s\n", n, z);
2842     blob_reset(&x);
2843   }
2844 }
2845