1 /*
2 ** Copyright (c) 2007 D. Richard Hipp
3 **
4 ** This program is free software; you can redistribute it and/or
5 ** modify it under the terms of the Simplified BSD License (also
6 ** known as the "2-Clause License" or "FreeBSD License".)
7
8 ** This program is distributed in the hope that it will be useful,
9 ** but without any warranty; without even the implied warranty of
10 ** merchantability or fitness for a particular purpose.
11 **
12 ** Author contact information:
13 ** drh@hwaci.com
14 ** http://www.hwaci.com/drh/
15 **
16 *******************************************************************************
17 **
18 ** This file contains code to do formatting of wiki text.
19 */
20 #include "config.h"
21 #include <assert.h>
22 #include "wikiformat.h"
23
24 #if INTERFACE
25 /*
26 ** Allowed wiki transformation operations
27 */
28 #define WIKI_HTMLONLY 0x001 /* HTML markup only. No wiki */
29 #define WIKI_INLINE 0x002 /* Do not surround with <p>..</p> */
30 #define WIKI_NOBLOCK 0x004 /* No block markup of any kind */
31 #define WIKI_BUTTONS 0x008 /* Allow sub-menu buttons */
32 #define WIKI_NOBADLINKS 0x010 /* Ignore broken hyperlinks */
33 #define WIKI_LINKSONLY 0x020 /* No markup. Only decorate links */
34 #define WIKI_NEWLINE 0x040 /* Honor \n - break lines at each \n */
35 #define WIKI_MARKDOWNLINKS 0x080 /* Resolve hyperlinks as in markdown */
36 #define WIKI_SAFE 0x100 /* Make the result safe for embedding */
37 #define WIKI_TARGET_BLANK 0x200 /* Hyperlinks go to a new window */
38 #define WIKI_NOBRACKET 0x400 /* Omit extra [..] around hyperlinks */
39 #endif
40
41
42 /*
43 ** These are the only markup attributes allowed.
44 */
45 enum allowed_attr_t {
46 ATTR_ALIGN = 1,
47 ATTR_ALT,
48 ATTR_BGCOLOR,
49 ATTR_BORDER,
50 ATTR_CELLPADDING,
51 ATTR_CELLSPACING,
52 ATTR_CLASS,
53 ATTR_CLEAR,
54 ATTR_COLOR,
55 ATTR_COLSPAN,
56 ATTR_COMPACT,
57 ATTR_FACE,
58 ATTR_HEIGHT,
59 ATTR_HREF,
60 ATTR_HSPACE,
61 ATTR_ID,
62 ATTR_LINKS,
63 ATTR_NAME,
64 ATTR_ROWSPAN,
65 ATTR_SIZE,
66 ATTR_SRC,
67 ATTR_START,
68 ATTR_STYLE,
69 ATTR_TARGET,
70 ATTR_TITLE,
71 ATTR_TYPE,
72 ATTR_VALIGN,
73 ATTR_VALUE,
74 ATTR_VSPACE,
75 ATTR_WIDTH
76 };
77
78 enum amsk_t {
79 AMSK_ALIGN = 0x00000001,
80 AMSK_ALT = 0x00000002,
81 AMSK_BGCOLOR = 0x00000004,
82 AMSK_BORDER = 0x00000008,
83 AMSK_CELLPADDING = 0x00000010,
84 AMSK_CELLSPACING = 0x00000020,
85 AMSK_CLASS = 0x00000040,
86 AMSK_CLEAR = 0x00000080,
87 AMSK_COLOR = 0x00000100,
88 AMSK_COLSPAN = 0x00000200,
89 AMSK_COMPACT = 0x00000400,
90 AMSK_FACE = 0x00000800,
91 AMSK_HEIGHT = 0x00001000,
92 AMSK_HREF = 0x00002000,
93 AMSK_HSPACE = 0x00004000,
94 AMSK_ID = 0x00008000,
95 AMSK_LINKS = 0x00010000,
96 AMSK_NAME = 0x00020000,
97 AMSK_ROWSPAN = 0x00040000,
98 AMSK_SIZE = 0x00080000,
99 AMSK_SRC = 0x00100000,
100 AMSK_START = 0x00200000,
101 AMSK_STYLE = 0x00400000,
102 AMSK_TARGET = 0x00800000,
103 AMSK_TITLE = 0x01000000,
104 AMSK_TYPE = 0x02000000,
105 AMSK_VALIGN = 0x04000000,
106 AMSK_VALUE = 0x08000000,
107 AMSK_VSPACE = 0x10000000,
108 AMSK_WIDTH = 0x20000000
109 };
110
111 static const struct AllowedAttribute {
112 const char *zName;
113 unsigned int iMask;
114 } aAttribute[] = {
115 /* These indexes MUST line up with their
116 corresponding allowed_attr_t enum values.
117 */
118 { 0, 0 },
119 { "align", AMSK_ALIGN },
120 { "alt", AMSK_ALT },
121 { "bgcolor", AMSK_BGCOLOR },
122 { "border", AMSK_BORDER },
123 { "cellpadding", AMSK_CELLPADDING },
124 { "cellspacing", AMSK_CELLSPACING },
125 { "class", AMSK_CLASS },
126 { "clear", AMSK_CLEAR },
127 { "color", AMSK_COLOR },
128 { "colspan", AMSK_COLSPAN },
129 { "compact", AMSK_COMPACT },
130 { "face", AMSK_FACE },
131 { "height", AMSK_HEIGHT },
132 { "href", AMSK_HREF },
133 { "hspace", AMSK_HSPACE },
134 { "id", AMSK_ID },
135 { "links", AMSK_LINKS },
136 { "name", AMSK_NAME },
137 { "rowspan", AMSK_ROWSPAN },
138 { "size", AMSK_SIZE },
139 { "src", AMSK_SRC },
140 { "start", AMSK_START },
141 { "style", AMSK_STYLE },
142 { "target", AMSK_TARGET },
143 { "title", AMSK_TITLE },
144 { "type", AMSK_TYPE },
145 { "valign", AMSK_VALIGN },
146 { "value", AMSK_VALUE },
147 { "vspace", AMSK_VSPACE },
148 { "width", AMSK_WIDTH },
149 };
150
151 /*
152 ** Use binary search to locate a tag in the aAttribute[] table.
153 */
findAttr(const char * z)154 static int findAttr(const char *z){
155 int i, c, first, last;
156 first = 1;
157 last = count(aAttribute) - 1;
158 while( first<=last ){
159 i = (first+last)/2;
160 c = fossil_strcmp(aAttribute[i].zName, z);
161 if( c==0 ){
162 return i;
163 }else if( c<0 ){
164 first = i+1;
165 }else{
166 last = i-1;
167 }
168 }
169 return 0;
170 }
171
172
173
174 /*
175 ** Allowed markup.
176 **
177 ** Except for MARKUP_INVALID, this must all be in alphabetical order
178 ** and in numerical sequence. The first markup type must be zero.
179 ** The value for MARKUP_XYZ must correspond to the <xyz> entry
180 ** in aAllowedMarkup[].
181 */
182 #define MARKUP_INVALID 0
183 #define MARKUP_A 1
184 #define MARKUP_ADDRESS 2
185 #define MARKUP_HTML5_ARTICLE 3
186 #define MARKUP_HTML5_ASIDE 4
187 #define MARKUP_B 5
188 #define MARKUP_BIG 6
189 #define MARKUP_BLOCKQUOTE 7
190 #define MARKUP_BR 8
191 #define MARKUP_CENTER 9
192 #define MARKUP_CITE 10
193 #define MARKUP_CODE 11
194 #define MARKUP_COL 12
195 #define MARKUP_COLGROUP 13
196 #define MARKUP_DD 14
197 #define MARKUP_DEL 15
198 #define MARKUP_DFN 16
199 #define MARKUP_DIV 17
200 #define MARKUP_DL 18
201 #define MARKUP_DT 19
202 #define MARKUP_EM 20
203 #define MARKUP_FONT 21
204 #define MARKUP_HTML5_FOOTER 22
205 #define MARKUP_H1 23
206 #define MARKUP_H2 24
207 #define MARKUP_H3 25
208 #define MARKUP_H4 26
209 #define MARKUP_H5 27
210 #define MARKUP_H6 28
211 #define MARKUP_HTML5_HEADER 29
212 #define MARKUP_HR 30
213 #define MARKUP_I 31
214 #define MARKUP_IMG 32
215 #define MARKUP_INS 33
216 #define MARKUP_KBD 34
217 #define MARKUP_LI 35
218 #define MARKUP_HTML5_NAV 36
219 #define MARKUP_NOBR 37
220 #define MARKUP_NOWIKI 38
221 #define MARKUP_OL 39
222 #define MARKUP_P 40
223 #define MARKUP_PRE 41
224 #define MARKUP_S 42
225 #define MARKUP_SAMP 43
226 #define MARKUP_HTML5_SECTION 44
227 #define MARKUP_SMALL 45
228 #define MARKUP_SPAN 46
229 #define MARKUP_STRIKE 47
230 #define MARKUP_STRONG 48
231 #define MARKUP_SUB 49
232 #define MARKUP_SUP 50
233 #define MARKUP_TABLE 51
234 #define MARKUP_TBODY 52
235 #define MARKUP_TD 53
236 #define MARKUP_TFOOT 54
237 #define MARKUP_TH 55
238 #define MARKUP_THEAD 56
239 #define MARKUP_TITLE 57
240 #define MARKUP_TR 58
241 #define MARKUP_TT 59
242 #define MARKUP_U 60
243 #define MARKUP_UL 61
244 #define MARKUP_VAR 62
245 #define MARKUP_VERBATIM 63
246
247 /*
248 ** The various markup is divided into the following types:
249 */
250 #define MUTYPE_SINGLE 0x0001 /* <img>, <br>, or <hr> */
251 #define MUTYPE_BLOCK 0x0002 /* Forms a new paragraph. ex: <p>, <h2> */
252 #define MUTYPE_FONT 0x0004 /* Font changes. ex: <b>, <font>, <sub> */
253 #define MUTYPE_LIST 0x0010 /* Lists. <ol>, <ul>, or <dl> */
254 #define MUTYPE_LI 0x0020 /* List items. <li>, <dd>, <dt> */
255 #define MUTYPE_TABLE 0x0040 /* <table> */
256 #define MUTYPE_TR 0x0080 /* <tr> */
257 #define MUTYPE_TD 0x0100 /* <td> or <th> */
258 #define MUTYPE_SPECIAL 0x0200 /* <nowiki> or <verbatim> */
259 #define MUTYPE_HYPERLINK 0x0400 /* <a> */
260
261 /* MUTYPE values for elements that require strictly nested end-tags */
262 #define MUTYPE_Nested 0x0656
263
264 /*
265 ** These markup types must have an end tag.
266 */
267 #define MUTYPE_STACK (MUTYPE_BLOCK | MUTYPE_FONT | MUTYPE_LIST | MUTYPE_TABLE)
268
269 /*
270 ** This markup types are allowed for "inline" text.
271 */
272 #define MUTYPE_INLINE (MUTYPE_FONT | MUTYPE_HYPERLINK)
273
274 static const struct AllowedMarkup {
275 const char *zName; /* Name of the markup */
276 char iCode; /* The MARKUP_* code */
277 short int iType; /* The MUTYPE_* code */
278 int allowedAttr; /* Allowed attributes on this markup */
279 } aMarkup[] = {
280 { 0, MARKUP_INVALID, 0, 0 },
281 { "a", MARKUP_A, MUTYPE_HYPERLINK,
282 AMSK_HREF|AMSK_NAME|AMSK_CLASS|AMSK_TARGET|AMSK_STYLE|
283 AMSK_TITLE},
284 { "address", MARKUP_ADDRESS, MUTYPE_BLOCK, AMSK_STYLE },
285 { "article", MARKUP_HTML5_ARTICLE, MUTYPE_BLOCK,
286 AMSK_ID|AMSK_CLASS|AMSK_STYLE },
287 { "aside", MARKUP_HTML5_ASIDE, MUTYPE_BLOCK,
288 AMSK_ID|AMSK_CLASS|AMSK_STYLE },
289
290 { "b", MARKUP_B, MUTYPE_FONT, AMSK_STYLE },
291 { "big", MARKUP_BIG, MUTYPE_FONT, AMSK_STYLE },
292 { "blockquote", MARKUP_BLOCKQUOTE, MUTYPE_BLOCK, AMSK_STYLE },
293 { "br", MARKUP_BR, MUTYPE_SINGLE, AMSK_CLEAR },
294 { "center", MARKUP_CENTER, MUTYPE_BLOCK, AMSK_STYLE },
295 { "cite", MARKUP_CITE, MUTYPE_FONT, AMSK_STYLE },
296 { "code", MARKUP_CODE, MUTYPE_FONT, AMSK_STYLE },
297 { "col", MARKUP_COL, MUTYPE_SINGLE,
298 AMSK_ALIGN|AMSK_CLASS|AMSK_COLSPAN|AMSK_WIDTH|AMSK_STYLE },
299 { "colgroup", MARKUP_COLGROUP, MUTYPE_BLOCK,
300 AMSK_ALIGN|AMSK_CLASS|AMSK_COLSPAN|AMSK_WIDTH|AMSK_STYLE},
301 { "dd", MARKUP_DD, MUTYPE_LI, AMSK_STYLE },
302 { "del", MARKUP_DEL, MUTYPE_FONT, AMSK_STYLE },
303 { "dfn", MARKUP_DFN, MUTYPE_FONT, AMSK_STYLE },
304 { "div", MARKUP_DIV, MUTYPE_BLOCK,
305 AMSK_ID|AMSK_CLASS|AMSK_STYLE },
306 { "dl", MARKUP_DL, MUTYPE_LIST,
307 AMSK_COMPACT|AMSK_STYLE },
308 { "dt", MARKUP_DT, MUTYPE_LI, AMSK_STYLE },
309 { "em", MARKUP_EM, MUTYPE_FONT, AMSK_STYLE },
310 { "font", MARKUP_FONT, MUTYPE_FONT,
311 AMSK_COLOR|AMSK_FACE|AMSK_SIZE|AMSK_STYLE },
312 { "footer", MARKUP_HTML5_FOOTER, MUTYPE_BLOCK,
313 AMSK_ID|AMSK_CLASS|AMSK_STYLE },
314
315 { "h1", MARKUP_H1, MUTYPE_BLOCK,
316 AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE },
317 { "h2", MARKUP_H2, MUTYPE_BLOCK,
318 AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE },
319 { "h3", MARKUP_H3, MUTYPE_BLOCK,
320 AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE },
321 { "h4", MARKUP_H4, MUTYPE_BLOCK,
322 AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE },
323 { "h5", MARKUP_H5, MUTYPE_BLOCK,
324 AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE },
325 { "h6", MARKUP_H6, MUTYPE_BLOCK,
326 AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE },
327
328 { "header", MARKUP_HTML5_HEADER, MUTYPE_BLOCK,
329 AMSK_ID|AMSK_CLASS|AMSK_STYLE },
330
331 { "hr", MARKUP_HR, MUTYPE_SINGLE,
332 AMSK_ALIGN|AMSK_COLOR|AMSK_SIZE|AMSK_WIDTH|
333 AMSK_STYLE|AMSK_CLASS },
334 { "i", MARKUP_I, MUTYPE_FONT, AMSK_STYLE },
335 { "img", MARKUP_IMG, MUTYPE_SINGLE,
336 AMSK_ALIGN|AMSK_ALT|AMSK_BORDER|AMSK_HEIGHT|
337 AMSK_HSPACE|AMSK_SRC|AMSK_VSPACE|AMSK_WIDTH|AMSK_STYLE },
338 { "ins", MARKUP_INS, MUTYPE_FONT, AMSK_STYLE },
339 { "kbd", MARKUP_KBD, MUTYPE_FONT, AMSK_STYLE },
340 { "li", MARKUP_LI, MUTYPE_LI,
341 AMSK_TYPE|AMSK_VALUE|AMSK_STYLE },
342 { "nav", MARKUP_HTML5_NAV, MUTYPE_BLOCK,
343 AMSK_ID|AMSK_CLASS|AMSK_STYLE },
344 { "nobr", MARKUP_NOBR, MUTYPE_FONT, 0 },
345 { "nowiki", MARKUP_NOWIKI, MUTYPE_SPECIAL, 0 },
346 { "ol", MARKUP_OL, MUTYPE_LIST,
347 AMSK_START|AMSK_TYPE|AMSK_COMPACT|AMSK_STYLE },
348 { "p", MARKUP_P, MUTYPE_BLOCK,
349 AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE },
350 { "pre", MARKUP_PRE, MUTYPE_BLOCK, AMSK_STYLE },
351 { "s", MARKUP_S, MUTYPE_FONT, AMSK_STYLE },
352 { "samp", MARKUP_SAMP, MUTYPE_FONT, AMSK_STYLE },
353 { "section", MARKUP_HTML5_SECTION, MUTYPE_BLOCK,
354 AMSK_ID|AMSK_CLASS|AMSK_STYLE },
355 { "small", MARKUP_SMALL, MUTYPE_FONT, AMSK_STYLE },
356 { "span", MARKUP_SPAN, MUTYPE_BLOCK,
357 AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE },
358 { "strike", MARKUP_STRIKE, MUTYPE_FONT, AMSK_STYLE },
359 { "strong", MARKUP_STRONG, MUTYPE_FONT, AMSK_STYLE },
360 { "sub", MARKUP_SUB, MUTYPE_FONT, AMSK_STYLE },
361 { "sup", MARKUP_SUP, MUTYPE_FONT, AMSK_STYLE },
362 { "table", MARKUP_TABLE, MUTYPE_TABLE,
363 AMSK_ALIGN|AMSK_BGCOLOR|AMSK_BORDER|AMSK_CELLPADDING|
364 AMSK_CELLSPACING|AMSK_HSPACE|AMSK_VSPACE|AMSK_CLASS|
365 AMSK_STYLE },
366 { "tbody", MARKUP_TBODY, MUTYPE_BLOCK,
367 AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE },
368 { "td", MARKUP_TD, MUTYPE_TD,
369 AMSK_ALIGN|AMSK_BGCOLOR|AMSK_COLSPAN|
370 AMSK_ROWSPAN|AMSK_VALIGN|AMSK_CLASS|AMSK_STYLE },
371 { "tfoot", MARKUP_TFOOT, MUTYPE_BLOCK,
372 AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE },
373 { "th", MARKUP_TH, MUTYPE_TD,
374 AMSK_ALIGN|AMSK_BGCOLOR|AMSK_COLSPAN|
375 AMSK_ROWSPAN|AMSK_VALIGN|AMSK_CLASS|AMSK_STYLE },
376 { "thead", MARKUP_THEAD, MUTYPE_BLOCK,
377 AMSK_ALIGN|AMSK_CLASS|AMSK_STYLE },
378 { "title", MARKUP_TITLE, MUTYPE_BLOCK, 0 },
379 { "tr", MARKUP_TR, MUTYPE_TR,
380 AMSK_ALIGN|AMSK_BGCOLOR|AMSK_VALIGN|AMSK_CLASS|AMSK_STYLE },
381 { "tt", MARKUP_TT, MUTYPE_FONT, AMSK_STYLE },
382 { "u", MARKUP_U, MUTYPE_FONT, AMSK_STYLE },
383 { "ul", MARKUP_UL, MUTYPE_LIST,
384 AMSK_TYPE|AMSK_COMPACT|AMSK_STYLE },
385 { "var", MARKUP_VAR, MUTYPE_FONT, AMSK_STYLE },
386 { "verbatim", MARKUP_VERBATIM, MUTYPE_SPECIAL,
387 AMSK_ID|AMSK_TYPE },
388 };
389
show_allowed_wiki_markup(void)390 void show_allowed_wiki_markup( void ){
391 int i; /* loop over allowedAttr */
392 for( i=1 ; i<=count(aMarkup) - 1 ; i++ ){
393 @ <%s(aMarkup[i].zName)>
394 }
395 }
396
397 /*
398 ** Use binary search to locate a tag in the aMarkup[] table.
399 */
findTag(const char * z)400 static int findTag(const char *z){
401 int i, c, first, last;
402 first = 1;
403 last = count(aMarkup) - 1;
404 while( first<=last ){
405 i = (first+last)/2;
406 c = fossil_strcmp(aMarkup[i].zName, z);
407 if( c==0 ){
408 assert( aMarkup[i].iCode==i );
409 return i;
410 }else if( c<0 ){
411 first = i+1;
412 }else{
413 last = i-1;
414 }
415 }
416 return MARKUP_INVALID;
417 }
418
419 /*
420 ** Token types
421 */
422 #define TOKEN_MARKUP 1 /* <...> */
423 #define TOKEN_CHARACTER 2 /* "&" or "<" not part of markup */
424 #define TOKEN_LINK 3 /* [...] */
425 #define TOKEN_PARAGRAPH 4 /* blank lines */
426 #define TOKEN_NEWLINE 5 /* A single "\n" */
427 #define TOKEN_BUL_LI 6 /* " * " */
428 #define TOKEN_NUM_LI 7 /* " # " */
429 #define TOKEN_ENUM 8 /* " \(?\d+[.)]? " */
430 #define TOKEN_INDENT 9 /* " " */
431 #define TOKEN_RAW 10 /* Output exactly (used when wiki-use-html==1) */
432 #define TOKEN_TEXT 11 /* None of the above */
433
434 /*
435 ** State flags. Save the lower 16 bits for the WIKI_* flags.
436 */
437 #define AT_NEWLINE 0x0010000 /* At start of a line */
438 #define AT_PARAGRAPH 0x0020000 /* At start of a paragraph */
439 #define ALLOW_WIKI 0x0040000 /* Allow wiki markup */
440 #define ALLOW_LINKS 0x0080000 /* Allow [...] hyperlinks */
441 #define FONT_MARKUP_ONLY 0x0100000 /* Only allow MUTYPE_FONT markup */
442 #define INLINE_MARKUP_ONLY 0x0200000 /* Allow only "inline" markup */
443 #define IN_LIST 0x0400000 /* Within wiki <ul> or <ol> */
444
445 /*
446 ** Current state of the rendering engine
447 */
448 typedef struct Renderer Renderer;
449 struct Renderer {
450 Blob *pOut; /* Output appended to this blob */
451 int state; /* Flag that govern rendering */
452 unsigned renderFlags; /* Flags from the client */
453 int wikiList; /* Current wiki list type */
454 int inVerbatim; /* True in <verbatim> mode */
455 int preVerbState; /* Value of state prior to verbatim */
456 int wantAutoParagraph; /* True if a <p> is desired */
457 int inAutoParagraph; /* True if within an automatic paragraph */
458 const char *zVerbatimId; /* The id= attribute of <verbatim> */
459 int nStack; /* Number of elements on the stack */
460 int nAlloc; /* Space allocated for aStack */
461 struct sStack {
462 short iCode; /* Markup code */
463 short allowWiki; /* ALLOW_WIKI if wiki allowed before tag */
464 const char *zId; /* ID attribute or NULL */
465 } *aStack;
466 };
467
468 /*
469 ** Return TRUE if HTML should be used as the sole markup language for wiki.
470 **
471 ** On first invocation, this routine consults the "wiki-use-html" setting.
472 ** It caches the result for subsequent invocations, under the assumption
473 ** that the setting will not change.
474 */
wikiUsesHtml(void)475 static int wikiUsesHtml(void){
476 static int r = -1;
477 if( r<0 ) r = db_get_boolean("wiki-use-html", 0);
478 return r;
479 }
480
481 /*
482 ** z points to a "<" character. Check to see if this is the start of
483 ** a valid markup. If it is, return the total number of characters in
484 ** the markup including the initial "<" and the terminating ">". If
485 ** it is not well-formed markup, return 0.
486 */
html_tag_length(const char * z)487 int html_tag_length(const char *z){
488 int n = 1;
489 int inparen = 0;
490 int c;
491 if( z[n]=='/' ){ n++; }
492 if( !fossil_isalpha(z[n]) ) return 0;
493 while( fossil_isalnum(z[n]) || z[n]=='-' ){ n++; }
494 c = z[n];
495 if( c=='/' && z[n+1]=='>' ){ return n+2; }
496 if( c!='>' && !fossil_isspace(c) ) return 0;
497 while( (c = z[n])!=0 && (c!='>' || inparen) ){
498 if( c==inparen ){
499 inparen = 0;
500 }else if( inparen==0 && (c=='"' || c=='\'') ){
501 inparen = c;
502 }
503 n++;
504 }
505 if( z[n]!='>' ) return 0;
506 return n+1;
507 }
508
509 /*
510 ** z points to a "\n" character. Check to see if this newline is
511 ** followed by one or more blank lines. If it is, return the number
512 ** of characters through the closing "\n". If not, return 0.
513 */
paragraphBreakLength(const char * z)514 static int paragraphBreakLength(const char *z){
515 int i, n;
516 int nNewline = 1;
517 for(i=1, n=0; fossil_isspace(z[i]); i++){
518 if( z[i]=='\n' ){
519 nNewline++;
520 n = i;
521 }
522 }
523 if( nNewline>=2 ){
524 return n+1;
525 }else{
526 return 0;
527 }
528 }
529
530 /*
531 ** Return the number of characters until the next "interesting"
532 ** characters.
533 **
534 ** Interesting characters are:
535 **
536 ** <
537 ** &
538 ** \n
539 ** [
540 **
541 ** The "[" is only considered if flags contain ALLOW_LINKS or ALLOW_WIKI.
542 ** The "\n" is only considered interesting if the flags constains ALLOW_WIKI.
543 */
textLength(const char * z,int flags)544 static int textLength(const char *z, int flags){
545 const char *zReject;
546 if( flags & ALLOW_WIKI ){
547 zReject = "<&[\n";
548 }else if( flags & ALLOW_LINKS ){
549 zReject = "<&[";
550 }else{
551 zReject = "<&";
552 }
553 return strcspn(z, zReject);
554 }
555
556 /*
557 ** Return true if z[] begins with an HTML character element.
558 */
isElement(const char * z)559 static int isElement(const char *z){
560 int i;
561 assert( z[0]=='&' );
562 if( z[1]=='#' ){
563 for(i=2; fossil_isdigit(z[i]); i++){}
564 return i>2 && z[i]==';';
565 }else{
566 for(i=1; fossil_isalpha(z[i]); i++){}
567 return i>1 && z[i]==';';
568 }
569 }
570
571 /*
572 ** Check to see if the z[] string is the beginning of a wiki list item.
573 ** If it is, return the length of the bullet text. Otherwise return 0.
574 */
listItemLength(const char * z,const char listChar)575 static int listItemLength(const char *z, const char listChar){
576 int i, n;
577 n = 0;
578 i = 0;
579 while( z[n]==' ' || z[n]=='\t' ){
580 if( z[n]=='\t' ) i++;
581 i++;
582 n++;
583 }
584 if( i<2 || z[n]!=listChar ) return 0;
585 n++;
586 i = 0;
587 while( z[n]==' ' || z[n]=='\t' ){
588 if( z[n]=='\t' ) i++;
589 i++;
590 n++;
591 }
592 if( i<2 || fossil_isspace(z[n]) ) return 0;
593 return n;
594 }
595
596 /*
597 ** Check to see if the z[] string is the beginning of a enumeration value.
598 ** If it is, return the length of the bullet text. Otherwise return 0.
599 **
600 ** Syntax:
601 ** * a tab or two or more spaces
602 ** * one or more digits
603 ** * optional "."
604 ** * another tab or two ore more spaces.
605 **
606 */
enumLength(const char * z)607 static int enumLength(const char *z){
608 int i, n;
609 n = 0;
610 i = 0;
611 while( z[n]==' ' || z[n]=='\t' ){
612 if( z[n]=='\t' ) i++;
613 i++;
614 n++;
615 }
616 if( i<2 ) return 0;
617 for(i=0; fossil_isdigit(z[n]); i++, n++){}
618 if( i==0 ) return 0;
619 if( z[n]=='.' ){
620 n++;
621 }
622 i = 0;
623 while( z[n]==' ' || z[n]=='\t' ){
624 if( z[n]=='\t' ) i++;
625 i++;
626 n++;
627 }
628 if( i<2 || fossil_isspace(z[n]) ) return 0;
629 return n;
630 }
631
632 /*
633 ** Check to see if the z[] string is the beginning of an indented
634 ** paragraph. If it is, return the length of the indent. Otherwise
635 ** return 0.
636 */
indentLength(const char * z)637 static int indentLength(const char *z){
638 int i, n;
639 n = 0;
640 i = 0;
641 while( z[n]==' ' || z[n]=='\t' ){
642 if( z[n]=='\t' ) i++;
643 i++;
644 n++;
645 }
646 if( i<2 || fossil_isspace(z[n]) ) return 0;
647 return n;
648 }
649
650 /*
651 ** Check to see if the z[] string is a wiki hyperlink. If it is,
652 ** return the length of the hyperlink. Otherwise return 0.
653 */
linkLength(const char * z)654 static int linkLength(const char *z){
655 int n;
656 assert( z[0]=='[' );
657 for(n=0; z[n] && z[n]!=']'; n++){}
658 if( z[n]==']' ){
659 return n+1;
660 }else{
661 return 0;
662 }
663 }
664
665 /*
666 ** Get the next wiki token.
667 **
668 ** z points to the start of a token. Return the number of
669 ** characters in that token. Write the token type into *pTokenType.
670 */
nextWikiToken(const char * z,Renderer * p,int * pTokenType)671 static int nextWikiToken(const char *z, Renderer *p, int *pTokenType){
672 int n;
673 if( z[0]=='<' ){
674 n = html_tag_length(z);
675 if( n>0 ){
676 *pTokenType = TOKEN_MARKUP;
677 return n;
678 }else{
679 *pTokenType = TOKEN_CHARACTER;
680 return 1;
681 }
682 }
683 if( z[0]=='&' && (p->inVerbatim || !isElement(z)) ){
684 *pTokenType = TOKEN_CHARACTER;
685 return 1;
686 }
687 if( (p->state & ALLOW_WIKI)!=0 ){
688 if( z[0]=='\n' ){
689 n = paragraphBreakLength(z);
690 if( n>0 ){
691 *pTokenType = TOKEN_PARAGRAPH;
692 return n;
693 }else{
694 *pTokenType = TOKEN_NEWLINE;
695 return 1;
696 }
697 }
698 if( (p->state & AT_NEWLINE)!=0 && fossil_isspace(z[0]) ){
699 n = listItemLength(z, '*');
700 if( n>0 ){
701 *pTokenType = TOKEN_BUL_LI;
702 return n;
703 }
704 n = listItemLength(z, '#');
705 if( n>0 ){
706 *pTokenType = TOKEN_NUM_LI;
707 return n;
708 }
709 n = enumLength(z);
710 if( n>0 ){
711 *pTokenType = TOKEN_ENUM;
712 return n;
713 }
714 }
715 if( (p->state & AT_PARAGRAPH)!=0 && fossil_isspace(z[0]) ){
716 n = indentLength(z);
717 if( n>0 ){
718 *pTokenType = TOKEN_INDENT;
719 return n;
720 }
721 }
722 if( z[0]=='[' && (n = linkLength(z))>0 ){
723 *pTokenType = TOKEN_LINK;
724 return n;
725 }
726 }else if( (p->state & ALLOW_LINKS)!=0 && z[0]=='[' && (n = linkLength(z))>0 ){
727 *pTokenType = TOKEN_LINK;
728 return n;
729 }
730 *pTokenType = TOKEN_TEXT;
731 return 1 + textLength(z+1, p->state);
732 }
733
734 /*
735 ** Parse only Wiki links, return everything else as TOKEN_RAW.
736 **
737 ** z points to the start of a token. Return the number of
738 ** characters in that token. Write the token type into *pTokenType.
739 */
nextRawToken(const char * z,Renderer * p,int * pTokenType)740 static int nextRawToken(const char *z, Renderer *p, int *pTokenType){
741 int n;
742 if( z[0]=='[' && (n = linkLength(z))>0 ){
743 *pTokenType = TOKEN_LINK;
744 return n;
745 }
746 *pTokenType = TOKEN_RAW;
747 return 1 + textLength(z+1, p->state);
748 }
749
750 /*
751 ** A single markup is parsed into an instance of the following
752 ** structure.
753 */
754 typedef struct ParsedMarkup ParsedMarkup;
755 struct ParsedMarkup {
756 unsigned char endTag; /* True if </...> instead of <...> */
757 unsigned char iCode; /* MARKUP_* */
758 unsigned char nAttr; /* Number of attributes */
759 unsigned short iType; /* MUTYPE_* */
760 struct {
761 unsigned char iACode; /* ATTR_* */
762 char *zValue; /* Argument to this attribute. Might be NULL */
763 char cTerm; /* Original argument termination character */
764 } aAttr[10];
765 };
766
767 /*
768 ** z[] is an HTML markup element - something that begins with '<'.
769 ** Parse this element into the p structure.
770 **
771 ** The content of z[] might be modified by converting characters
772 ** to lowercase and by inserting some "\000" characters.
773 */
parseMarkup(ParsedMarkup * p,char * z)774 static int parseMarkup(ParsedMarkup *p, char *z){
775 int i, j, c;
776 int iACode;
777 char *zValue;
778 int seen = 0;
779 char zTag[100];
780
781 if( z[1]=='/' ){
782 p->endTag = 1;
783 i = 2;
784 }else{
785 p->endTag = 0;
786 i = 1;
787 }
788 j = 0;
789 while( fossil_isalnum(z[i]) ){
790 if( j<sizeof(zTag)-1 ) zTag[j++] = fossil_tolower(z[i]);
791 i++;
792 }
793 zTag[j] = 0;
794 p->iCode = findTag(zTag);
795 p->iType = aMarkup[p->iCode].iType;
796 p->nAttr = 0;
797 c = 0;
798 if( z[i]=='-' ){
799 p->aAttr[0].iACode = iACode = ATTR_ID;
800 i++;
801 p->aAttr[0].zValue = &z[i];
802 while( fossil_isalnum(z[i]) ){ i++; }
803 p->aAttr[0].cTerm = c = z[i];
804 z[i++] = 0;
805 p->nAttr = 1;
806 if( c=='>' ) return 0;
807 }
808 while( fossil_isspace(z[i]) ){ i++; }
809 while( c!='>' && p->nAttr<8 && fossil_isalpha(z[i]) ){
810 int attrOk; /* True to preserve attribute. False to ignore it */
811 j = 0;
812 while( fossil_isalnum(z[i]) ){
813 if( j<sizeof(zTag)-1 ) zTag[j++] = fossil_tolower(z[i]);
814 i++;
815 }
816 zTag[j] = 0;
817 p->aAttr[p->nAttr].iACode = iACode = findAttr(zTag);
818 attrOk = iACode!=0 && (seen & aAttribute[iACode].iMask)==0;
819 while( fossil_isspace(z[i]) ){ z++; }
820 if( z[i]!='=' ){
821 p->aAttr[p->nAttr].zValue = 0;
822 p->aAttr[p->nAttr].cTerm = 0;
823 c = 0;
824 }else{
825 i++;
826 while( fossil_isspace(z[i]) ){ z++; }
827 if( z[i]=='"' ){
828 i++;
829 zValue = &z[i];
830 while( z[i] && z[i]!='"' ){ i++; }
831 }else if( z[i]=='\'' ){
832 i++;
833 zValue = &z[i];
834 while( z[i] && z[i]!='\'' ){ i++; }
835 }else{
836 zValue = &z[i];
837 while( !fossil_isspace(z[i]) && z[i]!='>' ){
838 if( z[i]=='\'' || z[i]=='"' ) attrOk = 0;
839 i++;
840 }
841 }
842 if( attrOk ){
843 p->aAttr[p->nAttr].zValue = zValue;
844 p->aAttr[p->nAttr].cTerm = c = z[i];
845 if( z[i]==0 ){
846 i--;
847 }else{
848 z[i] = 0;
849 }
850 }
851 i++;
852 }
853 if( attrOk ){
854 seen |= aAttribute[iACode].iMask;
855 p->nAttr++;
856 }
857 while( fossil_isspace(z[i]) ){ i++; }
858 if( z[i]==0 || z[i]=='>' || (z[i]=='/' && z[i+1]=='>') ) break;
859 }
860 return seen;
861 }
862
863 /*
864 ** Render markup on the given blob.
865 */
renderMarkup(Blob * pOut,ParsedMarkup * p)866 static void renderMarkup(Blob *pOut, ParsedMarkup *p){
867 int i;
868 if( p->endTag ){
869 blob_appendf(pOut, "</%s>", aMarkup[p->iCode].zName);
870 }else{
871 blob_appendf(pOut, "<%s", aMarkup[p->iCode].zName);
872 for(i=0; i<p->nAttr; i++){
873 blob_appendf(pOut, " %s", aAttribute[p->aAttr[i].iACode].zName);
874 if( p->aAttr[i].zValue ){
875 const char *zVal = p->aAttr[i].zValue;
876 if( p->aAttr[i].iACode==ATTR_SRC && zVal[0]=='/' ){
877 blob_appendf(pOut, "=\"%R%s\"", zVal);
878 }else{
879 blob_appendf(pOut, "=\"%s\"", zVal);
880 }
881 }
882 }
883 if (p->iType & MUTYPE_SINGLE){
884 blob_append_string(pOut, " /");
885 }
886 blob_append_char(pOut, '>');
887 }
888 }
889
890 /*
891 ** When the markup was parsed, some "\000" may have been inserted.
892 ** This routine restores to those "\000" values back to their
893 ** original content.
894 */
unparseMarkup(ParsedMarkup * p)895 static void unparseMarkup(ParsedMarkup *p){
896 int i, n;
897 for(i=0; i<p->nAttr; i++){
898 char *z = p->aAttr[i].zValue;
899 if( z==0 ) continue;
900 if( p->aAttr[i].cTerm ){
901 n = strlen(z);
902 z[n] = p->aAttr[i].cTerm;
903 }
904 }
905 }
906
907 /*
908 ** Return the value of attribute attrId. Return NULL if there is no
909 ** ID attribute.
910 */
attributeValue(ParsedMarkup * p,int attrId)911 static const char *attributeValue(ParsedMarkup *p, int attrId){
912 int i;
913 for(i=0; i<p->nAttr; i++){
914 if( p->aAttr[i].iACode==attrId ){
915 return p->aAttr[i].zValue;
916 }
917 }
918 return 0;
919 }
920
921 /*
922 ** Return the ID attribute for markup. Return NULL if there is no
923 ** ID attribute.
924 */
markupId(ParsedMarkup * p)925 static const char *markupId(ParsedMarkup *p){
926 return attributeValue(p, ATTR_ID);
927 }
928
929 /*
930 ** Check markup pMarkup to see if it is a hyperlink with class "button"
931 ** that is follows by simple text and an </a> only. Example:
932 **
933 ** <a class="button" href="../index.wiki">Index</a>
934 **
935 ** If the markup matches this pattern, and if the WIKI_BUTTONS flag was
936 ** passed to wiki_convert(), then transform this link into a submenu
937 ** button, skip the text, and set *pN equal to the total length of the
938 ** text through the end of </a> and return true. If the markup does
939 ** not match or if WIKI_BUTTONS is not set, then make no changes to *pN
940 ** and return false.
941 */
isButtonHyperlink(Renderer * p,ParsedMarkup * pMarkup,const char * z,int * pN)942 static int isButtonHyperlink(
943 Renderer *p, /* Renderer state */
944 ParsedMarkup *pMarkup, /* Potential button markup */
945 const char *z, /* Complete text of Wiki */
946 int *pN /* Characters of z[] consumed */
947 ){
948 const char *zClass;
949 const char *zHref;
950 char *zTag;
951 int i, j;
952 if( (p->state & WIKI_BUTTONS)==0 ) return 0;
953 zClass = attributeValue(pMarkup, ATTR_CLASS);
954 if( zClass==0 ) return 0;
955 if( fossil_strcmp(zClass, "button")!=0 ) return 0;
956 zHref = attributeValue(pMarkup, ATTR_HREF);
957 if( zHref==0 ) return 0;
958 i = *pN;
959 while( z[i] && z[i]!='<' ){ i++; }
960 if( fossil_strnicmp(&z[i], "</a>",4)!=0 ) return 0;
961 for(j=*pN; fossil_isspace(z[j]); j++){}
962 zTag = mprintf("%.*s", i-j, &z[j]);
963 j = (int)strlen(zTag);
964 while( j>0 && fossil_isspace(zTag[j-1]) ){ j--; }
965 if( j==0 ) return 0;
966 style_submenu_element(zTag, "%s", zHref);
967 *pN = i+4;
968 return 1;
969 }
970
971 /*
972 ** Pop a single element off of the stack. As the element is popped,
973 ** output its end tag if it is not a </div> tag.
974 */
popStack(Renderer * p)975 static void popStack(Renderer *p){
976 if( p->nStack ){
977 int iCode;
978 p->nStack--;
979 iCode = p->aStack[p->nStack].iCode;
980 if( (iCode!=MARKUP_DIV || p->aStack[p->nStack].zId==0) && p->pOut ){
981 blob_appendf(p->pOut, "</%s>", aMarkup[iCode].zName);
982 }
983 }
984 }
985
986 /*
987 ** Push a new markup value onto the stack. Enlarge the stack
988 ** if necessary.
989 */
pushStackWithId(Renderer * p,int elem,const char * zId,int w)990 static void pushStackWithId(Renderer *p, int elem, const char *zId, int w){
991 if( p->nStack>=p->nAlloc ){
992 p->nAlloc = p->nAlloc*2 + 100;
993 p->aStack = fossil_realloc(p->aStack, p->nAlloc*sizeof(p->aStack[0]));
994 }
995 p->aStack[p->nStack].iCode = elem;
996 p->aStack[p->nStack].zId = zId;
997 p->aStack[p->nStack].allowWiki = w;
998 p->nStack++;
999 }
pushStack(Renderer * p,int elem)1000 static void pushStack(Renderer *p, int elem){
1001 pushStackWithId(p, elem, 0, 0);
1002 }
1003
1004 /*
1005 ** Pop the stack until the top-most iTag element is removed.
1006 ** If there is no iTag element on the stack, this routine
1007 ** is a no-op.
1008 */
popStackToTag(Renderer * p,int iTag)1009 static void popStackToTag(Renderer *p, int iTag){
1010 int i;
1011 for(i=p->nStack-1; i>=0; i--){
1012 if( p->aStack[i].iCode!=iTag ) continue;
1013 if( p->aStack[i].zId ) continue;
1014 break;
1015 }
1016 if( i<0 ) return;
1017 while( p->nStack>i ){
1018 popStack(p);
1019 }
1020 }
1021
1022 /*
1023 ** Attempt to find a find a tag of type iTag with id zId. Return -1
1024 ** if not found. If found, return its stack level.
1025 */
findTagWithId(Renderer * p,int iTag,const char * zId)1026 static int findTagWithId(Renderer *p, int iTag, const char *zId){
1027 int i;
1028 assert( zId!=0 );
1029 for(i=p->nStack-1; i>=0; i--){
1030 if( p->aStack[i].iCode!=iTag ) continue;
1031 if( p->aStack[i].zId==0 ) continue;
1032 if( fossil_strcmp(zId, p->aStack[i].zId)!=0 ) continue;
1033 break;
1034 }
1035 return i;
1036 }
1037
1038 /*
1039 ** Pop the stack until the top-most element of the stack
1040 ** is an element that matches the type in iMask. Return
1041 ** code of the markup element that is on left on top of the stack.
1042 ** If the stack does not have an element
1043 ** that matches iMask, then leave the stack unchanged and
1044 ** return false (MARKUP_INVALID).
1045 */
backupToType(Renderer * p,int iMask)1046 static int backupToType(Renderer *p, int iMask){
1047 int i;
1048 for(i=p->nStack-1; i>=0; i--){
1049 if( aMarkup[p->aStack[i].iCode].iType & iMask ) break;
1050 }
1051 if( i<0 ) return 0;
1052 i++;
1053 while( p->nStack>i ){
1054 popStack(p);
1055 }
1056 return p->aStack[i-1].iCode;
1057 }
1058
1059 /*
1060 ** Begin a new paragraph if that something that is needed.
1061 */
startAutoParagraph(Renderer * p)1062 static void startAutoParagraph(Renderer *p){
1063 if( p->wantAutoParagraph==0 ) return;
1064 if( p->state & WIKI_LINKSONLY ) return;
1065 if( p->wikiList==MARKUP_OL || p->wikiList==MARKUP_UL ) return;
1066 blob_append_string(p->pOut, "<p>");
1067 p->wantAutoParagraph = 0;
1068 p->inAutoParagraph = 1;
1069 }
1070
1071 /*
1072 ** End a paragraph if we are in one.
1073 */
endAutoParagraph(Renderer * p)1074 static void endAutoParagraph(Renderer *p){
1075 if( p->inAutoParagraph ){
1076 p->inAutoParagraph = 0;
1077 }
1078 }
1079
1080 /*
1081 ** If the input string corresponds to an existing baseline,
1082 ** return true.
1083 */
is_valid_hname(const char * z)1084 static int is_valid_hname(const char *z){
1085 int n = strlen(z);
1086 if( n<4 || n>HNAME_MAX ) return 0;
1087 if( !validate16(z, n) ) return 0;
1088 return 1;
1089 }
1090
1091 /*
1092 ** Return TRUE if a hash name corresponds to an artifact in this
1093 ** repository.
1094 */
in_this_repo(const char * zUuid)1095 static int in_this_repo(const char *zUuid){
1096 static Stmt q;
1097 int rc;
1098 int n;
1099 char zU2[HNAME_MAX+1];
1100 db_static_prepare(&q,
1101 "SELECT 1 FROM blob WHERE uuid>=:u AND uuid<:u2"
1102 );
1103 db_bind_text(&q, ":u", zUuid);
1104 n = (int)strlen(zUuid);
1105 if( n>=sizeof(zU2) ) n = sizeof(zU2)-1;
1106 memcpy(zU2, zUuid, n);
1107 zU2[n-1]++;
1108 zU2[n] = 0;
1109 db_bind_text(&q, ":u2", zU2);
1110 rc = db_step(&q);
1111 db_reset(&q);
1112 return rc==SQLITE_ROW;
1113 }
1114
1115 /*
1116 ** zTarget is guaranteed to be a UUID. It might be the UUID of a ticket.
1117 ** If it is, store in *pClosed a true or false depending on whether or not
1118 ** the ticket is closed and return true. If zTarget
1119 ** is not the UUID of a ticket, return false.
1120 */
is_ticket(const char * zTarget,int * pClosed)1121 int is_ticket(
1122 const char *zTarget, /* Ticket UUID */
1123 int *pClosed /* True if the ticket is closed */
1124 ){
1125 static Stmt q;
1126 int n;
1127 int rc;
1128 char zLower[HNAME_MAX+1];
1129 char zUpper[HNAME_MAX+1];
1130 n = strlen(zTarget);
1131 memcpy(zLower, zTarget, n+1);
1132 canonical16(zLower, n+1);
1133 memcpy(zUpper, zLower, n+1);
1134 zUpper[n-1]++;
1135 if( !db_static_stmt_is_init(&q) ){
1136 char *zClosedExpr = db_get("ticket-closed-expr", "status='Closed'");
1137 db_static_prepare(&q,
1138 "SELECT %z FROM ticket "
1139 " WHERE tkt_uuid>=:lwr AND tkt_uuid<:upr",
1140 zClosedExpr /*safe-for-%s*/
1141 );
1142 }
1143 db_bind_text(&q, ":lwr", zLower);
1144 db_bind_text(&q, ":upr", zUpper);
1145 if( db_step(&q)==SQLITE_ROW ){
1146 rc = 1;
1147 *pClosed = db_column_int(&q, 0);
1148 }else{
1149 rc = 0;
1150 }
1151 db_reset(&q);
1152 return rc;
1153 }
1154
1155 /*
1156 ** Return a pointer to the name part of zTarget (skipping the "wiki:" prefix
1157 ** if there is one) if zTarget is a valid wiki page name. Return NULL if
1158 ** zTarget names a page that does not exist.
1159 */
validWikiPageName(int mFlags,const char * zTarget)1160 static const char *validWikiPageName(int mFlags, const char *zTarget){
1161 if( strncmp(zTarget, "wiki:", 5)==0
1162 && wiki_name_is_wellformed((const unsigned char*)zTarget) ){
1163 return zTarget+5;
1164 }
1165 if( strcmp(zTarget, "Sandbox")==0 ) return zTarget;
1166 if( wiki_name_is_wellformed((const unsigned char *)zTarget)
1167 && ((mFlags & WIKI_NOBADLINKS)==0 ||
1168 db_exists("SELECT 1 FROM tag WHERE tagname GLOB 'wiki-%q'"
1169 " AND (SELECT value FROM tagxref WHERE tagid=tag.tagid"
1170 " ORDER BY mtime DESC LIMIT 1) > 0", zTarget))
1171 ){
1172 return zTarget;
1173 }
1174 return 0;
1175 }
1176
1177 static const char *wikiOverrideHash = 0;
1178
1179 /*
1180 ** Fossil-wiki hyperlinks to wiki pages should be overridden to the
1181 ** hash value supplied. If the value is NULL, then override is cancelled
1182 ** and all overwrites operate normally.
1183 */
wiki_hyperlink_override(const char * zUuid)1184 void wiki_hyperlink_override(const char *zUuid){
1185 wikiOverrideHash = zUuid;
1186 }
1187
1188
1189 /*
1190 ** If links to wiki page zTarget should be redirected to some historical
1191 ** version of that page, then return the hash of the historical version.
1192 ** If no override is required, return NULL.
1193 */
wiki_is_overridden(const char * zTarget)1194 static const char *wiki_is_overridden(const char *zTarget){
1195 if( wikiOverrideHash==0 ) return 0;
1196 /* The override should only happen if the override version is not the
1197 ** latest version of the wiki page. */
1198 if( !db_exists(
1199 "SELECT 1 FROM tag, blob, tagxref AS xA, tagxref AS xB "
1200 " WHERE tag.tagname GLOB 'wiki-%q*'"
1201 " AND blob.uuid GLOB '%q'"
1202 " AND xA.tagid=tag.tagid AND xA.rid=blob.rid"
1203 " AND xB.tagid=tag.tagid AND xB.mtime>xA.mtime",
1204 zTarget, wikiOverrideHash
1205 ) ){
1206 return 0;
1207 }
1208 return wikiOverrideHash;
1209 }
1210
1211 /*
1212 ** Resolve a hyperlink. The zTarget argument is the content of the [...]
1213 ** in the wiki. Append to the output string whatever text is appropriate
1214 ** for opening the hyperlink. Write into zClose[0...nClose-1] text that will
1215 ** close the markup.
1216 **
1217 ** If this routine determines that no hyperlink should be generated, then
1218 ** set zClose[0] to 0.
1219 **
1220 ** Actually, this routine might or might not append the hyperlink, depending
1221 ** on current rendering rules: specifically does the current user have
1222 ** "History" permission.
1223 **
1224 ** [http://fossil-scm.org/]
1225 ** [https://fossil-scm.org/]
1226 ** [ftp://fossil-scm.org/]
1227 ** [mailto:fossil-users@lists.fossil-scm.org]
1228 **
1229 ** [/path] -> Refers to the root of the Fossil hierarchy, not
1230 ** the root of the URI domain
1231 **
1232 ** [./relpath]
1233 ** [../relpath]
1234 **
1235 ** [#fragment]
1236 **
1237 ** [0123456789abcdef]
1238 **
1239 ** [WikiPageName]
1240 ** [wiki:WikiPageName]
1241 **
1242 ** [2010-02-27 07:13]
1243 **
1244 ** [InterMap:Link] -> Interwiki link
1245 */
wiki_resolve_hyperlink(Blob * pOut,int mFlags,const char * zTarget,char * zClose,int nClose,const char * zOrig,const char * zTitle)1246 void wiki_resolve_hyperlink(
1247 Blob *pOut, /* Write the HTML output here */
1248 int mFlags, /* Rendering option flags */
1249 const char *zTarget, /* Hyperlink target; text within [...] */
1250 char *zClose, /* Write hyperlink closing text here */
1251 int nClose, /* Bytes available in zClose[] */
1252 const char *zOrig, /* Complete document text */
1253 const char *zTitle /* Title of the link */
1254 ){
1255 const char *zTerm = "</a>";
1256 const char *z;
1257 char *zExtra = 0;
1258 const char *zExtraNS = 0;
1259 char *zRemote = 0;
1260
1261 if( zTitle ){
1262 zExtra = mprintf(" title='%h'", zTitle);
1263 zExtraNS = zExtra+1;
1264 }else if( mFlags & WIKI_TARGET_BLANK ){
1265 zExtra = mprintf(" target='_blank'");
1266 zExtraNS = zExtra+1;
1267 }
1268 assert( nClose>=20 );
1269 if( strncmp(zTarget, "http:", 5)==0
1270 || strncmp(zTarget, "https:", 6)==0
1271 || strncmp(zTarget, "ftp:", 4)==0
1272 || strncmp(zTarget, "mailto:", 7)==0
1273 ){
1274 blob_appendf(pOut, "<a href=\"%s\"%s>", zTarget, zExtra);
1275 }else if( zTarget[0]=='/' ){
1276 blob_appendf(pOut, "<a href=\"%R%h\"%s>", zTarget, zExtra);
1277 }else if( zTarget[0]=='.'
1278 && (zTarget[1]=='/' || (zTarget[1]=='.' && zTarget[2]=='/'))
1279 && (mFlags & WIKI_LINKSONLY)==0 ){
1280 blob_appendf(pOut, "<a href=\"%h\"%s>", zTarget, zExtra);
1281 }else if( zTarget[0]=='#' ){
1282 blob_appendf(pOut, "<a href=\"%h\"%s>", zTarget, zExtra);
1283 }else if( is_valid_hname(zTarget) ){
1284 int isClosed = 0;
1285 const char *zLB = (mFlags & WIKI_NOBRACKET)==0 ? "[" : "";
1286 if( strlen(zTarget)<=HNAME_MAX && is_ticket(zTarget, &isClosed) ){
1287 /* Special display processing for tickets. Display the hyperlink
1288 ** as crossed out if the ticket is closed.
1289 */
1290 if( isClosed ){
1291 if( g.perm.Hyperlink ){
1292 blob_appendf(pOut,
1293 "%z<span class=\"wikiTagCancelled\">%s",
1294 xhref(zExtraNS,"%R/info/%s",zTarget), zLB
1295 );
1296 zTerm = "]</span></a>";
1297 }else{
1298 blob_appendf(pOut,"<span class=\"wikiTagCancelled\">%s", zLB);
1299 zTerm = "]</span>";
1300 }
1301 }else{
1302 if( g.perm.Hyperlink ){
1303 blob_appendf(pOut,"%z%s", xhref(zExtraNS,"%R/info/%s", zTarget),zLB);
1304 zTerm = "]</a>";
1305 }else{
1306 blob_appendf(pOut, "%s", zLB);
1307 zTerm = "]";
1308 }
1309 }
1310 }else if( !in_this_repo(zTarget) ){
1311 if( (mFlags & (WIKI_LINKSONLY|WIKI_NOBADLINKS))!=0 ){
1312 zTerm = "";
1313 }else{
1314 blob_appendf(pOut, "<span class=\"brokenlink\">%s", zLB);
1315 zTerm = "]</span>";
1316 }
1317 }else if( g.perm.Hyperlink ){
1318 blob_appendf(pOut, "%z%s",xhref(zExtraNS, "%R/info/%s", zTarget), zLB);
1319 zTerm = "]</a>";
1320 }else{
1321 zTerm = "";
1322 }
1323 if( zTerm[0]==']' && (mFlags & WIKI_NOBRACKET)!=0 ) zTerm++;
1324 }else if( (zRemote = interwiki_url(zTarget))!=0 ){
1325 blob_appendf(pOut, "<a href=\"%z\"%s>", zRemote, zExtra);
1326 zTerm = "</a>";
1327 }else if( (z = validWikiPageName(mFlags, zTarget))!=0 ){
1328 /* The link is to a valid wiki page name */
1329 const char *zOverride = wiki_is_overridden(zTarget);
1330 if( zOverride ){
1331 blob_appendf(pOut, "<a href=\"%R/info/%S\"%s>", zOverride, zExtra);
1332 }else{
1333 blob_appendf(pOut, "<a href=\"%R/wiki?name=%T\"%s>", z, zExtra);
1334 }
1335 }else if( strlen(zTarget)>=10 && fossil_isdigit(zTarget[0]) && zTarget[4]=='-'
1336 && db_int(0, "SELECT datetime(%Q) NOT NULL", zTarget) ){
1337 /* Dates or date-and-times in ISO8610 resolve to a link to the
1338 ** timeline for that date */
1339 blob_appendf(pOut, "<a href=\"%R/timeline?c=%T\"%s>", zTarget, zExtra);
1340 }else if( mFlags & WIKI_MARKDOWNLINKS ){
1341 /* If none of the above, and if rendering links for markdown, then
1342 ** create a link to the literal text of the target */
1343 blob_appendf(pOut, "<a href=\"%h\"%s>", zTarget, zExtra);
1344 }else if( zOrig && zTarget>=&zOrig[2]
1345 && zTarget[-1]=='[' && !fossil_isspace(zTarget[-2]) ){
1346 /* If the hyperlink markup is not preceded by whitespace, then it
1347 ** is probably a C-language subscript or similar, not really a
1348 ** hyperlink. Just ignore it. */
1349 zTerm = "";
1350 }else if( (mFlags & (WIKI_NOBADLINKS|WIKI_LINKSONLY))!=0 ){
1351 /* Also ignore the link if various flags are set */
1352 zTerm = "";
1353 }else{
1354 blob_appendf(pOut, "<span class=\"brokenlink\">[%h]", zTarget);
1355 zTerm = "</span>";
1356 }
1357 if( zExtra ) fossil_free(zExtra);
1358 assert( strlen(zTerm)<nClose );
1359 sqlite3_snprintf(nClose, zClose, "%s", zTerm);
1360 }
1361
1362 /*
1363 ** Check to see if the given parsed markup is the correct
1364 ** </verbatim> tag.
1365 */
endVerbatim(Renderer * p,ParsedMarkup * pMarkup)1366 static int endVerbatim(Renderer *p, ParsedMarkup *pMarkup){
1367 char *z;
1368 assert( p->inVerbatim );
1369 if( pMarkup->iCode!=MARKUP_VERBATIM ) return 0;
1370 if( !pMarkup->endTag ) return 0;
1371 if( p->zVerbatimId==0 ) return 1;
1372 if( pMarkup->nAttr!=1 ) return 0;
1373 z = pMarkup->aAttr[0].zValue;
1374 return fossil_strcmp(z, p->zVerbatimId)==0;
1375 }
1376
1377 /*
1378 ** z[] points to the text that immediately follows markup of the form:
1379 **
1380 ** <verbatim type='pikchr ...'>
1381 **
1382 ** zClass is the argument to "type". This routine will process the
1383 ** Pikchr text through the next matching </verbatim> (or until end-of-file)
1384 ** and append the resulting SVG output onto p. It then returns the
1385 ** number of bytes of text processed, including the closing </verbatim>.
1386 */
wiki_process_pikchr(Renderer * p,char * z,const char * zClass)1387 static int wiki_process_pikchr(Renderer *p, char *z, const char *zClass){
1388 ParsedMarkup m; /* Parsed closing tag */
1389 int i = 0; /* For looping over z[] in search of </verbatim> */
1390 int iRet = 0; /* Value to return */
1391 int atEnd = 0; /* True if se have found the </verbatim> */
1392 int nMarkup = 0; /* Length of a markup we are checking */
1393
1394 /* Search for the closing </verbatim> tag */
1395 while( z[i]!=0 ){
1396 char *zEnd = strchr(z+i, '<');
1397 if( zEnd==0 ){
1398 i += (int)strlen(z+i);
1399 iRet = i;
1400 break;
1401 }
1402 nMarkup = html_tag_length(zEnd);
1403 if( nMarkup<11 || fossil_strnicmp(zEnd, "</verbatim", 10)!=0 ){
1404 i = (int)(zEnd - z) + 1;
1405 continue;
1406 }
1407 (void)parseMarkup(&m, z+i);
1408 atEnd = endVerbatim(p, &m);
1409 unparseMarkup(&m);
1410 if( atEnd ){
1411 iRet = i + nMarkup;
1412 break;
1413 }
1414 i++;
1415 }
1416
1417 /* The Pikchr source text should be i character in length and iRet is
1418 ** i plus the number of bytes in the </verbatim>. Generate the reply.
1419 */
1420 assert( strncmp(zClass,"pikchr",6)==0 );
1421 zClass += 6;
1422 while( fossil_isspace(zClass[0]) ) zClass++;
1423 blob_append(p->pOut, "<p>", 3);
1424 pikchr_to_html(p->pOut, z, i, zClass, (int)strlen(zClass));
1425 blob_append(p->pOut, "</p>\n", 5);
1426 return iRet;
1427 }
1428
1429 /*
1430 ** Return the MUTYPE for the top of the stack.
1431 */
stackTopType(Renderer * p)1432 static int stackTopType(Renderer *p){
1433 if( p->nStack<=0 ) return 0;
1434 return aMarkup[p->aStack[p->nStack-1].iCode].iType;
1435 }
1436
1437 /*
1438 ** Convert the wiki in z[] into html in the renderer p. The
1439 ** renderer has already been initialized.
1440 **
1441 ** This routine will probably modify the content of z[].
1442 */
wiki_render(Renderer * p,char * z)1443 static void wiki_render(Renderer *p, char *z){
1444 int tokenType;
1445 ParsedMarkup markup;
1446 int n;
1447 int inlineOnly = (p->state & INLINE_MARKUP_ONLY)!=0;
1448 int wikiHtmlOnly = (p->state & (WIKI_HTMLONLY | WIKI_LINKSONLY))!=0;
1449 int linksOnly = (p->state & WIKI_LINKSONLY)!=0;
1450 char *zOrig = z;
1451
1452 /* Make sure the attribute constants and names still align
1453 ** following changes in the attribute list. */
1454 assert( fossil_strcmp(aAttribute[ATTR_WIDTH].zName, "width")==0 );
1455
1456 while( z[0] ){
1457 if( wikiHtmlOnly ){
1458 n = nextRawToken(z, p, &tokenType);
1459 }else{
1460 n = nextWikiToken(z, p, &tokenType);
1461 }
1462 p->state &= ~(AT_NEWLINE|AT_PARAGRAPH);
1463 switch( tokenType ){
1464 case TOKEN_PARAGRAPH: {
1465 if( inlineOnly ){
1466 /* blob_append_string(p->pOut, " ¶ "); */
1467 blob_append_string(p->pOut, " ");
1468 }else{
1469 if( p->wikiList ){
1470 popStackToTag(p, p->wikiList);
1471 p->wikiList = 0;
1472 }
1473 endAutoParagraph(p);
1474 blob_append_string(p->pOut, "\n\n");
1475 p->wantAutoParagraph = 1;
1476 }
1477 p->state |= AT_PARAGRAPH|AT_NEWLINE;
1478 break;
1479 }
1480 case TOKEN_NEWLINE: {
1481 if( p->renderFlags & WIKI_NEWLINE ){
1482 blob_append_string(p->pOut, "<br>\n");
1483 }else{
1484 blob_append_string(p->pOut, "\n");
1485 }
1486 p->state |= AT_NEWLINE;
1487 break;
1488 }
1489 case TOKEN_BUL_LI: {
1490 if( inlineOnly ){
1491 blob_append_string(p->pOut, " • ");
1492 }else{
1493 if( p->wikiList!=MARKUP_UL ){
1494 if( p->wikiList ){
1495 popStackToTag(p, p->wikiList);
1496 }
1497 endAutoParagraph(p);
1498 pushStack(p, MARKUP_UL);
1499 blob_append_string(p->pOut, "<ul>");
1500 p->wikiList = MARKUP_UL;
1501 }
1502 popStackToTag(p, MARKUP_LI);
1503 startAutoParagraph(p);
1504 pushStack(p, MARKUP_LI);
1505 blob_append_string(p->pOut, "<li>");
1506 }
1507 break;
1508 }
1509 case TOKEN_NUM_LI: {
1510 if( inlineOnly ){
1511 blob_append_string(p->pOut, " # ");
1512 }else{
1513 if( p->wikiList!=MARKUP_OL ){
1514 if( p->wikiList ){
1515 popStackToTag(p, p->wikiList);
1516 }
1517 endAutoParagraph(p);
1518 pushStack(p, MARKUP_OL);
1519 blob_append_string(p->pOut, "<ol>");
1520 p->wikiList = MARKUP_OL;
1521 }
1522 popStackToTag(p, MARKUP_LI);
1523 startAutoParagraph(p);
1524 pushStack(p, MARKUP_LI);
1525 blob_append_string(p->pOut, "<li>");
1526 }
1527 break;
1528 }
1529 case TOKEN_ENUM: {
1530 if( inlineOnly ){
1531 blob_appendf(p->pOut, " (%d) ", atoi(z));
1532 }else{
1533 if( p->wikiList!=MARKUP_OL ){
1534 if( p->wikiList ){
1535 popStackToTag(p, p->wikiList);
1536 }
1537 endAutoParagraph(p);
1538 pushStack(p, MARKUP_OL);
1539 blob_append_string(p->pOut, "<ol>");
1540 p->wikiList = MARKUP_OL;
1541 }
1542 popStackToTag(p, MARKUP_LI);
1543 startAutoParagraph(p);
1544 pushStack(p, MARKUP_LI);
1545 blob_appendf(p->pOut, "<li value=\"%d\">", atoi(z));
1546 }
1547 break;
1548 }
1549 case TOKEN_INDENT: {
1550 if( !inlineOnly ){
1551 assert( p->wikiList==0 );
1552 pushStack(p, MARKUP_BLOCKQUOTE);
1553 blob_append_string(p->pOut, "<blockquote>");
1554 p->wantAutoParagraph = 0;
1555 p->wikiList = MARKUP_BLOCKQUOTE;
1556 }
1557 break;
1558 }
1559 case TOKEN_CHARACTER: {
1560 startAutoParagraph(p);
1561 if( z[0]=='<' ){
1562 blob_append_string(p->pOut, "<");
1563 }else if( z[0]=='&' ){
1564 blob_append_string(p->pOut, "&");
1565 }
1566 break;
1567 }
1568 case TOKEN_LINK: {
1569 char *zTarget;
1570 char *zDisplay = 0;
1571 int i, j;
1572 int savedState;
1573 char zClose[20];
1574 char cS1 = 0;
1575 int iS1 = 0;
1576
1577 startAutoParagraph(p);
1578 zTarget = &z[1];
1579 for(i=1; z[i] && z[i]!=']'; i++){
1580 if( z[i]=='|' && zDisplay==0 ){
1581 zDisplay = &z[i+1];
1582 for(j=i; j>0 && fossil_isspace(z[j-1]); j--){}
1583 iS1 = j;
1584 cS1 = z[j];
1585 z[j] = 0;
1586 }
1587 }
1588 z[i] = 0;
1589 if( zDisplay==0 ){
1590 zDisplay = zTarget + interwiki_removable_prefix(zTarget);
1591 }else{
1592 while( fossil_isspace(*zDisplay) ) zDisplay++;
1593 }
1594 wiki_resolve_hyperlink(p->pOut, p->state,
1595 zTarget, zClose, sizeof(zClose), zOrig, 0);
1596 if( linksOnly || zClose[0]==0 || p->inVerbatim ){
1597 if( cS1 ) z[iS1] = cS1;
1598 if( zClose[0]!=']' ){
1599 blob_appendf(p->pOut, "[%h]%s", zTarget, zClose);
1600 }else{
1601 blob_appendf(p->pOut, "%h%s", zTarget, zClose);
1602 }
1603 }else{
1604 savedState = p->state;
1605 p->state &= ~ALLOW_WIKI;
1606 p->state |= FONT_MARKUP_ONLY;
1607 wiki_render(p, zDisplay);
1608 p->state = savedState;
1609 blob_append(p->pOut, zClose, -1);
1610 }
1611 break;
1612 }
1613 case TOKEN_TEXT: {
1614 int i;
1615 for(i=0; i<n && fossil_isspace(z[i]); i++){}
1616 if( i<n ) startAutoParagraph(p);
1617 blob_append(p->pOut, z, n);
1618 break;
1619 }
1620 case TOKEN_RAW: {
1621 if( linksOnly ){
1622 htmlize_to_blob(p->pOut, z, n);
1623 }else{
1624 blob_append(p->pOut, z, n);
1625 }
1626 break;
1627 }
1628 case TOKEN_MARKUP: {
1629 const char *zId;
1630 int iDiv;
1631 int mAttr = parseMarkup(&markup, z);
1632
1633 /* Convert <title> to <h1 align='center'> */
1634 if( markup.iCode==MARKUP_TITLE && !p->inVerbatim ){
1635 markup.iCode = MARKUP_H1;
1636 markup.nAttr = 1;
1637 markup.aAttr[0].iACode = AMSK_ALIGN;
1638 markup.aAttr[0].zValue = "center";
1639 markup.aAttr[0].cTerm = 0;
1640 }
1641
1642 /* Markup of the form </div id=ID> where there is a matching
1643 ** ID somewhere on the stack. Exit any contained verbatim.
1644 ** Pop the stack up to the matching <div>. Discard the </div>
1645 */
1646 if( markup.iCode==MARKUP_DIV && markup.endTag &&
1647 (zId = markupId(&markup))!=0 &&
1648 (iDiv = findTagWithId(p, MARKUP_DIV, zId))>=0
1649 ){
1650 if( p->inVerbatim ){
1651 p->inVerbatim = 0;
1652 p->state = p->preVerbState;
1653 blob_append_string(p->pOut, "</pre>");
1654 }
1655 while( p->nStack>iDiv+1 ) popStack(p);
1656 if( p->aStack[iDiv].allowWiki ){
1657 p->state |= ALLOW_WIKI;
1658 }else{
1659 p->state &= ~ALLOW_WIKI;
1660 }
1661 assert( p->nStack==iDiv+1 );
1662 p->nStack--;
1663 }else
1664
1665 /* If within <verbatim id=ID> ignore everything other than
1666 ** </verbatim id=ID> and the </dev id=ID2> above.
1667 */
1668 if( p->inVerbatim ){
1669 if( endVerbatim(p, &markup) ){
1670 p->inVerbatim = 0;
1671 p->state = p->preVerbState;
1672 blob_append_string(p->pOut, "</pre>");
1673 }else{
1674 unparseMarkup(&markup);
1675 blob_append_string(p->pOut, "<");
1676 n = 1;
1677 }
1678 }else
1679
1680 /* Render invalid markup literally. The markup appears in the
1681 ** final output as plain text.
1682 */
1683 if( markup.iCode==MARKUP_INVALID ){
1684 unparseMarkup(&markup);
1685 startAutoParagraph(p);
1686 blob_append_string(p->pOut, "<");
1687 n = 1;
1688 }else
1689
1690 /* If the markup is not font-change markup ignore it if the
1691 ** font-change-only flag is set.
1692 */
1693 if( (markup.iType&MUTYPE_FONT)==0 && (p->state & FONT_MARKUP_ONLY)!=0 ){
1694 /* Do nothing */
1695 }else
1696
1697 if( markup.iCode==MARKUP_NOWIKI ){
1698 if( markup.endTag ){
1699 p->state |= ALLOW_WIKI;
1700 }else{
1701 p->state &= ~ALLOW_WIKI;
1702 }
1703 }else
1704
1705 /* Ignore block markup for in-line rendering.
1706 */
1707 if( inlineOnly && (markup.iType&MUTYPE_INLINE)==0 ){
1708 /* Do nothing */
1709 }else
1710
1711 /* Generate end-tags */
1712 if( markup.endTag ){
1713 popStackToTag(p, markup.iCode);
1714 }else
1715
1716 /* Push <div> markup onto the stack together with the id=ID attribute.
1717 */
1718 if( markup.iCode==MARKUP_DIV && (mAttr & ATTR_ID)!=0 ){
1719 pushStackWithId(p, markup.iCode, markupId(&markup),
1720 (p->state & ALLOW_WIKI)!=0);
1721 }else
1722
1723 /* Enter <verbatim> processing. With verbatim enabled, all other
1724 ** markup other than the corresponding end-tag with the same ID is
1725 ** ignored.
1726 */
1727 if( markup.iCode==MARKUP_VERBATIM ){
1728 int ii; //, vAttrDidAppend=0;
1729 const char *zClass = 0;
1730 p->zVerbatimId = 0;
1731 p->inVerbatim = 1;
1732 p->preVerbState = p->state;
1733 p->state &= ~ALLOW_WIKI;
1734 for(ii=0; ii<markup.nAttr; ii++){
1735 if( markup.aAttr[ii].iACode == ATTR_ID ){
1736 p->zVerbatimId = markup.aAttr[ii].zValue;
1737 }else if( markup.aAttr[ii].iACode==ATTR_TYPE ){
1738 zClass = markup.aAttr[ii].zValue;
1739 }else if( markup.aAttr[ii].iACode==ATTR_LINKS
1740 && !is_false(markup.aAttr[ii].zValue) ){
1741 p->state |= ALLOW_LINKS;
1742 }
1743 }
1744 endAutoParagraph(p);
1745 if( zClass==0 ){
1746 blob_append_string(p->pOut, "<pre class='verbatim'>");
1747 }else if( strncmp(zClass,"pikchr",6)==0 &&
1748 (fossil_isspace(zClass[6]) || zClass[6]==0) ){
1749 n += wiki_process_pikchr(p, z+n, zClass);
1750 p->inVerbatim = 0;
1751 p->state = p->preVerbState;
1752 }else{
1753 blob_appendf(p->pOut, "<pre name='code' class='%h'>",
1754 zClass);
1755 }
1756 p->wantAutoParagraph = 0;
1757 }else
1758 if( markup.iType==MUTYPE_LI ){
1759 if( backupToType(p, MUTYPE_LIST)==0 ){
1760 endAutoParagraph(p);
1761 pushStack(p, MARKUP_UL);
1762 blob_append_string(p->pOut, "<ul>");
1763 }
1764 pushStack(p, MARKUP_LI);
1765 renderMarkup(p->pOut, &markup);
1766 }else
1767 if( markup.iType==MUTYPE_TR ){
1768 if( backupToType(p, MUTYPE_TABLE) ){
1769 pushStack(p, MARKUP_TR);
1770 renderMarkup(p->pOut, &markup);
1771 }
1772 }else
1773 if( markup.iType==MUTYPE_TD ){
1774 if( backupToType(p, MUTYPE_TABLE|MUTYPE_TR) ){
1775 if( stackTopType(p)==MUTYPE_TABLE ){
1776 pushStack(p, MARKUP_TR);
1777 blob_append_string(p->pOut, "<tr>");
1778 }
1779 pushStack(p, markup.iCode);
1780 renderMarkup(p->pOut, &markup);
1781 }
1782 }else
1783 if( markup.iType==MUTYPE_HYPERLINK ){
1784 if( !isButtonHyperlink(p, &markup, z, &n) ){
1785 popStackToTag(p, markup.iCode);
1786 startAutoParagraph(p);
1787 renderMarkup(p->pOut, &markup);
1788 pushStack(p, markup.iCode);
1789 }
1790 }else
1791 {
1792 if( markup.iType==MUTYPE_FONT ){
1793 startAutoParagraph(p);
1794 }else if( markup.iType==MUTYPE_BLOCK || markup.iType==MUTYPE_LIST ){
1795 p->wantAutoParagraph = 0;
1796 }
1797 if( markup.iCode==MARKUP_HR
1798 || markup.iCode==MARKUP_H1
1799 || markup.iCode==MARKUP_H2
1800 || markup.iCode==MARKUP_H3
1801 || markup.iCode==MARKUP_H4
1802 || markup.iCode==MARKUP_H5
1803 || markup.iCode==MARKUP_P
1804 ){
1805 endAutoParagraph(p);
1806 }
1807 if( (markup.iType & MUTYPE_STACK )!=0 ){
1808 pushStack(p, markup.iCode);
1809 }
1810 renderMarkup(p->pOut, &markup);
1811 }
1812 break;
1813 }
1814 }
1815 z += n;
1816 }
1817 }
1818
1819 /*
1820 ** Transform the text in the pIn blob. Write the results
1821 ** into the pOut blob. The pOut blob should already be
1822 ** initialized. The output is merely appended to pOut.
1823 ** If pOut is NULL, then the output is appended to the CGI
1824 ** reply.
1825 */
wiki_convert(Blob * pIn,Blob * pOut,int flags)1826 void wiki_convert(Blob *pIn, Blob *pOut, int flags){
1827 Renderer renderer;
1828
1829 memset(&renderer, 0, sizeof(renderer));
1830 renderer.renderFlags = flags;
1831 renderer.state = ALLOW_WIKI|AT_NEWLINE|AT_PARAGRAPH|flags;
1832 if( flags & WIKI_INLINE ){
1833 renderer.wantAutoParagraph = 0;
1834 }else{
1835 renderer.wantAutoParagraph = 1;
1836 }
1837 if( wikiUsesHtml() ){
1838 renderer.state |= WIKI_HTMLONLY;
1839 }
1840 if( pOut ){
1841 renderer.pOut = pOut;
1842 }else{
1843 renderer.pOut = cgi_output_blob();
1844 }
1845
1846 blob_to_utf8_no_bom(pIn, 0);
1847 wiki_render(&renderer, blob_str(pIn));
1848 endAutoParagraph(&renderer);
1849 while( renderer.nStack ){
1850 popStack(&renderer);
1851 }
1852 blob_append_char(renderer.pOut, '\n');
1853 free(renderer.aStack);
1854 }
1855
1856 /*
1857 ** COMMAND: test-wiki-render
1858 **
1859 ** Usage: %fossil test-wiki-render FILE [OPTIONS]
1860 **
1861 ** Translate the input FILE from Fossil-wiki into HTML and write
1862 ** the resulting HTML on standard output.
1863 **
1864 ** Options:
1865 ** --buttons Set the WIKI_BUTTONS flag
1866 ** --htmlonly Set the WIKI_HTMLONLY flag
1867 ** --linksonly Set the WIKI_LINKSONLY flag
1868 ** --nobadlinks Set the WIKI_NOBADLINKS flag
1869 ** --inline Set the WIKI_INLINE flag
1870 ** --noblock Set the WIKI_NOBLOCK flag
1871 */
test_wiki_render(void)1872 void test_wiki_render(void){
1873 Blob in, out;
1874 int flags = 0;
1875 if( find_option("buttons",0,0)!=0 ) flags |= WIKI_BUTTONS;
1876 if( find_option("htmlonly",0,0)!=0 ) flags |= WIKI_HTMLONLY;
1877 if( find_option("linksonly",0,0)!=0 ) flags |= WIKI_LINKSONLY;
1878 if( find_option("nobadlinks",0,0)!=0 ) flags |= WIKI_NOBADLINKS;
1879 if( find_option("inline",0,0)!=0 ) flags |= WIKI_INLINE;
1880 if( find_option("noblock",0,0)!=0 ) flags |= WIKI_NOBLOCK;
1881 db_find_and_open_repository(OPEN_OK_NOT_FOUND|OPEN_SUBSTITUTE,0);
1882 verify_all_options();
1883 if( g.argc!=3 ) usage("FILE");
1884 blob_zero(&out);
1885 blob_read_from_file(&in, g.argv[2], ExtFILE);
1886 wiki_convert(&in, &out, flags);
1887 blob_write_to_file(&out, "-");
1888 }
1889
1890 /*
1891 ** COMMAND: test-markdown-render
1892 **
1893 ** Usage: %fossil test-markdown-render FILE ...
1894 **
1895 ** Render markdown in FILE as HTML on stdout.
1896 ** Options:
1897 **
1898 ** --safe Restrict the output to use only "safe" HTML
1899 */
test_markdown_render(void)1900 void test_markdown_render(void){
1901 Blob in, out;
1902 int i;
1903 int bSafe = 0;
1904 db_find_and_open_repository(OPEN_OK_NOT_FOUND|OPEN_SUBSTITUTE,0);
1905 bSafe = find_option("safe",0,0)!=0;
1906 verify_all_options();
1907 for(i=2; i<g.argc; i++){
1908 blob_zero(&out);
1909 blob_read_from_file(&in, g.argv[i], ExtFILE);
1910 if( g.argc>3 ){
1911 fossil_print("<!------ %h ------->\n", g.argv[i]);
1912 }
1913 markdown_to_html(&in, 0, &out);
1914 safe_html_context( bSafe ? DOCSRC_UNTRUSTED : DOCSRC_TRUSTED );
1915 safe_html(&out);
1916 blob_write_to_file(&out, "-");
1917 blob_reset(&in);
1918 blob_reset(&out);
1919 }
1920 }
1921
1922 /*
1923 ** Search for a <title>...</title> at the beginning of a wiki page.
1924 ** Return true (nonzero) if a title is found. Return zero if there is
1925 ** not title.
1926 **
1927 ** If a title is found, initialize the pTitle blob to be the content
1928 ** of the title and initialize pTail to be the text that follows the
1929 ** title.
1930 */
wiki_find_title(Blob * pIn,Blob * pTitle,Blob * pTail)1931 int wiki_find_title(Blob *pIn, Blob *pTitle, Blob *pTail){
1932 char *z;
1933 int i;
1934 int iStart;
1935 blob_to_utf8_no_bom(pIn, 0);
1936 z = blob_str(pIn);
1937 for(i=0; fossil_isspace(z[i]); i++){}
1938 if( z[i]!='<' ) return 0;
1939 i++;
1940 if( strncmp(&z[i],"title>", 6)!=0 ) return 0;
1941 for(iStart=i+6; fossil_isspace(z[iStart]); iStart++){}
1942 for(i=iStart; z[i] && (z[i]!='<' || strncmp(&z[i],"</title>",8)!=0); i++){}
1943 if( strncmp(&z[i],"</title>",8)!=0 ){
1944 blob_init(pTitle, 0, 0);
1945 blob_init(pTail, &z[iStart], -1);
1946 return 1;
1947 }
1948 if( i-iStart>0 ){
1949 blob_init(pTitle, &z[iStart], i-iStart);
1950 }else{
1951 blob_init(pTitle, 0, 0);
1952 }
1953 blob_init(pTail, &z[i+8], -1);
1954 return 1;
1955 }
1956
1957 /*
1958 ** Parse text looking for wiki hyperlinks in one of the formats:
1959 **
1960 ** [target]
1961 ** [target|...]
1962 **
1963 ** Where "target" can be either an artifact ID prefix or a wiki page
1964 ** name. For each such hyperlink found, add an entry to the
1965 ** backlink table.
1966 */
wiki_extract_links(char * z,Backlink * pBklnk,int flags)1967 void wiki_extract_links(
1968 char *z, /* The wiki text from which to extract links */
1969 Backlink *pBklnk, /* Backlink extraction context */
1970 int flags /* wiki parsing flags */
1971 ){
1972 Renderer renderer;
1973 int tokenType;
1974 ParsedMarkup markup;
1975 int n;
1976 int inlineOnly;
1977 int wikiHtmlOnly = 0;
1978
1979 memset(&renderer, 0, sizeof(renderer));
1980 renderer.state = ALLOW_WIKI|AT_NEWLINE|AT_PARAGRAPH;
1981 if( flags & WIKI_NOBLOCK ){
1982 renderer.state |= INLINE_MARKUP_ONLY;
1983 }
1984 if( wikiUsesHtml() ){
1985 renderer.state |= WIKI_HTMLONLY;
1986 wikiHtmlOnly = 1;
1987 }
1988 inlineOnly = (renderer.state & INLINE_MARKUP_ONLY)!=0;
1989
1990 while( z[0] ){
1991 if( wikiHtmlOnly ){
1992 n = nextRawToken(z, &renderer, &tokenType);
1993 }else{
1994 n = nextWikiToken(z, &renderer, &tokenType);
1995 }
1996 switch( tokenType ){
1997 case TOKEN_LINK: {
1998 char *zTarget;
1999 int i;
2000
2001 zTarget = &z[1];
2002 for(i=0; zTarget[i] && zTarget[i]!='|' && zTarget[i]!=']'; i++){}
2003 while(i>1 && zTarget[i-1]==' '){ i--; }
2004 backlink_create(pBklnk, zTarget, i);
2005 break;
2006 }
2007 case TOKEN_MARKUP: {
2008 const char *zId;
2009 int iDiv;
2010 parseMarkup(&markup, z);
2011
2012 /* Markup of the form </div id=ID> where there is a matching
2013 ** ID somewhere on the stack. Exit the verbatim if were are in
2014 ** it. Pop the stack up to the matching <div>. Discard the
2015 ** </div>
2016 */
2017 if( markup.iCode==MARKUP_DIV && markup.endTag &&
2018 (zId = markupId(&markup))!=0 &&
2019 (iDiv = findTagWithId(&renderer, MARKUP_DIV, zId))>=0
2020 ){
2021 if( renderer.inVerbatim ){
2022 renderer.inVerbatim = 0;
2023 renderer.state = renderer.preVerbState;
2024 }
2025 while( renderer.nStack>iDiv+1 ) popStack(&renderer);
2026 if( renderer.aStack[iDiv].allowWiki ){
2027 renderer.state |= ALLOW_WIKI;
2028 }else{
2029 renderer.state &= ~ALLOW_WIKI;
2030 }
2031 renderer.nStack--;
2032 }else
2033
2034 /* If within <verbatim id=ID> ignore everything other than
2035 ** </verbatim id=ID> and the </dev id=ID2> above.
2036 */
2037 if( renderer.inVerbatim ){
2038 if( endVerbatim(&renderer, &markup) ){
2039 renderer.inVerbatim = 0;
2040 renderer.state = renderer.preVerbState;
2041 }else{
2042 n = 1;
2043 }
2044 }else
2045
2046 /* Render invalid markup literally. The markup appears in the
2047 ** final output as plain text.
2048 */
2049 if( markup.iCode==MARKUP_INVALID ){
2050 n = 1;
2051 }else
2052
2053 /* If the markup is not font-change markup ignore it if the
2054 ** font-change-only flag is set.
2055 */
2056 if( (markup.iType&MUTYPE_FONT)==0 &&
2057 (renderer.state & FONT_MARKUP_ONLY)!=0 ){
2058 /* Do nothing */
2059 }else
2060
2061 if( markup.iCode==MARKUP_NOWIKI ){
2062 if( markup.endTag ){
2063 renderer.state |= ALLOW_WIKI;
2064 }else{
2065 renderer.state &= ~ALLOW_WIKI;
2066 }
2067 }else
2068
2069 /* Ignore block markup for in-line rendering.
2070 */
2071 if( inlineOnly && (markup.iType&MUTYPE_INLINE)==0 ){
2072 /* Do nothing */
2073 }else
2074
2075 /* Generate end-tags */
2076 if( markup.endTag ){
2077 popStackToTag(&renderer, markup.iCode);
2078 }else
2079
2080 /* Push <div> markup onto the stack together with the id=ID attribute.
2081 */
2082 if( markup.iCode==MARKUP_DIV ){
2083 pushStackWithId(&renderer, markup.iCode, markupId(&markup),
2084 (renderer.state & ALLOW_WIKI)!=0);
2085 }else
2086
2087 /* Enter <verbatim> processing. With verbatim enabled, all other
2088 ** markup other than the corresponding end-tag with the same ID is
2089 ** ignored.
2090 */
2091 if( markup.iCode==MARKUP_VERBATIM ){
2092 int vAttrIdx;
2093 renderer.zVerbatimId = 0;
2094 renderer.inVerbatim = 1;
2095 renderer.preVerbState = renderer.state;
2096 renderer.state &= ~ALLOW_WIKI;
2097 for (vAttrIdx = 0; vAttrIdx < markup.nAttr; vAttrIdx++){
2098 if( markup.aAttr[vAttrIdx].iACode == ATTR_ID ){
2099 renderer.zVerbatimId = markup.aAttr[0].zValue;
2100 }
2101 }
2102 renderer.wantAutoParagraph = 0;
2103 }
2104
2105 /* Restore the input text to its original configuration
2106 */
2107 unparseMarkup(&markup);
2108 break;
2109 }
2110 default: {
2111 break;
2112 }
2113 }
2114 z += n;
2115 }
2116 free(renderer.aStack);
2117 }
2118
2119 /*
2120 ** Return the length, in bytes, of the HTML token that z is pointing to.
2121 */
html_token_length(const char * z)2122 int html_token_length(const char *z){
2123 int n;
2124 char c;
2125 if( (c=z[0])=='<' ){
2126 n = html_tag_length(z);
2127 if( n<=0 ) n = 1;
2128 }else if( fossil_isspace(c) ){
2129 for(n=1; z[n] && fossil_isspace(z[n]); n++){}
2130 }else if( c=='&' ){
2131 n = z[1]=='#' ? 2 : 1;
2132 while( fossil_isalnum(z[n]) ) n++;
2133 if( z[n]==';' ) n++;
2134 }else{
2135 n = 1;
2136 for(n=1; 1; n++){
2137 if( (c = z[n]) > '<' ) continue;
2138 if( c=='<' || c=='&' || fossil_isspace(c) || c==0 ) break;
2139 }
2140 }
2141 return n;
2142 }
2143
2144 /*
2145 ** z points to someplace in the middle of HTML markup. Return the length
2146 ** of the subtoken that starts on z.
2147 */
html_subtoken_length(const char * z)2148 int html_subtoken_length(const char *z){
2149 int n;
2150 char c;
2151 c = z[0];
2152 if( fossil_isspace(c) ){
2153 for(n=1; z[n] && fossil_isspace(z[n]); n++){}
2154 return n;
2155 }
2156 if( c=='"' || c=='\'' ){
2157 for(n=1; z[n] && z[n]!=c && z[n]!='>'; n++){}
2158 if( z[n]==c ) n++;
2159 return n;
2160 }
2161 if( c=='>' ){
2162 return 0;
2163 }
2164 if( c=='=' ){
2165 return 1;
2166 }
2167 if( fossil_isalnum(c) || c=='/' ){
2168 for(n=1; (c=z[n])!=0 && (fossil_isalnum(c) || c=='-' || c=='_'); n++){}
2169 return n;
2170 }
2171 return 1;
2172 }
2173
2174 /*
2175 ** z points to an HTML markup token: <TAG ATTR=VALUE ...>
2176 ** This routine looks for the VALUE associated with zAttr and returns
2177 ** a pointer to the start of that value and sets *pLen to be the length
2178 ** in bytes for the value. Or it returns NULL if no such attr exists.
2179 */
html_attribute(const char * zMarkup,const char * zAttr,int * pLen)2180 const char *html_attribute(const char *zMarkup, const char *zAttr, int *pLen){
2181 int i = 1;
2182 int n;
2183 int nAttr;
2184 int iMatchCnt = 0;
2185 assert( zMarkup[0]=='<' );
2186 assert( zMarkup[1]!=0 );
2187 n = html_subtoken_length(zMarkup+i);
2188 if( n==0 ) return 0;
2189 i += n;
2190 nAttr = (int)strlen(zAttr);
2191 while( 1 ){
2192 const char *zStart = zMarkup+i;
2193 n = html_subtoken_length(zStart);
2194 if( n==0 ) break;
2195 i += n;
2196 if( fossil_isspace(zStart[0]) ) continue;
2197 if( n==nAttr && fossil_strnicmp(zAttr,zStart,nAttr)==0 ){
2198 iMatchCnt = 1;
2199 }else if( n==1 && zStart[0]=='=' && iMatchCnt==1 ){
2200 iMatchCnt = 2;
2201 }else if( iMatchCnt==2 ){
2202 if( (zStart[0]=='"' || zStart[0]=='\'') && zStart[n-1]==zStart[0] ){
2203 zStart++;
2204 n -= 2;
2205 }
2206 *pLen = n;
2207 return zStart;
2208 }else{
2209 iMatchCnt = 0;
2210 }
2211 }
2212 return 0;
2213 }
2214
2215 /*
2216 ** COMMAND: test-html-tokenize
2217 **
2218 ** Tokenize an HTML file. Return the offset and length and text of
2219 ** each token - one token per line. Omit white-space tokens.
2220 */
test_html_tokenize(void)2221 void test_html_tokenize(void){
2222 Blob in;
2223 char *z;
2224 int i;
2225 int iOfst, n;
2226
2227 for(i=2; i<g.argc; i++){
2228 blob_read_from_file(&in, g.argv[i], ExtFILE);
2229 z = blob_str(&in);
2230 for(iOfst=0; z[iOfst]; iOfst+=n){
2231 n = html_token_length(z+iOfst);
2232 if( fossil_isspace(z[iOfst]) ) continue;
2233 fossil_print("%d %d %.*s\n", iOfst, n, n, z+iOfst);
2234 if( z[iOfst]=='<' && n>1 ){
2235 int j,k;
2236 for(j=iOfst+1; (k = html_subtoken_length(z+j))>0; j+=k){
2237 if( fossil_isspace(z[j]) || z[j]=='=' ) continue;
2238 fossil_print("# %d %d %.*s\n", j, k, k, z+j);
2239 }
2240 }
2241 }
2242 blob_reset(&in);
2243 }
2244 }
2245
2246 /*
2247 ** Attempt to reformat messy HTML to be easily readable by humans.
2248 **
2249 ** * Try to keep lines less than 80 characters in length
2250 ** * Collapse white space into a single space
2251 ** * Put a blank line before:
2252 ** <blockquote><center><code><hN><p><pre><table>
2253 ** * Put a newline after <br> and <hr>
2254 ** * Start each of the following elements on a new line:
2255 ** <address><cite><dd><div><dl><dt><li><ol><samp>
2256 ** <tbody><td><tfoot><th><thead><tr><ul>
2257 **
2258 ** Except, do not do any reformatting inside of <pre>...</pre>
2259 */
htmlTidy(const char * zIn,Blob * pOut)2260 void htmlTidy(const char *zIn, Blob *pOut){
2261 int n;
2262 int nPre = 0;
2263 int iCur = 0;
2264 int wantSpace = 0;
2265 int omitSpace = 1;
2266 while( zIn[0] ){
2267 n = html_token_length(zIn);
2268 if( zIn[0]=='<' && n>1 ){
2269 int i, j;
2270 int isCloseTag;
2271 int eTag;
2272 int eType;
2273 char zTag[32];
2274 isCloseTag = zIn[1]=='/';
2275 for(i=0, j=1+isCloseTag; i<30 && fossil_isalnum(zIn[j]); i++, j++){
2276 zTag[i] = fossil_tolower(zIn[j]);
2277 }
2278 zTag[i] = 0;
2279 eTag = findTag(zTag);
2280 eType = aMarkup[eTag].iType;
2281 if( eTag==MARKUP_PRE ){
2282 if( isCloseTag ){
2283 nPre--;
2284 blob_append(pOut, zIn, n);
2285 zIn += n;
2286 if( nPre==0 ){ blob_append_char(pOut, '\n'); iCur = 0; }
2287 continue;
2288 }else{
2289 if( iCur && nPre==0 ){ blob_append_char(pOut, '\n'); iCur = 0; }
2290 nPre++;
2291 }
2292 }else if( eType & (MUTYPE_BLOCK|MUTYPE_TABLE) ){
2293 if( !isCloseTag && nPre==0 && blob_size(pOut)>0 ){
2294 blob_append(pOut, "\n\n", 1 + (iCur>0));
2295 iCur = 0;
2296 }
2297 wantSpace = 0;
2298 omitSpace = 1;
2299 }else if( (eType & (MUTYPE_LIST|MUTYPE_LI|MUTYPE_TR|MUTYPE_TD))!=0
2300 || eTag==MARKUP_HR
2301 ){
2302 if( nPre==0 && (!isCloseTag || (eType&MUTYPE_LIST)!=0) && iCur>0 ){
2303 blob_append_char(pOut, '\n');
2304 iCur = 0;
2305 }
2306 wantSpace = 0;
2307 omitSpace = 1;
2308 }
2309 if( wantSpace && nPre==0 ){
2310 if( iCur+n+1>=80 ){
2311 blob_append_char(pOut, '\n');
2312 iCur = 0;
2313 }else{
2314 blob_append_char(pOut, ' ');
2315 iCur++;
2316 }
2317 }
2318 blob_append(pOut, zIn, n);
2319 iCur += n;
2320 wantSpace = 0;
2321 if( eTag==MARKUP_BR || eTag==MARKUP_HR ){
2322 blob_append_char(pOut, '\n');
2323 iCur = 0;
2324 }
2325 }else if( fossil_isspace(zIn[0]) ){
2326 if( nPre ){
2327 blob_append(pOut, zIn, n);
2328 }else{
2329 wantSpace = !omitSpace;
2330 }
2331 }else{
2332 if( wantSpace && nPre==0 ){
2333 if( iCur+n+1>=80 ){
2334 blob_append_char(pOut, '\n');
2335 iCur = 0;
2336 }else{
2337 blob_append_char(pOut, ' ');
2338 iCur++;
2339 }
2340 }
2341 blob_append(pOut, zIn, n);
2342 iCur += n;
2343 wantSpace = omitSpace = 0;
2344 }
2345 zIn += n;
2346 }
2347 if( iCur ) blob_append_char(pOut, '\n');
2348 }
2349
2350 /*
2351 ** COMMAND: test-html-tidy
2352 **
2353 ** Run the htmlTidy() routine on the content of all files named on
2354 ** the command-line and write the results to standard output.
2355 */
test_html_tidy(void)2356 void test_html_tidy(void){
2357 Blob in, out;
2358 int i;
2359
2360 for(i=2; i<g.argc; i++){
2361 blob_read_from_file(&in, g.argv[i], ExtFILE);
2362 blob_zero(&out);
2363 htmlTidy(blob_str(&in), &out);
2364 blob_reset(&in);
2365 fossil_puts(blob_buffer(&out), 0, blob_size(&out));
2366 blob_reset(&out);
2367 }
2368 }
2369
2370 /*
2371 ** Remove all HTML markup from the input text. The output written into
2372 ** pOut is pure text.
2373 **
2374 ** Put the title on the first line, if there is any <title> markup.
2375 ** If there is no <title>, then create a blank first line.
2376 */
html_to_plaintext(const char * zIn,Blob * pOut)2377 void html_to_plaintext(const char *zIn, Blob *pOut){
2378 int n;
2379 int i, j;
2380 int inTitle = 0; /* True between <title>...</title> */
2381 int seenText = 0; /* True after first non-whitespace seen */
2382 int nNL = 0; /* Number of \n characters at the end of pOut */
2383 int nWS = 0; /* True if pOut ends with whitespace */
2384 while( fossil_isspace(zIn[0]) ) zIn++;
2385 while( zIn[0] ){
2386 n = html_token_length(zIn);
2387 if( zIn[0]=='<' && n>1 ){
2388 int isCloseTag;
2389 int eTag;
2390 int eType;
2391 char zTag[32];
2392 isCloseTag = zIn[1]=='/';
2393 for(i=0, j=1+isCloseTag; i<30 && fossil_isalnum(zIn[j]); i++, j++){
2394 zTag[i] = fossil_tolower(zIn[j]);
2395 }
2396 zTag[i] = 0;
2397 eTag = findTag(zTag);
2398 eType = aMarkup[eTag].iType;
2399 if( eTag==MARKUP_INVALID && fossil_strnicmp(zIn,"<style",6)==0 ){
2400 zIn += n;
2401 while( zIn[0] ){
2402 n = html_token_length(zIn);
2403 if( fossil_strnicmp(zIn, "</style",7)==0 ) break;
2404 zIn += n;
2405 }
2406 if( zIn[0]=='<' ) zIn += n;
2407 continue;
2408 }
2409 if( eTag==MARKUP_TITLE ){
2410 inTitle = !isCloseTag;
2411 }
2412 if( !isCloseTag && seenText && (eType & (MUTYPE_BLOCK|MUTYPE_TABLE))!=0 ){
2413 if( nNL==0 ){
2414 blob_append_char(pOut, '\n');
2415 nNL++;
2416 }
2417 nWS = 1;
2418 }
2419 }else if( fossil_isspace(zIn[0]) ){
2420 if( seenText ){
2421 nNL = 0;
2422 if( !inTitle ){ /* '\n' -> ' ' within <title> */
2423 for(i=0; i<n; i++) if( zIn[i]=='\n' ) nNL++;
2424 }
2425 if( !nWS ){
2426 blob_append_char(pOut, nNL ? '\n' : ' ');
2427 nWS = 1;
2428 }
2429 }
2430 }else if( zIn[0]=='&' ){
2431 char c = '?';
2432 if( zIn[1]=='#' ){
2433 int x = atoi(&zIn[1]);
2434 if( x>0 && x<=127 ) c = x;
2435 }else{
2436 static const struct { int n; char c; char *z; } aEntity[] = {
2437 { 5, '&', "&" },
2438 { 4, '<', "<" },
2439 { 4, '>', ">" },
2440 { 6, ' ', " " },
2441 };
2442 int jj;
2443 for(jj=0; jj<count(aEntity); jj++){
2444 if( aEntity[jj].n==n && strncmp(aEntity[jj].z,zIn,n)==0 ){
2445 c = aEntity[jj].c;
2446 break;
2447 }
2448 }
2449 }
2450 if( fossil_isspace(c) ){
2451 if( nWS==0 && seenText ) blob_append_char(pOut, c);
2452 nWS = 1;
2453 nNL = c=='\n';
2454 }else{
2455 if( !seenText && !inTitle ) blob_append_char(pOut, '\n');
2456 seenText = 1;
2457 nNL = nWS = 0;
2458 blob_append_char(pOut, c);
2459 }
2460 }else{
2461 if( !seenText && !inTitle ) blob_append_char(pOut, '\n');
2462 seenText = 1;
2463 nNL = nWS = 0;
2464 blob_append(pOut, zIn, n);
2465 }
2466 zIn += n;
2467 }
2468 if( nNL==0 ) blob_append_char(pOut, '\n');
2469 }
2470
2471 /*
2472 ** COMMAND: test-html-to-text
2473 **
2474 ** Usage: %fossil test-html-to-text FILE ...
2475 **
2476 ** Read all files named on the command-line. Convert the file
2477 ** content from HTML to text and write the results on standard
2478 ** output.
2479 **
2480 ** This command is intended as a test and debug interface for
2481 ** the html_to_plaintext() routine.
2482 */
test_html_to_text(void)2483 void test_html_to_text(void){
2484 Blob in, out;
2485 int i;
2486
2487 for(i=2; i<g.argc; i++){
2488 blob_read_from_file(&in, g.argv[i], ExtFILE);
2489 blob_zero(&out);
2490 html_to_plaintext(blob_str(&in), &out);
2491 blob_reset(&in);
2492 fossil_puts(blob_buffer(&out), 0, blob_size(&out));
2493 blob_reset(&out);
2494 }
2495 }
2496
2497 /****************************************************************************
2498 ** safe-html:
2499 **
2500 ** An interface for preventing HTML constructs (ex: <style>, <form>, etc)
2501 ** from being inserted into Wiki and Forum posts using Markdown. See the
2502 ** comment on safe_html_append() for additional information on what is meant
2503 ** by "safe".
2504 **
2505 ** The safe-html restrictions only apply to Markdown, as Fossil-Wiki only
2506 ** allows safe-html by design - unsafe-HTML is never and has never been
2507 ** allowed in Fossil-Wiki.
2508 **
2509 ** This code is in the wikiformat.c file so that it can have access to the
2510 ** white-list of acceptable HTML in the aMarkup[] array.
2511 */
2512
2513 /*
2514 ** An instance of this object keeps track of the nesting of HTML
2515 ** elements for safe_html_append().
2516 */
2517 typedef struct HtmlTagStack HtmlTagStack;
2518 struct HtmlTagStack {
2519 int n; /* Current tag stack depth */
2520 int nAlloc; /* Space allocated for aStack[] */
2521 int *aStack; /* The stack of tags */
2522 int aSpace[10]; /* Initial static space, to avoid malloc() */
2523 };
2524
2525 /*
2526 ** Initialize bulk memory to a valid empty tagstack.
2527 */
html_tagstack_init(HtmlTagStack * p)2528 static void html_tagstack_init(HtmlTagStack *p){
2529 p->n = 0;
2530 p->nAlloc = 0;
2531 p->aStack = p->aSpace;
2532 }
2533
2534 /*
2535 ** Push a new element onto the tag statk
2536 */
html_tagstack_push(HtmlTagStack * p,int e)2537 static void html_tagstack_push(HtmlTagStack *p, int e){
2538 if( p->n>=ArraySize(p->aSpace) && p->n>=p->nAlloc ){
2539 if( p->nAlloc==0 ){
2540 int *aNew;
2541 p->nAlloc = 50;
2542 aNew = fossil_malloc( sizeof(p->aStack[0])*p->nAlloc );
2543 memcpy(aNew, p->aStack, sizeof(p->aStack[0])*p->n );
2544 p->aStack = aNew;
2545 }else{
2546 p->nAlloc *= 2;
2547 p->aStack = fossil_realloc(p->aStack, sizeof(p->aStack[0])*p->nAlloc );
2548 }
2549 }
2550 p->aStack[p->n++] = e;
2551 }
2552
2553 /*
2554 ** Clear a tag stack, reclaiming any memory allocations.
2555 */
html_tagstack_clear(HtmlTagStack * p)2556 static void html_tagstack_clear(HtmlTagStack *p){
2557 if( p->nAlloc ){
2558 fossil_free(p->aStack);
2559 p->nAlloc = 0;
2560 p->aStack = p->aSpace;
2561 }
2562 p->n = 0;
2563 }
2564
2565 /*
2566 ** The HTML end-tag eEnd wants to be added to pBlob.
2567 **
2568 ** If an open-tag for eEnd exists anywhere on the stack, then
2569 ** pop it and all prior elements from the task, issuing appropriate
2570 ** end-tags as you go.
2571 **
2572 ** If there is no open-tag for eEnd on the stack, then this
2573 ** routine is a no-op.
2574 */
html_tagstack_pop(HtmlTagStack * p,Blob * pBlob,int eEnd)2575 static void html_tagstack_pop(HtmlTagStack *p, Blob *pBlob, int eEnd){
2576 int i, e;
2577 if( eEnd!=0 ){
2578 for(i=p->n-1; i>=0 && p->aStack[i]!=eEnd; i--){}
2579 if( i<0 ){
2580 blob_appendf(pBlob, "<span class='error'></%s></span>",
2581 aMarkup[eEnd].zName);
2582 return;
2583 }
2584 }else if( p->n==0 ){
2585 return;
2586 }
2587 do{
2588 e = p->aStack[--p->n];
2589 if( e==eEnd || (aMarkup[e].iType & MUTYPE_Nested)!=0 ){
2590 blob_appendf(pBlob, "</%s>", aMarkup[e].zName);
2591 }
2592 }while( e!=eEnd && p->n>0 );
2593 }
2594
2595 /*
2596 ** Return a nonce to indicate that safe_html() can allow code through
2597 ** without censoring.
2598 **
2599 ** When safe_html() is asked to sanitize some HTML, it will ignore
2600 ** any text in between two consecutive instances of the nonce. The
2601 ** nonce itself is an HTML comment so it is harmless to keep the
2602 ** nonce in the middle of the HTML stream. A different nonce is
2603 ** choosen each time Fossil is run, using a lot of randomness, so
2604 ** an attacker will be unable to guess the nonce in advance.
2605 **
2606 ** The original use-case for this mechanism is to allow Pikchr-generated
2607 ** SVG in the middle of HTML generated from Markdown. The Markdown
2608 ** output will normally be processed by safe_html() to prevent accidental
2609 ** or malicious introduction of harmful HTML (ex: <script>) in the
2610 ** output stream. The safe_html() only lets through HTML elements
2611 ** that are on its allow-list and SVG is not on that list. Hence, in order
2612 ** to allow the Pikchr-generated SVG through, it must be surrounded by
2613 ** the nonce.
2614 */
safe_html_nonce(int bGenerate)2615 const char *safe_html_nonce(int bGenerate){
2616 static char *zNonce = 0;
2617 if( zNonce==0 && bGenerate ){
2618 zNonce = db_text(0, "SELECT '<!--'||hex(randomblob(32))||'-->';");
2619 }
2620 return zNonce;
2621 }
2622 #define SAFE_NONCE_SIZE (4+64+3)
2623
2624 /*
2625 ** Append a safe translation of HTML text to a Blob object.
2626 **
2627 ** Restriction: The input to this routine must be writable.
2628 * Temporary changes may be made to the input, but the input is restored
2629 ** to its original state prior to returning. If zHtml[nHtml] is not a
2630 ** zero character, then a zero might be written in that position
2631 ** temporarily, but that slot will also be restored before this routine
2632 ** returns.
2633 */
safe_html_append(Blob * pBlob,char * zHtml,int nHtml)2634 static void safe_html_append(Blob *pBlob, char *zHtml, int nHtml){
2635 char cLast;
2636 int i, j, n;
2637 HtmlTagStack s;
2638 ParsedMarkup markup;
2639 const char *zNonce;
2640 char *z;
2641
2642 if( nHtml<=0 ) return;
2643 cLast = zHtml[nHtml];
2644 zHtml[nHtml] = 0;
2645 html_tagstack_init(&s);
2646
2647 i = 0;
2648 while( i<nHtml ){
2649 if( zHtml[i]=='<' ){
2650 j = i;
2651 }else{
2652 z = strchr(zHtml+i, '<');
2653 if( z==0 ){
2654 blob_append(pBlob, zHtml+i, nHtml-i);
2655 break;
2656 }
2657 j = (int)(z - zHtml);
2658 blob_append(pBlob, zHtml+i, j-i);
2659 }
2660 if( zHtml[j+1]=='!'
2661 && j+2*SAFE_NONCE_SIZE<nHtml
2662 && (zNonce = safe_html_nonce(0))!=0
2663 && strncmp(zHtml+j,zNonce,SAFE_NONCE_SIZE)==0
2664 && (z = strstr(zHtml+j+SAFE_NONCE_SIZE,zNonce))!=0
2665 ){
2666 i = (int)(z - zHtml) + SAFE_NONCE_SIZE;
2667 blob_append(pBlob, zHtml+j, i-j);
2668 continue;
2669 }
2670 n = html_tag_length(zHtml+j);
2671 if( n==0 ){
2672 blob_append(pBlob, "<", 4);
2673 i = j+1;
2674 continue;
2675 }else{
2676 i = j + n;
2677 }
2678 parseMarkup(&markup, zHtml+j);
2679 if( markup.iCode==MARKUP_INVALID ){
2680 unparseMarkup(&markup);
2681 blob_appendf(pBlob, "<span class='error'><%.*s></span>",
2682 n-2, zHtml+j+1);
2683 continue;
2684 }
2685 if( (markup.iType & MUTYPE_Nested)==0 || markup.iCode==MARKUP_P ){
2686 renderMarkup(pBlob, &markup);
2687 }else{
2688 if( markup.endTag ){
2689 html_tagstack_pop(&s, pBlob, markup.iCode);
2690 }else{
2691 renderMarkup(pBlob, &markup);
2692 html_tagstack_push(&s, markup.iCode);
2693 }
2694 }
2695 unparseMarkup(&markup);
2696 }
2697 html_tagstack_pop(&s, pBlob, 0);
2698 html_tagstack_clear(&s);
2699 zHtml[nHtml] = cLast;
2700 }
2701
2702 /*
2703 ** This local variable is true if the safe_html() function is enabled.
2704 ** In other words, this is true if the output of Markdown should be
2705 ** restricted to use only "safe" HTML.
2706 */
2707 static int safeHtmlEnable = 1;
2708
2709
2710 #if INTERFACE
2711 /*
2712 ** Allowed values for the eTrust parameter to safe_html_context().
2713 */
2714 #define DOCSRC_FILE 1 /* Document is a checked-in file */
2715 #define DOCSRC_FORUM 2 /* Document is a forum post */
2716 #define DOCSRC_TICKET 3 /* Document is a ticket comment */
2717 #define DOCSRC_WIKI 4 /* Document is a wiki page */
2718 #define DOCSRC_TRUSTED 5 /* safe_html() is always a no-op */
2719 #define DOCSRC_UNTRUSTED 6 /* safe_html() is always enabled */
2720 #endif /* INTERFACE */
2721
2722
2723 /*
2724 ** Specify the context in which a markdown document with potentially
2725 ** unsafe HTML will be rendered.
2726 */
safe_html_context(int eTrust)2727 void safe_html_context(int eTrust){
2728 static const char *zSafeHtmlSetting = 0;
2729 char cPerm = 0;
2730 if( eTrust==DOCSRC_TRUSTED ){
2731 safeHtmlEnable = 0;
2732 return;
2733 }
2734 if( eTrust==DOCSRC_UNTRUSTED ){
2735 safeHtmlEnable = 1;
2736 return;
2737 }
2738 if( zSafeHtmlSetting==0 ){
2739 zSafeHtmlSetting = db_get("safe-html", "");
2740 }
2741 switch( eTrust ){
2742 case DOCSRC_FILE: cPerm = 'b'; break;
2743 case DOCSRC_FORUM: cPerm = 'f'; break;
2744 case DOCSRC_TICKET: cPerm = 't'; break;
2745 case DOCSRC_WIKI: cPerm = 'w'; break;
2746 }
2747 safeHtmlEnable = (strchr(zSafeHtmlSetting,cPerm)==0);
2748 }
2749
2750 /*
2751 ** SETTING: safe-html width=8
2752 ** This setting controls whether or not unsafe HTML elements
2753 ** (such as SCRIPT or STYLE tags) are allowed in Markdown-formatted
2754 ** documents. Unsafe HTML is disabled by default. If this setting
2755 ** exists and is a string, then letters in that string can enable
2756 ** unsafe HTML in various contexts:
2757 **
2758 ** - b Unsafe HTML allowed in embedded documentation
2759 ** - f Unsafe HTML allowed in forum posts
2760 ** - t Unsafe HTML allowed in tickets
2761 ** - w Unsafe HTML allowed on wiki pages
2762 */
2763 /*
2764 ** The input blob contains HTML. If safe-html is enabled, then
2765 ** convert the input into "safe HTML". The following modifications
2766 ** are made:
2767 **
2768 ** 1. Remove any elements that are not on the AllowedMarkup list.
2769 ** (ex: <script>, <form>, etc.)
2770 **
2771 ** 2. Remove any attributes that are not on the AllowedMarkup list.
2772 ** (ex: onload=, etc.)
2773 **
2774 ** 3. Omit any surplus close-tags. This prevents the script from
2775 ** terminating an <div> or similar in the outer context.
2776 **
2777 ** 4. Insert additional close-tags as necessary so that any
2778 ** tag in the input that needs a close-tag has one. This
2779 ** prevents tags in the embedded script from affecting the
2780 ** display of content that follows this script in the enclosing
2781 ** context.
2782 **
2783 ** These modifications are intended to make the generated HTML safe
2784 ** to be embedded in a larger HTML document, such that the embedded
2785 ** HTML has no influence on the formatting and operation of the
2786 ** larger document.
2787 **
2788 ** If safe-html is disabled, then this routine is a no-op.
2789 */
safe_html(Blob * in)2790 void safe_html(Blob *in){
2791 Blob out; /* Holding area for the revised text during construction */
2792 char *z; /* Original input text */
2793 int n; /* Number of bytes in the original input text */
2794 int k;
2795
2796 if( safeHtmlEnable==0 ) return;
2797 z = blob_str(in);
2798 n = blob_size(in);
2799 blob_init(&out, 0, 0);
2800 while( fossil_isspace(z[0]) ){ z++; n--; }
2801 for(k=n-1; k>5 && fossil_isspace(z[k]); k--){}
2802
2803 if( fossil_strnicmp(z, "<div",4)==0 && !fossil_isalpha(z[4])
2804 && fossil_strnicmp(z+k-5, "</div>",6)==0
2805 ){
2806 /* The input contains an outer <div>...</div>. Preserve the
2807 ** full scope of that <div>. */
2808 int m = html_tag_length(z);
2809 k -= 5;
2810 blob_append(&out, z, m);
2811 safe_html_append(&out, z+m, k-m);
2812 blob_append(&out, z+k, n-k);
2813 }else{
2814 safe_html_append(&out, z, n);
2815 }
2816 blob_reset(in);
2817 *in = out;
2818 }
2819
2820 /*
2821 ** COMMAND: test-safe-html
2822 **
2823 ** Usage: %fossil test-safe-html FILE ...
2824 **
2825 ** Read files named on the command-line. Send the text of each file
2826 ** through safe_html_append() and then write the result on
2827 ** standard output.
2828 */
test_safe_html_cmd(void)2829 void test_safe_html_cmd(void){
2830 int i;
2831 Blob x;
2832 for(i=2; i<g.argc; i++){
2833 char *z;
2834 int n;
2835 blob_read_from_file(&x, g.argv[i], ExtFILE);
2836 blob_terminate(&x);
2837 safe_html(&x);
2838 z = blob_str(&x);
2839 n = blob_size(&x);
2840 while( n>0 && (z[n-1]=='\n' || z[n-1]=='\r') ) n--;
2841 fossil_print("%.*s\n", n, z);
2842 blob_reset(&x);
2843 }
2844 }
2845