1 /*
2 ** Copyright (c) 2002 D. Richard Hipp
3 **
4 ** This program is free software; you can redistribute it and/or
5 ** modify it under the terms of the GNU General Public
6 ** License as published by the Free Software Foundation; either
7 ** version 2 of the License, or (at your option) any later version.
8 **
9 ** This program is distributed in the hope that it will be useful,
10 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 ** General Public License for more details.
13 **
14 ** You should have received a copy of the GNU General Public
15 ** License along with this library; if not, write to the
16 ** Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 ** Boston, MA  02111-1307, USA.
18 **
19 ** Author contact information:
20 **   drh@hwaci.com
21 **   http://www.hwaci.com/drh/
22 **
23 *******************************************************************************
24 **
25 ** This file contains code used to generate convert wiki text into HTML.
26 */
27 #include "config.h"
28 #include "format.h"
29 #include <time.h>
30 #include <limits.h>  /* for PATH_MAX */
31 
32 /*
33 ** Format a relative link for output. The idea here is to determine from context
34 ** whether the link needs to be relative or absolute (i.e. for RSS output, e-mail
35 ** notifications, etc). Returns the formatted string.
36 */
format_link(const char * zFormat,...)37 char *format_link(const char* zFormat,...){
38   char *zLink;
39   va_list ap;
40   va_start(ap,zFormat);
41   zLink = vmprintf(zFormat,ap);
42   va_end(ap);
43   if( g.zLinkURL && g.zLinkURL[0] ){
44     zLink = mprintf("%s/%z",g.zLinkURL,zLink);
45   }
46   return zLink;
47 }
48 
49 /*
50 ** Return the number digits at the beginning of the string z.
51 */
ndigit(const char * z)52 int ndigit(const char *z){
53   int i = 0;
54   while( isdigit(*z) ){ i++; z++; }
55   return i;
56 }
57 
58 /*
59 ** Check to see if *z contains nothing but spaces up to the next
60 ** newline.  If so, return the number of spaces plus one for the
61 ** newline characters.  If not, return 0.
62 **
63 ** If two or more blank lines occur in a row, go ahead and return
64 ** a number of characters sufficient to cover them all.
65 */
is_blank_line(const char * z)66 static int is_blank_line(const char *z){
67   int i = 0;
68   int r = 0;
69   while( isspace(z[i]) ){
70     if( z[i]=='\n' ){ r = i+1; }
71     i++;
72   }
73   return r;
74 }
75 
76 /*
77 ** Return TRUE if *z points to the terminator for a word.  Words
78 ** are terminated by whitespace or end of input or any of the
79 ** characters in zEnd.
80 ** Note that is_eow() ignores zEnd characters _inside_ a word. They
81 ** only count if they're followed by other EOW characters.
82 */
is_eow(const char * z,const char * zEnd)83 int is_eow(const char *z, const char *zEnd){
84   if( zEnd==0 ) zEnd = ".,:;?!)\"'";
85   while( *z!=0 && !isspace(*z) ){
86     int i;
87     for(i=0; zEnd[i]; i++){ if( *z==zEnd[i] ) break; }
88     if( zEnd[i]==0 ) return 0;
89     z++;
90   }
91   return 1;
92 }
93 
94 /*
95 ** Check to see if *z points to the beginning of a Wiki page name.
96 ** If it does, return the number of characters in that name.  If not,
97 ** return 0.
98 **
99 ** A Wiki page name contains only alphabetic characters.  The first
100 ** letter must be capital and there must be at least one other capital
101 ** letter in the word.  And every capital leter must be followed by
102 ** one or more lower-case letters.
103 */
is_wiki_name(const char * z)104 int is_wiki_name(const char *z){
105   int i;
106   int nCap = 0;
107   if( !isupper(z[0]) ) return 0;
108   for(i=0; z[i]; i++){
109     if( isupper(z[i]) ){
110       if( !islower(z[i+1]) ) return 0;
111       nCap++;
112     }else if( !islower(z[i]) ){
113       break;
114     }
115   }
116   return (nCap>=2 && is_eow(&z[i],0)) ? i : 0;
117 }
118 
119 /*
120 ** Check to see if *z points to the beginning of a file in the repository.
121 ** If it does, return the number of characters in that name.  If not,
122 ** return 0.
123 **
124 ** The filename must start with a slash and there'll have to be another slash
125 ** somewhere inside. Spaces in filenames aren't supported.
126 */
is_repository_file(const char * z)127 int is_repository_file(const char *z){
128   char *s;
129   int i;
130   int gotslash=0;
131   if( z[0]!='/' ) return 0;
132   for(i=1; z[i] && !is_eow(&z[i],0); i++){
133     if(z[i]=='/') gotslash=1;
134   }
135   if(!gotslash) return 0;
136 
137   /* see if it's in the repository. Note that we strip the leading '/' from the
138    * query.
139    */
140   s = mprintf("%.*s", i-1, &z[1]);
141   gotslash = db_exists("SELECT filename FROM filechng WHERE filename='%q'", s );
142   free(s);
143   return gotslash ? i : 0;
144 }
145 
146 /*
147 ** Check to see if z[] is a form that indicates the beginning of a
148 ** bullet or enumeration list element.  z[] can be of the form "*:"
149 ** or "_:" for a bullet or "N:" for an enumeration element where N
150 ** is any number.  The colon can repeat 1 or more times.
151 **
152 ** If z[] is not a list element marker, then return 0.  If z[] is
153 ** a list element marker, set *pLevel to indicate the list depth
154 ** (the number of colons) and the type (bullet or enumeration).
155 ** *pLevel is negative for enumerations and positive for bullets and
156 ** the magnitude is the depth.  Then return the number of characters
157 ** in the marker (which will always be at least 2.)
158 */
is_list_elem(const char * z,int * pLevel)159 static int is_list_elem(const char *z, int *pLevel){
160   int type;
161   int depth;
162   const char *zStart = z;
163   if( isdigit(*z) ){
164     z++;
165     while( isdigit(*z) ){ z++; }
166     type = -1;
167   }else if( *z=='*' || *z=='_' ){
168     z++;
169     type = +1;
170   }else{
171     *pLevel = 0;
172     return 0;
173   }
174   depth = 0;
175   while( *z==':' ){ z++; depth++; }
176   while( isspace(*z) && *z!='\n' ){ z++; }
177   if( depth==0 || depth>10 || *z==0 || *z=='\n' ){
178     *pLevel = 0;
179     return 0;
180   }
181   if( type<0 ){
182     *pLevel = -depth;
183   }else{
184     *pLevel = depth;
185   }
186   return z - zStart;
187 }
188 
189 /*
190 ** If *z points to horizontal rule markup, return the number of
191 ** characters in that markup.  Otherwise return 0.
192 **
193 ** Horizontal rule markup consists of four or more '-' or '=' characters
194 ** at the beginning of a line followed by nothing but whitespace
195 ** to the end of the line.
196 */
is_horizontal_rule(const char * z)197 static int is_horizontal_rule(const char *z){
198   int i;
199   int c = z[0];
200   if( c!='-' && c!='=' ) return 0;
201   for(i=0; z[i]==c; i++){}
202   if( i<4 ) return 0;
203   while( isspace(z[i]) && z[i]!='\n' ){ i++; }
204   return z[i]=='\n' || z[i]==0 ? i : 0;
205 }
206 
207 /*
208 ** Return the number of characters in the URL that begins
209 ** at *z.  Return 0 if *z is not the beginning of a URL.
210 **
211 ** Algorithm: Advance to the first whitespace character or until
212 ** then end of the string.  Then back up over the following
213 ** characters:  .)]}?!"':;,
214 */
is_url(const char * z)215 int is_url(const char *z){
216   int i;
217   int minlen = 6;
218   switch( z[0] ){
219     case 'h':
220      if( strncmp(z,"http:",5)==0 ) minlen = 7;
221      else if( strncmp(z,"https:",6)==0 ) minlen = 8;
222      else return 0;
223      break;
224     case 'f':
225      if( strncmp(z,"ftp://",6)==0 ) minlen = 7;
226      else return 0;
227      break;
228     case 'm':
229      if( strncmp(z,"mailto:",7)==0 ) minlen = 10;
230      else return 0;
231      break;
232     default:
233      return 0;
234   }
235   for(i=0; z[i] && !isspace(z[i]); i++){}
236   while( i>0 ){
237     switch( z[i-1] ){
238       case '.':
239       case ')':
240       case ']':
241       case '}':
242       case '?':
243       case '!':
244       case '"':
245       case '\'':
246       case ':':
247       case ';':
248       case ',':
249         i--;
250         break;
251       default:
252         return i>=minlen ? i : 0;
253     }
254   }
255   return 0;
256 }
257 
258 /*
259 ** Return true if the given URL points to an image.  An image URL is
260 ** any URL that ends with ".gif", ".jpg", ".jpeg", or ".png"
261 */
is_image(const char * zUrl,int N)262 static int is_image(const char *zUrl, int N){
263   int i;
264   char zBuf[10];
265   if( N<5 ) return 0;
266   for(i=0; i<5; i++){
267     zBuf[i] = tolower(zUrl[N-5+i]);
268   }
269   zBuf[i] = 0;
270   return strcmp(&zBuf[1],".gif")==0 ||
271          strcmp(&zBuf[1],".png")==0 ||
272          strcmp(&zBuf[1],".jpg")==0 ||
273          strcmp(&zBuf[1],".jpe")==0 ||
274          strcmp(zBuf,".jpeg")==0;
275 }
276 
277 /*
278 ** Output N characters of text from zText.
279 */
put_htmlized_text(const char ** pzText,int N)280 static void put_htmlized_text(const char **pzText, int N){
281   if( N>0 ){
282     char *z = htmlize(*pzText, N);
283     cgi_printf("%s", z);
284     free(z);
285     *pzText += N;
286   }
287 }
288 
289 /*
290 ** Search ahead in text z[] looking for a font terminator consisting
291 ** of "n" consecutive instances of character "c".  The font terminator
292 ** must be at the end of a word and it must occur before a paragraph break.
293 ** Also, z[] must begin a new word.  If any of these conditions are false,
294 ** return false.  If all conditions are meet, return true.
295 **
296 ** TODO:  Ignore terminators that occur inside of special markup such
297 ** as "{quote: not-a-terminator_}"
298 */
font_terminator(const char * z,int c,int n)299 static int font_terminator(const char *z, int c, int n){
300   int seenNL = 0;
301   int cnt = 0;
302   if( isspace(*z) || *z==0 || *z==c ) return 0;
303   z++;
304   while( *z ){
305     if( *z==c && !isspace(z[-1]) ){
306       cnt++;
307       if( cnt==n && is_eow(&z[1],0) ){
308         return 1;
309       }
310     }else{
311       cnt = 0;
312       if( *z=='\n' ){
313         if( seenNL ) return 0;
314         seenNL = 1;
315       }else if( !isspace(*z) ){
316         seenNL = 0;
317       }
318     }
319     z++;
320   }
321   return 0;
322 }
323 
324 /*
325 ** Return the number of asterisks at z[] and beyond.
326 */
count_stars(const char * z)327 static int count_stars(const char *z){
328   int n = 0;
329   while( *z=='*' ){ n++; z++; }
330   return n;
331 }
332 
333 /*
334 ** The following structure is used to record information about a single
335 ** instance of markup.  Markup is text of the following form:
336 **
337 **         {type: key args}
338 **    or   {type: key}
339 **    or   {type}
340 **
341 ** The key is permitted to begin with "}".  If args is missing, key is
342 ** used in its place.  So {type: key} is equivalent to {type: key key}.
343 ** If key is missing, then type is used in its place.  So {type} is the
344 ** same as {type: type} which is the same as {type: type type}
345 */
346 typedef struct Markup Markup;
347 struct Markup {
348   int lenTotal;        /* Total length of the markup */
349   int lenType;         /* Length of the "type" field */
350   int lenKey;          /* Length of the "key" field */
351   int lenArgs;         /* Length of the "args" field */
352   const char *zType;   /* Pointer to the start of "type" */
353   const char *zKey;    /* Pointer to the start of "key" */
354   const char *zArgs;   /* Pointer to the start of "args" */
355 };
356 
357 /*
358 ** z[] is a string of text beginning with "{".  Check to see if it is
359 ** valid markup.  If it is, fill in the pMarkup structure and return true.
360 ** If it is not valid markup, return false.
361 */
is_markup(const char * z,Markup * pMarkup)362 static int is_markup(const char *z, Markup *pMarkup){
363   int i, j;
364   int nest = 1;
365   if( *z!='{' ) return 0;
366   for(i=1; isalpha(z[i]); i++){}
367   if( z[i]=='}' ){
368     pMarkup->lenTotal = i+1;
369     pMarkup->lenType = i-1;
370     pMarkup->lenKey = i-1;
371     pMarkup->lenArgs = i-1;
372     pMarkup->zType = &z[1];
373     pMarkup->zKey = &z[1];
374     pMarkup->zArgs = &z[1];
375     return 1;
376   }
377   if( z[i]!=':' ) return 0;
378   pMarkup->lenType = i-1;
379   pMarkup->zType = &z[1];
380   i++;
381   while( isspace(z[i]) && z[i]!='\n' ){ i++; }
382   if( z[i]==0 || z[i]=='\n' ) return 0;
383   j = i;
384   pMarkup->zKey = &z[i];
385   while( z[i] && !isspace(z[i]) ){
386     if( z[i]=='}' ) nest--;
387     if( z[i]=='{' ) nest++;
388     if( nest==0 ) break;
389     i++;
390   }
391   if( z[i]==0 || z[i]=='\n' ) return 0;
392   pMarkup->lenKey = i - j;
393   if( nest==0 ){
394     pMarkup->lenArgs = i - j;
395     pMarkup->lenTotal = i+1;
396     pMarkup->zArgs = pMarkup->zKey;
397     return 1;
398   }
399   while( isspace(z[i]) && z[i]!='\n' ){ i++; }
400   if( z[i]=='\n' || z[i]==0 ) return 0;
401   j = i;
402   while( z[i] && z[i]!='\n' ){
403     if( z[i]=='}' ) nest--;
404     if( z[i]=='{' ) nest++;
405     if( nest==0 ) break;
406     i++;
407   }
408   if( z[i]!='}' || nest>0 ) return 0;
409   pMarkup->zArgs = &z[j];
410   pMarkup->lenArgs = i - j;
411   pMarkup->lenTotal = i+1;
412   return 1;
413 }
414 
415 /*
416 ** Calculate the length of the table cell starting just after a | and
417 ** extending to the next non-quoted (i.e. not in {} markup) |
418 ** or end-of-line. Returns zero if there's
419 ** no complete (i.e. |-terminated) cell. Cell length does _not_ include
420 ** the ending |.
421 */
table_cell_length(const char * z)422 static int table_cell_length(const char *z){
423   Markup markup;
424   int i = 0;
425 
426   while( z[i] && z[i]!='|' && z[i]!='\n' ){
427     if( z[i]=='{' && is_markup(&z[i],&markup) ){
428       i += markup.lenTotal;
429     }else{
430       i++;
431     }
432   }
433   return (z[i]=='|') ? i : 0;
434 }
435 /*
436 ** If *z points to a row of table markup, return the number of
437 ** characters in that markup.  Otherwise return 0.
438 **
439 ** Table markup consists of a line starting with '|' and each cell
440 ** separated by more '|' characters. The line ends with a '|' followed by
441 ** nothing but whitespace to the end-of-line.
442 */
is_table_row(const char * z)443 static int is_table_row(const char *z){
444   int i = 0, j;
445   if( z[0]!='|' ) return 0;
446   while( z[i]=='|' && (j=table_cell_length(&z[++i]))!=0 ){
447     i += j;
448   }
449 
450   for(; z[i]!='\n' && isspace(z[i]); i++){}
451 
452   return (z[i]=='\n' || z[i]==0) ? i : 0;
453 }
454 
455 /*
456 ** Output the table row defined by z. Individual cells can be wiki formatted
457 ** (within reason), so knowing cell boundaries depends on checking for
458 ** wiki markup and such.
459 */
output_table_row(const char * z,int nLen)460 static void output_table_row(const char *z, int nLen){
461   int i = 0, j;
462   char *zCell;
463 
464   @ <tr>
465   while( i<nLen && z[i]=='|' && (j=table_cell_length(&z[++i]))!=0 ){
466     zCell = mprintf("%.*s",j,&z[i]);
467     @ <td>
468     output_formatted(zCell,0);
469     free(zCell);
470     @ </td>
471     i += j;
472   }
473   @ </tr>
474 }
475 
476 /*
477 ** The aList[] array records the current nesting of <ul> and <ol>.
478 ** aList[0] records the stack depth.  (Max depth of 10).  aList[1]
479 ** is +1 if the outer layer is <ul> and -1 if the outer layer is <ol>
480 ** aList[2] holds similar information for the second layer, and so forth.
481 **
482 ** The iTarget parameter specifies the desired depth of the stack and
483 ** whether the inner most level is <ul> or <ol>  The absolute value of
484 ** iTarget is the desired depth.  iTarget is negative for <ol> on the
485 ** inner layer and positive for <ul> on the inner layer.
486 **
487 ** The routine outputs HTML to adjust the list nesting to the desired
488 ** level.
489 */
adjust_list_nesting(int * aList,int iTarget)490 static void adjust_list_nesting(int *aList, int iTarget){
491   int iDepth = iTarget;
492   if( iDepth<0 ) iDepth = 0x7fffffff & -iDepth;
493   if( aList[0]==iDepth && iDepth>0 && aList[iDepth]*iTarget<0 ){
494     iDepth--;
495   }
496   while( aList[0]>iDepth ){
497     if( aList[aList[0]--]>0 ){
498       cgi_printf("</ul>\n");
499     }else{
500       cgi_printf("</ol>\n");
501     }
502   }
503   while( aList[0]<iDepth-1 ){
504     cgi_printf("<ul>\n");
505     aList[0]++;
506     aList[aList[0]] = +1;
507   }
508   iDepth = iTarget;
509   if( iDepth<0 ) iDepth = 0x7fffffff & -iDepth;
510   if( aList[0]==iDepth-1 ){
511     if( iTarget<0 ){
512       cgi_printf("<ol>\n");
513       aList[iDepth] = -1;
514     }else{
515       cgi_printf("<ul>\n");
516       aList[iDepth] = +1;
517     }
518     aList[0]++;
519   }
520 }
521 
522 /*
523 ** Return non-zero if the specified string is in the given sorted list.
524 */
inSortedList(const char * z,int nCh,const char * azList[],int nList)525 static int inSortedList(const char *z, int nCh, const char* azList[], int nList){
526   int i;
527   int upr, lwr, mid, c;
528   char zBuf[32];
529   if( nCh<=0 || nCh>sizeof(zBuf)-1 ) return 0;
530   for(i=0; i<nCh; i++) zBuf[i] = tolower(z[i]);
531   zBuf[i] = 0;
532   upr = nList - 1;
533   lwr = 0;
534   while( upr>=lwr ){
535     mid = (upr+lwr)/2;
536     c = strcmp(azList[mid],zBuf);
537     if( c==0 ) return 1;
538     if( c<0 ){
539       lwr = mid+1;
540     }else{
541       upr = mid-1;
542     }
543   }
544   return 0;
545 }
546 
547 /*
548 ** The following table contains all of the allows HTML markup for the
549 ** restricted HTML output routine.  If an HTML element is found which is
550 ** not on this list, it is escaped.
551 **
552 ** A binary search is done on this list, so it must be in sorted order.
553 */
554 static const char *azAllowedHtml[] = {
555   "a",
556   "address",
557   "b",
558   "big",
559   "blockquote",
560   "br",
561   "center",
562   "cite",
563   "code",
564   "dd",
565   "dfn",
566   "dir",
567   "dl",
568   "dt",
569   "em",
570   "font",
571   "h1",
572   "h2",
573   "h3",
574   "h4",
575   "h5",
576   "h6",
577   "hr",
578   "i",
579   "img",
580   "kbd",
581   "li",
582   "menu",
583   "nobr",
584   "ol",
585   "p",
586   "pre",
587   "s",
588   "samp",
589   "small",
590   "strike",
591   "strong",
592   "sub",
593   "sup",
594   "table",
595   "td",
596   "th",
597   "tr",
598   "tt",
599   "u",
600   "ul",
601   "var",
602   "wbr",
603 };
604 
605 /*
606 ** The following table is a list of accepted HTML element attributes.
607 ** Any attribute not on the list will be stripped out during processing.
608 **
609 ** A binary search is done on this list, so it must be in sorted order.
610 */
611 static const char *azAllowedAttr[] = {
612   "abbr",
613   "accesskey",
614   "align",
615   "alt",
616   "axis",
617   "bgcolor",
618   "border",
619   "cellpadding",
620   "cellspacing",
621   "char",
622   "charoff",
623   "charset",
624   "cite", /* URI */
625   "class",
626   "clear",
627   "color",
628   "colspan",
629   "compact",
630   "dir",
631   "face",
632   "frame",
633   "headers"
634   "height",
635   "href", /* uri */
636   "hreflang",
637   "hspace",
638   "id",
639   "lang",
640   "longdesc",
641   "name",
642   "noshade",
643   "nowrap",
644   "rel",
645   "rev",
646   "rowspan",
647   "rules",
648   "scope",
649   "size",
650   "span",
651   "src",  /* URI */
652   "start",
653   "summary",
654   "title",
655   "valign",
656   "value",
657   "width",
658 };
659 
660 /*
661 ** Return TRUE if all HTML attributes up to the next '>' in the input string
662 ** are on the allowed list (and pass any other checks we might want to add
663 ** down the road...)
664 */
isAllowedAttr(const char * zAttr,int nAttr)665 static int isAllowedAttr(const char *zAttr,int nAttr){
666   int i,j;
667   int inquote = 0;
668   int inbody = 0;
669 
670   for(i=0; i<nAttr && zAttr[i]!='>'; i++){
671     if( !inbody && !inquote && isalpha(zAttr[i]) ){
672       for(j=1; i+j<nAttr && isalnum(zAttr[i+j]); j++){}
673 
674       if( !inSortedList(&zAttr[i], j, azAllowedAttr,
675                         sizeof(azAllowedAttr)/sizeof(azAllowedAttr[0]))){
676         return 0;
677       }
678       i += j-1;
679       inbody = 0;
680     }else if( inquote && zAttr[i]=='"' ){
681       inquote=0;
682     }else if( !inquote && zAttr[i]=='"' ){
683       inquote=1;
684     }else if( isspace(zAttr[i]) ){
685       inbody = 0;
686     }
687   }
688   return 1;
689 }
690 
691 /*
692 ** Return TRUE if the HTML element given in the argument is on the allowed
693 ** element list.
694 */
isAllowed(const char * zElem,int nElem)695 static int isAllowed(const char *zElem, int nElem){
696   return inSortedList(zElem, nElem, azAllowedHtml,
697                       sizeof(azAllowedHtml)/sizeof(azAllowedHtml[0]));
698 }
699 
700 /*
701 ** Return TRUE if the HTML element given in the argument is a form of
702 ** external reference (i.e. A, IMG, etc).
703 */
isLinkTag(const char * zElem,int nElem)704 static int isLinkTag(const char *zElem, int nElem){
705   return (nElem==1 && 0==sqlite3_strnicmp(zElem,"A",nElem))
706       || (nElem==3 && 0==sqlite3_strnicmp(zElem,"IMG",nElem))
707       || (nElem==4 && 0==sqlite3_strnicmp(zElem,"CITE",nElem));
708 }
709 
710 /*
711 ** If the input string begins with "<html>" and contains "</html>" somewhere
712 ** before it ends, then return the number of characters through the end of
713 ** the </html>.  If the <html> or the </html> is missing, return 0.
714 */
is_html(const char * z)715 static int is_html(const char *z){
716   int i;
717   if( sqlite3_strnicmp(z, "<html>", 6) ) return 0;
718   for(i=6; z[i]; i++){
719     if( z[i]=='<' && sqlite3_strnicmp(&z[i],"</html>",7)==0 ) return i+7;
720   }
721   return 0;
722 }
723 
724 /*
725 ** Output nText characters zText as HTML.  Do not allow markup other
726 ** than the markup for which isAllowed() returns true.
727 **
728 ** In the case of tags with external links, ensure they have a rel="nofollow"
729 ** attribute when g.noFollow is set.
730 **
731 ** FIXME: would be nice to translate relative URL targets if g.zLinkURL!=0
732 */
output_restricted_html(const char * zText,int nText)733 static void output_restricted_html(const char *zText, int nText){
734   int i, j, k;
735   for(i=0; i<nText; i++){
736     if( zText[i]!='<' ) continue;
737     if( i+1<nText ){
738       k = 1 + (zText[i+1]=='/');
739       for(j=k; i+j<nText && isalnum(zText[i+j]); j++){}
740       if( isAllowed(&zText[i+k], j-k)
741           && isAllowedAttr(&zText[i+j],nText-(i+j)) ){
742         if( g.noFollow && zText[i+j]!='>' && isLinkTag(&zText[i+k],j-k) ){
743           /* link tags are special. We want to allow them
744           ** but in order to discourage wiki spam we want to insert
745           ** something in the attributes... Note that we don't bother
746           ** when the tag doesn't have attributes.
747           */
748           cgi_append_content(zText,i + j);
749           zText += i+j;
750           nText -= i+j;
751           cgi_printf(" rel=\"nofollow\" ");
752           i = -1;
753         }
754         continue;
755       }
756     }
757     cgi_append_content(zText,i);
758     cgi_printf("&lt;");
759     zText += i+1;
760     nText -= i+1;
761     i = -1;
762   }
763   cgi_append_content(zText,i);
764 }
765 
766 /*
767 ** Output a formatted ticket link
768 */
output_ticket(int tn,int rn)769 void output_ticket(int tn, int rn){
770   if( g.okRead ){
771     char *zLink = (rn>0) ? format_link("tktview?tn=%d,%d",tn,rn)
772                          : format_link("tktview?tn=%d",tn);
773     if( g.okTicketLink ) {
774       char **az = db_query(
775         "SELECT title,status FROM ticket WHERE tn=%d", tn);
776       if( az && az[0] && az[1] ){
777         cgi_printf("<a href=\"%z\" title=\"%h\">",zLink,az[0]);
778         /* FIXME: should use a <span> with the ticket status as a class */
779         if( az[1][0] == 'n' || az[1][0] == 'a' ){
780           cgi_printf("#%d",tn);
781         }else{
782           cgi_printf("<strike>#%d</strike>",tn);
783         }
784         cgi_printf("</a>");
785         return;
786       }
787     }
788     cgi_printf("<a href=\"%z\">#%d</a>",zLink,tn);
789   }else{
790     cgi_printf("#%d",tn);
791   }
792 }
793 
794 /*
795 ** Output a formatted checkin link
796 */
output_chng(int cn)797 void output_chng(int cn){
798   if( g.okRead ){
799     char *zLink = format_link("chngview?cn=%d",cn);
800     if( g.okCheckinLink ){
801       char **az = db_query(
802            "SELECT milestone,user,message,branch FROM chng WHERE cn=%d", cn);
803       if( az && az[0] && az[1] && az[2] ){
804         if( az[0][0] && az[0][0] != '0' ){
805           cgi_printf("<a href=\"%z\"",zLink);
806           cgi_printf("title=\"Milestone [%d] %h (By %h)\">",
807                      cn, az[2], az[1]);
808           cgi_printf("[%d]</a>", cn);
809         }else{
810           char *z = az[2];
811           int trimmed;
812 
813           /* Mozilla and Firefox are quite sensitive to newlines
814           ** in link titles so we can't use '@' formatting here.
815           */
816           cgi_printf("<a href=\"%z\" title=\"Check-in [%d]", zLink,cn);
817           if( az[3] && az[3][0] ){
818             cgi_printf("on branch %h", az[3]);
819           }
820           cgi_printf(":");  /* want this tight to last text */
821           trimmed = output_trim_message(z, MN_CKIN_MSG, MX_CKIN_MSG);
822           cgi_printf("%h%s (By %h)\">[%d]</a>", z, trimmed?"...":"",
823                      az[1],cn);
824         }
825         return;
826       }
827     }
828     cgi_printf("<a href=\"%z\">[%d]</a>", zLink, cn);
829   }else{
830     cgi_printf("[%d]", cn);
831   }
832 }
833 
834 /*
835 ** Replace single quotes and backslashes with spaces.
836 */
sanitize_string(char * z)837 static void sanitize_string( char *z ){
838   int i;
839   for( i=0;z && z[i]; i++){
840     if( z[i] == '\'' || z[i] == '\\' ) {
841       z[i] = ' ';
842     }
843   }
844 }
845 
markup_substitution(int strip_quotes,const char * zF,const Markup * sMarkup,const char * zInBlock,int lenBlock)846 static char *markup_substitution(
847   int strip_quotes,
848   const char *zF,
849   const Markup* sMarkup,
850   const char *zInBlock,
851   int lenBlock
852 ){
853   char *zOutput = NULL;
854   unsigned const char *zFormat = (unsigned const char*)zF;
855   char *azStrings[256];
856   int  anLens[256];
857   int j, k;
858 
859   /* If we don't treat args as blank where there aren't any,
860   ** we can't create rules like <b>%k %a</b> that work
861   ** with both {markup: this} and {markup: this is} formats. This
862   ** is a fairly common convention with most of the existing markups.
863   ** We strdup() the blank string because we _will_ free it when
864   ** we leave this subroutine.
865   */
866   char *zArgs = (sMarkup->zArgs==sMarkup->zKey)
867                 ? strdup("")
868                 : mprintf("%.*s", sMarkup->lenArgs, sMarkup->zArgs );
869   char *zMarkup = mprintf("%.*s", sMarkup->lenType, sMarkup->zType );
870   char *zKey = mprintf("%.*s", sMarkup->lenKey, sMarkup->zKey );
871   char *zBlock = mprintf("%.*s", lenBlock, zInBlock );
872   const char *zRoot = db_config("cvsroot", "");
873 
874   if( strip_quotes ){
875     /* if we're dealing with a program markup, strip out
876     ** backslashes and quotes. This is why we can't just use
877     ** "subst".
878     */
879     sanitize_string(zMarkup);
880     sanitize_string(zKey);
881     sanitize_string(zArgs);
882     sanitize_string(zBlock);
883   }
884 
885   memset( anLens, 0, sizeof(anLens) );
886   memset( azStrings, 0, sizeof(azStrings) );
887 
888   azStrings['%'] = "%";
889   anLens['%'] = 1;
890 
891   /* markup name substitution */
892   azStrings['m'] = zMarkup;
893   anLens['m'] = sMarkup->lenType;
894 
895   /* key substitution */
896   azStrings['k'] = zKey;
897   anLens['k'] = sMarkup->lenKey;
898 
899   /* block substitution */
900   azStrings['b'] = zBlock;
901   anLens['b'] = lenBlock;
902 
903   /* argument substitution. args isn't necessarily the same as
904   ** sMarkup->zArgs. */
905   azStrings['a'] = zArgs;
906   anLens['a'] = strlen(zArgs);
907 
908   /* argument substitution. args isn't necessarily the same as
909   ** sMarkup->zArgs. */
910   azStrings['x'] = zArgs[0] ? zArgs : zKey;
911   anLens['x'] = zArgs[0] ? strlen(zArgs) : sMarkup->lenKey;
912 
913   /* cvsroot */
914   azStrings['r'] = (char*)zRoot;
915   anLens['r'] = strlen(zRoot);
916 
917   /* basename... from this someone can get the db name */
918   azStrings['n'] = (char*)g.zName;
919   anLens['n'] = strlen(g.zName);
920 
921   /* logged in user */
922   azStrings['u'] = (char*)g.zUser;
923   anLens['u'] = strlen(g.zUser);
924 
925   /* capabilities */
926   azStrings['c'] = db_short_query(
927       "SELECT capabilities FROM user WHERE id='%q'",g.zUser);
928   anLens['c'] = azStrings['c'] ? strlen(azStrings['c']) : 0;
929 
930   /* Calculate the space needed for the % subs.
931   */
932   for(k=j=0; zFormat[j]; j++){
933     if( zFormat[j] == '%' && anLens[zFormat[j+1]] ){
934       j ++;
935       k += anLens[zFormat[j]];
936       continue;
937     }
938     k ++;
939   }
940 
941   /* (over)allocate an output buffer. By "over", I mean we get
942   ** the length of the original plus the length we think we need
943   ** for a fully substituted buffer.
944   */
945   zOutput = malloc(j + k + 1);
946   if( zOutput == NULL ){
947     free(zKey);
948     free(zArgs);
949     free(zMarkup);
950     free(zBlock);
951     if(azStrings['c']) free(azStrings['c']);
952     return NULL;
953   }
954 
955   /* actually perform the substitutions */
956   for(k=j=0; zFormat[j]; j++){
957     if( zFormat[j] == '%' && azStrings[zFormat[j+1]]!=0 ){
958       j ++;
959       memcpy(&zOutput[k],azStrings[zFormat[j]],anLens[zFormat[j]]);
960       k += anLens[zFormat[j]];
961       continue;
962     }
963     zOutput[k++] = zFormat[j];
964   }
965   zOutput[k] = 0;
966 
967   free(zKey);
968   free(zArgs);
969   free(zMarkup);
970   free(zBlock);
971   if(azStrings['c']) free(azStrings['c']);
972   return zOutput;
973 }
974 
975 /*
976 ** Run the block (if any) out through the standard input of the pipeline
977 ** and feed the output of the pipeline into the CGI output.
978 **
979 ** It's assumed that zPipeline has been sanitized and stuff.
980 */
pipe_block(const char * zPipeline,const char * zBlock,int lenBlock)981 static void pipe_block(
982   const char *zPipeline,
983   const char *zBlock,
984   int lenBlock
985 ){
986   char zFile[PATH_MAX];
987   char *zB = mprintf("%.*s",lenBlock,zBlock);
988   FILE *fin = NULL;
989   char *zP;
990 
991   /* Doing this without a temporary file is a bit nasty because of
992   ** potential deadlocks. It _can_ be done if you want to fight with
993   ** pipe(2) and stuff, but CVSTrac already has a write_to_temp() function
994   ** so we might as well be lazy and use it. Note that if lenBlock==0
995   ** we can just skip out using /dev/null.
996   */
997 
998   zFile[0] = 0;
999   if( lenBlock==0 ) {
1000     /* In case the program takes arguments from the command line, we
1001     ** don't want to just treat it as a no-op. So pipe in /dev/null.
1002     */
1003     zP = mprintf( "%s </dev/null", zPipeline );
1004   }else if( !write_to_temp( zB, zFile, sizeof(zFile) ) ){
1005     zP = mprintf( "%s <%s", zPipeline, zFile );
1006   }else{
1007     if( zB ) free(zB);
1008     return;
1009   }
1010 
1011   /* Block has been written, free so we don't forget later
1012   */
1013   if( zB ) free(zB);
1014 
1015   fin = popen(zP,"r");
1016   free(zP);
1017 
1018   /* HTML scrubbing doesn't work effectively on just individual lines. We
1019   ** really need to feed in the entire buffer or we're vulnerable to all
1020   ** sorts of whitespace stupidity.
1021   */
1022   zP = common_readfp(fin);
1023   if( zP ){
1024     output_restricted_html(zP, strlen(zP));
1025     free( zP );
1026   }
1027 
1028   if( fin ){
1029     pclose(fin);
1030   }
1031 
1032   if( zFile[0] ){
1033     unlink(zFile);
1034   }
1035 }
1036 
1037 /*
1038 ** Output Wiki text while inserting the proper HTML control codes.
1039 ** The following formatting conventions are implemented:
1040 **
1041 **    *    Characters with special meaning to HTML are escaped.
1042 **
1043 **    *    Blank lines results in a paragraph break.
1044 **
1045 **    *    Paragraphs where the first line is indented by two or more
1046 **         spaces are shown verbatim.  None of the following rules apply
1047 **         to verbatim text.
1048 **
1049 **    *    Lines beginning with "*: " begin a bullet in a bullet list.
1050 **
1051 **    *    Lines beginning with "1: " begin an item in an enumerated list.
1052 **
1053 **    *    Paragraphs beginning with "_: " are indented.
1054 **
1055 **    *    Multiple colons can be used in *:, 1:, and _: for multiple
1056 **         levels of indentation.
1057 **
1058 **    *    Text within _..._ is italic and text in *...* is bold.
1059 **         Text within **...** or ***...*** bold with a larger font.
1060 **         Text within =...= is fixed (code) font.
1061 **
1062 **    *    Wiki pages names (Words in initial caps) are enclosed in an
1063 **         appropriate hyperlink.
1064 **
1065 **    *    Words that begin with "http:", "https:", "ftp:", or "mailto:"
1066 **         are enclosed in an appropriate hyperlink.
1067 **
1068 **    *    Text of the form "#NNN" where NNN is a valid ticket number
1069 **         is converted into a hyperlink to the corresponding ticket.
1070 **
1071 **    *    Text of the form "[NNN]" where NNN is a valid check-in number
1072 **         becomes a hyperlink to the checkin.
1073 **
1074 **    *    {quote: XYZ} renders XYZ with all special meanings for XYZ escaped.
1075 **
1076 **    *    {link: URL TEXT} renders TEXT with a link to URL.  URL can be
1077 **         relative.
1078 **
1079 **    *    {linebreak} renders a linebreak.
1080 **
1081 **    *    {image: URL ALT} renders an in-line image from URL.  URL can be
1082 **         relative or it can be the name of an attachment to zPageId.
1083 **         {leftimage: URL ALT} and {rightimage: URL ALT} create wrap-around
1084 **         images at the left or right margin.
1085 **
1086 **    *    {clear} skips down the page far enough to clear any wrap-around
1087 **         images.
1088 **
1089 **    *    {report: RN CAPTION} inlines the specified report, RN.
1090 **         {leftreport: RN CAPTION} and {rightreport: RN CAPTION} are also
1091 **         usable.
1092 **
1093 **    *    Text between <html>...</html> is interpreted as HTML.  A restricted
1094 **         subset of tags are supported - things like forms and javascript are
1095 **         intentionally excluded.  The initial <html> must occur at the
1096 **         beginning of a paragraph.
1097 */
output_wiki(const char * zText,const char * zLinkSuffix,const char * zPageId)1098 void output_wiki(
1099   const char *zText,          /* The text to be formatted */
1100   const char *zLinkSuffix,    /* Suffix added to hyperlinks to Wiki */
1101   const char *zPageId         /* Name of current page */
1102 ){
1103   int i, j, k;
1104   int aList[20];         /* See adjust_list_nesting for details */
1105   int inPRE = 0;
1106   int inB = 0;
1107   int inI = 0;
1108   int inT = 0;
1109   int inTab = 0;
1110   int v;
1111   int wordStart = 1;     /* At the start of a word */
1112   int lineStart = 1;     /* At the start of a line */
1113   int paraStart = 1;     /* At the start of a paragraph */
1114   const char *zEndB;     /* Text used to end a run of bold */
1115   char **azAttach;       /* Attachments to zPageId */
1116   const char *zExtLink = "<font color=\"#a0a0a0\">&curren;</font>";
1117   static int once = 1;
1118   static int nTicket, nCommit;
1119   if( once ){
1120     nTicket = atoi(db_short_query("SELECT max(tn) FROM ticket"));
1121     nCommit = atoi(db_short_query("SELECT max(cn) FROM chng"));
1122     once = 0;
1123   }
1124 
1125   i = 0;
1126   aList[0] = 0;
1127   azAttach = 0;
1128   zEndB = "";
1129   while( zText[i] ){
1130     char *z;
1131     int n;
1132     Markup sMarkup;
1133     int c = zText[i];
1134 
1135     /* Text between <html>...</html> is interpreted as HTML.
1136     */
1137     if( c=='<' && (n = is_html(&zText[i]))>0 ){
1138       put_htmlized_text(&zText, i);
1139       zText += 6;
1140       cgi_printf("<div>");
1141       output_restricted_html(zText, n-13);
1142       cgi_printf("</div>");
1143       zText += n - 6;
1144       i = 0;
1145       continue;
1146     }
1147 
1148     /* Markup may consist of special strings contained in curly braces.
1149     ** Examples:  "{linebreak}"  or "{quote: *:}"
1150     */
1151     if( c=='{' && is_markup(&zText[i], &sMarkup) ){
1152       /*
1153       ** Markup of the form "{linebreak}" forces a line break.
1154       */
1155       if( sMarkup.lenType==9 && strncmp(sMarkup.zType,"linebreak",9)==0 ){
1156         put_htmlized_text(&zText, i);
1157         zText += sMarkup.lenTotal;
1158         i = 0;
1159         cgi_printf("<br>\n");
1160         wordStart = lineStart = paraStart = 0;
1161         continue;
1162       }
1163 
1164       /*
1165       ** Markup of the form "{clear}" moves down past any left or right
1166       ** aligned images.
1167       */
1168       if( sMarkup.lenType==5 && strncmp(sMarkup.zType,"clear",5)==0 ){
1169         put_htmlized_text(&zText, i);
1170         zText += sMarkup.lenTotal;
1171         i = 0;
1172         cgi_printf("<br clear=\"both\">\n");
1173         wordStart = lineStart = paraStart = 0;
1174         continue;
1175       }
1176 
1177       /*
1178       ** Markup of the form "{quote: ABC}" writes out the text ABC exactly
1179       ** as it appears.  This can be used to escape special meanings
1180       ** associated with ABC.
1181       */
1182       if( sMarkup.lenType==5 && strncmp(sMarkup.zType,"quote",5)==0 ){
1183         put_htmlized_text(&zText, i);
1184         if( sMarkup.zKey==sMarkup.zArgs ){
1185           n = sMarkup.lenKey;
1186         }else{
1187           n = &sMarkup.zArgs[sMarkup.lenArgs] - sMarkup.zKey;
1188         }
1189         put_htmlized_text(&sMarkup.zKey, n);
1190         zText += sMarkup.lenTotal;
1191         i = 0;
1192         wordStart = lineStart = paraStart = 0;
1193         continue;
1194       }
1195 
1196       /*
1197       ** Markup of the form "{wiki: NAME TEXT}" creates a hyperlink
1198       ** to wiki page. The hyperlink appears on the screen as TEXT.
1199       */
1200       if( sMarkup.lenType==4 && strncmp(sMarkup.zType,"wiki",4)==0 ){
1201         int exists = 1;
1202         put_htmlized_text(&zText, i);
1203         if( g.okRdWiki ) {
1204           char *zPage = mprintf("%.*s", sMarkup.lenKey, sMarkup.zKey);
1205           exists = db_exists("SELECT 1 FROM wiki WHERE name='%q'", zPage);
1206           if( !exists ) cgi_printf("<em>");
1207           cgi_printf("<a href=\"wiki?p=%t%s\">", zPage, zLinkSuffix);
1208           free(zPage);
1209         }
1210         put_htmlized_text(&sMarkup.zArgs, sMarkup.lenArgs);
1211         if( g.okRdWiki ) {
1212           cgi_printf("</a>");
1213           if( !exists ) cgi_printf("</em>");
1214         }
1215         zText += sMarkup.lenTotal;
1216         i = 0;
1217         wordStart = lineStart = paraStart = 0;
1218         continue;
1219       }
1220 
1221       /*
1222       ** Markup of the form "{link: TO TEXT}" creates a hyperlink to TO.
1223       ** The hyperlink appears on the screen as TEXT.  TO can be a any URL,
1224       ** including a relative URL such as "chngview?cn=123".
1225       */
1226       if( sMarkup.lenType==4 && strncmp(sMarkup.zType,"link",4)==0 ){
1227         put_htmlized_text(&zText, i);
1228         if( is_url(sMarkup.zKey)>0 ){
1229           cgi_printf("%s<a href=\"%.*s\"%s>",
1230                      zExtLink, sMarkup.lenKey, sMarkup.zKey,
1231                      g.noFollow ? " rel=\"nofollow\"" : "");
1232         }else{
1233           char *zLink = format_link("%.*s", sMarkup.lenKey, sMarkup.zKey);
1234           cgi_printf("<a href=\"%z\">", zLink);
1235         }
1236         put_htmlized_text(&sMarkup.zArgs, sMarkup.lenArgs);
1237         cgi_printf("</a>");
1238         zText += sMarkup.lenTotal;
1239         i = 0;
1240         wordStart = lineStart = paraStart = 0;
1241         continue;
1242       }
1243 
1244       /*
1245       ** Markup of the form "{image: URL ALT}" creates an in-line image to
1246       ** URL with ALT as the alternate text.  URL can be relative (for example
1247       ** the URL of an attachment.
1248       **
1249       ** If the URL is the name of an attachment, then automatically
1250       ** convert it to the correct URL for that attachment.
1251       */
1252       if( (sMarkup.lenType==5 && strncmp(sMarkup.zType,"image",5)==0)
1253        || (sMarkup.lenType==9 && strncmp(sMarkup.zType,"leftimage",9)==0)
1254        || (sMarkup.lenType==10 && strncmp(sMarkup.zType,"rightimage",10)==0)
1255       ){
1256         char *zUrl = 0;
1257         const char *zAlign;
1258         char *zAlt = htmlize(sMarkup.zArgs, sMarkup.lenArgs);
1259         if( azAttach==0 && zPageId!=0 ){
1260           azAttach = (char **)
1261                      db_query("SELECT fname, atn FROM attachment "
1262                               "WHERE tn='%q'", zPageId);
1263         }
1264         if( azAttach ){
1265           int ix;
1266           for(ix=0; azAttach[ix]; ix+=2){
1267             if( strncmp(azAttach[ix],sMarkup.zKey,sMarkup.lenKey)==0 ){
1268               free(zUrl);
1269               zUrl = format_link("attach_get/%s/%h",
1270                                  azAttach[ix+1], azAttach[ix]);
1271               break;
1272             }
1273           }
1274         }
1275         if( zUrl==0 ){
1276           zUrl = htmlize(sMarkup.zKey, sMarkup.lenKey);
1277         }
1278         put_htmlized_text(&zText, i);
1279         switch( sMarkup.zType[0] ){
1280           case 'l': case 'L':   zAlign = " align=\"left\"";  break;
1281           case 'r': case 'R':   zAlign = " align=\"right\""; break;
1282           default:              zAlign = "";                 break;
1283         }
1284         cgi_printf("<img src=\"%s\" alt=\"%s\"%s>", zUrl, zAlt, zAlign);
1285         free(zUrl);
1286         free(zAlt);
1287         zText += sMarkup.lenTotal;
1288         i = 0;
1289         wordStart = lineStart = paraStart = 0;
1290         continue;
1291       }
1292 
1293       /*
1294       ** Markup of the form "{report: RN}" embeds a report into the output.
1295       */
1296       if( (sMarkup.lenType==6 && strncmp(sMarkup.zType,"report",6)==0)
1297           || (sMarkup.lenType==11 && strncmp(sMarkup.zType,"rightreport",11)==0)
1298           || (sMarkup.lenType==10 && strncmp(sMarkup.zType,"leftreport",10)==0)
1299       ){
1300         char *zCaption = mprintf("%.*s", sMarkup.lenArgs, sMarkup.zArgs);
1301         char *zAlign = 0;
1302         if( sMarkup.lenType==11 ){
1303           zAlign = "align=\"right\"";
1304         }else if( sMarkup.lenType==10 ){
1305           zAlign = "align=\"left\"";
1306         }
1307         put_htmlized_text(&zText, i);
1308         embed_view( atoi(sMarkup.zKey),
1309                     (sMarkup.zArgs==sMarkup.zKey) ? "" : zCaption,
1310                     zAlign );
1311         free(zCaption);
1312         zText += sMarkup.lenTotal;
1313         i = 0;
1314         wordStart = lineStart = paraStart = 0;
1315         continue;
1316       }
1317 
1318       /* Markup of the form "{markups}" outputs the list of custom markups
1319       ** formats with descriptions.
1320       */
1321       if( sMarkup.lenType==7 && strncmp(sMarkup.zType,"markups",7)==0 ){
1322         char **azMarkup;
1323         put_htmlized_text(&zText, i);
1324 
1325         azMarkup = db_query(
1326               "SELECT markup, description FROM markup ORDER BY markup;");
1327         if( azMarkup && azMarkup[0] ){
1328           @ <p><big><b>Custom Markup Rules</b></big></p>
1329           @ <p>The following are custom markup rules implemented
1330           @ on this server.</p>
1331           for(j=0; azMarkup[j]; j+=2){
1332             if( azMarkup[j+1] && azMarkup[j+1][0] ){
1333               /* this markup has a description, output it.
1334               */
1335               @ <p>
1336               output_formatted(azMarkup[j+1],NULL);
1337               @ </p>
1338             }else{
1339               @ <p>{%h(azMarkup[j])} (no description)</p>
1340             }
1341           }
1342         }
1343 
1344         zText += sMarkup.lenTotal;
1345         i = 0;
1346         wordStart = lineStart = paraStart = 0;
1347         continue;
1348       }
1349 
1350       /* It could be custom markup. There are two kinds of custom markups
1351       ** available. The first is a simple format string such
1352       ** "key=%k args=%a" where %k is replaced by the markup key and %a
1353       ** by any following arguments. More flexibility would probably be
1354       ** nice, but that's how the existing markup logic works. The second
1355       ** form of markup is an external executable which gets passed the
1356       ** key and args on the command line and any output is dumped right
1357       ** into the output stream.
1358       */
1359       if( sMarkup.zType && sMarkup.lenType ) {
1360         /* sMarkup.zType is a pointer into the text buffer, not a NUL
1361         ** terminated token. This is actually the case with everything
1362         ** in sMarkup. Note that the markup type is already checked to
1363         ** be only chars that pass isalpha() so we can avoid "%.*q".
1364         */
1365         char **azMarkup = db_query(
1366           "SELECT type,formatter FROM markup WHERE markup='%.*s';",
1367           sMarkup.lenType, sMarkup.zType);
1368 
1369         if( azMarkup && azMarkup[0] && azMarkup[1] ){
1370           /* We've found a custom formatter for this type */
1371 
1372           int bl = sMarkup.lenTotal;
1373           int cl = 0;
1374           int type = atoi(azMarkup[0]);
1375           char *zOutput;
1376 
1377           put_htmlized_text(&zText, i);
1378 
1379           /* handle blocks. This basically means we scan ahead to find
1380           ** "end<markup>. bl becomes the total length of the block
1381           ** and cl is everything up the the {end<markup>}. If we can't
1382           ** find a match, bl becomes zero and we end up just outputting
1383           ** the raw markup tag.
1384           */
1385           if( type==2 || type==3 ){
1386             char *zEnd = mprintf("{end%.*s}", sMarkup.lenType, sMarkup.zType);
1387             int el = strlen(zEnd);
1388             while( zText[bl] && strncmp(&zText[bl],zEnd,el)){ bl++; }
1389             if( zText[bl]!=0 ){
1390               /* found a matching end tag. Note that bl includes the
1391               ** length of the initial markup which is not part of the
1392               ** actual content. Fix that. bl doesn't include the length
1393               ** of the end markup tag. Fix that too.
1394               */
1395               cl = bl - sMarkup.lenTotal;
1396               bl += el;
1397             } else {
1398               /* that didn't work, restore to original value.
1399               */
1400               bl = sMarkup.lenTotal;
1401             }
1402             free(zEnd);
1403           }
1404 
1405           /* Substitutions are basically the same for all types of
1406           ** formatters, except that quotes are stripped from arguments
1407           ** to programs.
1408           */
1409           zOutput = markup_substitution( (type==1 || type==3),
1410             azMarkup[1], &sMarkup, &zText[sMarkup.lenTotal], cl );
1411           if( bl && zOutput ){
1412             if( type == 0 || type == 2 ){
1413               output_restricted_html(zOutput, strlen(zOutput));
1414             }else if( type==1 || (type==3 && cl==0) ){
1415               pipe_block(zOutput, "", 0);
1416             }else if( type==3 ){
1417               pipe_block(zOutput, &zText[sMarkup.lenTotal], cl);
1418             }
1419 
1420             free(zOutput);
1421           }
1422 
1423           zText += bl;
1424           i = 0;
1425           wordStart = lineStart = paraStart = 0;
1426           continue;
1427         }
1428       }
1429     }
1430 
1431     if( paraStart ){
1432       put_htmlized_text(&zText, i);
1433 
1434       /* Blank lines at the beginning of a paragraph are ignored.
1435       */
1436       if( isspace(c) && (j = is_blank_line(&zText[i]))>0 ){
1437         zText += j;
1438         continue;
1439       }
1440 
1441       /* If the first line of a paragraph begins with a tab or with two
1442       ** or more spaces, then that paragraph is printed verbatim.
1443       */
1444       if( c=='\t' || (c==' ' && (zText[i+1]==' ' || zText[i+1]=='\t')) ){
1445         if( !inPRE ){
1446           if( inB ){ cgi_printf(zEndB); inB=0; }
1447           if( inI ){ cgi_printf("</i>"); inI=0; }
1448           if( inT ){ cgi_printf("</tt>"); inT=0; }
1449           if( inTab ){ cgi_printf("</table>"); inTab=0; }
1450           adjust_list_nesting(aList, 0);
1451           cgi_printf("<pre>\n");
1452           inPRE = 1;
1453         }
1454       }
1455     } /* end if( paraStart ) */
1456 
1457     if( lineStart ){
1458       /* Blank lines in the middle of text cause a paragraph break
1459       */
1460       if( isspace(c) && (j = is_blank_line(&zText[i]))>0 ){
1461         put_htmlized_text(&zText, i);
1462         zText += j;
1463         if( inB ){ cgi_printf(zEndB); inB=0; }
1464         if( inI ){ cgi_printf("</i>"); inI=0; }
1465         if( inT ){ cgi_printf("</tt>"); inT=0; }
1466         if( inTab ){ cgi_printf("</table>"); inTab=0; }
1467         if( inPRE ){ cgi_printf("</pre>\n"); inPRE = 0; }
1468         is_list_elem(zText, &k);
1469         if( abs(k)<aList[0] ) adjust_list_nesting(aList, k);
1470         if( zText[0]!=0 ){ cgi_printf("\n<p>"); }
1471         wordStart = lineStart = paraStart = 1;
1472         i = 0;
1473         continue;
1474       }
1475     } /* end if( lineStart ) */
1476 
1477     if( lineStart && !inPRE ){
1478       /* If we are not in verbatim text and a line begins with "*:", then
1479       ** generate a bullet.  Or if the line begins with "NNN:" where NNN
1480       ** is a number, generate an enumeration item.
1481       */
1482       if( (j = is_list_elem(&zText[i], &k))>0 ){
1483         put_htmlized_text(&zText, i);
1484         adjust_list_nesting(aList, k);
1485         if( inTab ){ cgi_printf("</table>"); inTab=0; }
1486         if( zText[0]!='_' ) cgi_printf("<li>");
1487         zText += j;
1488         i = 0;
1489         wordStart = 1;
1490         lineStart = paraStart = 0;
1491         continue;
1492       }
1493 
1494       /* Four or more "-" characters on at the beginning of a line that
1495       ** contains no other text results in a horizontal rule.
1496       */
1497       if( (c=='-' || c=='=') && (j = is_horizontal_rule(&zText[i]))>0 ){
1498         put_htmlized_text(&zText, i);
1499         adjust_list_nesting(aList, 0);
1500         if( inTab ){ cgi_printf("</table>"); inTab=0; }
1501         cgi_printf("<hr>\n");
1502         zText += j;
1503         if( *zText ) zText++;
1504         i = 0;
1505         lineStart = wordStart = 1;
1506         paraStart = 1;
1507         continue;
1508       }
1509 
1510       /* '|' at the start of a line may be a table
1511       */
1512       if( c=='|' && (j = is_table_row(&zText[i]))>0 ){
1513         put_htmlized_text(&zText, i);
1514         adjust_list_nesting(aList, 0);
1515         if( !inTab ){
1516           cgi_printf("<table border=\"1\" cellspacing=\"0\">\n");
1517           inTab = 1;
1518         }
1519         output_table_row(zText,j);
1520         zText += j;
1521         i = 0;
1522         wordStart = 1;
1523         lineStart = paraStart = 0;
1524         continue;
1525       }
1526     } /* end if( lineStart && !inPre ) */
1527 
1528     if( wordStart && !inPRE ){
1529       /* A wiki name at the beginning of a word which is not in verbatim
1530       ** text generates a hyperlink to that wiki page.
1531       **
1532       ** Special case: If the name is in CamelCase but ends with a "_", then
1533       ** suppress the "_" and do not generate the hyperlink.  This allows
1534       ** CamelCase words that are not wiki page names to appear in text.
1535       */
1536       if( g.okRdWiki && isupper(c) && (j = is_wiki_name(&zText[i]))>0 ){
1537         int exists = db_exists("SELECT 1 FROM wiki WHERE name='%.*s'",
1538                                j, &zText[i]);
1539         put_htmlized_text(&zText, i);
1540         if( !exists ) cgi_printf("<em>");
1541         cgi_printf("<a href=\"%z\">%.*s</a>",
1542             format_link("wiki?p=%.*s%s", j, zText, zLinkSuffix),
1543             j, zText);
1544         if( !exists ) cgi_printf("</em>");
1545         zText += j;
1546         i = 0;
1547         wordStart = lineStart = paraStart = 0;
1548         continue;
1549       }
1550 
1551       if( g.okCheckout && c=='/' && (j = is_repository_file(&zText[i]))>0 ){
1552         char *zFile;
1553         put_htmlized_text(&zText, i);
1554         zFile = mprintf("%.*s", j-1, zText+1);
1555         cgi_printf("<a href=\"%z\">/%h</a>",
1556             format_link("rlog?f=%T", zFile), zFile);
1557         free(zFile);
1558         zText += j;
1559         i = 0;
1560         wordStart = lineStart = paraStart = 0;
1561         continue;
1562       }
1563 
1564       /* A "_" at the beginning of a word puts us into an italic font.
1565       */
1566       if( c=='_' && !inB && !inI && !inT && font_terminator(&zText[i+1],c,1) ){
1567         put_htmlized_text(&zText, i);
1568         i = 0;
1569         zText++;
1570         cgi_printf("<i>");
1571         inI = 1;
1572         continue;
1573       }
1574 
1575       /* A "=" at the beginning of a word puts us into an fixed font.
1576       */
1577       if( c=='=' && !inB && !inI && !inT && font_terminator(&zText[i+1],c,1) ){
1578         put_htmlized_text(&zText, i);
1579         i = 0;
1580         zText++;
1581         cgi_printf("<tt>");
1582         inT = 1;
1583         continue;
1584       }
1585 
1586       /* A "*" at the beginning of a word puts us into a bold font.
1587       */
1588       if( c=='*' && !inB && !inI && !inT && (j = count_stars(&zText[i]))>=1
1589               && j<=3 && font_terminator(&zText[i+j],c,j) ){
1590         const char *zBeginB = "";
1591         put_htmlized_text(&zText, i);
1592         i = 0;
1593         zText += j;
1594         switch( j ){
1595           case 1: zBeginB = "<b>";           zEndB = "</b>";             break;
1596           case 2: zBeginB = "<big><b>";      zEndB = "</b></big>";       break;
1597           case 3: zBeginB = "<big><big><b>"; zEndB = "</b></big></big>"; break;
1598         }
1599         cgi_printf(zBeginB);
1600         inB = j;
1601         continue;
1602       }
1603 
1604 
1605       /* Words that begin with "http:" or "https:" or "ftp:" or "mailto:"
1606       ** become hyperlinks.
1607       */
1608       if( (c=='h' || c=='f' || c=='m') && (j=is_url(&zText[i]))>0 ){
1609         put_htmlized_text(&zText, i);
1610         z = htmlize(zText, j);
1611         if( is_image(z, strlen(z)) ){
1612           cgi_printf("<img src=\"%s\" alt=\"%s\"%s>", z, z,
1613                      g.noFollow ? " rel=\"nofollow\"" : "");
1614         }else{
1615           cgi_printf("%s<a href=\"%s\"%s>%s</a>",
1616                      zExtLink, z,
1617                      g.noFollow ? " rel=\"nofollow\"" : "", z);
1618         }
1619         free(z);
1620         zText += j;
1621         i = 0;
1622         wordStart = lineStart = paraStart = 0;
1623         continue;
1624       }
1625 
1626       /* If the user has read permission on tickets and a word is of the
1627       ** form "#NNN" where NNN is a sequence of digits, then generate a
1628       ** hyperlink to ticket number NNN.
1629       */
1630       if( c=='#' && g.okRead && (j = ndigit(&zText[i+1]))>0
1631                  && is_eow(&zText[i+1+j],0)
1632                  && (v = atoi(&zText[i+1]))>0 && v<=nTicket ){
1633         put_htmlized_text(&zText, i);
1634         output_ticket(v,0);
1635         zText += j;
1636         if( *zText ) zText++;
1637         i = 0;
1638         wordStart = lineStart = paraStart = 0;
1639         continue;
1640       }
1641 
1642       /* If the user has checkout permissions and a word is of the form
1643       ** "[NNN]" where NNN is a checkin number, then generate a hyperlink
1644       ** to check-in NNN.
1645       */
1646       if( c=='[' && g.okRead && (j = ndigit(&zText[i+1]))>0
1647                  && is_eow(&zText[i+j+2],0)
1648                  && (v = atoi(&zText[i+1]))>0 && v<=nCommit
1649                  && zText[i+j+1]==']' ){
1650         put_htmlized_text(&zText, i);
1651         output_chng(v);
1652         zText += j+1;
1653         if( *zText ) zText++;
1654         i  = 0;
1655         wordStart = lineStart = paraStart = 0;
1656         continue;
1657       }
1658     } /* end if( wordStart && !inPre ) */
1659 
1660     /* A "*", "=", or a "_" at the end of a word takes us out of bold,
1661     ** fixed or italic mode.
1662     */
1663     if( inB && c=='*' && !isspace(zText[i-1]) && zText[i-1]!='*' &&
1664             (j = count_stars(&zText[i]))==inB && is_eow(&zText[i+j],0) ){
1665       inB = 0;
1666       put_htmlized_text(&zText, i);
1667       i = 0;
1668       zText += j;
1669       cgi_printf(zEndB);
1670       continue;
1671     }
1672     if( inT && c=='=' && !isspace(zText[i-1]) && is_eow(&zText[i+1],0) ){
1673       put_htmlized_text(&zText, i);
1674       i = 0;
1675       zText++;
1676       inT = 0;
1677       cgi_printf("</tt>");
1678       continue;
1679     }
1680     if( inI && c=='_' && !isspace(zText[i-1]) && is_eow(&zText[i+1],0) ){
1681       put_htmlized_text(&zText, i);
1682       i = 0;
1683       zText++;
1684       inI = 0;
1685       cgi_printf("</i>");
1686       continue;
1687     }
1688     if( wordStart ){
1689       wordStart = isspace(c) || c=='(' || c=='"';
1690     }else{
1691       wordStart = isspace(c);
1692     }
1693     lineStart = c=='\n';
1694     paraStart = 0;
1695     i++;
1696   }
1697   if( zText[0] ) cgi_printf("%h", zText);
1698   if( inB ) cgi_printf("%s\n",zEndB);
1699   if( inT ) cgi_printf("</tt>\n");
1700   if( inI ) cgi_printf("</i>\n");
1701   if( inTab ){ cgi_printf("</table>"); inTab=0; }
1702   adjust_list_nesting(aList, 0);
1703   if( inPRE ) cgi_printf("</pre>\n");
1704 }
1705 
1706 /*
1707 ** Output text while inserting hyperlinks to ticket and checkin reports.
1708 ** Within the text, an occurance of "#NNN" (where N is a digit) results
1709 ** in a hyperlink to the page that shows that ticket.  Any occurance of
1710 ** [NNN] gives a hyperlink to check-in number NNN.
1711 **
1712 ** (Added later:)  Also format the text as HTML.  Insert <p> in place
1713 ** of blank lines.  Insert <pre>..</pre> around paragraphs that are
1714 ** indented by two or more spaces.  Make lines that begin with "*:"
1715 ** or "1:" into <ul> or <ol> list elements.
1716 **
1717 ** (Later:)  The formatting is now extended to include all of the
1718 ** Wiki formatting options.
1719 */
output_formatted(const char * zText,const char * zPageId)1720 void output_formatted(const char *zText, const char *zPageId){
1721   output_wiki(zText,"",zPageId);
1722 }
1723 
1724 /*
1725 ** This routine alters a check-in message to make it more readable
1726 ** in a timeline.  The following changes are made:
1727 **
1728 ** *:  Remove all leading whitespace.  This prevents the text from
1729 **     being display verbatim.
1730 **
1731 ** *:  If the message begins with "*:" or "N:" (where N is a number)
1732 **     then strip it out.
1733 **
1734 ** *:  Change all newlines to spaces.  This will disable paragraph
1735 **     breaks, verbatim paragraphs, enumerations, and bullet lists.
1736 **
1737 ** *:  Replace all internal list markups with '+' followed by spaces.
1738 **     (Otherwise, bullet lists turn into boldface).
1739 **
1740 ** *:  Collapse contiguous whitespace into a single space character
1741 **
1742 ** *:  Truncate the string at the first whitespace character that
1743 **     is more than mxChar characters from the beginning of the string.
1744 **     Or if the string is longer than mxChar character and but there
1745 **     was a paragraph break after mnChar characters, truncate at the
1746 **     paragraph break.
1747 **
1748 ** This routine changes the message in place.  It returns non-zero if
1749 ** the message was truncated and zero if the original text is still
1750 ** all there (though perhaps altered.)
1751 */
output_trim_message(char * zMsg,int mnChar,int mxChar)1752 int output_trim_message(char *zMsg, int mnChar, int mxChar){
1753   int i, j, k, n;
1754   int brkpt = 0;    /* First paragraph break after zMsg[mnChar] */
1755 
1756   if( zMsg==0 ) return 0;
1757   for(i=0; isspace(zMsg[i]); i++){}
1758   i += is_list_elem(&zMsg[i], &k);
1759   for(j=0; zMsg[i]; i++){
1760     int c = zMsg[i];
1761     if( c=='\n' ){
1762       if( j>mnChar && is_blank_line(&zMsg[i+1]) && brkpt==0 ){
1763         brkpt = j;
1764       }
1765       c = ' ';
1766       if( (n = is_list_elem(&zMsg[i+1],&k))>0 ) {
1767         zMsg[i+1] = '+';
1768         memset(&zMsg[i+2],' ',n-1);
1769       }
1770     }
1771     if( isspace(c) ){
1772       if( j>=mxChar ){
1773         zMsg[j] = 0;
1774         if( brkpt>0 ) zMsg[brkpt] = 0;
1775         return 1;
1776       }
1777       if( j>0 && !isspace(zMsg[j-1]) ){
1778         zMsg[j++] = ' ';
1779       }
1780     }else{
1781       zMsg[j++] = c;
1782     }
1783   }
1784   zMsg[j] = 0;
1785   return 0;
1786 }
1787 
1788 /*
1789 ** Append HTML text to the output that describes the formatting
1790 ** conventions implemented by the output_formatted() function
1791 ** above.
1792 */
append_formatting_hints(void)1793 void append_formatting_hints(void){
1794   char **az;
1795   int j;
1796   @ <ul>
1797   @ <li><p>
1798   @ Blank lines divide paragraphs.
1799   @ </p></li>
1800   @
1801   @ <li><p>
1802   @ If a paragraph is indented by a tab or by two or more spaces,
1803   @ it is displayed verbatim -- in a constant-width font with all
1804   @ spacing and line breaks preserved.
1805   @ </p></li>
1806   @
1807   @ <li><p>
1808   @ Surround phrases by underscores, asterisks or equals
1809   @ for italic, bold or fixed text.
1810   @ (Ex: "<tt>_italic text_, *bold text*, =fixed text=</tt>")
1811   @ Use two or three asterisks for bold text in a larger font.
1812   @ </p></li>
1813   @
1814   @ <li><p>
1815   if( g.okRead ){
1816     @ Text like "<tt>#123</tt>" becomes a hyperlink to ticket #123.
1817   }
1818   if( g.okCheckout ){
1819     @ Text like "<tt>[456]</tt>" becomes a hyperlink to
1820     @ check-in [456].
1821   }
1822   if( g.okRdWiki ){
1823     @ An absolute URL, a wiki page name becomes a hyperlink.
1824     @ Also markup of the form "<tt>{wiki: <i>title text</i>}</tt>"
1825     @ becomes a hyperlink to the wiki document of <i>title</i>.
1826   } else {
1827     @ An absolute URL becomes a hyperlink.
1828   }
1829   @ Or use markup of the form:
1830   @ "<tt>{link: <i>url text</i>}</tt>".
1831   @ </p></li>
1832   @
1833   @ <li><p>
1834   @ A path to a file in the repository becomes a link to its rlog page:
1835   @ "<tt>/path/to/format.c</tt>".
1836   @ </p></li>
1837   @
1838   @ <li><p>
1839   @ The characters "<tt>*:</tt>" or "<tt>1:</tt>" at the beginning of a line
1840   @ produce a bullet or enumeration list.
1841   @ Use additional colons for nested lists.
1842   @ </p></li>
1843   @
1844   @ <li><p>
1845   @ Create a table by wrapping cells with "<tt>|</tt>", starting at the
1846   @ beginning of a line. Each cell is separated with a "<tt>|</tt>" and
1847   @ each line should end with a "<tt>|</tt>".
1848   @ </p></li>
1849   @
1850   @ <li><p>
1851   @ Use "<tt>_:</tt>" at the beginning of a paragraph to indent that
1852   @ paragraph.  Multiple colons indent more.
1853   @ </p></li>
1854   @
1855   @ <li><p>
1856   @ Four or more "-" or "=" characters on a line by themselves generate a
1857   @ horizontal rule (the &lt;hr&gt; markup of HTML).
1858   @ </p></li>
1859   @
1860   @ <li><p>
1861   @ Create a line-break using "<tt>{linebreak}</tt>".
1862   @ </p></li>
1863   @
1864   @ <li><p>
1865   @ Use "<tt>{quote: <i>text</i>}</tt>" to display <i>text</i>.
1866   @ </p></li>
1867   @
1868   @ <li><p>
1869   @ Insert in-line images using "<tt>{image: <i>url</i>}</tt>".
1870   @ The <i>url</i> can be the filename of an attachment.
1871   @ </p></li>
1872   @
1873   @ <li><p>
1874   @ Insert in-line reports using "<tt>{report: <i>rn</i>}</tt>". The <i>rn</i>
1875   @ is the report number (which isn't necessarily the same as the numbers on
1876   @ the <a href="reportlist">report list</a>).
1877   @ </p></li>
1878   @
1879   @ <li><p>
1880   @ Text between "<tt>&lt;html&gt;...&lt;/html&gt;</tt>" is interpreted as HTML.
1881   @ </p></li>
1882   @
1883 
1884   /* output custom markups.
1885   */
1886   az = db_query("SELECT markup, description FROM markup;");
1887   if( az && az[0] ){
1888     for(j=0; az[j]; j+=2){
1889       if( az[j+1] && az[j+1][0] ){
1890         /* this markup has a description, output it.
1891         */
1892         @ <li><p>
1893         output_formatted(az[j+1],NULL);
1894         @ </p></li>
1895       }else{
1896         @ <li><p>{%h(az[j])} (no description)</p></li>
1897       }
1898     }
1899   }
1900   @ </ul>
1901 }
1902