1 /*
2 ** Copyright (c) 2002 D. Richard Hipp
3 **
4 ** This program is free software; you can redistribute it and/or
5 ** modify it under the terms of the GNU General Public
6 ** License as published by the Free Software Foundation; either
7 ** version 2 of the License, or (at your option) any later version.
8 **
9 ** This program is distributed in the hope that it will be useful,
10 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 ** General Public License for more details.
13 **
14 ** You should have received a copy of the GNU General Public
15 ** License along with this library; if not, write to the
16 ** Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 ** Boston, MA 02111-1307, USA.
18 **
19 ** Author contact information:
20 ** drh@hwaci.com
21 ** http://www.hwaci.com/drh/
22 **
23 *******************************************************************************
24 **
25 ** This file contains code used to generate convert wiki text into HTML.
26 */
27 #include "config.h"
28 #include "format.h"
29 #include <time.h>
30 #include <limits.h> /* for PATH_MAX */
31
32 /*
33 ** Format a relative link for output. The idea here is to determine from context
34 ** whether the link needs to be relative or absolute (i.e. for RSS output, e-mail
35 ** notifications, etc). Returns the formatted string.
36 */
format_link(const char * zFormat,...)37 char *format_link(const char* zFormat,...){
38 char *zLink;
39 va_list ap;
40 va_start(ap,zFormat);
41 zLink = vmprintf(zFormat,ap);
42 va_end(ap);
43 if( g.zLinkURL && g.zLinkURL[0] ){
44 zLink = mprintf("%s/%z",g.zLinkURL,zLink);
45 }
46 return zLink;
47 }
48
49 /*
50 ** Return the number digits at the beginning of the string z.
51 */
ndigit(const char * z)52 int ndigit(const char *z){
53 int i = 0;
54 while( isdigit(*z) ){ i++; z++; }
55 return i;
56 }
57
58 /*
59 ** Check to see if *z contains nothing but spaces up to the next
60 ** newline. If so, return the number of spaces plus one for the
61 ** newline characters. If not, return 0.
62 **
63 ** If two or more blank lines occur in a row, go ahead and return
64 ** a number of characters sufficient to cover them all.
65 */
is_blank_line(const char * z)66 static int is_blank_line(const char *z){
67 int i = 0;
68 int r = 0;
69 while( isspace(z[i]) ){
70 if( z[i]=='\n' ){ r = i+1; }
71 i++;
72 }
73 return r;
74 }
75
76 /*
77 ** Return TRUE if *z points to the terminator for a word. Words
78 ** are terminated by whitespace or end of input or any of the
79 ** characters in zEnd.
80 ** Note that is_eow() ignores zEnd characters _inside_ a word. They
81 ** only count if they're followed by other EOW characters.
82 */
is_eow(const char * z,const char * zEnd)83 int is_eow(const char *z, const char *zEnd){
84 if( zEnd==0 ) zEnd = ".,:;?!)\"'";
85 while( *z!=0 && !isspace(*z) ){
86 int i;
87 for(i=0; zEnd[i]; i++){ if( *z==zEnd[i] ) break; }
88 if( zEnd[i]==0 ) return 0;
89 z++;
90 }
91 return 1;
92 }
93
94 /*
95 ** Check to see if *z points to the beginning of a Wiki page name.
96 ** If it does, return the number of characters in that name. If not,
97 ** return 0.
98 **
99 ** A Wiki page name contains only alphabetic characters. The first
100 ** letter must be capital and there must be at least one other capital
101 ** letter in the word. And every capital leter must be followed by
102 ** one or more lower-case letters.
103 */
is_wiki_name(const char * z)104 int is_wiki_name(const char *z){
105 int i;
106 int nCap = 0;
107 if( !isupper(z[0]) ) return 0;
108 for(i=0; z[i]; i++){
109 if( isupper(z[i]) ){
110 if( !islower(z[i+1]) ) return 0;
111 nCap++;
112 }else if( !islower(z[i]) ){
113 break;
114 }
115 }
116 return (nCap>=2 && is_eow(&z[i],0)) ? i : 0;
117 }
118
119 /*
120 ** Check to see if *z points to the beginning of a file in the repository.
121 ** If it does, return the number of characters in that name. If not,
122 ** return 0.
123 **
124 ** The filename must start with a slash and there'll have to be another slash
125 ** somewhere inside. Spaces in filenames aren't supported.
126 */
is_repository_file(const char * z)127 int is_repository_file(const char *z){
128 char *s;
129 int i;
130 int gotslash=0;
131 if( z[0]!='/' ) return 0;
132 for(i=1; z[i] && !is_eow(&z[i],0); i++){
133 if(z[i]=='/') gotslash=1;
134 }
135 if(!gotslash) return 0;
136
137 /* see if it's in the repository. Note that we strip the leading '/' from the
138 * query.
139 */
140 s = mprintf("%.*s", i-1, &z[1]);
141 gotslash = db_exists("SELECT filename FROM filechng WHERE filename='%q'", s );
142 free(s);
143 return gotslash ? i : 0;
144 }
145
146 /*
147 ** Check to see if z[] is a form that indicates the beginning of a
148 ** bullet or enumeration list element. z[] can be of the form "*:"
149 ** or "_:" for a bullet or "N:" for an enumeration element where N
150 ** is any number. The colon can repeat 1 or more times.
151 **
152 ** If z[] is not a list element marker, then return 0. If z[] is
153 ** a list element marker, set *pLevel to indicate the list depth
154 ** (the number of colons) and the type (bullet or enumeration).
155 ** *pLevel is negative for enumerations and positive for bullets and
156 ** the magnitude is the depth. Then return the number of characters
157 ** in the marker (which will always be at least 2.)
158 */
is_list_elem(const char * z,int * pLevel)159 static int is_list_elem(const char *z, int *pLevel){
160 int type;
161 int depth;
162 const char *zStart = z;
163 if( isdigit(*z) ){
164 z++;
165 while( isdigit(*z) ){ z++; }
166 type = -1;
167 }else if( *z=='*' || *z=='_' ){
168 z++;
169 type = +1;
170 }else{
171 *pLevel = 0;
172 return 0;
173 }
174 depth = 0;
175 while( *z==':' ){ z++; depth++; }
176 while( isspace(*z) && *z!='\n' ){ z++; }
177 if( depth==0 || depth>10 || *z==0 || *z=='\n' ){
178 *pLevel = 0;
179 return 0;
180 }
181 if( type<0 ){
182 *pLevel = -depth;
183 }else{
184 *pLevel = depth;
185 }
186 return z - zStart;
187 }
188
189 /*
190 ** If *z points to horizontal rule markup, return the number of
191 ** characters in that markup. Otherwise return 0.
192 **
193 ** Horizontal rule markup consists of four or more '-' or '=' characters
194 ** at the beginning of a line followed by nothing but whitespace
195 ** to the end of the line.
196 */
is_horizontal_rule(const char * z)197 static int is_horizontal_rule(const char *z){
198 int i;
199 int c = z[0];
200 if( c!='-' && c!='=' ) return 0;
201 for(i=0; z[i]==c; i++){}
202 if( i<4 ) return 0;
203 while( isspace(z[i]) && z[i]!='\n' ){ i++; }
204 return z[i]=='\n' || z[i]==0 ? i : 0;
205 }
206
207 /*
208 ** Return the number of characters in the URL that begins
209 ** at *z. Return 0 if *z is not the beginning of a URL.
210 **
211 ** Algorithm: Advance to the first whitespace character or until
212 ** then end of the string. Then back up over the following
213 ** characters: .)]}?!"':;,
214 */
is_url(const char * z)215 int is_url(const char *z){
216 int i;
217 int minlen = 6;
218 switch( z[0] ){
219 case 'h':
220 if( strncmp(z,"http:",5)==0 ) minlen = 7;
221 else if( strncmp(z,"https:",6)==0 ) minlen = 8;
222 else return 0;
223 break;
224 case 'f':
225 if( strncmp(z,"ftp://",6)==0 ) minlen = 7;
226 else return 0;
227 break;
228 case 'm':
229 if( strncmp(z,"mailto:",7)==0 ) minlen = 10;
230 else return 0;
231 break;
232 default:
233 return 0;
234 }
235 for(i=0; z[i] && !isspace(z[i]); i++){}
236 while( i>0 ){
237 switch( z[i-1] ){
238 case '.':
239 case ')':
240 case ']':
241 case '}':
242 case '?':
243 case '!':
244 case '"':
245 case '\'':
246 case ':':
247 case ';':
248 case ',':
249 i--;
250 break;
251 default:
252 return i>=minlen ? i : 0;
253 }
254 }
255 return 0;
256 }
257
258 /*
259 ** Return true if the given URL points to an image. An image URL is
260 ** any URL that ends with ".gif", ".jpg", ".jpeg", or ".png"
261 */
is_image(const char * zUrl,int N)262 static int is_image(const char *zUrl, int N){
263 int i;
264 char zBuf[10];
265 if( N<5 ) return 0;
266 for(i=0; i<5; i++){
267 zBuf[i] = tolower(zUrl[N-5+i]);
268 }
269 zBuf[i] = 0;
270 return strcmp(&zBuf[1],".gif")==0 ||
271 strcmp(&zBuf[1],".png")==0 ||
272 strcmp(&zBuf[1],".jpg")==0 ||
273 strcmp(&zBuf[1],".jpe")==0 ||
274 strcmp(zBuf,".jpeg")==0;
275 }
276
277 /*
278 ** Output N characters of text from zText.
279 */
put_htmlized_text(const char ** pzText,int N)280 static void put_htmlized_text(const char **pzText, int N){
281 if( N>0 ){
282 char *z = htmlize(*pzText, N);
283 cgi_printf("%s", z);
284 free(z);
285 *pzText += N;
286 }
287 }
288
289 /*
290 ** Search ahead in text z[] looking for a font terminator consisting
291 ** of "n" consecutive instances of character "c". The font terminator
292 ** must be at the end of a word and it must occur before a paragraph break.
293 ** Also, z[] must begin a new word. If any of these conditions are false,
294 ** return false. If all conditions are meet, return true.
295 **
296 ** TODO: Ignore terminators that occur inside of special markup such
297 ** as "{quote: not-a-terminator_}"
298 */
font_terminator(const char * z,int c,int n)299 static int font_terminator(const char *z, int c, int n){
300 int seenNL = 0;
301 int cnt = 0;
302 if( isspace(*z) || *z==0 || *z==c ) return 0;
303 z++;
304 while( *z ){
305 if( *z==c && !isspace(z[-1]) ){
306 cnt++;
307 if( cnt==n && is_eow(&z[1],0) ){
308 return 1;
309 }
310 }else{
311 cnt = 0;
312 if( *z=='\n' ){
313 if( seenNL ) return 0;
314 seenNL = 1;
315 }else if( !isspace(*z) ){
316 seenNL = 0;
317 }
318 }
319 z++;
320 }
321 return 0;
322 }
323
324 /*
325 ** Return the number of asterisks at z[] and beyond.
326 */
count_stars(const char * z)327 static int count_stars(const char *z){
328 int n = 0;
329 while( *z=='*' ){ n++; z++; }
330 return n;
331 }
332
333 /*
334 ** The following structure is used to record information about a single
335 ** instance of markup. Markup is text of the following form:
336 **
337 ** {type: key args}
338 ** or {type: key}
339 ** or {type}
340 **
341 ** The key is permitted to begin with "}". If args is missing, key is
342 ** used in its place. So {type: key} is equivalent to {type: key key}.
343 ** If key is missing, then type is used in its place. So {type} is the
344 ** same as {type: type} which is the same as {type: type type}
345 */
346 typedef struct Markup Markup;
347 struct Markup {
348 int lenTotal; /* Total length of the markup */
349 int lenType; /* Length of the "type" field */
350 int lenKey; /* Length of the "key" field */
351 int lenArgs; /* Length of the "args" field */
352 const char *zType; /* Pointer to the start of "type" */
353 const char *zKey; /* Pointer to the start of "key" */
354 const char *zArgs; /* Pointer to the start of "args" */
355 };
356
357 /*
358 ** z[] is a string of text beginning with "{". Check to see if it is
359 ** valid markup. If it is, fill in the pMarkup structure and return true.
360 ** If it is not valid markup, return false.
361 */
is_markup(const char * z,Markup * pMarkup)362 static int is_markup(const char *z, Markup *pMarkup){
363 int i, j;
364 int nest = 1;
365 if( *z!='{' ) return 0;
366 for(i=1; isalpha(z[i]); i++){}
367 if( z[i]=='}' ){
368 pMarkup->lenTotal = i+1;
369 pMarkup->lenType = i-1;
370 pMarkup->lenKey = i-1;
371 pMarkup->lenArgs = i-1;
372 pMarkup->zType = &z[1];
373 pMarkup->zKey = &z[1];
374 pMarkup->zArgs = &z[1];
375 return 1;
376 }
377 if( z[i]!=':' ) return 0;
378 pMarkup->lenType = i-1;
379 pMarkup->zType = &z[1];
380 i++;
381 while( isspace(z[i]) && z[i]!='\n' ){ i++; }
382 if( z[i]==0 || z[i]=='\n' ) return 0;
383 j = i;
384 pMarkup->zKey = &z[i];
385 while( z[i] && !isspace(z[i]) ){
386 if( z[i]=='}' ) nest--;
387 if( z[i]=='{' ) nest++;
388 if( nest==0 ) break;
389 i++;
390 }
391 if( z[i]==0 || z[i]=='\n' ) return 0;
392 pMarkup->lenKey = i - j;
393 if( nest==0 ){
394 pMarkup->lenArgs = i - j;
395 pMarkup->lenTotal = i+1;
396 pMarkup->zArgs = pMarkup->zKey;
397 return 1;
398 }
399 while( isspace(z[i]) && z[i]!='\n' ){ i++; }
400 if( z[i]=='\n' || z[i]==0 ) return 0;
401 j = i;
402 while( z[i] && z[i]!='\n' ){
403 if( z[i]=='}' ) nest--;
404 if( z[i]=='{' ) nest++;
405 if( nest==0 ) break;
406 i++;
407 }
408 if( z[i]!='}' || nest>0 ) return 0;
409 pMarkup->zArgs = &z[j];
410 pMarkup->lenArgs = i - j;
411 pMarkup->lenTotal = i+1;
412 return 1;
413 }
414
415 /*
416 ** Calculate the length of the table cell starting just after a | and
417 ** extending to the next non-quoted (i.e. not in {} markup) |
418 ** or end-of-line. Returns zero if there's
419 ** no complete (i.e. |-terminated) cell. Cell length does _not_ include
420 ** the ending |.
421 */
table_cell_length(const char * z)422 static int table_cell_length(const char *z){
423 Markup markup;
424 int i = 0;
425
426 while( z[i] && z[i]!='|' && z[i]!='\n' ){
427 if( z[i]=='{' && is_markup(&z[i],&markup) ){
428 i += markup.lenTotal;
429 }else{
430 i++;
431 }
432 }
433 return (z[i]=='|') ? i : 0;
434 }
435 /*
436 ** If *z points to a row of table markup, return the number of
437 ** characters in that markup. Otherwise return 0.
438 **
439 ** Table markup consists of a line starting with '|' and each cell
440 ** separated by more '|' characters. The line ends with a '|' followed by
441 ** nothing but whitespace to the end-of-line.
442 */
is_table_row(const char * z)443 static int is_table_row(const char *z){
444 int i = 0, j;
445 if( z[0]!='|' ) return 0;
446 while( z[i]=='|' && (j=table_cell_length(&z[++i]))!=0 ){
447 i += j;
448 }
449
450 for(; z[i]!='\n' && isspace(z[i]); i++){}
451
452 return (z[i]=='\n' || z[i]==0) ? i : 0;
453 }
454
455 /*
456 ** Output the table row defined by z. Individual cells can be wiki formatted
457 ** (within reason), so knowing cell boundaries depends on checking for
458 ** wiki markup and such.
459 */
output_table_row(const char * z,int nLen)460 static void output_table_row(const char *z, int nLen){
461 int i = 0, j;
462 char *zCell;
463
464 @ <tr>
465 while( i<nLen && z[i]=='|' && (j=table_cell_length(&z[++i]))!=0 ){
466 zCell = mprintf("%.*s",j,&z[i]);
467 @ <td>
468 output_formatted(zCell,0);
469 free(zCell);
470 @ </td>
471 i += j;
472 }
473 @ </tr>
474 }
475
476 /*
477 ** The aList[] array records the current nesting of <ul> and <ol>.
478 ** aList[0] records the stack depth. (Max depth of 10). aList[1]
479 ** is +1 if the outer layer is <ul> and -1 if the outer layer is <ol>
480 ** aList[2] holds similar information for the second layer, and so forth.
481 **
482 ** The iTarget parameter specifies the desired depth of the stack and
483 ** whether the inner most level is <ul> or <ol> The absolute value of
484 ** iTarget is the desired depth. iTarget is negative for <ol> on the
485 ** inner layer and positive for <ul> on the inner layer.
486 **
487 ** The routine outputs HTML to adjust the list nesting to the desired
488 ** level.
489 */
adjust_list_nesting(int * aList,int iTarget)490 static void adjust_list_nesting(int *aList, int iTarget){
491 int iDepth = iTarget;
492 if( iDepth<0 ) iDepth = 0x7fffffff & -iDepth;
493 if( aList[0]==iDepth && iDepth>0 && aList[iDepth]*iTarget<0 ){
494 iDepth--;
495 }
496 while( aList[0]>iDepth ){
497 if( aList[aList[0]--]>0 ){
498 cgi_printf("</ul>\n");
499 }else{
500 cgi_printf("</ol>\n");
501 }
502 }
503 while( aList[0]<iDepth-1 ){
504 cgi_printf("<ul>\n");
505 aList[0]++;
506 aList[aList[0]] = +1;
507 }
508 iDepth = iTarget;
509 if( iDepth<0 ) iDepth = 0x7fffffff & -iDepth;
510 if( aList[0]==iDepth-1 ){
511 if( iTarget<0 ){
512 cgi_printf("<ol>\n");
513 aList[iDepth] = -1;
514 }else{
515 cgi_printf("<ul>\n");
516 aList[iDepth] = +1;
517 }
518 aList[0]++;
519 }
520 }
521
522 /*
523 ** Return non-zero if the specified string is in the given sorted list.
524 */
inSortedList(const char * z,int nCh,const char * azList[],int nList)525 static int inSortedList(const char *z, int nCh, const char* azList[], int nList){
526 int i;
527 int upr, lwr, mid, c;
528 char zBuf[32];
529 if( nCh<=0 || nCh>sizeof(zBuf)-1 ) return 0;
530 for(i=0; i<nCh; i++) zBuf[i] = tolower(z[i]);
531 zBuf[i] = 0;
532 upr = nList - 1;
533 lwr = 0;
534 while( upr>=lwr ){
535 mid = (upr+lwr)/2;
536 c = strcmp(azList[mid],zBuf);
537 if( c==0 ) return 1;
538 if( c<0 ){
539 lwr = mid+1;
540 }else{
541 upr = mid-1;
542 }
543 }
544 return 0;
545 }
546
547 /*
548 ** The following table contains all of the allows HTML markup for the
549 ** restricted HTML output routine. If an HTML element is found which is
550 ** not on this list, it is escaped.
551 **
552 ** A binary search is done on this list, so it must be in sorted order.
553 */
554 static const char *azAllowedHtml[] = {
555 "a",
556 "address",
557 "b",
558 "big",
559 "blockquote",
560 "br",
561 "center",
562 "cite",
563 "code",
564 "dd",
565 "dfn",
566 "dir",
567 "dl",
568 "dt",
569 "em",
570 "font",
571 "h1",
572 "h2",
573 "h3",
574 "h4",
575 "h5",
576 "h6",
577 "hr",
578 "i",
579 "img",
580 "kbd",
581 "li",
582 "menu",
583 "nobr",
584 "ol",
585 "p",
586 "pre",
587 "s",
588 "samp",
589 "small",
590 "strike",
591 "strong",
592 "sub",
593 "sup",
594 "table",
595 "td",
596 "th",
597 "tr",
598 "tt",
599 "u",
600 "ul",
601 "var",
602 "wbr",
603 };
604
605 /*
606 ** The following table is a list of accepted HTML element attributes.
607 ** Any attribute not on the list will be stripped out during processing.
608 **
609 ** A binary search is done on this list, so it must be in sorted order.
610 */
611 static const char *azAllowedAttr[] = {
612 "abbr",
613 "accesskey",
614 "align",
615 "alt",
616 "axis",
617 "bgcolor",
618 "border",
619 "cellpadding",
620 "cellspacing",
621 "char",
622 "charoff",
623 "charset",
624 "cite", /* URI */
625 "class",
626 "clear",
627 "color",
628 "colspan",
629 "compact",
630 "dir",
631 "face",
632 "frame",
633 "headers"
634 "height",
635 "href", /* uri */
636 "hreflang",
637 "hspace",
638 "id",
639 "lang",
640 "longdesc",
641 "name",
642 "noshade",
643 "nowrap",
644 "rel",
645 "rev",
646 "rowspan",
647 "rules",
648 "scope",
649 "size",
650 "span",
651 "src", /* URI */
652 "start",
653 "summary",
654 "title",
655 "valign",
656 "value",
657 "width",
658 };
659
660 /*
661 ** Return TRUE if all HTML attributes up to the next '>' in the input string
662 ** are on the allowed list (and pass any other checks we might want to add
663 ** down the road...)
664 */
isAllowedAttr(const char * zAttr,int nAttr)665 static int isAllowedAttr(const char *zAttr,int nAttr){
666 int i,j;
667 int inquote = 0;
668 int inbody = 0;
669
670 for(i=0; i<nAttr && zAttr[i]!='>'; i++){
671 if( !inbody && !inquote && isalpha(zAttr[i]) ){
672 for(j=1; i+j<nAttr && isalnum(zAttr[i+j]); j++){}
673
674 if( !inSortedList(&zAttr[i], j, azAllowedAttr,
675 sizeof(azAllowedAttr)/sizeof(azAllowedAttr[0]))){
676 return 0;
677 }
678 i += j-1;
679 inbody = 0;
680 }else if( inquote && zAttr[i]=='"' ){
681 inquote=0;
682 }else if( !inquote && zAttr[i]=='"' ){
683 inquote=1;
684 }else if( isspace(zAttr[i]) ){
685 inbody = 0;
686 }
687 }
688 return 1;
689 }
690
691 /*
692 ** Return TRUE if the HTML element given in the argument is on the allowed
693 ** element list.
694 */
isAllowed(const char * zElem,int nElem)695 static int isAllowed(const char *zElem, int nElem){
696 return inSortedList(zElem, nElem, azAllowedHtml,
697 sizeof(azAllowedHtml)/sizeof(azAllowedHtml[0]));
698 }
699
700 /*
701 ** Return TRUE if the HTML element given in the argument is a form of
702 ** external reference (i.e. A, IMG, etc).
703 */
isLinkTag(const char * zElem,int nElem)704 static int isLinkTag(const char *zElem, int nElem){
705 return (nElem==1 && 0==sqlite3_strnicmp(zElem,"A",nElem))
706 || (nElem==3 && 0==sqlite3_strnicmp(zElem,"IMG",nElem))
707 || (nElem==4 && 0==sqlite3_strnicmp(zElem,"CITE",nElem));
708 }
709
710 /*
711 ** If the input string begins with "<html>" and contains "</html>" somewhere
712 ** before it ends, then return the number of characters through the end of
713 ** the </html>. If the <html> or the </html> is missing, return 0.
714 */
is_html(const char * z)715 static int is_html(const char *z){
716 int i;
717 if( sqlite3_strnicmp(z, "<html>", 6) ) return 0;
718 for(i=6; z[i]; i++){
719 if( z[i]=='<' && sqlite3_strnicmp(&z[i],"</html>",7)==0 ) return i+7;
720 }
721 return 0;
722 }
723
724 /*
725 ** Output nText characters zText as HTML. Do not allow markup other
726 ** than the markup for which isAllowed() returns true.
727 **
728 ** In the case of tags with external links, ensure they have a rel="nofollow"
729 ** attribute when g.noFollow is set.
730 **
731 ** FIXME: would be nice to translate relative URL targets if g.zLinkURL!=0
732 */
output_restricted_html(const char * zText,int nText)733 static void output_restricted_html(const char *zText, int nText){
734 int i, j, k;
735 for(i=0; i<nText; i++){
736 if( zText[i]!='<' ) continue;
737 if( i+1<nText ){
738 k = 1 + (zText[i+1]=='/');
739 for(j=k; i+j<nText && isalnum(zText[i+j]); j++){}
740 if( isAllowed(&zText[i+k], j-k)
741 && isAllowedAttr(&zText[i+j],nText-(i+j)) ){
742 if( g.noFollow && zText[i+j]!='>' && isLinkTag(&zText[i+k],j-k) ){
743 /* link tags are special. We want to allow them
744 ** but in order to discourage wiki spam we want to insert
745 ** something in the attributes... Note that we don't bother
746 ** when the tag doesn't have attributes.
747 */
748 cgi_append_content(zText,i + j);
749 zText += i+j;
750 nText -= i+j;
751 cgi_printf(" rel=\"nofollow\" ");
752 i = -1;
753 }
754 continue;
755 }
756 }
757 cgi_append_content(zText,i);
758 cgi_printf("<");
759 zText += i+1;
760 nText -= i+1;
761 i = -1;
762 }
763 cgi_append_content(zText,i);
764 }
765
766 /*
767 ** Output a formatted ticket link
768 */
output_ticket(int tn,int rn)769 void output_ticket(int tn, int rn){
770 if( g.okRead ){
771 char *zLink = (rn>0) ? format_link("tktview?tn=%d,%d",tn,rn)
772 : format_link("tktview?tn=%d",tn);
773 if( g.okTicketLink ) {
774 char **az = db_query(
775 "SELECT title,status FROM ticket WHERE tn=%d", tn);
776 if( az && az[0] && az[1] ){
777 cgi_printf("<a href=\"%z\" title=\"%h\">",zLink,az[0]);
778 /* FIXME: should use a <span> with the ticket status as a class */
779 if( az[1][0] == 'n' || az[1][0] == 'a' ){
780 cgi_printf("#%d",tn);
781 }else{
782 cgi_printf("<strike>#%d</strike>",tn);
783 }
784 cgi_printf("</a>");
785 return;
786 }
787 }
788 cgi_printf("<a href=\"%z\">#%d</a>",zLink,tn);
789 }else{
790 cgi_printf("#%d",tn);
791 }
792 }
793
794 /*
795 ** Output a formatted checkin link
796 */
output_chng(int cn)797 void output_chng(int cn){
798 if( g.okRead ){
799 char *zLink = format_link("chngview?cn=%d",cn);
800 if( g.okCheckinLink ){
801 char **az = db_query(
802 "SELECT milestone,user,message,branch FROM chng WHERE cn=%d", cn);
803 if( az && az[0] && az[1] && az[2] ){
804 if( az[0][0] && az[0][0] != '0' ){
805 cgi_printf("<a href=\"%z\"",zLink);
806 cgi_printf("title=\"Milestone [%d] %h (By %h)\">",
807 cn, az[2], az[1]);
808 cgi_printf("[%d]</a>", cn);
809 }else{
810 char *z = az[2];
811 int trimmed;
812
813 /* Mozilla and Firefox are quite sensitive to newlines
814 ** in link titles so we can't use '@' formatting here.
815 */
816 cgi_printf("<a href=\"%z\" title=\"Check-in [%d]", zLink,cn);
817 if( az[3] && az[3][0] ){
818 cgi_printf("on branch %h", az[3]);
819 }
820 cgi_printf(":"); /* want this tight to last text */
821 trimmed = output_trim_message(z, MN_CKIN_MSG, MX_CKIN_MSG);
822 cgi_printf("%h%s (By %h)\">[%d]</a>", z, trimmed?"...":"",
823 az[1],cn);
824 }
825 return;
826 }
827 }
828 cgi_printf("<a href=\"%z\">[%d]</a>", zLink, cn);
829 }else{
830 cgi_printf("[%d]", cn);
831 }
832 }
833
834 /*
835 ** Replace single quotes and backslashes with spaces.
836 */
sanitize_string(char * z)837 static void sanitize_string( char *z ){
838 int i;
839 for( i=0;z && z[i]; i++){
840 if( z[i] == '\'' || z[i] == '\\' ) {
841 z[i] = ' ';
842 }
843 }
844 }
845
markup_substitution(int strip_quotes,const char * zF,const Markup * sMarkup,const char * zInBlock,int lenBlock)846 static char *markup_substitution(
847 int strip_quotes,
848 const char *zF,
849 const Markup* sMarkup,
850 const char *zInBlock,
851 int lenBlock
852 ){
853 char *zOutput = NULL;
854 unsigned const char *zFormat = (unsigned const char*)zF;
855 char *azStrings[256];
856 int anLens[256];
857 int j, k;
858
859 /* If we don't treat args as blank where there aren't any,
860 ** we can't create rules like <b>%k %a</b> that work
861 ** with both {markup: this} and {markup: this is} formats. This
862 ** is a fairly common convention with most of the existing markups.
863 ** We strdup() the blank string because we _will_ free it when
864 ** we leave this subroutine.
865 */
866 char *zArgs = (sMarkup->zArgs==sMarkup->zKey)
867 ? strdup("")
868 : mprintf("%.*s", sMarkup->lenArgs, sMarkup->zArgs );
869 char *zMarkup = mprintf("%.*s", sMarkup->lenType, sMarkup->zType );
870 char *zKey = mprintf("%.*s", sMarkup->lenKey, sMarkup->zKey );
871 char *zBlock = mprintf("%.*s", lenBlock, zInBlock );
872 const char *zRoot = db_config("cvsroot", "");
873
874 if( strip_quotes ){
875 /* if we're dealing with a program markup, strip out
876 ** backslashes and quotes. This is why we can't just use
877 ** "subst".
878 */
879 sanitize_string(zMarkup);
880 sanitize_string(zKey);
881 sanitize_string(zArgs);
882 sanitize_string(zBlock);
883 }
884
885 memset( anLens, 0, sizeof(anLens) );
886 memset( azStrings, 0, sizeof(azStrings) );
887
888 azStrings['%'] = "%";
889 anLens['%'] = 1;
890
891 /* markup name substitution */
892 azStrings['m'] = zMarkup;
893 anLens['m'] = sMarkup->lenType;
894
895 /* key substitution */
896 azStrings['k'] = zKey;
897 anLens['k'] = sMarkup->lenKey;
898
899 /* block substitution */
900 azStrings['b'] = zBlock;
901 anLens['b'] = lenBlock;
902
903 /* argument substitution. args isn't necessarily the same as
904 ** sMarkup->zArgs. */
905 azStrings['a'] = zArgs;
906 anLens['a'] = strlen(zArgs);
907
908 /* argument substitution. args isn't necessarily the same as
909 ** sMarkup->zArgs. */
910 azStrings['x'] = zArgs[0] ? zArgs : zKey;
911 anLens['x'] = zArgs[0] ? strlen(zArgs) : sMarkup->lenKey;
912
913 /* cvsroot */
914 azStrings['r'] = (char*)zRoot;
915 anLens['r'] = strlen(zRoot);
916
917 /* basename... from this someone can get the db name */
918 azStrings['n'] = (char*)g.zName;
919 anLens['n'] = strlen(g.zName);
920
921 /* logged in user */
922 azStrings['u'] = (char*)g.zUser;
923 anLens['u'] = strlen(g.zUser);
924
925 /* capabilities */
926 azStrings['c'] = db_short_query(
927 "SELECT capabilities FROM user WHERE id='%q'",g.zUser);
928 anLens['c'] = azStrings['c'] ? strlen(azStrings['c']) : 0;
929
930 /* Calculate the space needed for the % subs.
931 */
932 for(k=j=0; zFormat[j]; j++){
933 if( zFormat[j] == '%' && anLens[zFormat[j+1]] ){
934 j ++;
935 k += anLens[zFormat[j]];
936 continue;
937 }
938 k ++;
939 }
940
941 /* (over)allocate an output buffer. By "over", I mean we get
942 ** the length of the original plus the length we think we need
943 ** for a fully substituted buffer.
944 */
945 zOutput = malloc(j + k + 1);
946 if( zOutput == NULL ){
947 free(zKey);
948 free(zArgs);
949 free(zMarkup);
950 free(zBlock);
951 if(azStrings['c']) free(azStrings['c']);
952 return NULL;
953 }
954
955 /* actually perform the substitutions */
956 for(k=j=0; zFormat[j]; j++){
957 if( zFormat[j] == '%' && azStrings[zFormat[j+1]]!=0 ){
958 j ++;
959 memcpy(&zOutput[k],azStrings[zFormat[j]],anLens[zFormat[j]]);
960 k += anLens[zFormat[j]];
961 continue;
962 }
963 zOutput[k++] = zFormat[j];
964 }
965 zOutput[k] = 0;
966
967 free(zKey);
968 free(zArgs);
969 free(zMarkup);
970 free(zBlock);
971 if(azStrings['c']) free(azStrings['c']);
972 return zOutput;
973 }
974
975 /*
976 ** Run the block (if any) out through the standard input of the pipeline
977 ** and feed the output of the pipeline into the CGI output.
978 **
979 ** It's assumed that zPipeline has been sanitized and stuff.
980 */
pipe_block(const char * zPipeline,const char * zBlock,int lenBlock)981 static void pipe_block(
982 const char *zPipeline,
983 const char *zBlock,
984 int lenBlock
985 ){
986 char zFile[PATH_MAX];
987 char *zB = mprintf("%.*s",lenBlock,zBlock);
988 FILE *fin = NULL;
989 char *zP;
990
991 /* Doing this without a temporary file is a bit nasty because of
992 ** potential deadlocks. It _can_ be done if you want to fight with
993 ** pipe(2) and stuff, but CVSTrac already has a write_to_temp() function
994 ** so we might as well be lazy and use it. Note that if lenBlock==0
995 ** we can just skip out using /dev/null.
996 */
997
998 zFile[0] = 0;
999 if( lenBlock==0 ) {
1000 /* In case the program takes arguments from the command line, we
1001 ** don't want to just treat it as a no-op. So pipe in /dev/null.
1002 */
1003 zP = mprintf( "%s </dev/null", zPipeline );
1004 }else if( !write_to_temp( zB, zFile, sizeof(zFile) ) ){
1005 zP = mprintf( "%s <%s", zPipeline, zFile );
1006 }else{
1007 if( zB ) free(zB);
1008 return;
1009 }
1010
1011 /* Block has been written, free so we don't forget later
1012 */
1013 if( zB ) free(zB);
1014
1015 fin = popen(zP,"r");
1016 free(zP);
1017
1018 /* HTML scrubbing doesn't work effectively on just individual lines. We
1019 ** really need to feed in the entire buffer or we're vulnerable to all
1020 ** sorts of whitespace stupidity.
1021 */
1022 zP = common_readfp(fin);
1023 if( zP ){
1024 output_restricted_html(zP, strlen(zP));
1025 free( zP );
1026 }
1027
1028 if( fin ){
1029 pclose(fin);
1030 }
1031
1032 if( zFile[0] ){
1033 unlink(zFile);
1034 }
1035 }
1036
1037 /*
1038 ** Output Wiki text while inserting the proper HTML control codes.
1039 ** The following formatting conventions are implemented:
1040 **
1041 ** * Characters with special meaning to HTML are escaped.
1042 **
1043 ** * Blank lines results in a paragraph break.
1044 **
1045 ** * Paragraphs where the first line is indented by two or more
1046 ** spaces are shown verbatim. None of the following rules apply
1047 ** to verbatim text.
1048 **
1049 ** * Lines beginning with "*: " begin a bullet in a bullet list.
1050 **
1051 ** * Lines beginning with "1: " begin an item in an enumerated list.
1052 **
1053 ** * Paragraphs beginning with "_: " are indented.
1054 **
1055 ** * Multiple colons can be used in *:, 1:, and _: for multiple
1056 ** levels of indentation.
1057 **
1058 ** * Text within _..._ is italic and text in *...* is bold.
1059 ** Text within **...** or ***...*** bold with a larger font.
1060 ** Text within =...= is fixed (code) font.
1061 **
1062 ** * Wiki pages names (Words in initial caps) are enclosed in an
1063 ** appropriate hyperlink.
1064 **
1065 ** * Words that begin with "http:", "https:", "ftp:", or "mailto:"
1066 ** are enclosed in an appropriate hyperlink.
1067 **
1068 ** * Text of the form "#NNN" where NNN is a valid ticket number
1069 ** is converted into a hyperlink to the corresponding ticket.
1070 **
1071 ** * Text of the form "[NNN]" where NNN is a valid check-in number
1072 ** becomes a hyperlink to the checkin.
1073 **
1074 ** * {quote: XYZ} renders XYZ with all special meanings for XYZ escaped.
1075 **
1076 ** * {link: URL TEXT} renders TEXT with a link to URL. URL can be
1077 ** relative.
1078 **
1079 ** * {linebreak} renders a linebreak.
1080 **
1081 ** * {image: URL ALT} renders an in-line image from URL. URL can be
1082 ** relative or it can be the name of an attachment to zPageId.
1083 ** {leftimage: URL ALT} and {rightimage: URL ALT} create wrap-around
1084 ** images at the left or right margin.
1085 **
1086 ** * {clear} skips down the page far enough to clear any wrap-around
1087 ** images.
1088 **
1089 ** * {report: RN CAPTION} inlines the specified report, RN.
1090 ** {leftreport: RN CAPTION} and {rightreport: RN CAPTION} are also
1091 ** usable.
1092 **
1093 ** * Text between <html>...</html> is interpreted as HTML. A restricted
1094 ** subset of tags are supported - things like forms and javascript are
1095 ** intentionally excluded. The initial <html> must occur at the
1096 ** beginning of a paragraph.
1097 */
output_wiki(const char * zText,const char * zLinkSuffix,const char * zPageId)1098 void output_wiki(
1099 const char *zText, /* The text to be formatted */
1100 const char *zLinkSuffix, /* Suffix added to hyperlinks to Wiki */
1101 const char *zPageId /* Name of current page */
1102 ){
1103 int i, j, k;
1104 int aList[20]; /* See adjust_list_nesting for details */
1105 int inPRE = 0;
1106 int inB = 0;
1107 int inI = 0;
1108 int inT = 0;
1109 int inTab = 0;
1110 int v;
1111 int wordStart = 1; /* At the start of a word */
1112 int lineStart = 1; /* At the start of a line */
1113 int paraStart = 1; /* At the start of a paragraph */
1114 const char *zEndB; /* Text used to end a run of bold */
1115 char **azAttach; /* Attachments to zPageId */
1116 const char *zExtLink = "<font color=\"#a0a0a0\">¤</font>";
1117 static int once = 1;
1118 static int nTicket, nCommit;
1119 if( once ){
1120 nTicket = atoi(db_short_query("SELECT max(tn) FROM ticket"));
1121 nCommit = atoi(db_short_query("SELECT max(cn) FROM chng"));
1122 once = 0;
1123 }
1124
1125 i = 0;
1126 aList[0] = 0;
1127 azAttach = 0;
1128 zEndB = "";
1129 while( zText[i] ){
1130 char *z;
1131 int n;
1132 Markup sMarkup;
1133 int c = zText[i];
1134
1135 /* Text between <html>...</html> is interpreted as HTML.
1136 */
1137 if( c=='<' && (n = is_html(&zText[i]))>0 ){
1138 put_htmlized_text(&zText, i);
1139 zText += 6;
1140 cgi_printf("<div>");
1141 output_restricted_html(zText, n-13);
1142 cgi_printf("</div>");
1143 zText += n - 6;
1144 i = 0;
1145 continue;
1146 }
1147
1148 /* Markup may consist of special strings contained in curly braces.
1149 ** Examples: "{linebreak}" or "{quote: *:}"
1150 */
1151 if( c=='{' && is_markup(&zText[i], &sMarkup) ){
1152 /*
1153 ** Markup of the form "{linebreak}" forces a line break.
1154 */
1155 if( sMarkup.lenType==9 && strncmp(sMarkup.zType,"linebreak",9)==0 ){
1156 put_htmlized_text(&zText, i);
1157 zText += sMarkup.lenTotal;
1158 i = 0;
1159 cgi_printf("<br>\n");
1160 wordStart = lineStart = paraStart = 0;
1161 continue;
1162 }
1163
1164 /*
1165 ** Markup of the form "{clear}" moves down past any left or right
1166 ** aligned images.
1167 */
1168 if( sMarkup.lenType==5 && strncmp(sMarkup.zType,"clear",5)==0 ){
1169 put_htmlized_text(&zText, i);
1170 zText += sMarkup.lenTotal;
1171 i = 0;
1172 cgi_printf("<br clear=\"both\">\n");
1173 wordStart = lineStart = paraStart = 0;
1174 continue;
1175 }
1176
1177 /*
1178 ** Markup of the form "{quote: ABC}" writes out the text ABC exactly
1179 ** as it appears. This can be used to escape special meanings
1180 ** associated with ABC.
1181 */
1182 if( sMarkup.lenType==5 && strncmp(sMarkup.zType,"quote",5)==0 ){
1183 put_htmlized_text(&zText, i);
1184 if( sMarkup.zKey==sMarkup.zArgs ){
1185 n = sMarkup.lenKey;
1186 }else{
1187 n = &sMarkup.zArgs[sMarkup.lenArgs] - sMarkup.zKey;
1188 }
1189 put_htmlized_text(&sMarkup.zKey, n);
1190 zText += sMarkup.lenTotal;
1191 i = 0;
1192 wordStart = lineStart = paraStart = 0;
1193 continue;
1194 }
1195
1196 /*
1197 ** Markup of the form "{wiki: NAME TEXT}" creates a hyperlink
1198 ** to wiki page. The hyperlink appears on the screen as TEXT.
1199 */
1200 if( sMarkup.lenType==4 && strncmp(sMarkup.zType,"wiki",4)==0 ){
1201 int exists = 1;
1202 put_htmlized_text(&zText, i);
1203 if( g.okRdWiki ) {
1204 char *zPage = mprintf("%.*s", sMarkup.lenKey, sMarkup.zKey);
1205 exists = db_exists("SELECT 1 FROM wiki WHERE name='%q'", zPage);
1206 if( !exists ) cgi_printf("<em>");
1207 cgi_printf("<a href=\"wiki?p=%t%s\">", zPage, zLinkSuffix);
1208 free(zPage);
1209 }
1210 put_htmlized_text(&sMarkup.zArgs, sMarkup.lenArgs);
1211 if( g.okRdWiki ) {
1212 cgi_printf("</a>");
1213 if( !exists ) cgi_printf("</em>");
1214 }
1215 zText += sMarkup.lenTotal;
1216 i = 0;
1217 wordStart = lineStart = paraStart = 0;
1218 continue;
1219 }
1220
1221 /*
1222 ** Markup of the form "{link: TO TEXT}" creates a hyperlink to TO.
1223 ** The hyperlink appears on the screen as TEXT. TO can be a any URL,
1224 ** including a relative URL such as "chngview?cn=123".
1225 */
1226 if( sMarkup.lenType==4 && strncmp(sMarkup.zType,"link",4)==0 ){
1227 put_htmlized_text(&zText, i);
1228 if( is_url(sMarkup.zKey)>0 ){
1229 cgi_printf("%s<a href=\"%.*s\"%s>",
1230 zExtLink, sMarkup.lenKey, sMarkup.zKey,
1231 g.noFollow ? " rel=\"nofollow\"" : "");
1232 }else{
1233 char *zLink = format_link("%.*s", sMarkup.lenKey, sMarkup.zKey);
1234 cgi_printf("<a href=\"%z\">", zLink);
1235 }
1236 put_htmlized_text(&sMarkup.zArgs, sMarkup.lenArgs);
1237 cgi_printf("</a>");
1238 zText += sMarkup.lenTotal;
1239 i = 0;
1240 wordStart = lineStart = paraStart = 0;
1241 continue;
1242 }
1243
1244 /*
1245 ** Markup of the form "{image: URL ALT}" creates an in-line image to
1246 ** URL with ALT as the alternate text. URL can be relative (for example
1247 ** the URL of an attachment.
1248 **
1249 ** If the URL is the name of an attachment, then automatically
1250 ** convert it to the correct URL for that attachment.
1251 */
1252 if( (sMarkup.lenType==5 && strncmp(sMarkup.zType,"image",5)==0)
1253 || (sMarkup.lenType==9 && strncmp(sMarkup.zType,"leftimage",9)==0)
1254 || (sMarkup.lenType==10 && strncmp(sMarkup.zType,"rightimage",10)==0)
1255 ){
1256 char *zUrl = 0;
1257 const char *zAlign;
1258 char *zAlt = htmlize(sMarkup.zArgs, sMarkup.lenArgs);
1259 if( azAttach==0 && zPageId!=0 ){
1260 azAttach = (char **)
1261 db_query("SELECT fname, atn FROM attachment "
1262 "WHERE tn='%q'", zPageId);
1263 }
1264 if( azAttach ){
1265 int ix;
1266 for(ix=0; azAttach[ix]; ix+=2){
1267 if( strncmp(azAttach[ix],sMarkup.zKey,sMarkup.lenKey)==0 ){
1268 free(zUrl);
1269 zUrl = format_link("attach_get/%s/%h",
1270 azAttach[ix+1], azAttach[ix]);
1271 break;
1272 }
1273 }
1274 }
1275 if( zUrl==0 ){
1276 zUrl = htmlize(sMarkup.zKey, sMarkup.lenKey);
1277 }
1278 put_htmlized_text(&zText, i);
1279 switch( sMarkup.zType[0] ){
1280 case 'l': case 'L': zAlign = " align=\"left\""; break;
1281 case 'r': case 'R': zAlign = " align=\"right\""; break;
1282 default: zAlign = ""; break;
1283 }
1284 cgi_printf("<img src=\"%s\" alt=\"%s\"%s>", zUrl, zAlt, zAlign);
1285 free(zUrl);
1286 free(zAlt);
1287 zText += sMarkup.lenTotal;
1288 i = 0;
1289 wordStart = lineStart = paraStart = 0;
1290 continue;
1291 }
1292
1293 /*
1294 ** Markup of the form "{report: RN}" embeds a report into the output.
1295 */
1296 if( (sMarkup.lenType==6 && strncmp(sMarkup.zType,"report",6)==0)
1297 || (sMarkup.lenType==11 && strncmp(sMarkup.zType,"rightreport",11)==0)
1298 || (sMarkup.lenType==10 && strncmp(sMarkup.zType,"leftreport",10)==0)
1299 ){
1300 char *zCaption = mprintf("%.*s", sMarkup.lenArgs, sMarkup.zArgs);
1301 char *zAlign = 0;
1302 if( sMarkup.lenType==11 ){
1303 zAlign = "align=\"right\"";
1304 }else if( sMarkup.lenType==10 ){
1305 zAlign = "align=\"left\"";
1306 }
1307 put_htmlized_text(&zText, i);
1308 embed_view( atoi(sMarkup.zKey),
1309 (sMarkup.zArgs==sMarkup.zKey) ? "" : zCaption,
1310 zAlign );
1311 free(zCaption);
1312 zText += sMarkup.lenTotal;
1313 i = 0;
1314 wordStart = lineStart = paraStart = 0;
1315 continue;
1316 }
1317
1318 /* Markup of the form "{markups}" outputs the list of custom markups
1319 ** formats with descriptions.
1320 */
1321 if( sMarkup.lenType==7 && strncmp(sMarkup.zType,"markups",7)==0 ){
1322 char **azMarkup;
1323 put_htmlized_text(&zText, i);
1324
1325 azMarkup = db_query(
1326 "SELECT markup, description FROM markup ORDER BY markup;");
1327 if( azMarkup && azMarkup[0] ){
1328 @ <p><big><b>Custom Markup Rules</b></big></p>
1329 @ <p>The following are custom markup rules implemented
1330 @ on this server.</p>
1331 for(j=0; azMarkup[j]; j+=2){
1332 if( azMarkup[j+1] && azMarkup[j+1][0] ){
1333 /* this markup has a description, output it.
1334 */
1335 @ <p>
1336 output_formatted(azMarkup[j+1],NULL);
1337 @ </p>
1338 }else{
1339 @ <p>{%h(azMarkup[j])} (no description)</p>
1340 }
1341 }
1342 }
1343
1344 zText += sMarkup.lenTotal;
1345 i = 0;
1346 wordStart = lineStart = paraStart = 0;
1347 continue;
1348 }
1349
1350 /* It could be custom markup. There are two kinds of custom markups
1351 ** available. The first is a simple format string such
1352 ** "key=%k args=%a" where %k is replaced by the markup key and %a
1353 ** by any following arguments. More flexibility would probably be
1354 ** nice, but that's how the existing markup logic works. The second
1355 ** form of markup is an external executable which gets passed the
1356 ** key and args on the command line and any output is dumped right
1357 ** into the output stream.
1358 */
1359 if( sMarkup.zType && sMarkup.lenType ) {
1360 /* sMarkup.zType is a pointer into the text buffer, not a NUL
1361 ** terminated token. This is actually the case with everything
1362 ** in sMarkup. Note that the markup type is already checked to
1363 ** be only chars that pass isalpha() so we can avoid "%.*q".
1364 */
1365 char **azMarkup = db_query(
1366 "SELECT type,formatter FROM markup WHERE markup='%.*s';",
1367 sMarkup.lenType, sMarkup.zType);
1368
1369 if( azMarkup && azMarkup[0] && azMarkup[1] ){
1370 /* We've found a custom formatter for this type */
1371
1372 int bl = sMarkup.lenTotal;
1373 int cl = 0;
1374 int type = atoi(azMarkup[0]);
1375 char *zOutput;
1376
1377 put_htmlized_text(&zText, i);
1378
1379 /* handle blocks. This basically means we scan ahead to find
1380 ** "end<markup>. bl becomes the total length of the block
1381 ** and cl is everything up the the {end<markup>}. If we can't
1382 ** find a match, bl becomes zero and we end up just outputting
1383 ** the raw markup tag.
1384 */
1385 if( type==2 || type==3 ){
1386 char *zEnd = mprintf("{end%.*s}", sMarkup.lenType, sMarkup.zType);
1387 int el = strlen(zEnd);
1388 while( zText[bl] && strncmp(&zText[bl],zEnd,el)){ bl++; }
1389 if( zText[bl]!=0 ){
1390 /* found a matching end tag. Note that bl includes the
1391 ** length of the initial markup which is not part of the
1392 ** actual content. Fix that. bl doesn't include the length
1393 ** of the end markup tag. Fix that too.
1394 */
1395 cl = bl - sMarkup.lenTotal;
1396 bl += el;
1397 } else {
1398 /* that didn't work, restore to original value.
1399 */
1400 bl = sMarkup.lenTotal;
1401 }
1402 free(zEnd);
1403 }
1404
1405 /* Substitutions are basically the same for all types of
1406 ** formatters, except that quotes are stripped from arguments
1407 ** to programs.
1408 */
1409 zOutput = markup_substitution( (type==1 || type==3),
1410 azMarkup[1], &sMarkup, &zText[sMarkup.lenTotal], cl );
1411 if( bl && zOutput ){
1412 if( type == 0 || type == 2 ){
1413 output_restricted_html(zOutput, strlen(zOutput));
1414 }else if( type==1 || (type==3 && cl==0) ){
1415 pipe_block(zOutput, "", 0);
1416 }else if( type==3 ){
1417 pipe_block(zOutput, &zText[sMarkup.lenTotal], cl);
1418 }
1419
1420 free(zOutput);
1421 }
1422
1423 zText += bl;
1424 i = 0;
1425 wordStart = lineStart = paraStart = 0;
1426 continue;
1427 }
1428 }
1429 }
1430
1431 if( paraStart ){
1432 put_htmlized_text(&zText, i);
1433
1434 /* Blank lines at the beginning of a paragraph are ignored.
1435 */
1436 if( isspace(c) && (j = is_blank_line(&zText[i]))>0 ){
1437 zText += j;
1438 continue;
1439 }
1440
1441 /* If the first line of a paragraph begins with a tab or with two
1442 ** or more spaces, then that paragraph is printed verbatim.
1443 */
1444 if( c=='\t' || (c==' ' && (zText[i+1]==' ' || zText[i+1]=='\t')) ){
1445 if( !inPRE ){
1446 if( inB ){ cgi_printf(zEndB); inB=0; }
1447 if( inI ){ cgi_printf("</i>"); inI=0; }
1448 if( inT ){ cgi_printf("</tt>"); inT=0; }
1449 if( inTab ){ cgi_printf("</table>"); inTab=0; }
1450 adjust_list_nesting(aList, 0);
1451 cgi_printf("<pre>\n");
1452 inPRE = 1;
1453 }
1454 }
1455 } /* end if( paraStart ) */
1456
1457 if( lineStart ){
1458 /* Blank lines in the middle of text cause a paragraph break
1459 */
1460 if( isspace(c) && (j = is_blank_line(&zText[i]))>0 ){
1461 put_htmlized_text(&zText, i);
1462 zText += j;
1463 if( inB ){ cgi_printf(zEndB); inB=0; }
1464 if( inI ){ cgi_printf("</i>"); inI=0; }
1465 if( inT ){ cgi_printf("</tt>"); inT=0; }
1466 if( inTab ){ cgi_printf("</table>"); inTab=0; }
1467 if( inPRE ){ cgi_printf("</pre>\n"); inPRE = 0; }
1468 is_list_elem(zText, &k);
1469 if( abs(k)<aList[0] ) adjust_list_nesting(aList, k);
1470 if( zText[0]!=0 ){ cgi_printf("\n<p>"); }
1471 wordStart = lineStart = paraStart = 1;
1472 i = 0;
1473 continue;
1474 }
1475 } /* end if( lineStart ) */
1476
1477 if( lineStart && !inPRE ){
1478 /* If we are not in verbatim text and a line begins with "*:", then
1479 ** generate a bullet. Or if the line begins with "NNN:" where NNN
1480 ** is a number, generate an enumeration item.
1481 */
1482 if( (j = is_list_elem(&zText[i], &k))>0 ){
1483 put_htmlized_text(&zText, i);
1484 adjust_list_nesting(aList, k);
1485 if( inTab ){ cgi_printf("</table>"); inTab=0; }
1486 if( zText[0]!='_' ) cgi_printf("<li>");
1487 zText += j;
1488 i = 0;
1489 wordStart = 1;
1490 lineStart = paraStart = 0;
1491 continue;
1492 }
1493
1494 /* Four or more "-" characters on at the beginning of a line that
1495 ** contains no other text results in a horizontal rule.
1496 */
1497 if( (c=='-' || c=='=') && (j = is_horizontal_rule(&zText[i]))>0 ){
1498 put_htmlized_text(&zText, i);
1499 adjust_list_nesting(aList, 0);
1500 if( inTab ){ cgi_printf("</table>"); inTab=0; }
1501 cgi_printf("<hr>\n");
1502 zText += j;
1503 if( *zText ) zText++;
1504 i = 0;
1505 lineStart = wordStart = 1;
1506 paraStart = 1;
1507 continue;
1508 }
1509
1510 /* '|' at the start of a line may be a table
1511 */
1512 if( c=='|' && (j = is_table_row(&zText[i]))>0 ){
1513 put_htmlized_text(&zText, i);
1514 adjust_list_nesting(aList, 0);
1515 if( !inTab ){
1516 cgi_printf("<table border=\"1\" cellspacing=\"0\">\n");
1517 inTab = 1;
1518 }
1519 output_table_row(zText,j);
1520 zText += j;
1521 i = 0;
1522 wordStart = 1;
1523 lineStart = paraStart = 0;
1524 continue;
1525 }
1526 } /* end if( lineStart && !inPre ) */
1527
1528 if( wordStart && !inPRE ){
1529 /* A wiki name at the beginning of a word which is not in verbatim
1530 ** text generates a hyperlink to that wiki page.
1531 **
1532 ** Special case: If the name is in CamelCase but ends with a "_", then
1533 ** suppress the "_" and do not generate the hyperlink. This allows
1534 ** CamelCase words that are not wiki page names to appear in text.
1535 */
1536 if( g.okRdWiki && isupper(c) && (j = is_wiki_name(&zText[i]))>0 ){
1537 int exists = db_exists("SELECT 1 FROM wiki WHERE name='%.*s'",
1538 j, &zText[i]);
1539 put_htmlized_text(&zText, i);
1540 if( !exists ) cgi_printf("<em>");
1541 cgi_printf("<a href=\"%z\">%.*s</a>",
1542 format_link("wiki?p=%.*s%s", j, zText, zLinkSuffix),
1543 j, zText);
1544 if( !exists ) cgi_printf("</em>");
1545 zText += j;
1546 i = 0;
1547 wordStart = lineStart = paraStart = 0;
1548 continue;
1549 }
1550
1551 if( g.okCheckout && c=='/' && (j = is_repository_file(&zText[i]))>0 ){
1552 char *zFile;
1553 put_htmlized_text(&zText, i);
1554 zFile = mprintf("%.*s", j-1, zText+1);
1555 cgi_printf("<a href=\"%z\">/%h</a>",
1556 format_link("rlog?f=%T", zFile), zFile);
1557 free(zFile);
1558 zText += j;
1559 i = 0;
1560 wordStart = lineStart = paraStart = 0;
1561 continue;
1562 }
1563
1564 /* A "_" at the beginning of a word puts us into an italic font.
1565 */
1566 if( c=='_' && !inB && !inI && !inT && font_terminator(&zText[i+1],c,1) ){
1567 put_htmlized_text(&zText, i);
1568 i = 0;
1569 zText++;
1570 cgi_printf("<i>");
1571 inI = 1;
1572 continue;
1573 }
1574
1575 /* A "=" at the beginning of a word puts us into an fixed font.
1576 */
1577 if( c=='=' && !inB && !inI && !inT && font_terminator(&zText[i+1],c,1) ){
1578 put_htmlized_text(&zText, i);
1579 i = 0;
1580 zText++;
1581 cgi_printf("<tt>");
1582 inT = 1;
1583 continue;
1584 }
1585
1586 /* A "*" at the beginning of a word puts us into a bold font.
1587 */
1588 if( c=='*' && !inB && !inI && !inT && (j = count_stars(&zText[i]))>=1
1589 && j<=3 && font_terminator(&zText[i+j],c,j) ){
1590 const char *zBeginB = "";
1591 put_htmlized_text(&zText, i);
1592 i = 0;
1593 zText += j;
1594 switch( j ){
1595 case 1: zBeginB = "<b>"; zEndB = "</b>"; break;
1596 case 2: zBeginB = "<big><b>"; zEndB = "</b></big>"; break;
1597 case 3: zBeginB = "<big><big><b>"; zEndB = "</b></big></big>"; break;
1598 }
1599 cgi_printf(zBeginB);
1600 inB = j;
1601 continue;
1602 }
1603
1604
1605 /* Words that begin with "http:" or "https:" or "ftp:" or "mailto:"
1606 ** become hyperlinks.
1607 */
1608 if( (c=='h' || c=='f' || c=='m') && (j=is_url(&zText[i]))>0 ){
1609 put_htmlized_text(&zText, i);
1610 z = htmlize(zText, j);
1611 if( is_image(z, strlen(z)) ){
1612 cgi_printf("<img src=\"%s\" alt=\"%s\"%s>", z, z,
1613 g.noFollow ? " rel=\"nofollow\"" : "");
1614 }else{
1615 cgi_printf("%s<a href=\"%s\"%s>%s</a>",
1616 zExtLink, z,
1617 g.noFollow ? " rel=\"nofollow\"" : "", z);
1618 }
1619 free(z);
1620 zText += j;
1621 i = 0;
1622 wordStart = lineStart = paraStart = 0;
1623 continue;
1624 }
1625
1626 /* If the user has read permission on tickets and a word is of the
1627 ** form "#NNN" where NNN is a sequence of digits, then generate a
1628 ** hyperlink to ticket number NNN.
1629 */
1630 if( c=='#' && g.okRead && (j = ndigit(&zText[i+1]))>0
1631 && is_eow(&zText[i+1+j],0)
1632 && (v = atoi(&zText[i+1]))>0 && v<=nTicket ){
1633 put_htmlized_text(&zText, i);
1634 output_ticket(v,0);
1635 zText += j;
1636 if( *zText ) zText++;
1637 i = 0;
1638 wordStart = lineStart = paraStart = 0;
1639 continue;
1640 }
1641
1642 /* If the user has checkout permissions and a word is of the form
1643 ** "[NNN]" where NNN is a checkin number, then generate a hyperlink
1644 ** to check-in NNN.
1645 */
1646 if( c=='[' && g.okRead && (j = ndigit(&zText[i+1]))>0
1647 && is_eow(&zText[i+j+2],0)
1648 && (v = atoi(&zText[i+1]))>0 && v<=nCommit
1649 && zText[i+j+1]==']' ){
1650 put_htmlized_text(&zText, i);
1651 output_chng(v);
1652 zText += j+1;
1653 if( *zText ) zText++;
1654 i = 0;
1655 wordStart = lineStart = paraStart = 0;
1656 continue;
1657 }
1658 } /* end if( wordStart && !inPre ) */
1659
1660 /* A "*", "=", or a "_" at the end of a word takes us out of bold,
1661 ** fixed or italic mode.
1662 */
1663 if( inB && c=='*' && !isspace(zText[i-1]) && zText[i-1]!='*' &&
1664 (j = count_stars(&zText[i]))==inB && is_eow(&zText[i+j],0) ){
1665 inB = 0;
1666 put_htmlized_text(&zText, i);
1667 i = 0;
1668 zText += j;
1669 cgi_printf(zEndB);
1670 continue;
1671 }
1672 if( inT && c=='=' && !isspace(zText[i-1]) && is_eow(&zText[i+1],0) ){
1673 put_htmlized_text(&zText, i);
1674 i = 0;
1675 zText++;
1676 inT = 0;
1677 cgi_printf("</tt>");
1678 continue;
1679 }
1680 if( inI && c=='_' && !isspace(zText[i-1]) && is_eow(&zText[i+1],0) ){
1681 put_htmlized_text(&zText, i);
1682 i = 0;
1683 zText++;
1684 inI = 0;
1685 cgi_printf("</i>");
1686 continue;
1687 }
1688 if( wordStart ){
1689 wordStart = isspace(c) || c=='(' || c=='"';
1690 }else{
1691 wordStart = isspace(c);
1692 }
1693 lineStart = c=='\n';
1694 paraStart = 0;
1695 i++;
1696 }
1697 if( zText[0] ) cgi_printf("%h", zText);
1698 if( inB ) cgi_printf("%s\n",zEndB);
1699 if( inT ) cgi_printf("</tt>\n");
1700 if( inI ) cgi_printf("</i>\n");
1701 if( inTab ){ cgi_printf("</table>"); inTab=0; }
1702 adjust_list_nesting(aList, 0);
1703 if( inPRE ) cgi_printf("</pre>\n");
1704 }
1705
1706 /*
1707 ** Output text while inserting hyperlinks to ticket and checkin reports.
1708 ** Within the text, an occurance of "#NNN" (where N is a digit) results
1709 ** in a hyperlink to the page that shows that ticket. Any occurance of
1710 ** [NNN] gives a hyperlink to check-in number NNN.
1711 **
1712 ** (Added later:) Also format the text as HTML. Insert <p> in place
1713 ** of blank lines. Insert <pre>..</pre> around paragraphs that are
1714 ** indented by two or more spaces. Make lines that begin with "*:"
1715 ** or "1:" into <ul> or <ol> list elements.
1716 **
1717 ** (Later:) The formatting is now extended to include all of the
1718 ** Wiki formatting options.
1719 */
output_formatted(const char * zText,const char * zPageId)1720 void output_formatted(const char *zText, const char *zPageId){
1721 output_wiki(zText,"",zPageId);
1722 }
1723
1724 /*
1725 ** This routine alters a check-in message to make it more readable
1726 ** in a timeline. The following changes are made:
1727 **
1728 ** *: Remove all leading whitespace. This prevents the text from
1729 ** being display verbatim.
1730 **
1731 ** *: If the message begins with "*:" or "N:" (where N is a number)
1732 ** then strip it out.
1733 **
1734 ** *: Change all newlines to spaces. This will disable paragraph
1735 ** breaks, verbatim paragraphs, enumerations, and bullet lists.
1736 **
1737 ** *: Replace all internal list markups with '+' followed by spaces.
1738 ** (Otherwise, bullet lists turn into boldface).
1739 **
1740 ** *: Collapse contiguous whitespace into a single space character
1741 **
1742 ** *: Truncate the string at the first whitespace character that
1743 ** is more than mxChar characters from the beginning of the string.
1744 ** Or if the string is longer than mxChar character and but there
1745 ** was a paragraph break after mnChar characters, truncate at the
1746 ** paragraph break.
1747 **
1748 ** This routine changes the message in place. It returns non-zero if
1749 ** the message was truncated and zero if the original text is still
1750 ** all there (though perhaps altered.)
1751 */
output_trim_message(char * zMsg,int mnChar,int mxChar)1752 int output_trim_message(char *zMsg, int mnChar, int mxChar){
1753 int i, j, k, n;
1754 int brkpt = 0; /* First paragraph break after zMsg[mnChar] */
1755
1756 if( zMsg==0 ) return 0;
1757 for(i=0; isspace(zMsg[i]); i++){}
1758 i += is_list_elem(&zMsg[i], &k);
1759 for(j=0; zMsg[i]; i++){
1760 int c = zMsg[i];
1761 if( c=='\n' ){
1762 if( j>mnChar && is_blank_line(&zMsg[i+1]) && brkpt==0 ){
1763 brkpt = j;
1764 }
1765 c = ' ';
1766 if( (n = is_list_elem(&zMsg[i+1],&k))>0 ) {
1767 zMsg[i+1] = '+';
1768 memset(&zMsg[i+2],' ',n-1);
1769 }
1770 }
1771 if( isspace(c) ){
1772 if( j>=mxChar ){
1773 zMsg[j] = 0;
1774 if( brkpt>0 ) zMsg[brkpt] = 0;
1775 return 1;
1776 }
1777 if( j>0 && !isspace(zMsg[j-1]) ){
1778 zMsg[j++] = ' ';
1779 }
1780 }else{
1781 zMsg[j++] = c;
1782 }
1783 }
1784 zMsg[j] = 0;
1785 return 0;
1786 }
1787
1788 /*
1789 ** Append HTML text to the output that describes the formatting
1790 ** conventions implemented by the output_formatted() function
1791 ** above.
1792 */
append_formatting_hints(void)1793 void append_formatting_hints(void){
1794 char **az;
1795 int j;
1796 @ <ul>
1797 @ <li><p>
1798 @ Blank lines divide paragraphs.
1799 @ </p></li>
1800 @
1801 @ <li><p>
1802 @ If a paragraph is indented by a tab or by two or more spaces,
1803 @ it is displayed verbatim -- in a constant-width font with all
1804 @ spacing and line breaks preserved.
1805 @ </p></li>
1806 @
1807 @ <li><p>
1808 @ Surround phrases by underscores, asterisks or equals
1809 @ for italic, bold or fixed text.
1810 @ (Ex: "<tt>_italic text_, *bold text*, =fixed text=</tt>")
1811 @ Use two or three asterisks for bold text in a larger font.
1812 @ </p></li>
1813 @
1814 @ <li><p>
1815 if( g.okRead ){
1816 @ Text like "<tt>#123</tt>" becomes a hyperlink to ticket #123.
1817 }
1818 if( g.okCheckout ){
1819 @ Text like "<tt>[456]</tt>" becomes a hyperlink to
1820 @ check-in [456].
1821 }
1822 if( g.okRdWiki ){
1823 @ An absolute URL, a wiki page name becomes a hyperlink.
1824 @ Also markup of the form "<tt>{wiki: <i>title text</i>}</tt>"
1825 @ becomes a hyperlink to the wiki document of <i>title</i>.
1826 } else {
1827 @ An absolute URL becomes a hyperlink.
1828 }
1829 @ Or use markup of the form:
1830 @ "<tt>{link: <i>url text</i>}</tt>".
1831 @ </p></li>
1832 @
1833 @ <li><p>
1834 @ A path to a file in the repository becomes a link to its rlog page:
1835 @ "<tt>/path/to/format.c</tt>".
1836 @ </p></li>
1837 @
1838 @ <li><p>
1839 @ The characters "<tt>*:</tt>" or "<tt>1:</tt>" at the beginning of a line
1840 @ produce a bullet or enumeration list.
1841 @ Use additional colons for nested lists.
1842 @ </p></li>
1843 @
1844 @ <li><p>
1845 @ Create a table by wrapping cells with "<tt>|</tt>", starting at the
1846 @ beginning of a line. Each cell is separated with a "<tt>|</tt>" and
1847 @ each line should end with a "<tt>|</tt>".
1848 @ </p></li>
1849 @
1850 @ <li><p>
1851 @ Use "<tt>_:</tt>" at the beginning of a paragraph to indent that
1852 @ paragraph. Multiple colons indent more.
1853 @ </p></li>
1854 @
1855 @ <li><p>
1856 @ Four or more "-" or "=" characters on a line by themselves generate a
1857 @ horizontal rule (the <hr> markup of HTML).
1858 @ </p></li>
1859 @
1860 @ <li><p>
1861 @ Create a line-break using "<tt>{linebreak}</tt>".
1862 @ </p></li>
1863 @
1864 @ <li><p>
1865 @ Use "<tt>{quote: <i>text</i>}</tt>" to display <i>text</i>.
1866 @ </p></li>
1867 @
1868 @ <li><p>
1869 @ Insert in-line images using "<tt>{image: <i>url</i>}</tt>".
1870 @ The <i>url</i> can be the filename of an attachment.
1871 @ </p></li>
1872 @
1873 @ <li><p>
1874 @ Insert in-line reports using "<tt>{report: <i>rn</i>}</tt>". The <i>rn</i>
1875 @ is the report number (which isn't necessarily the same as the numbers on
1876 @ the <a href="reportlist">report list</a>).
1877 @ </p></li>
1878 @
1879 @ <li><p>
1880 @ Text between "<tt><html>...</html></tt>" is interpreted as HTML.
1881 @ </p></li>
1882 @
1883
1884 /* output custom markups.
1885 */
1886 az = db_query("SELECT markup, description FROM markup;");
1887 if( az && az[0] ){
1888 for(j=0; az[j]; j+=2){
1889 if( az[j+1] && az[j+1][0] ){
1890 /* this markup has a description, output it.
1891 */
1892 @ <li><p>
1893 output_formatted(az[j+1],NULL);
1894 @ </p></li>
1895 }else{
1896 @ <li><p>{%h(az[j])} (no description)</p></li>
1897 }
1898 }
1899 }
1900 @ </ul>
1901 }
1902