1 /*
2 ** Copyright (c) 2002 D. Richard Hipp
3 **
4 ** This program is free software; you can redistribute it and/or
5 ** modify it under the terms of the GNU General Public
6 ** License as published by the Free Software Foundation; either
7 ** version 2 of the License, or (at your option) any later version.
8 **
9 ** This program is distributed in the hope that it will be useful,
10 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 ** General Public License for more details.
13 **
14 ** You should have received a copy of the GNU General Public
15 ** License along with this library; if not, write to the
16 ** Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 ** Boston, MA  02111-1307, USA.
18 **
19 ** Author contact information:
20 **   drh@hwaci.com
21 **   http://www.hwaci.com/drh/
22 **
23 *******************************************************************************
24 **
25 ** This file contains code used to do text searches of the database
26 */
27 #include "config.h"
28 #include "search.h"
29 
30 
31 /*
32 ** Search for a keyword in text.  Return a matching score:
33 **
34 **     0     No sign of the word was found in the text
35 **     6     The word was found but not on a word boundry
36 **     8     The word was found with different capitalization
37 **    10     The word was found in the text exactly as given.
38 */
score_word(const char * zWord,const char * zText,int n,int * pIdx)39 static int score_word(const char *zWord, const char *zText, int n, int *pIdx){
40   int c1, c2, i, best;
41   int idx = -1;
42 
43   best = 0;
44   c1 = zWord[0];
45   if( isupper(c1) ){
46     c2 = tolower(c1);
47   }else{
48     c2 = toupper(c1);
49   }
50   if( n<=0 ) n = strlen(zWord);
51   for(i=0; zText[i]; i++){
52     if( (zText[i]==c1 || zText[i]==c2) && sqlite3_strnicmp(zWord,&zText[i],n)==0){
53       int score = 6;
54       if( (i==0 || !isalnum(zText[i-1]))
55            && (zText[i+n]==0 || !isalnum(zText[i+n])) ){
56         score = 8;
57         if( strncmp(zWord,&zText[i],n)==0 ){
58           idx = i;
59           best = score = 10;
60           break;
61         }
62       }
63       if( score>best ){
64         best = score;
65         idx = i;
66       }
67     }
68   }
69   if( pIdx ) *pIdx = idx;
70   return best;
71 }
72 
73 /*
74 **    search(PATTERN, STRING, ...)
75 **
76 ** This routine implements the SQLite "search()" function.  There are two
77 ** arguments: text to be searched and a keyword pattern.  The function
78 ** returns an integer score which is higher depending on how well the
79 ** search went.
80 */
srchFunc(sqlite3_context * context,int argc,sqlite3_value ** argv)81 void srchFunc(sqlite3_context *context, int argc, sqlite3_value **argv){
82   int i, j, k, score, total;
83   char *zPattern;
84 
85   if( argc<2 || argv[0]==0 ) return;
86   zPattern = (char *)sqlite3_value_text(argv[0]);
87   total = 0;
88   if( zPattern && zPattern[0] ){
89     for(i=0; zPattern[i]; i++){
90       if( isspace(zPattern[i]) ) continue;
91       for(j=1; zPattern[i+j] && !isspace(zPattern[i+j]); j++){}
92       score = 0;
93       for(k=1; k<argc; k++){
94         int one_score;
95         char *zWord = (char *)sqlite3_value_text(argv[k]);
96         if( zWord==0 || zWord[0]==0 ) continue;
97         one_score = score_word(&zPattern[i], zWord, j, 0);
98         if( one_score>score ) score = one_score;
99       }
100       total += score;
101       i += j-1;
102     }
103   }
104   sqlite3_result_int(context, total);
105 }
106 
107 /*
108 **    highlight(WORD-LIST, TEXT, ...)
109 **
110 ** Return HTML which contains TEXT but with every word in WORD-LIST
111 ** enclosed within <b>...</b>.  Long segments of TEXT that do not
112 ** contain any matching words are replace with elipses.
113 */
highlightFunc(sqlite3_context * context,int argc,sqlite3_value ** argv)114 static void highlightFunc(sqlite3_context *context, int argc, sqlite3_value **argv){
115   int i, j, k;
116   int size;
117   int wn, idx;
118   int nKey;                 /* Number of search terms in zPattern */
119   char *azKey[50];          /* Up to 50 search terms in zPattern */
120   int keySize[50];          /* Number of characters in each search term */
121   int begin[5], end[5];     /* Up to 5 100-character segments of text */
122   int sbegin[5], send[5];   /* The same 5 segments, sorted and coalesced */
123   char mask[256];           /* True if first character of a key */
124   char *zAll;
125   char *zPattern;
126   char *z;
127 
128   /* Output is written into zOut[].  There are at most 5 exemplars of
129   ** about 100 characters each - total 500 characters.  But then we have
130   ** to insert <b> and </b> around search terms and escape HTML characters
131   ** such as < and > and &.  The maximum expansion is "& " converted
132   ** into "<b>&amp;</b> " or less than 7-to-1.  So the maximum output
133   ** is 3500.  Add a little extra space for the ellipses between
134   ** exemplars.  Plus some margin in case the calculation above is
135   ** wrong in some way.
136   */
137   char zOut[8000];
138 
139   /*
140   ** We must have a pattern and at least one text block to function.
141   */
142   if( argc<2 || argv[0]==0 ) return;
143   zPattern = (char *)sqlite3_value_text(argv[0]);
144   if( zPattern[0]==0 ) return;
145 
146   /*
147   ** Make a copy of the pattern.
148   */
149   zPattern = strdup(zPattern);
150   if( zPattern==0 ) return;
151 
152   /*
153   ** Concatenate all text to be analyzed.
154   */
155   size = 0;
156   for(k=1; k<argc; k++){
157     if( argv[k]==0 ) continue;
158     z = (char*)sqlite3_value_text(argv[k]);
159     if( z==0 ) continue;
160     size += strlen(z)+1;
161   }
162   zAll = malloc( size );
163   if( zAll==0 ) return;
164   for(i=0, k=1; k<argc; k++){
165     if( argv[k]==0 ) continue;
166     z = (char*)sqlite3_value_text(argv[k]);
167     if( z==0 ) continue;
168     if( i>0 ){ zAll[i++] = '\n'; }
169     strcpy(&zAll[i], z);
170     i += strlen(&zAll[i]);
171   }
172 
173   /*
174   ** Find as many as 5 exemplar segments in the text with each segment
175   ** as long as 100 characters.
176   */
177   nKey = 0;
178   for(wn=i=0; wn<50 && zPattern[i];){
179     int score;
180     if( isspace(zPattern[i]) ){ i++; continue; }
181     for(j=1; zPattern[i+j] && !isspace(zPattern[i+j]); j++){}
182     azKey[nKey] = &zPattern[i];
183     keySize[nKey++] = j;
184     score = score_word(&zPattern[i], zAll, j, &idx);
185     i += j;
186     if( zPattern[i] ) zPattern[i++] = 0;
187     if( score==0 ) continue;
188     begin[wn] = idx-50 > 0 ? idx-50 : 0;
189     end[wn] = begin[wn]+100 > size ? size : begin[wn]+100;
190     wn++;
191   }
192 
193   /*
194   ** Sort an coalesce the exemplars
195   */
196   if( wn==0 ){
197     begin[0] = 0;
198     end[0] = size>100 ? 100 : size;
199     wn = 1;
200   }
201   if( g.useUTF8 ){
202     /*
203     ** Avoid splitting UTF-8 characters by detecting high bits in the
204     ** begin/end characters.
205     */
206     for(i=0; i<wn; i++){
207       while( begin[i]>0 && (zAll[begin[i]]&0xc0)==0x80 ){
208         begin[i]--;
209       }
210       while( (zAll[end[i]]&0xc0)==0x80 ){
211         end[i]++;
212       }
213     }
214   }
215   for(i=0; i<5 && wn>0; i++){
216     int min = begin[0];
217     k = 0;
218     for(j=1; j<wn; j++){
219       if( begin[j]<min ){
220         k = j;
221         min = begin[j];
222       }
223     }
224     sbegin[i] = begin[k];
225     send[i] = end[k];
226     wn--;
227     begin[k] = begin[wn];
228     end[k] = end[wn];
229   }
230   wn = i;
231   begin[0] = sbegin[0];
232   end[0] = send[0];
233   for(i=j=1; i<wn; i++){
234     if( sbegin[i]>end[j-1]+1 ){
235       begin[j] = sbegin[i];
236       end[j] = send[i];
237       j++;
238     }else if( send[i]>end[j-1] ){
239       end[j-1] = send[i];
240     }
241   }
242   wn = j;
243 
244   /*
245   ** Initialize the mask[] array so that mask[x] has value 1 if x is the
246   ** first character of any key pattern.
247   */
248   memset(mask, 0, sizeof(mask));
249   for(i=0; i<nKey; i++){
250     mask[toupper(azKey[i][0])] = 1;
251     mask[tolower(azKey[i][0])] = 1;
252   }
253 
254   /*
255   ** Generate the output stream
256   */
257   z = zOut;
258   for(i=0; i<wn; i++){
259     if( begin[i]>0 ){
260       if( i>0 ){ *(z++) = ' '; }
261       strcpy(z, "<b>...</b> ");
262       z += strlen(z);
263     }
264     for(j=begin[i]; j<end[i]; j++){
265       int c = zAll[j];
266       if( c=='&' ){
267         strcpy(z, "&amp;");
268         z += 5;
269       }else if( c=='<' ){
270         strcpy(z, "&lt;");
271         z += 4;
272       }else if( c=='>' ){
273         strcpy(z, "&gt;");
274         z += 4;
275       }else if( mask[c] ){
276         for(k=0; k<nKey; k++){
277           int n;
278           if( tolower(c)!=tolower(azKey[k][0]) ) continue;
279           n = keySize[k];
280           if( sqlite3_strnicmp(&zAll[j],azKey[k],n)==0 ){
281             strcpy(z,"<b>");
282             z += 3;
283             while( n ){
284               c = zAll[j++];
285               if( c=='&' ){
286                 strcpy(z, "&amp;");
287                 z += 5;
288               }else if( c=='<' ){
289                 strcpy(z, "&lt;");
290                 z += 4;
291               }else if( c=='>' ){
292                 strcpy(z, "&gt;");
293                 z += 4;
294               }else{
295                 *(z++) = c;
296               }
297               n--;
298             }
299             j--;
300             strcpy(z,"</b>");
301             z += 4;
302             break;
303           }
304         }
305         if( k>=nKey ){
306           *(z++) = c;
307         }
308       }else{
309         *(z++) = c;
310       }
311     }
312   }
313   if( end[wn-1]<size ){
314     strcpy(z, " <b>...</b>");
315   }else{
316     *z = 0;
317   }
318 
319   /*
320   ** Report back the results
321   */
322   sqlite3_result_text(context, zOut, -1, SQLITE_TRANSIENT);
323   free(zAll);
324   free(zPattern);
325 }
326 
327 /*
328 ** WEBPAGE: /search
329 */
search_page(void)330 void search_page(void){
331   int srchTkt = PD("t","0")[0]!='0';
332   int srchCkin = PD("c","0")[0]!='0';
333   int srchWiki = PD("w","0")[0]!='0';
334   int srchFile = PD("f","0")[0]!='0';
335   int nPage = atoi(PD("p","0"));
336   const char *zPattern;
337   char **azResult = 0;
338   char *zPage;
339   sqlite3 *db;
340   char zSql[10000];
341 
342   login_check_credentials();
343   zPattern = P("s");
344   if( zPattern && zPattern[0] ){
345     zPattern = sqlite3_mprintf("%q",zPattern);
346   }
347   if( srchTkt+srchCkin+srchWiki+srchFile==0 ){
348     srchTkt = srchCkin = srchWiki = srchFile = 1;
349   }
350   if( !g.okRead ) srchTkt = 0;
351   if( !g.okCheckout ) srchFile = srchCkin = 0;
352   if( !g.okRdWiki ) srchWiki = 0;
353   if( srchTkt==0 && srchCkin==0 && srchWiki==0 && srchFile==0 ){
354     login_needed();
355     return;
356   }
357   if( zPattern && zPattern[0] && strlen(zPattern)<100 ){
358     int i = 0;
359     char *zConnect = " (";
360 
361     db = db_open();
362     db_add_functions();
363     sqlite3_create_function(db, "highlight", -1, SQLITE_ANY, 0,
364                             highlightFunc, 0, 0);
365 
366     appendf(zSql,&i,sizeof(zSql),"SELECT type, score, obj, title, body FROM");
367     if( srchTkt ){
368       appendf(zSql,&i,sizeof(zSql), "%s SELECT "
369          "1 AS type, "
370          "search('%s',title,description,remarks) as score, "
371          "tn AS obj, "
372          "highlight('%s',title) as title, "
373          "highlight('%s',description,remarks) as body FROM ticket "
374          "WHERE score>0",
375          zConnect, zPattern, zPattern, zPattern);
376       zConnect = " UNION ALL";
377     }
378     if( srchCkin ){
379       appendf(zSql,&i,sizeof(zSql),"%s SELECT "
380          "2 AS type, "
381          "search('%s',message) as score, "
382          "cn AS obj, "
383          "NULL as title, " /* ignored */
384          "highlight('%s',message) as body FROM chng "
385          "WHERE score>0",
386          zConnect, zPattern, zPattern);
387       zConnect = " UNION ALL";
388     }
389     if( srchWiki ){
390       appendf(zSql,&i,sizeof(zSql),"%s SELECT "
391          "3 AS type, "
392          "search('%s',name, text) as score, "
393          "name AS obj, "
394          "highlight('%s',name) as title, "
395          "highlight('%s',text) as body "
396          "FROM (select name as nm, min(invtime) as tm FROM wiki GROUP BY name) "
397          "    as x, wiki "
398          "WHERE score>0 AND x.nm=wiki.name AND x.tm=wiki.invtime",
399          zConnect, zPattern, zPattern, zPattern);
400       zConnect = " UNION ALL";
401     }
402     if( srchFile ){
403       appendf(zSql,&i,sizeof(zSql),"%s SELECT "
404          "CASE WHEN isdir THEN 6 ELSE 4 END as type, "
405          "search('%s', dir || '/' || base) as score, "
406          "dir || '/' || base AS obj, "
407          "dir || '/' || base as title, "
408          "highlight('%s',dir || '/' || base) as body FROM file "
409          "WHERE score>0",
410          zConnect, zPattern, zPattern);
411       zConnect = " UNION ALL";
412 
413       appendf(zSql,&i,sizeof(zSql),"%s SELECT "
414          "5 AS type, "
415          "search('%s', fname, description) as score, "
416          "atn AS obj, "
417          "fname as title, "
418          "highlight('%s',fname, description) as body FROM attachment "
419          "WHERE score>0 AND tn!=0",
420          zConnect, zPattern, zPattern);
421       zConnect = " UNION ALL";
422     }
423     appendf(zSql,&i,sizeof(zSql), ") ORDER BY score DESC LIMIT 30 OFFSET %d;",
424             nPage*30);
425     azResult = db_query("%s",zSql);
426   }
427   common_standard_menu("search", 0);
428   common_add_help_item("CvstracSearch");
429   common_header("Search");
430   @ <form action="search" method="GET">
431   @ Search:
432   @ <input type="text" size=40 name="s" value="%h(PD("s",""))">
433   @ <input type="submit" value="Go">
434   if( g.okRead + g.okCheckout + g.okRdWiki>1 ){
435     char *z;
436     @ <br>Look in:
437     if( g.okRead ){
438       z = srchTkt ? " checked " : "";
439       @ <label for="t"><input type="checkbox" name="t" id="t" value="1"%s(z)>
440       @    Tickets</label>
441     }
442     if( g.okCheckout ){
443       z = srchCkin ? " checked " : "";
444       @ <label for="c"><input type="checkbox" name="c" id="c" value="1"%s(z)>
445       @    Checkins</label>
446     }
447     if( g.okRdWiki ){
448       z = srchWiki ? " checked " : "";
449       @ <label for="w"><input type="checkbox" name="w" id="w" value="1"%s(z)>
450       @    Wiki Pages</label>
451     }
452     if( g.okCheckout ){
453       z = srchFile ? " checked " : "";
454       @ <label for="f"><input type="checkbox" name="f" id="f" value="1"%s(z)>
455       @    Filenames</label>
456     }
457   }
458   @ </form>
459   @ <div>
460   if( azResult && azResult[0] ){
461     int i, n;
462     @ <hr>
463     for(i=n=0; azResult[i]; i+=5, n++){
464       @ <p>
465       switch( atoi(azResult[i]) ){
466         case 1: /* ticket */
467           @ Ticket
468           output_ticket(atoi(azResult[i+2]),0);
469           @ : %s(azResult[i+3])
470           break;
471         case 2: /* check-in */
472           @ Check-in
473           output_chng(atoi(azResult[i+2]));
474           break;
475         case 3: /* wiki */
476           @ Wiki Page <a href="wiki?p=%s(azResult[i+2])">%s(azResult[i+3])</a>
477           break;
478         case 4: /* file */
479           @ File <a href="rlog?f=%t(azResult[i+2])">%h(azResult[i+3])</a>
480           break;
481         case 5: /* attachment */
482           zPage = db_short_query("SELECT tn FROM attachment WHERE atn=%d",
483                                  atoi(azResult[i+2]));
484           @ Attachment
485           @   <a href="attach_get/%s(azResult[i+2])/%t(azResult[i+3])">\
486           @   %s(azResult[i+3])</a> to
487           if( is_integer(zPage) ){
488             output_ticket(atoi(zPage),0);
489           }else{
490             @ wiki page <a href="wiki?p=%s(zPage)">%s(zPage)</a>
491           }
492           break;
493         case 6:{ /* directory */
494           const char* zDir = azResult[i+2];
495           if( zDir[0]=='/' ) zDir ++;
496           @ Directory <a href="dir?d=%t(zDir)">%h(zDir)</a>
497           break;
498         }
499       }
500       @ <br>%s(azResult[i+4])</p>
501     }
502     if( nPage || n>=30 ){
503       @ <hr><p align="center">
504       /* provide a history */
505       cgi_printf("Page: ");
506       for( i=0; i<nPage; i++ ){
507         cgi_printf("<a href=\"search?s=%T&t=%d&c=%d&w=%d&f=%d&p=%d\">%d</a> ",
508             zPattern, srchTkt, srchCkin, srchWiki, srchFile, i, i+1);
509       }
510       cgi_printf("%d&nbsp;&nbsp;", nPage+1);
511       if( n>=30 ){
512         cgi_printf("<a href=\"search?s=%T&t=%d&c=%d&w=%d&f=%d&p=%d\">More</a>",
513             zPattern, srchTkt, srchCkin, srchWiki, srchFile, nPage+1);
514       }
515       @ </p>
516     }
517   }else if( zPattern && zPattern[0] ){
518     @ No results for <strong>%h(zPattern)</strong>
519   }
520   @ </div>
521   common_footer();
522 }
523