1 /*
2 ** Copyright (c) 2002 D. Richard Hipp
3 **
4 ** This program is free software; you can redistribute it and/or
5 ** modify it under the terms of the GNU General Public
6 ** License as published by the Free Software Foundation; either
7 ** version 2 of the License, or (at your option) any later version.
8 **
9 ** This program is distributed in the hope that it will be useful,
10 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 ** General Public License for more details.
13 **
14 ** You should have received a copy of the GNU General Public
15 ** License along with this library; if not, write to the
16 ** Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 ** Boston, MA 02111-1307, USA.
18 **
19 ** Author contact information:
20 ** drh@hwaci.com
21 ** http://www.hwaci.com/drh/
22 **
23 *******************************************************************************
24 **
25 ** This file contains code used to do text searches of the database
26 */
27 #include "config.h"
28 #include "search.h"
29
30
31 /*
32 ** Search for a keyword in text. Return a matching score:
33 **
34 ** 0 No sign of the word was found in the text
35 ** 6 The word was found but not on a word boundry
36 ** 8 The word was found with different capitalization
37 ** 10 The word was found in the text exactly as given.
38 */
score_word(const char * zWord,const char * zText,int n,int * pIdx)39 static int score_word(const char *zWord, const char *zText, int n, int *pIdx){
40 int c1, c2, i, best;
41 int idx = -1;
42
43 best = 0;
44 c1 = zWord[0];
45 if( isupper(c1) ){
46 c2 = tolower(c1);
47 }else{
48 c2 = toupper(c1);
49 }
50 if( n<=0 ) n = strlen(zWord);
51 for(i=0; zText[i]; i++){
52 if( (zText[i]==c1 || zText[i]==c2) && sqlite3_strnicmp(zWord,&zText[i],n)==0){
53 int score = 6;
54 if( (i==0 || !isalnum(zText[i-1]))
55 && (zText[i+n]==0 || !isalnum(zText[i+n])) ){
56 score = 8;
57 if( strncmp(zWord,&zText[i],n)==0 ){
58 idx = i;
59 best = score = 10;
60 break;
61 }
62 }
63 if( score>best ){
64 best = score;
65 idx = i;
66 }
67 }
68 }
69 if( pIdx ) *pIdx = idx;
70 return best;
71 }
72
73 /*
74 ** search(PATTERN, STRING, ...)
75 **
76 ** This routine implements the SQLite "search()" function. There are two
77 ** arguments: text to be searched and a keyword pattern. The function
78 ** returns an integer score which is higher depending on how well the
79 ** search went.
80 */
srchFunc(sqlite3_context * context,int argc,sqlite3_value ** argv)81 void srchFunc(sqlite3_context *context, int argc, sqlite3_value **argv){
82 int i, j, k, score, total;
83 char *zPattern;
84
85 if( argc<2 || argv[0]==0 ) return;
86 zPattern = (char *)sqlite3_value_text(argv[0]);
87 total = 0;
88 if( zPattern && zPattern[0] ){
89 for(i=0; zPattern[i]; i++){
90 if( isspace(zPattern[i]) ) continue;
91 for(j=1; zPattern[i+j] && !isspace(zPattern[i+j]); j++){}
92 score = 0;
93 for(k=1; k<argc; k++){
94 int one_score;
95 char *zWord = (char *)sqlite3_value_text(argv[k]);
96 if( zWord==0 || zWord[0]==0 ) continue;
97 one_score = score_word(&zPattern[i], zWord, j, 0);
98 if( one_score>score ) score = one_score;
99 }
100 total += score;
101 i += j-1;
102 }
103 }
104 sqlite3_result_int(context, total);
105 }
106
107 /*
108 ** highlight(WORD-LIST, TEXT, ...)
109 **
110 ** Return HTML which contains TEXT but with every word in WORD-LIST
111 ** enclosed within <b>...</b>. Long segments of TEXT that do not
112 ** contain any matching words are replace with elipses.
113 */
highlightFunc(sqlite3_context * context,int argc,sqlite3_value ** argv)114 static void highlightFunc(sqlite3_context *context, int argc, sqlite3_value **argv){
115 int i, j, k;
116 int size;
117 int wn, idx;
118 int nKey; /* Number of search terms in zPattern */
119 char *azKey[50]; /* Up to 50 search terms in zPattern */
120 int keySize[50]; /* Number of characters in each search term */
121 int begin[5], end[5]; /* Up to 5 100-character segments of text */
122 int sbegin[5], send[5]; /* The same 5 segments, sorted and coalesced */
123 char mask[256]; /* True if first character of a key */
124 char *zAll;
125 char *zPattern;
126 char *z;
127
128 /* Output is written into zOut[]. There are at most 5 exemplars of
129 ** about 100 characters each - total 500 characters. But then we have
130 ** to insert <b> and </b> around search terms and escape HTML characters
131 ** such as < and > and &. The maximum expansion is "& " converted
132 ** into "<b>&</b> " or less than 7-to-1. So the maximum output
133 ** is 3500. Add a little extra space for the ellipses between
134 ** exemplars. Plus some margin in case the calculation above is
135 ** wrong in some way.
136 */
137 char zOut[8000];
138
139 /*
140 ** We must have a pattern and at least one text block to function.
141 */
142 if( argc<2 || argv[0]==0 ) return;
143 zPattern = (char *)sqlite3_value_text(argv[0]);
144 if( zPattern[0]==0 ) return;
145
146 /*
147 ** Make a copy of the pattern.
148 */
149 zPattern = strdup(zPattern);
150 if( zPattern==0 ) return;
151
152 /*
153 ** Concatenate all text to be analyzed.
154 */
155 size = 0;
156 for(k=1; k<argc; k++){
157 if( argv[k]==0 ) continue;
158 z = (char*)sqlite3_value_text(argv[k]);
159 if( z==0 ) continue;
160 size += strlen(z)+1;
161 }
162 zAll = malloc( size );
163 if( zAll==0 ) return;
164 for(i=0, k=1; k<argc; k++){
165 if( argv[k]==0 ) continue;
166 z = (char*)sqlite3_value_text(argv[k]);
167 if( z==0 ) continue;
168 if( i>0 ){ zAll[i++] = '\n'; }
169 strcpy(&zAll[i], z);
170 i += strlen(&zAll[i]);
171 }
172
173 /*
174 ** Find as many as 5 exemplar segments in the text with each segment
175 ** as long as 100 characters.
176 */
177 nKey = 0;
178 for(wn=i=0; wn<50 && zPattern[i];){
179 int score;
180 if( isspace(zPattern[i]) ){ i++; continue; }
181 for(j=1; zPattern[i+j] && !isspace(zPattern[i+j]); j++){}
182 azKey[nKey] = &zPattern[i];
183 keySize[nKey++] = j;
184 score = score_word(&zPattern[i], zAll, j, &idx);
185 i += j;
186 if( zPattern[i] ) zPattern[i++] = 0;
187 if( score==0 ) continue;
188 begin[wn] = idx-50 > 0 ? idx-50 : 0;
189 end[wn] = begin[wn]+100 > size ? size : begin[wn]+100;
190 wn++;
191 }
192
193 /*
194 ** Sort an coalesce the exemplars
195 */
196 if( wn==0 ){
197 begin[0] = 0;
198 end[0] = size>100 ? 100 : size;
199 wn = 1;
200 }
201 if( g.useUTF8 ){
202 /*
203 ** Avoid splitting UTF-8 characters by detecting high bits in the
204 ** begin/end characters.
205 */
206 for(i=0; i<wn; i++){
207 while( begin[i]>0 && (zAll[begin[i]]&0xc0)==0x80 ){
208 begin[i]--;
209 }
210 while( (zAll[end[i]]&0xc0)==0x80 ){
211 end[i]++;
212 }
213 }
214 }
215 for(i=0; i<5 && wn>0; i++){
216 int min = begin[0];
217 k = 0;
218 for(j=1; j<wn; j++){
219 if( begin[j]<min ){
220 k = j;
221 min = begin[j];
222 }
223 }
224 sbegin[i] = begin[k];
225 send[i] = end[k];
226 wn--;
227 begin[k] = begin[wn];
228 end[k] = end[wn];
229 }
230 wn = i;
231 begin[0] = sbegin[0];
232 end[0] = send[0];
233 for(i=j=1; i<wn; i++){
234 if( sbegin[i]>end[j-1]+1 ){
235 begin[j] = sbegin[i];
236 end[j] = send[i];
237 j++;
238 }else if( send[i]>end[j-1] ){
239 end[j-1] = send[i];
240 }
241 }
242 wn = j;
243
244 /*
245 ** Initialize the mask[] array so that mask[x] has value 1 if x is the
246 ** first character of any key pattern.
247 */
248 memset(mask, 0, sizeof(mask));
249 for(i=0; i<nKey; i++){
250 mask[toupper(azKey[i][0])] = 1;
251 mask[tolower(azKey[i][0])] = 1;
252 }
253
254 /*
255 ** Generate the output stream
256 */
257 z = zOut;
258 for(i=0; i<wn; i++){
259 if( begin[i]>0 ){
260 if( i>0 ){ *(z++) = ' '; }
261 strcpy(z, "<b>...</b> ");
262 z += strlen(z);
263 }
264 for(j=begin[i]; j<end[i]; j++){
265 int c = zAll[j];
266 if( c=='&' ){
267 strcpy(z, "&");
268 z += 5;
269 }else if( c=='<' ){
270 strcpy(z, "<");
271 z += 4;
272 }else if( c=='>' ){
273 strcpy(z, ">");
274 z += 4;
275 }else if( mask[c] ){
276 for(k=0; k<nKey; k++){
277 int n;
278 if( tolower(c)!=tolower(azKey[k][0]) ) continue;
279 n = keySize[k];
280 if( sqlite3_strnicmp(&zAll[j],azKey[k],n)==0 ){
281 strcpy(z,"<b>");
282 z += 3;
283 while( n ){
284 c = zAll[j++];
285 if( c=='&' ){
286 strcpy(z, "&");
287 z += 5;
288 }else if( c=='<' ){
289 strcpy(z, "<");
290 z += 4;
291 }else if( c=='>' ){
292 strcpy(z, ">");
293 z += 4;
294 }else{
295 *(z++) = c;
296 }
297 n--;
298 }
299 j--;
300 strcpy(z,"</b>");
301 z += 4;
302 break;
303 }
304 }
305 if( k>=nKey ){
306 *(z++) = c;
307 }
308 }else{
309 *(z++) = c;
310 }
311 }
312 }
313 if( end[wn-1]<size ){
314 strcpy(z, " <b>...</b>");
315 }else{
316 *z = 0;
317 }
318
319 /*
320 ** Report back the results
321 */
322 sqlite3_result_text(context, zOut, -1, SQLITE_TRANSIENT);
323 free(zAll);
324 free(zPattern);
325 }
326
327 /*
328 ** WEBPAGE: /search
329 */
search_page(void)330 void search_page(void){
331 int srchTkt = PD("t","0")[0]!='0';
332 int srchCkin = PD("c","0")[0]!='0';
333 int srchWiki = PD("w","0")[0]!='0';
334 int srchFile = PD("f","0")[0]!='0';
335 int nPage = atoi(PD("p","0"));
336 const char *zPattern;
337 char **azResult = 0;
338 char *zPage;
339 sqlite3 *db;
340 char zSql[10000];
341
342 login_check_credentials();
343 zPattern = P("s");
344 if( zPattern && zPattern[0] ){
345 zPattern = sqlite3_mprintf("%q",zPattern);
346 }
347 if( srchTkt+srchCkin+srchWiki+srchFile==0 ){
348 srchTkt = srchCkin = srchWiki = srchFile = 1;
349 }
350 if( !g.okRead ) srchTkt = 0;
351 if( !g.okCheckout ) srchFile = srchCkin = 0;
352 if( !g.okRdWiki ) srchWiki = 0;
353 if( srchTkt==0 && srchCkin==0 && srchWiki==0 && srchFile==0 ){
354 login_needed();
355 return;
356 }
357 if( zPattern && zPattern[0] && strlen(zPattern)<100 ){
358 int i = 0;
359 char *zConnect = " (";
360
361 db = db_open();
362 db_add_functions();
363 sqlite3_create_function(db, "highlight", -1, SQLITE_ANY, 0,
364 highlightFunc, 0, 0);
365
366 appendf(zSql,&i,sizeof(zSql),"SELECT type, score, obj, title, body FROM");
367 if( srchTkt ){
368 appendf(zSql,&i,sizeof(zSql), "%s SELECT "
369 "1 AS type, "
370 "search('%s',title,description,remarks) as score, "
371 "tn AS obj, "
372 "highlight('%s',title) as title, "
373 "highlight('%s',description,remarks) as body FROM ticket "
374 "WHERE score>0",
375 zConnect, zPattern, zPattern, zPattern);
376 zConnect = " UNION ALL";
377 }
378 if( srchCkin ){
379 appendf(zSql,&i,sizeof(zSql),"%s SELECT "
380 "2 AS type, "
381 "search('%s',message) as score, "
382 "cn AS obj, "
383 "NULL as title, " /* ignored */
384 "highlight('%s',message) as body FROM chng "
385 "WHERE score>0",
386 zConnect, zPattern, zPattern);
387 zConnect = " UNION ALL";
388 }
389 if( srchWiki ){
390 appendf(zSql,&i,sizeof(zSql),"%s SELECT "
391 "3 AS type, "
392 "search('%s',name, text) as score, "
393 "name AS obj, "
394 "highlight('%s',name) as title, "
395 "highlight('%s',text) as body "
396 "FROM (select name as nm, min(invtime) as tm FROM wiki GROUP BY name) "
397 " as x, wiki "
398 "WHERE score>0 AND x.nm=wiki.name AND x.tm=wiki.invtime",
399 zConnect, zPattern, zPattern, zPattern);
400 zConnect = " UNION ALL";
401 }
402 if( srchFile ){
403 appendf(zSql,&i,sizeof(zSql),"%s SELECT "
404 "CASE WHEN isdir THEN 6 ELSE 4 END as type, "
405 "search('%s', dir || '/' || base) as score, "
406 "dir || '/' || base AS obj, "
407 "dir || '/' || base as title, "
408 "highlight('%s',dir || '/' || base) as body FROM file "
409 "WHERE score>0",
410 zConnect, zPattern, zPattern);
411 zConnect = " UNION ALL";
412
413 appendf(zSql,&i,sizeof(zSql),"%s SELECT "
414 "5 AS type, "
415 "search('%s', fname, description) as score, "
416 "atn AS obj, "
417 "fname as title, "
418 "highlight('%s',fname, description) as body FROM attachment "
419 "WHERE score>0 AND tn!=0",
420 zConnect, zPattern, zPattern);
421 zConnect = " UNION ALL";
422 }
423 appendf(zSql,&i,sizeof(zSql), ") ORDER BY score DESC LIMIT 30 OFFSET %d;",
424 nPage*30);
425 azResult = db_query("%s",zSql);
426 }
427 common_standard_menu("search", 0);
428 common_add_help_item("CvstracSearch");
429 common_header("Search");
430 @ <form action="search" method="GET">
431 @ Search:
432 @ <input type="text" size=40 name="s" value="%h(PD("s",""))">
433 @ <input type="submit" value="Go">
434 if( g.okRead + g.okCheckout + g.okRdWiki>1 ){
435 char *z;
436 @ <br>Look in:
437 if( g.okRead ){
438 z = srchTkt ? " checked " : "";
439 @ <label for="t"><input type="checkbox" name="t" id="t" value="1"%s(z)>
440 @ Tickets</label>
441 }
442 if( g.okCheckout ){
443 z = srchCkin ? " checked " : "";
444 @ <label for="c"><input type="checkbox" name="c" id="c" value="1"%s(z)>
445 @ Checkins</label>
446 }
447 if( g.okRdWiki ){
448 z = srchWiki ? " checked " : "";
449 @ <label for="w"><input type="checkbox" name="w" id="w" value="1"%s(z)>
450 @ Wiki Pages</label>
451 }
452 if( g.okCheckout ){
453 z = srchFile ? " checked " : "";
454 @ <label for="f"><input type="checkbox" name="f" id="f" value="1"%s(z)>
455 @ Filenames</label>
456 }
457 }
458 @ </form>
459 @ <div>
460 if( azResult && azResult[0] ){
461 int i, n;
462 @ <hr>
463 for(i=n=0; azResult[i]; i+=5, n++){
464 @ <p>
465 switch( atoi(azResult[i]) ){
466 case 1: /* ticket */
467 @ Ticket
468 output_ticket(atoi(azResult[i+2]),0);
469 @ : %s(azResult[i+3])
470 break;
471 case 2: /* check-in */
472 @ Check-in
473 output_chng(atoi(azResult[i+2]));
474 break;
475 case 3: /* wiki */
476 @ Wiki Page <a href="wiki?p=%s(azResult[i+2])">%s(azResult[i+3])</a>
477 break;
478 case 4: /* file */
479 @ File <a href="rlog?f=%t(azResult[i+2])">%h(azResult[i+3])</a>
480 break;
481 case 5: /* attachment */
482 zPage = db_short_query("SELECT tn FROM attachment WHERE atn=%d",
483 atoi(azResult[i+2]));
484 @ Attachment
485 @ <a href="attach_get/%s(azResult[i+2])/%t(azResult[i+3])">\
486 @ %s(azResult[i+3])</a> to
487 if( is_integer(zPage) ){
488 output_ticket(atoi(zPage),0);
489 }else{
490 @ wiki page <a href="wiki?p=%s(zPage)">%s(zPage)</a>
491 }
492 break;
493 case 6:{ /* directory */
494 const char* zDir = azResult[i+2];
495 if( zDir[0]=='/' ) zDir ++;
496 @ Directory <a href="dir?d=%t(zDir)">%h(zDir)</a>
497 break;
498 }
499 }
500 @ <br>%s(azResult[i+4])</p>
501 }
502 if( nPage || n>=30 ){
503 @ <hr><p align="center">
504 /* provide a history */
505 cgi_printf("Page: ");
506 for( i=0; i<nPage; i++ ){
507 cgi_printf("<a href=\"search?s=%T&t=%d&c=%d&w=%d&f=%d&p=%d\">%d</a> ",
508 zPattern, srchTkt, srchCkin, srchWiki, srchFile, i, i+1);
509 }
510 cgi_printf("%d ", nPage+1);
511 if( n>=30 ){
512 cgi_printf("<a href=\"search?s=%T&t=%d&c=%d&w=%d&f=%d&p=%d\">More</a>",
513 zPattern, srchTkt, srchCkin, srchWiki, srchFile, nPage+1);
514 }
515 @ </p>
516 }
517 }else if( zPattern && zPattern[0] ){
518 @ No results for <strong>%h(zPattern)</strong>
519 }
520 @ </div>
521 common_footer();
522 }
523