1 #include "search.h"
2 #include "message.h"
3 #include "messageinfo.h"
4 #include "rematchstr.h"
5 #include "rematchmsg.h"
6 #include "varlist.h"
7 #include <string.h>
8 #include <ctype.h>
9 #include <stdlib.h>
10
11 static const char rcsid[]="$Id: search.C,v 1.2 2005/08/28 14:30:56 mrsam Exp $";
12
cleanup()13 void Search::cleanup()
14 {
15 if (pcre_regexp_extra)
16 {
17 pcre_free(pcre_regexp_extra);
18 pcre_regexp_extra=NULL;
19 }
20 if (pcre_regexp)
21 {
22 pcre_free(pcre_regexp);
23 pcre_regexp=NULL;
24 }
25
26 if (pcre_vectors)
27 {
28 free(pcre_vectors);
29 pcre_vectors=NULL;
30 }
31 }
32
init(const char * expr,const char * opts)33 int Search::init(const char *expr, const char *opts)
34 {
35 int dummy;
36
37 match_header=0;
38 match_body=0;
39 weight1=1;
40 weight2=1;
41 scoring_match=0;
42 score=0;
43
44 if (strchr(opts, 'h')) match_header=1;
45 if (strchr(opts, 'b')) match_body=1;
46 if (!match_header && !match_body)
47 {
48 match_header=1;
49 if (strchr(opts, 'w')) match_body=1;
50 }
51
52 Buffer b;
53
54 b="MAILDROP_OLD_REGEXP";
55
56 const char *p=GetVarStr(b);
57
58 if (atoi(p ? p:"0") == 0)
59 {
60 const char *errptr;
61
62 cleanup();
63
64 if (strchr(opts, 'w'))
65 {
66 b="Pattern option 'w' is valid only when MAILDROP_OLD_REGEXP is set\n";
67 b += '\0';
68 merr.write(b);
69 return -1;
70 }
71
72 int errindex;
73
74 pcre_regexp=pcre_compile(expr,
75 strchr(opts, 'D') ? 0:PCRE_CASELESS,
76 &errptr,
77 &errindex, 0);
78
79 if (!pcre_regexp)
80 {
81 b="Invalid regular expression, offset ";
82 b.append((unsigned long)errindex);
83 b += " of: ";
84 b += expr;
85 b += ": ";
86 b += errptr;
87 b += "\n";
88 b += '\0';
89 merr.write(b);
90 return -1;
91 }
92
93 pcre_regexp_extra=pcre_study(pcre_regexp, 0,
94 &errptr);
95
96 if (errptr)
97 {
98 b="Error parsing regular expression: ";
99 b += expr;
100 b += ": ";
101 b += errptr;
102 b += "\n";
103 b += '\0';
104 merr.write(b);
105 return -1;
106 }
107
108 int cnt=0;
109
110 pcre_fullinfo(pcre_regexp, pcre_regexp_extra,
111 PCRE_INFO_CAPTURECOUNT, &cnt);
112
113 pcre_vector_count=(cnt+1)*3;
114
115 pcre_vectors=(int *)malloc(pcre_vector_count*sizeof(int));
116
117 if (!pcre_vectors)
118 {
119 b=strerror(errno);
120 b += "\n";
121 b += '\0';
122 merr.write(b);
123 return -1;
124 }
125 }
126 else
127 {
128 if (regexp.Compile(expr, strchr(opts, 'D') ? 1:0, dummy))
129 return (-1);
130 }
131
132 while (*opts)
133 {
134 if (*opts == '.' || isdigit(*opts) || *opts == '-' ||
135 *opts == '+')
136 {
137 weight1=atof(opts);
138 while (*opts && *opts != ',') ++opts;
139 if (*opts == ',')
140 {
141 ++opts;
142 if (*opts == '.' || isdigit(*opts) ||
143 *opts == '-' || *opts == '+')
144 weight2=atof(opts);
145 }
146 scoring_match=1;
147 break;
148 }
149 ++opts;
150 }
151 return (0);
152 }
153
find(Message & msg,MessageInfo &,const char * expr,const char * opts,Buffer * foreachp)154 int Search::find(Message &msg, MessageInfo &,
155 const char *expr, const char *opts, Buffer *foreachp)
156 {
157 if (init(expr, opts)) return (-1);
158
159 msg.Rewind();
160 return (strchr(opts, 'w') ? findinsection(msg, expr, foreachp):
161 findinline(msg, expr, foreachp));
162 }
163
find(const char * str,const char * expr,const char * opts,Buffer * foreachp)164 int Search::find(const char *str, const char *expr, const char *opts,
165 Buffer *foreachp)
166 {
167 if (init(expr, opts)) return (-1);
168
169 if (VerboseLevel() > 2)
170 {
171 Buffer msg;
172
173 msg="Matching /";
174 msg.append(expr);
175 msg.append("/ against ");
176 msg += str;
177 msg += '\n';
178 msg += '\0';
179 merr.write(msg);
180 }
181
182 int startoffset=0;
183 const char *orig_str=str;
184 int match_count=0;
185
186 for (;;)
187 {
188 if (pcre_regexp)
189 {
190 match_count=pcre_exec(pcre_regexp, pcre_regexp_extra,
191 orig_str, strlen(orig_str),
192 startoffset,
193 0,
194 pcre_vectors,
195 pcre_vector_count);
196 if (match_count <= 0)
197 break;
198 startoffset=pcre_vectors[1];
199
200 score += weight1;
201 weight1 *= weight2;
202
203 if (!scoring_match || foreachp)
204 {
205 init_match_vars(orig_str, match_count,
206 pcre_vectors, foreachp);
207 if (!foreachp)
208 break;
209 }
210 continue;
211 }
212
213 ReMatchStr match(str);
214
215 if ( regexp.Match(match)) break;
216
217 score += weight1;
218 weight1 *= weight2;
219
220 if (!scoring_match || foreachp)
221 {
222 match.SetCurrentPos(0);
223 init_match_vars(match, foreachp);
224 if (!foreachp)
225 break; // No need for more.
226 }
227
228 Re *p;
229 off_t c=0;
230
231 for (p= ®exp; p; )
232 c += p->MatchCount( &p );
233 if (c == 0)
234 {
235 if (!*str) break;
236 ++c;
237 }
238 str += c;
239 }
240 return (0);
241 }
242
243 //////////////////////////////////////////////////////////////////////////////
244 //
245 // Search individual lines for the pattern (transparently concatenate
246 // continued headers.
247 //
248 //////////////////////////////////////////////////////////////////////////////
249
findinline(Message & msg,const char * expr,Buffer * foreachp)250 int Search::findinline(Message &msg, const char *expr, Buffer *foreachp)
251 {
252 current_line.reset();
253 if (msg.appendline(current_line)) return (0); // Empty msg
254
255 int eof;
256
257 for (;;)
258 {
259 int c='\n';
260
261 next_line.reset();
262 if ((eof=msg.appendline(next_line)) == 0)
263 {
264 c=(unsigned char)*(const char *)next_line;
265
266 if ( isspace( c ) && c != '\n')
267 // Continued header
268 {
269 current_line.pop();
270 current_line += next_line;
271 continue;
272 }
273 }
274 current_line.pop();
275
276 current_line += '\0';
277
278 if (match_header)
279 {
280 if (VerboseLevel() > 2)
281 {
282 Buffer msg;
283
284 msg="Matching /";
285 msg.append(expr);
286 msg.append("/ against ");
287 msg += current_line;
288 msg.pop(); // Trailing null byte.
289 msg += '\n';
290 msg += '\0';
291 merr.write(msg);
292 }
293
294 if (pcre_regexp)
295 {
296 const char *orig_str=current_line;
297 int match_count;
298
299 match_count=pcre_exec(pcre_regexp,
300 pcre_regexp_extra,
301 orig_str,
302 strlen(orig_str),
303 0,
304 0,
305 pcre_vectors,
306 pcre_vector_count);
307
308 if (match_count > 0)
309 {
310 score += weight1;
311 weight1 *= weight2;
312
313 if (!scoring_match || foreachp)
314 {
315 init_match_vars(orig_str,
316 match_count,
317 pcre_vectors,
318 foreachp);
319 if (!foreachp)
320 return (0);
321 }
322 }
323 else if (VerboseLevel() > 2)
324 merr.write("Not matched.\n");
325 }
326 else
327 {
328 ReMatchStr match(current_line);
329
330 if (regexp.Match(match) == 0)
331 {
332 score += weight1;
333 weight1 *= weight2;
334 if (!scoring_match || foreachp)
335 {
336 match.SetCurrentPos(0);
337 init_match_vars(match,
338 foreachp);
339 if (!foreachp)
340 return (0);
341 }
342 }
343 else if (VerboseLevel() > 2)
344 merr.write("Not matched.\n");
345 }
346 }
347 if ( c == '\n') break;
348 current_line=next_line;
349 }
350 if (!match_body || eof) return (0);
351
352 while (current_line.reset(), msg.appendline(current_line) == 0)
353 {
354 current_line.pop();
355 current_line += '\0';
356
357 if (VerboseLevel() > 2)
358 {
359 Buffer msg;
360
361 msg="Matching /";
362 msg.append(expr);
363 msg.append("/ against ");
364 msg += current_line;
365 msg.pop(); // Trailing null byte.
366 msg += '\n';
367 msg += '\0';
368 merr.write(msg);
369 }
370
371 if (pcre_regexp)
372 {
373 const char *orig_str=current_line;
374 int match_count;
375
376 match_count=pcre_exec(pcre_regexp,
377 pcre_regexp_extra,
378 orig_str,
379 strlen(orig_str),
380 0,
381 0,
382 pcre_vectors,
383 pcre_vector_count);
384
385 if (match_count > 0)
386 {
387 score += weight1;
388 weight1 *= weight2;
389
390 if (!scoring_match || foreachp)
391 {
392 init_match_vars(orig_str,
393 match_count,
394 pcre_vectors,
395 foreachp);
396 if (!foreachp)
397 return (0);
398 }
399 }
400 else if (VerboseLevel() > 2)
401 merr.write("Not matched.\n");
402
403 continue;
404 }
405
406 ReMatchStr match(current_line);
407
408 if (regexp.Match(match) == 0)
409 {
410 score += weight1;
411 weight1 *= weight2;
412 if (!scoring_match || foreachp)
413 {
414 match.SetCurrentPos(0);
415 init_match_vars(match, foreachp);
416 if (!foreachp)
417 return (0);
418 }
419 }
420 else if (VerboseLevel() > 2)
421 merr.write("Not matched.\n");
422 }
423 return (0);
424 }
425
426 ///////////////////////////////////////////////////////////////////////////
427 //
428 // Search anchored in the entire message.
429 //
430 ///////////////////////////////////////////////////////////////////////////
431
findinsection(Message & msg,const char * expr,Buffer * foreachp)432 int Search::findinsection(Message &msg, const char *expr, Buffer *foreachp)
433 {
434 if (!match_header && !match_body) return (0); // Huh?
435
436 if (VerboseLevel() > 2)
437 {
438 Buffer m;
439
440 m="Matching /";
441 m.append(expr);
442 m.append("/ against");
443 if (match_header)
444 m.append(" header");
445 if (match_body)
446 m.append(" body");
447 m += '\n';
448 m += '\0';
449 merr.write(m);
450 }
451
452 if (!match_header)
453 {
454 Buffer dummy;
455
456 do
457 {
458 dummy.reset();
459 if (msg.appendline(dummy) < 0) return (0);
460 // No message body, give up.
461 } while (dummy.Length() != 1 ||
462 *(const char *)dummy != '\n');
463 }
464
465 off_t start_pos=msg.tell();
466 ReMatchMsg match_msg(&msg, !match_body, match_header);
467
468 while ( match_msg.CurrentChar() >= 0 && regexp.Match(match_msg) == 0)
469 {
470 score += weight1;
471 weight1 *= weight2;
472
473 if (!scoring_match || foreachp)
474 {
475 match_msg.SetCurrentPos(start_pos);
476 init_match_vars(match_msg, foreachp);
477 if (!foreachp)
478 break; // No need for more.
479 }
480
481 Re *p;
482 off_t c=0;
483
484 for (p= ®exp; p; )
485 c += p->MatchCount( &p );
486 if (c == 0) ++c;
487 start_pos += c;
488 match_msg.SetCurrentPos(start_pos);
489 }
490 return (0);
491 }
492
init_match_vars(const char * str,int nranges,int * offsets,Buffer * foreachp)493 void Search::init_match_vars(const char *str, int nranges, int *offsets,
494 Buffer *foreachp)
495 {
496 Buffer varname;
497 int cnt;
498
499 for (cnt=0; cnt<nranges; cnt++)
500 {
501 varname="MATCH";
502 if (cnt)
503 varname.append((unsigned long)cnt);
504
505
506 Buffer v;
507
508 int i, j;
509
510 i=offsets[cnt*2];
511 j=offsets[cnt*2+1];
512
513 if (i < j)
514 {
515 size_t s=j-i;
516
517 char *ptr=(char *)malloc(s+1);
518 if (ptr)
519 {
520 memcpy(ptr, str + i, s);
521 ptr[s]=0;
522 v=ptr;
523 free(ptr);
524 }
525 }
526
527 if (foreachp)
528 {
529 *foreachp += v;
530 *foreachp += '\0';
531 }
532
533 SetVar(varname, v);
534 }
535 }
536
init_match_vars(ReMatch & m,Buffer * foreachp)537 void Search::init_match_vars(ReMatch &m, Buffer *foreachp)
538 {
539 Re *p;
540 Buffer buf;
541 Buffer varname;
542 unsigned long varnamecount=1;
543
544 varname="MATCH";
545 for (p= ®exp; p; )
546 {
547 Re *q=p;
548 unsigned count=p->MatchCount(&p);
549
550 buf.reset();
551 while (count)
552 {
553 buf.push( m.NextChar() );
554 count--;
555 }
556
557 if ( !q->IsDummy())
558 {
559 if (foreachp)
560 {
561 *foreachp += buf;
562 *foreachp += '\0';
563 }
564 else
565 {
566 SetVar(varname, buf);
567 ++varnamecount;
568 varname="MATCH";
569 varname.append(varnamecount);
570 }
571 }
572 }
573 }
574