1 /*** rewrite.c ****************************************************************
2 **
3 ** This file is part of BibTool.
4 ** It is distributed under the GNU General Public License.
5 ** See the file COPYING for details.
6 **
7 ** (c) 1996-2020 Gerd Neugebauer
8 **
9 ** Net: gene@gerd-neugebauer.de
10 **
11 ** This program is free software; you can redistribute it and/or modify
12 ** it under the terms of the GNU General Public License as published by
13 ** the Free Software Foundation; either version 2, or (at your option)
14 ** any later version.
15 **
16 ** This program is distributed in the hope that it will be useful,
17 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 ** GNU General Public License for more details.
20 **
21 ** You should have received a copy of the GNU General Public License
22 ** along with this program; if not, write to the Free Software
23 ** Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 **
25 ******************************************************************************/
26 
27 #include <bibtool/general.h>
28 #include <bibtool/symbols.h>
29 #include <bibtool/entry.h>
30 #include <bibtool/error.h>
31 #include <bibtool/macros.h>
32 #include <bibtool/rsc.h>
33 #include <bibtool/key.h>
34 #include <bibtool/s_parse.h>
35 #include <bibtool/sbuffer.h>
36 #include <bibtool/rewrite.h>
37 #include <bibtool/symbols.h>
38 
39 #ifdef REGEX
40 #include <bibtool/regex.h>
41 #endif
42 
43  typedef struct rULE
44  { Symbol	rr_field;
45    Symbol	rr_goal;
46    Symbol	rr_value;
47    Symbol	rr_frame;
48    int		rr_flag;
49    struct rULE	*rr_next;
50 #ifdef REGEX
51    struct re_pattern_buffer rr_pat_buff;
52 #endif
53  } SRule, *Rule;
54 
55 #define RuleNULL	(Rule)0
56 
57 #define RuleField(X)	((X)->rr_field)
58 #define RuleGoal(X)	((X)->rr_goal)
59 #define RuleValue(X)	((X)->rr_value)
60 #define RulePattern(X)	((X)->rr_pat_buff)
61 #define RuleFrame(X)	((X)->rr_frame)
62 #define NextRule(X)	((X)->rr_next)
63 #define RuleFlag(X)	((X)->rr_flag)
64 
65 /*****************************************************************************/
66 /* Internal Programs							     */
67 /*===========================================================================*/
68 
69 #ifdef __STDC__
70 #define _ARG(A) A
71 #else
72 #define _ARG(A) ()
73 #endif
74  bool is_selected _ARG((DB db,Record rec));	   /*                        */
75  int set_regex_syntax _ARG((char* name));	   /*                        */
76  static Rule new_rule _ARG((Symbol field,Symbol value,Symbol pattern,Symbol frame,int flags,int casep));
77  static String  check_regex _ARG((Symbol field,Symbol value,Rule rule,DB db,Record rec));
78  static String  repl_regex _ARG((Symbol field,Symbol value,Rule rule,DB db,Record rec));
79  static bool s_match _ARG((String  p,String  s));  /*                        */
80  static bool s_search _ARG((String  pattern,String  s));/*                   */
81  static void add_rule _ARG((String s,Rule *rp,Rule *rp_end,int flags,int casep));
82 #ifdef UNUSED
83  static void free_rule _ARG((Rule rule));	   /*                        */
84 #endif
85  static void init_s_search _ARG((String  ignored));/*                        */
86  static void rewrite_1 _ARG((String frame,StringBuffer *sb,String match,DB db,Record rec));/**/
87  void add_check_rule _ARG((String s,int flags));   /*                        */
88  void add_extract _ARG((Symbol s,int regexp,int notp));/*                    */
89  void add_field _ARG((String spec));		   /*                        */
90  void add_rewrite_rule _ARG((String s));	   /*                        */
91  void clear_addlist _ARG((void));		   /*                        */
92  void remove_field _ARG((Symbol field,Record rec));/*                        */
93  void rename_field _ARG((Symbol spec));		   /*                        */
94  void rewrite_record _ARG((DB db,Record rec));	   /*                        */
95  void save_regex _ARG((String s));		   /*                        */
96 
97 /*****************************************************************************/
98 /* External Programs							     */
99 /*===========================================================================*/
100 
101 
102 /*---------------------------------------------------------------------------*/
103 
104  static String s_if = (String)"if";
105 
106  static Rule match = RuleNULL;
107 
108 
109 /*****************************************************************************/
110 /***			  Field Add/Delete Section			   ***/
111 /*****************************************************************************/
112 
113  static Macro addlist = MacroNULL;
114 
115 /*-----------------------------------------------------------------------------
116 ** Function:	clear_addlist()
117 ** Purpose:	Reset the addlist to the empty list.
118 ** Arguments:	none
119 ** Returns:	nothing
120 **___________________________________________________			     */
clear_addlist()121 void clear_addlist()				   /*                        */
122 {						   /*                        */
123   free_macro(addlist);				   /*                        */
124   addlist = MacroNULL;				   /*                        */
125 }						   /*------------------------*/
126 
127 /*-----------------------------------------------------------------------------
128 ** Function:	foreach_addlist()
129 ** Type:	bool
130 ** Purpose:	Apply a function for every entry in the |addlist|.
131 **		If the function returns |false| then the iteration is
132 **		terminated immediately and |false| returned. Otherwise
133 **		|true| is returned after all entries have been visited.
134 ** Arguments:
135 **	fct	the function to be applied
136 ** Returns:	the termination indicator
137 **___________________________________________________			     */
138 bool foreach_addlist(fct)			   /*                        */
139   bool (*fct) _ARG((Symbol key, Symbol val));	   /*                        */
140 { return each_macro(addlist, fct);		   /*                        */
141 }						   /*------------------------*/
142 
143 /*-----------------------------------------------------------------------------
144 ** Function:	add_field()
145 ** Purpose:	Save a token and value for addition.
146 ** Arguments:
147 **	spec	A string of the form
148 **		  | token=value|
149 ** Returns:	nothing
150 **___________________________________________________			     */
add_field(spec)151 void add_field(spec)				   /*			     */
152   String spec;					   /*			     */
153 { register Symbol field, value;		   	   /*			     */
154 						   /*			     */
155   sp_open(spec);				   /*			     */
156   if ((field = SParseSymbol(&spec)) == NO_SYMBOL)  /*		             */
157     return;					   /*			     */
158   sp_skip(&spec);			   	   /*			     */
159   if ((value=SParseValue(&spec)) == NO_SYMBOL)     /*			     */
160     return;					   /*			     */
161   sp_eos(&spec);			   	   /*			     */
162 						   /*			     */
163   addlist = new_macro(field, value, addlist, 0);   /*			     */
164 }						   /*------------------------*/
165 
166 /*-----------------------------------------------------------------------------
167 ** Function:	remove_field()
168 ** Purpose:	Remove the given field from record.
169 ** Arguments:
170 **	field	This is a symbol containing the name of the field to remove.
171 **	rec	Record in which the field should be removed.
172 ** Returns:	nothing
173 **___________________________________________________			     */
remove_field(field,rec)174 void remove_field(field, rec)			   /*			     */
175   register Symbol field;			   /*			     */
176   Record	 rec;				   /*			     */
177 { register int	 i;				   /*			     */
178 						   /*			     */
179   for (i = 0; i < RecordFree(rec); i += 2 )	   /*			     */
180   { if ( field == RecordHeap(rec)[i] )		   /* compare symbols	     */
181     { RecordHeap(rec)[i] = NO_SYMBOL; }	   	   /*			     */
182   }						   /*			     */
183 						   /*			     */
184   while ( RecordFree(rec) > 0 &&		   /* Adjust Heap Length     */
185 	  RecordHeap(rec)[RecordFree(rec) - 2] == NULL )/*		     */
186   { RecordFree(rec) -= 2;			   /*			     */
187   }						   /*			     */
188 }						   /*------------------------*/
189 
190 
191 /*****************************************************************************/
192 /***				 Rule Section				   ***/
193 /*****************************************************************************/
194 
195 #ifdef REGEX
196  static struct re_registers reg;		   /*			     */
197 #endif
198 
199 /*-----------------------------------------------------------------------------
200 ** Function*:	new_rule()
201 ** Purpose:	Allocate a new Rule and fill some slots.
202 ** Arguments:
203 **	field	The field to apply this rule to, or |NULL| for each field.
204 **	value	the value
205 **	pattern	the rule goal
206 **	frame	the rule frame
207 **	flags	The ored flag values
208 **	casep	Boolean; indicating case sensitive comparison.
209 ** Returns:	A pointer to the allocated structure or |NULL| upon failure.
210 **___________________________________________________			     */
new_rule(field,value,pattern,frame,flags,casep)211 static Rule new_rule(field, value, pattern, frame, flags, casep)/*           */
212   Symbol	field;				   /*			     */
213   Symbol	value;			   	   /*			     */
214   Symbol	pattern;			   /*			     */
215   Symbol	frame;				   /*			     */
216   int		flags;				   /*			     */
217   int		casep;				   /*			     */
218 { register Rule rule;				   /*			     */
219   static int    init = 1;			   /*                        */
220  						   /*                        */
221 #ifdef REGEX
222   if ( init )					   /*                        */
223   { init = 0;					   /*                        */
224     reg.num_regs = 32;				   /*                        */
225     reg.start = (regoff_t*)calloc(32,sizeof(regoff_t));/*                    */
226     reg.end   = (regoff_t*)calloc(32,sizeof(regoff_t));/*                    */
227     if ( reg.start == NULL || reg.end == NULL )	   /*                        */
228     { OUT_OF_MEMORY("rewrite rule"); } 		   /*			     */
229   }						   /*                        */
230 #endif
231 						   /*			     */
232   if ( (rule=(Rule)malloc(sizeof(SRule))) == RuleNULL )/*		     */
233   { OUT_OF_MEMORY("rewrite rule"); } 		   /*			     */
234 						   /*			     */
235   RuleField(rule) = field;			   /*			     */
236   if (field) { LinkSymbol(field); }		   /*                        */
237   RuleValue(rule) = value;			   /*			     */
238   if (value) { LinkSymbol(value); }		   /*                        */
239   RuleFrame(rule) = frame;			   /*			     */
240   if (frame) { LinkSymbol(frame); }		   /*                        */
241   RuleFlag(rule)  = flags;			   /*			     */
242   NextRule(rule)  = RuleNULL;			   /*			     */
243   RuleGoal(rule)  = pattern;			   /*                        */
244   if (pattern) { LinkSymbol(pattern); }		   /*                        */
245 					       	   /*                        */
246 #ifdef REGEX
247   if ( pattern &&				   /*                        */
248        *SymbolValue(pattern) &&			   /*                        */
249        (flags&RULE_REGEXP) )			   /*                        */
250   { char *msg;					   /*                        */
251     if ( (RulePattern(rule).buffer = (String)malloc(16)) == NULL )/*         */
252     { OUT_OF_MEMORY("pattern"); }		   /*			     */
253     RulePattern(rule).allocated = 16;		   /*			     */
254     RulePattern(rule).syntax    = RE_SYNTAX_EMACS; /*			     */
255     RulePattern(rule).fastmap   = NULL;		   /*			     */
256     RulePattern(rule).regs_allocated = REGS_FIXED; /*			     */
257     RulePattern(rule).translate = (casep	   /*                        */
258 				  ? (char*)trans_lower/*                     */
259 				  : NULL);	   /*	                     */
260 						   /*			     */
261     msg = (char*)re_compile_pattern((char*)SymbolValue(pattern),/*	     */
262 				    symlen(pattern),/*		             */
263 				    &RulePattern(rule) );/*	             */
264     if (msg) {				   	   /*                        */
265       Err(msg);					   /*                        */
266       free(rule);				   /*                        */
267       return NULL;				   /*                        */
268     }	   					   /*			     */
269   }						   /*                        */
270   else						   /*                        */
271   { RuleFlag(rule) = (flags & ~RULE_REGEXP);	   /*                        */
272   }						   /*                        */
273 #endif
274   DebugPrint2("pattern = ", SymbolValue(pattern)); /*			     */
275   DebugPrint2("frame   = ", SymbolValue(frame));   /*			     */
276   DebugPrintF1("+++ BibTool: flags   =");	   /*			     */
277   DebugPrintF1(flags & RULE_NOT ? " NOT" : "");	   /*                        */
278   DebugPrintF1(flags & RULE_ADD ? " ADD" : "");	   /*                        */
279   DebugPrintF1(flags & RULE_RENAME ? " RENAME" : "");/*                      */
280   DebugPrintF1(flags & RULE_KEEP ? " KEEP" : "");  /*                        */
281   DebugPrintF1(flags & RULE_REGEXP ? " REGEXP" : "");/*                      */
282   DebugPrintF1("\n");		   		   /*			     */
283   DebugPrintF2("+++ BibTool: New rule = %lx\n",	   /*                        */
284 	       (long)rule);			   /*                        */
285  						   /*                        */
286   return rule;					   /*			     */
287 }						   /*------------------------*/
288 
289 #ifdef UNUSED
290 /*-----------------------------------------------------------------------------
291 ** Function*:	free_rule()
292 ** Purpose:	Free a list of rules.
293 ** Arguments:
294 **	rule	First rule in the list.
295 ** Returns:	nothing
296 **___________________________________________________			     */
free_rule(rule)297 static void free_rule(rule)			   /*                        */
298   Rule rule;					   /*                        */
299 { Rule next;					   /*                        */
300  						   /*                        */
301   while (rule)				   	   /*                        */
302   { next = NextRule(rule);			   /*                        */
303 #ifdef REGEX
304     free(RulePattern(rule).buffer);		   /*                        */
305 #endif
306     free(rule);					   /*                        */
307     rule = next;				   /*                        */
308   }						   /*                        */
309 }						   /*------------------------*/
310 #endif
311 
312 /*-----------------------------------------------------------------------------
313 ** Function*:	add_rule()
314 ** Purpose:	Generic addition of a rule to a list of rules.
315 ** Arguments:
316 **	s	the specification string
317 **	rp	the pointer to the first rule
318 **	rp_end	the pointer to the last rule
319 **	flags	the flags
320 **	casep	the indicator for cased matching
321 ** Returns:	nothing
322 **___________________________________________________			     */
add_rule(s,rp,rp_end,flags,casep)323 static void add_rule(s,rp,rp_end,flags,casep)	   /*			     */
324   String	s;				   /*			     */
325   Rule		*rp;				   /*			     */
326   Rule		*rp_end;			   /*			     */
327   int		flags;				   /*                        */
328   int		casep;				   /*			     */
329 { Symbol	field;				   /*			     */
330   Symbol	pattern;			   /*			     */
331   Symbol	frame;				   /*			     */
332   Rule		rule;				   /*			     */
333   int		sp;				   /*                        */
334   int		stackp;				   /* stack pointer for the  */
335   static Symbol	*stack;			   	   /* local stack of fields  */
336   static int    stacksize = 0;			   /*                        */
337  						   /*                        */
338   if ( stacksize == 0 )				   /*                        */
339   { stacksize++;				   /*                        */
340     if ((stack=(Symbol*)malloc(sizeof(Symbol)))==(Symbol*)NULL)/*            */
341     { OUT_OF_MEMORY("rule stack"); }		   /*                        */
342   }						   /*                        */
343   stackp = 0;					   /*                        */
344 						   /*			     */
345   DebugPrint2("Adding rule: Parsing from: ", s);   /*			     */
346   sp_open(s);				   	   /*			     */
347   sp_skip(&s);				   	   /*			     */
348 						   /*			     */
349   while (*s && *s != '"')			   /*                        */
350   {						   /*                        */
351     DebugPrint2("\tlooking for symbol in: ", s);   /*			     */
352     field = SParseSymbol(&s);			   /*                        */
353     if (field == NO_SYMBOL)			   /*                        */
354     { DebugPrint2("\tno symbol found in: ", s);	   /*			     */
355       return;					   /*                        */
356     }					   	   /*                        */
357     DebugPrint2("\tok ",s);			   /*                        */
358     DebugPrint2("field   = ", SymbolValue(field)); /*			     */
359     sp_skip(&s);			   	   /*                        */
360 						   /*			     */
361     if (stackp >= stacksize)			   /*                        */
362     { stacksize += 8;				   /*                        */
363       if ( (stack=(Symbol*)realloc((void*)stack,   /*                        */
364 				   stacksize*sizeof(Symbol)))==NULL)/*       */
365       { OUT_OF_MEMORY("rule stack"); }		   /*                        */
366     }						   /*                        */
367     stack[stackp++] = field;			   /*                        */
368   }						   /*                        */
369 						   /*			     */
370   if ( *s == '\0' )				   /*			     */
371   { pattern = symbol((String)"."); }		   /*			     */
372   else if ( (pattern=SParseUnquotedString(&s)) == NO_SYMBOL )/*		     */
373   { DebugPrintF1("No pattern found");		   /*                        */
374     return;					   /*			     */
375   }						   /*                        */
376 						   /*			     */
377   sp_skip(&s);				   	   /*			     */
378 						   /*			     */
379   if (*s == '\0')				   /*			     */
380   { frame = NO_SYMBOL; }			   /*			     */
381   else if ((frame=SParseUnquotedString(&s)) == NO_SYMBOL)/*		     */
382   { return; }					   /*			     */
383   else						   /*			     */
384   { sp_eos(&s); }			   	   /*			     */
385 						   /*			     */
386   if (stackp == 0)				   /* No field specified.    */
387   { rule = new_rule(NO_SYMBOL,			   /*                        */
388 		    NO_SYMBOL,			   /*                        */
389 		    pattern,			   /*                        */
390 		    frame,			   /*                        */
391 		    flags,			   /*                        */
392 		    casep);			   /*                        */
393     if ( *rp == RuleNULL )			   /*                        */
394     { *rp = *rp_end = rule; }			   /*			     */
395     else					   /*                        */
396     { NextRule(*rp_end) = rule; *rp_end = rule;}   /*			     */
397     return;					   /*                        */
398   }						   /*                        */
399  						   /*                        */
400   for (sp = 0; sp < stackp; sp++)		   /*                        */
401   { rule = new_rule(stack[sp],			   /*                        */
402 		    NO_SYMBOL,			   /*                        */
403 		    pattern,			   /*                        */
404 		    frame,			   /*                        */
405 		    flags,			   /*                        */
406 		    casep);			   /*		             */
407     if ( *rp == RuleNULL )			   /*                        */
408     { *rp = *rp_end = rule; }			   /*			     */
409     else					   /*                        */
410     { NextRule(*rp_end) = rule; *rp_end = rule;}   /*			     */
411   }						   /*                        */
412 }						   /*------------------------*/
413 
414 /*-----------------------------------------------------------------------------
415 ** Function*:	rewrite_1()
416 ** Purpose:
417 **
418 **
419 ** Arguments:
420 **	frame
421 **	sb
422 **	match
423 **	rec
424 ** Returns:	nothing
425 **___________________________________________________			     */
rewrite_1(frame,sb,match,db,rec)426 static void rewrite_1(frame,sb,match,db,rec)	   /*			     */
427   String	frame;			   	   /*			     */
428   StringBuffer	*sb;			   	   /*			     */
429   String	match;			   	   /*			     */
430   DB		db;			   	   /*                        */
431   Record	rec;			   	   /*			     */
432 {						   /*                        */
433   for (; *frame; frame++)			   /*			     */
434   { if (*frame == '%')			   	   /*	                     */
435     { frame = fmt_expand(sb, frame, db, rec); }	   /*	                     */
436     else if (*frame != '\\')			   /* Transfer normal	     */
437     { (void)sbputchar(*frame,sb); }		   /*	characters.	     */
438     else					   /*			     */
439     {						   /*			     */
440       switch (*++frame)			   	   /*			     */
441       { case '1': case '2': case '3':		   /*			     */
442 	case '4': case '5': case '6':		   /*			     */
443 	case '7': case '8': case '9':		   /*			     */
444 #ifdef REGEX
445 	  { int i = *frame - '0';		   /* Look for register no   */
446 	    int e = reg.end[i];			   /* get end of match	     */
447 						   /*			     */
448 	    for (i = reg.start[i]; i < e; ++i)	   /* transfer from start    */
449 	    { (void)sbputchar(match[i],sb); }	   /*	to end of match.     */
450 	  }					   /*			     */
451 #endif
452 	  break;				   /*			     */
453 	case '$':				   /*			     */
454 	  (void)sbputs((char*)SymbolValue(*RecordHeap(rec)),/*               */
455 		       sb);			   /*			     */
456 	  break;				   /*			     */
457 	case '@':				   /*			     */
458 	  (void)sbputs((char*)SymbolValue(EntryName(RecordType(rec))),/*     */
459 		       sb);			   /*		             */
460 	  break;				   /*			     */
461 	case 'n':				   /*                        */
462 	  (void)sbputchar('\n', sb);		   /*                        */
463 	  break; 				   /*			     */
464 	case 't':				   /*                        */
465 	  (void)sbputchar('\t', sb);		   /*                        */
466 	  break; 				   /*			     */
467 	default:				   /*			     */
468 	  if (*frame) (void)sbputchar(*frame,sb);  /* Use '\\' as quote	     */
469 	  else --frame;				   /* or ignore at end of str*/
470       }						   /*			     */
471     }						   /*			     */
472   }						   /*			     */
473 }						   /*------------------------*/
474 
475 #ifdef REGEX
476 
477 /*-----------------------------------------------------------------------------
478 ** Function*:	selector_hits()
479 ** Type:	bool
480 ** Purpose:	Check whether a rule matches a given record.
481 **
482 ** Arguments:
483 **	rule	the rule
484 **	 db	the database
485 **	 rec	the record
486 ** Returns:	|true| iff the rule applies
487 **___________________________________________________			     */
selector_hits(rule,db,rec)488 static bool selector_hits(rule, db, rec)	   /*                        */
489   Rule rule;					   /*                        */
490   DB db;					   /*                        */
491   Record rec;					   /*                        */
492 { Symbol field = RuleFrame(rule);		   /*                        */
493   Symbol value;					   /*                        */
494   int len;					   /*                        */
495  						   /*                        */
496   if (field == NO_SYMBOL) { return true; }	   /*                        */
497  						   /*                        */
498   value = get_field(db, rec, field);		   /*                        */
499 #ifdef REGEX
500   len	= (value ? symlen(value) : 0) ;		   /*                        */
501   return (value &&				   /*                        */
502 	  SymbolValue(value) &&			   /*			     */
503 	  re_search(&RulePattern(rule),	   	   /*			     */
504 		    (char*)SymbolValue(value),	   /*                        */
505 		    len,	   		   /*                        */
506 		    0,			   	   /*                        */
507 		    len - 1,		   	   /*                        */
508 		    &reg) >= 0 );	   	   /*			     */
509 #else
510   return true;					   /*                        */
511 #endif
512 }						   /*------------------------*/
513 #endif
514 
515 /*-----------------------------------------------------------------------------
516 ** Function*:	repl_regex()
517 ** Purpose:
518 **
519 **
520 ** Arguments:
521 **	field	the field
522 **	value	the replacement value
523 **	rule	the rule
524 **	db	the database
525 **	rec	the record
526 ** Returns:	the result of the replacement
527 **___________________________________________________			     */
repl_regex(field,value,rule,db,rec)528 static String repl_regex(field, value, rule, db, rec)/*			     */
529   Symbol field;				   	   /*			     */
530   Symbol value;				   	   /*			     */
531   Rule	 rule;			   	   	   /*			     */
532   DB	 db;				   	   /*                        */
533   Record rec;			   	   	   /*			     */
534 {						   /*			     */
535   String        val = SymbolValue(value);	   /*                        */
536 #ifdef REGEX
537   char		c;			   	   /*			     */
538   int		len;			   	   /*			     */
539   StringBuffer	*sp;			   	   /* intermediate pointer   */
540   bool		once_more;		   	   /*                        */
541   int		limit;			   	   /* depth counter to break */
542  						   /*  out of infinite loops */
543   static StringBuffer *s1 = NULL;		   /*			     */
544   static StringBuffer *s2 = NULL;		   /*			     */
545 						   /*			     */
546   if (rule == RuleNULL) return val; 		   /*			     */
547 						   /*			     */
548   if (s1 == NULL) { s1 = sbopen(); s2 = sbopen(); }/*			     */
549   else		  { sbrewind(s1);  sbrewind(s2);  }/*			     */
550 						   /*			     */
551   (void)sbputs((char*)val, s1);		   	   /*			     */
552   val       = (String)sbflush(s1);	   	   /*			     */
553   len	    = strlen((char*)val);   		   /*			     */
554   limit     = rsc_rewrite_limit;		   /*			     */
555   once_more = true;				   /*                        */
556     					   	   /*			     */
557   while (once_more) 				   /*			     */
558   {						   /*			     */
559     once_more = false;				   /*                        */
560     while (rule)			   	   /*                        */
561     {						   /*                        */
562 #ifdef DEBUG
563       printf("+++ BibTool: repl_regex rule:0x%lx flags:0x%x field:%s <> %s\n",
564 	     (long)rule,			   /*                        */
565 	     RuleFlag(rule),			   /*                        */
566 	     (char*)SymbolValue(RuleField(rule)),  /*                        */
567 	     (char*)SymbolValue(field));	   /*                        */
568 #endif
569       if ((RuleFlag(rule) & RULE_RENAME) != 0)	   /*                        */
570       {						   /*                        */
571 	if (RuleField(rule) == field &&		   /*                        */
572 	    selector_hits(rule, db, rec))	   /*                        */
573 	{ int i;				   /*                        */
574 	  Symbol *hp;				   /*                        */
575 	  for (i = RecordFree(rec), hp = RecordHeap(rec);/*		     */
576 	       i > 0;				   /*			     */
577 	       i -= 2, hp += 2)			   /*			     */
578 	  {					   /*			     */
579 	    if (*hp == field)	   		   /*			     */
580 	    { field = *hp = RuleValue(rule);	   /*                        */
581 	      break;				   /*                        */
582 	    }					   /*                        */
583 	  }					   /*                        */
584 	}					   /*                        */
585 	rule = NextRule(rule);			   /*                        */
586 	limit = rsc_rewrite_limit;		   /*			     */
587       }						   /*                        */
588       else if ((RuleField(rule) == NULL	   	   /*			     */
589 		|| RuleField(rule) == field ) &&   /*			     */
590 	       (RuleFlag(rule) & RULE_ADD) == 0 && /*                        */
591 	       re_search(&RulePattern(rule),	   /*			     */
592 			 (char*)val,		   /*                        */
593 			 len,			   /*                        */
594 			 0,			   /*                        */
595 			 len - 1,		   /*                        */
596 			 &reg) >= 0 )		   /*			     */
597       {					   	   /*			     */
598 	if (--limit < 0)			   /*                        */
599 	{ ErrPrintF2("\n*** BibTool WARNING: Rewrite limit exceeded for field %s\n\t\t     in record %s\n",
600 		     (char*)SymbolValue(field),	   /*                        */
601 		     (*RecordHeap(rec)		   /*                        */
602 		      ? (char*)SymbolValue(*RecordHeap(rec))/*               */
603 		      : "") );			   /*                        */
604 	  once_more = false;			   /*                        */
605 	  break;				   /*                        */
606 	}					   /*                        */
607 	if (RuleFrame(rule) == NO_SYMBOL)	   /*			     */
608 	{ return StringNULL; }		   	   /*			     */
609 						   /*			     */
610 	if (reg.start[0] > 0)		   	   /*			     */
611 	{ c = val[reg.start[0]];		   /* Push initial segment   */
612 	  val[reg.start[0]] = '\0';		   /*			     */
613 	  (void)sbputs((char*)val, s2);		   /*		             */
614 	  val[reg.start[0]] = c;		   /*			     */
615 	}					   /*			     */
616 						   /*			     */
617 	rewrite_1(SymbolValue(RuleFrame(rule)),	   /*                        */
618 		  s2,				   /*                        */
619 		  val,			   	   /*                        */
620 		  db,				   /*                        */
621 		  rec);				   /*		             */
622 	(void)sbputs((char*)(val+reg.end[0]), s2); /* Transfer the end.	     */
623 						   /*			     */
624 	val = (String)sbflush(s2);		   /* update the value	     */
625 	len = strlen((char*)val);		   /*  and its length	     */
626 	sp  = s1; s1 = s2; s2 = sp;		   /* rotate the two string  */
627 	sbrewind(s2);				   /*  buffers and reset     */
628 						   /*  the destination.      */
629 	once_more = true;			   /*                        */
630       }						   /*                        */
631       else					   /*                        */
632       { rule = NextRule(rule);			   /*                        */
633 	limit = rsc_rewrite_limit;		   /*			     */
634       }						   /*                        */
635     }						   /*                        */
636   }						   /*			     */
637 #endif
638   return val;					   /* return the result.     */
639 }						   /*------------------------*/
640 
641 /*-----------------------------------------------------------------------------
642 ** Function*:	check_regex()
643 ** Purpose:
644 **
645 ** Arguments:
646 **	field	the field
647 **	value	the value
648 **	rule	the rule
649 **	rec	the record
650 ** Returns:
651 **___________________________________________________			     */
check_regex(field,value,rule,db,rec)652 static String check_regex(field, value, rule, db, rec)/*		     */
653   Symbol	field;			   	   /*			     */
654   Symbol	value;			   	   /*			     */
655   register Rule	rule;			   	   /*			     */
656   DB		db;			   	   /*                        */
657   Record	rec;			   	   /*			     */
658 {						   /*			     */
659 #ifdef REGEX
660   int		      len;			   /*			     */
661   static StringBuffer *s2 = 0L;			   /*			     */
662 						   /*			     */
663   if (rule == RuleNULL)				   /*                        */
664   { match = RuleNULL;				   /*                        */
665     return SymbolValue(value);			   /*			     */
666   }						   /*			     */
667 						   /*			     */
668   if ( s2 == NULL ) { s2 = sbopen(); }		   /*			     */
669   else		    { sbrewind(s2);  }		   /*			     */
670 						   /*			     */
671   for ( len  =	symlen(value);		   	   /* Loop through all rules */
672 	rule != RuleNULL;			   /*			     */
673 	rule =	NextRule(rule) )		   /*			     */
674   { if ( (   RuleField(rule) == NO_SYMBOL	   /*			     */
675 	  || RuleField(rule) == field )		   /*			     */
676 	&&					   /*                        */
677 	 (   (RuleFlag(rule)&RULE_REGEXP) == 0	   /*                        */
678 	  || re_search(&RulePattern(rule),	   /*			     */
679 		       (char*)SymbolValue(value),  /*                        */
680 		       len,0,len-1,&reg) >=0 	   /*			     */
681 	 )					   /*                        */
682        )					   /*			     */
683     { if ( RuleFrame(rule) == NO_SYMBOL )	   /*			     */
684       { match = RuleNULL;			   /*                        */
685 	return StringNULL;			   /*                        */
686       }						   /*			     */
687       rewrite_1(SymbolValue(RuleFrame(rule)),	   /*                        */
688 		s2,				   /*                        */
689 		SymbolValue(value),		   /*                        */
690 		db,				   /*                        */
691 		rec);  				   /*		             */
692       match = rule;				   /*                        */
693       return (String)sbflush(s2);		   /* TODO: update the value?*/
694     }						   /*                        */
695   }						   /*			     */
696 #endif
697   match = RuleNULL;				   /*                        */
698   return StringNULL;				   /* return the result.     */
699 }						   /*------------------------*/
700 
701 /*---------------------------------------------------------------------------*/
702 /*---			    Rewrite Rule Section			  ---*/
703 /*---------------------------------------------------------------------------*/
704 
705  static Rule r_rule = RuleNULL;
706  static Rule r_rule_end	= RuleNULL;
707 
708 /*-----------------------------------------------------------------------------
709 ** Function:	rename_field()
710 ** Type:	void
711 ** Purpose:
712 **
713 ** Arguments:
714 **	spec	the argument
715 ** Returns:	nothing
716 **___________________________________________________			     */
rename_field(spec)717 void rename_field(spec)				   /*			     */
718   Symbol spec;					   /*                        */
719 { String s = SymbolValue(spec);			   /*                        */
720   Symbol from;					   /*                        */
721   Symbol to;					   /*                        */
722   Symbol field 	 = NO_SYMBOL;			   /*                        */
723   Symbol pattern = NO_SYMBOL;		   	   /*                        */
724  						   /*                        */
725   sp_open(s);				   	   /*			     */
726   sp_skip(&s);			   	   	   /*			     */
727   if ((from = SParseSymbol(&s)) == NO_SYMBOL)      /*		             */
728     return;					   /*			     */
729   sp_skip(&s);			   	   	   /*			     */
730   if ((to = SParseSymbol(&s)) == NO_SYMBOL)        /*		             */
731     return;					   /*			     */
732  						   /*                        */
733   if (sp_expect(&s, s_if, false))	   	   /*                        */
734   { if ((field = SParseOptionalSymbol(&s)) != NO_SYMBOL)/*	             */
735     { sp_skip(&s);			   	   /*			     */
736       if ((pattern = SParseValue(&s)) == NO_SYMBOL)/*		             */
737       { if (to)    UnlinkSymbol(to);		   /*                        */
738 	if (from)  UnlinkSymbol(from);		   /*                        */
739 	if (field) UnlinkSymbol(field);		   /*                        */
740 	return;					   /*			     */
741       }						   /*                        */
742     }						   /*                        */
743   }						   /*                        */
744   if (sp_eos(&s) != StringNULL )		   /*                        */
745   { if (to)      UnlinkSymbol(to);		   /*                        */
746     if (from)    UnlinkSymbol(from);		   /*                        */
747     if (field)   UnlinkSymbol(field);		   /*                        */
748     if (pattern) UnlinkSymbol(pattern);		   /*                        */
749     return;					   /*			     */
750   }						   /*                        */
751  						   /*                        */
752   Rule rule = new_rule(from,			   /*                        */
753 		       to,		   	   /*                        */
754 		       pattern,			   /*                        */
755 		       field, 	   		   /*                        */
756 		       RULE_RENAME | RULE_REGEXP,  /*                        */
757 		       rsc_case_rewrite);	   /*                        */
758   if (r_rule == RuleNULL)			   /*                        */
759   { r_rule = r_rule_end = rule;			   /*                        */
760   }		   				   /*			     */
761   else					   	   /*                        */
762   { NextRule(r_rule_end) = rule;		   /*                        */
763     r_rule_end = rule;				   /*                        */
764   }						   /*			     */
765 }						   /*------------------------*/
766 
767 /*-----------------------------------------------------------------------------
768 ** Function:	add_rewrite_rule()
769 ** Purpose:	Save a rewrite rule for later use.
770 **		The main task is performed by |add_rule()|.
771 ** Arguments:
772 **	s	Rule to save
773 ** Returns:	nothing
774 **___________________________________________________			     */
add_rewrite_rule(s)775 void add_rewrite_rule(s)			   /*			     */
776   String s;				   	   /*			     */
777 {						   /*			     */
778   DebugPrintF1("add rewrite rule\n");		   /*			     */
779  						   /*                        */
780   add_rule(s,					   /*                        */
781 	   &r_rule,				   /*                        */
782 	   &r_rule_end,				   /*                        */
783 	   RULE_REGEXP,				   /*                        */
784 	   rsc_case_rewrite);			   /*			     */
785 }						   /*------------------------*/
786 
787 /*---------------------------------------------------------------------------*/
788 /*---			       Keep Rule Section			  ---*/
789 /*---------------------------------------------------------------------------*/
790 
791 #define K_RULES_SIZE 37
792 
793 static Rule *k_rules = (Rule*)NULL;
794 
795 /*-----------------------------------------------------------------------------
796 ** Function:	keep_field()
797 ** Type:	void
798 ** Purpose:
799 **
800 ** Arguments:
801 **	spec	the specification
802 ** Returns:	nothing
803 **___________________________________________________			     */
keep_field(spec)804 void keep_field(spec)				   /*			     */
805   Symbol spec;					   /*                        */
806 { String s = SymbolValue(spec);			   /*                        */
807   Symbol* names;				   /*                        */
808   Symbol* np;				   	   /*                        */
809   Symbol field 	 = NO_SYMBOL;			   /*                        */
810   Symbol pattern = NO_SYMBOL;		   	   /*                        */
811   intptr_t i;					   /*                        */
812  						   /*                        */
813   sp_open(s);				   	   /*			     */
814   if ((names = sp_symbols(&s)) == NULL)    	   /*		             */
815     return;					   /*			     */
816  						   /*                        */
817   if (sp_expect(&s, s_if, false))	   	   /*                        */
818   { if ((field = SParseOptionalSymbol(&s)) != NO_SYMBOL)/*	             */
819     { sp_skip(&s);			   	   /*			     */
820       if ((pattern = SParseValue(&s)) == NO_SYMBOL)/*		             */
821       { free_sym_array(names);			   /*                        */
822 	UnlinkSymbol(field);			   /*                        */
823 	return;					   /*			     */
824       }						   /*                        */
825     }						   /*                        */
826     else					   /*                        */
827     { free_sym_array(names);			   /*                        */
828       return;					   /*                        */
829     }						   /*                        */
830   }						   /*                        */
831   if (sp_eos(&s) != StringNULL )	   	   /*			     */
832   { free_sym_array(names);			   /*                        */
833     if (field)   UnlinkSymbol(field);		   /*                        */
834     if (pattern) UnlinkSymbol(pattern);		   /*                        */
835     return;					   /*			     */
836   }						   /*                        */
837  						   /*                        */
838   if (k_rules == NULL)				   /*                        */
839   { if ((k_rules=calloc(K_RULES_SIZE, sizeof(Rule))) == NULL)/*              */
840     { OUT_OF_MEMORY("keep rules"); }		   /*                        */
841   }						   /*                        */
842  						   /*                        */
843   for (np = names; *np; np++)			   /*                        */
844   { LinkSymbol(*np);				   /*                        */
845     Rule rule = new_rule(*np,			   /*                        */
846 			 NULL,		   	   /*                        */
847 			 pattern,		   /*                        */
848 			 field,			   /*                        */
849 			 RULE_KEEP | RULE_REGEXP,  /*                        */
850 			 true);	   	   	   /*                        */
851     i = (intptr_t)(*np) % K_RULES_SIZE;		   /*                        */
852     if (i < 0) i = -i;				   /*                        */
853  						   /*                        */
854     NextRule(rule) = k_rules[i];		   /*                        */
855     k_rules[i] = rule;				   /*                        */
856   }						   /*                        */
857   						   /*                        */
858   free_sym_array(names);			   /*                        */
859 }						   /*------------------------*/
860 
861 /*---------------------------------------------------------------------------*/
862 /*---			      Check Rule Section			  ---*/
863 /*---------------------------------------------------------------------------*/
864 
865  static Rule c_rule = RuleNULL;
866  static Rule c_rule_end = RuleNULL;
867 
868 /*-----------------------------------------------------------------------------
869 ** Function:	add_check_rule()
870 ** Purpose:	Save a check rule for later use.
871 ** Arguments:
872 **	s	Rule to save.
873 **	flags	the additional rule flags
874 ** Returns:	nothing
875 **___________________________________________________			     */
add_check_rule(s,flags)876 void add_check_rule(s,flags)			   /*			     */
877   String s;				   	   /*			     */
878   int flags;				   	   /*			     */
879 {						   /*			     */
880   DebugPrintF1("add check rule\n");		   /*			     */
881   add_rule(s,					   /*                        */
882 	   &c_rule,				   /*                        */
883 	   &c_rule_end,				   /*                        */
884 	   RULE_REGEXP|flags,			   /*                        */
885 	   rsc_case_check);  			   /*			     */
886 }						   /*------------------------*/
887 
888 /*-----------------------------------------------------------------------------
889 ** Function*:	dont_keep()
890 ** Type:	static bool
891 ** Purpose:
892 **
893 ** Arguments:
894 **	sym	the symbol
895 **	rec	the record
896 **	db	the database
897 ** Returns:
898 **___________________________________________________			     */
dont_keep(sym,rec,db)899 static bool dont_keep(sym,rec,db)		   /*                        */
900   Symbol   sym;					   /*                        */
901   Record   rec;					   /*                        */
902   DB       db;					   /*                        */
903 { Rule     r;					   /*                        */
904   int      idx = (int)((long)sym % K_RULES_SIZE);  /*                        */
905   if (idx < 0) idx = -idx;			   /*                        */
906  						   /*                        */
907   for (r = k_rules[idx]; r; r = NextRule(r))	   /*                        */
908   {						   /*                        */
909     if (RuleField(r) == sym &&			   /*                        */
910 	selector_hits(r, db, rec))		   /*                        */
911     { return false; }				   /*                        */
912   }						   /*                        */
913   return true;					   /*                        */
914 }						   /*------------------------*/
915 
916 /*-----------------------------------------------------------------------------
917 ** Function:	rewrite_record()
918 ** Purpose:	Apply deletions, checks, additions, and rewriting steps
919 **		in this order.
920 ** Arguments:
921 **	rec	Actual record to apply things to.
922 **	db	The database record is belonging to.
923 ** Returns:	nothing
924 **___________________________________________________			     */
rewrite_record(db,rec)925 void rewrite_record(db, rec)			   /*			     */
926   DB		  db;				   /*                        */
927   register Record rec;				   /*			     */
928 { register int	  i;				   /*			     */
929   register Symbol *hp;				   /* heap pointer	     */
930   register Macro  mac;				   /*			     */
931   String          cp;				   /*			     */
932   static StringBuffer *sb = NULL;		   /*                        */
933  						   /*                        */
934   if (sb == NULL) sb = sbopen();		   /*                        */
935 						   /*			     */
936   if (c_rule)			   		   /*			     */
937   {						   /*                        */
938     for (i = RecordFree(rec), hp = RecordHeap(rec);/*			     */
939 	 i > 0;				   	   /*			     */
940 	 i -= 2, hp +=2)			   /*			     */
941     {						   /*			     */
942       if (   *hp				   /*			     */
943 	  && *(hp+1)				   /*			     */
944 	  && StringNULL !=			   /*			     */
945 	     (cp=check_regex(*hp,	   	   /*                        */
946 			     *(hp+1), 		   /*                        */
947 			     c_rule,		   /*                        */
948 			     db,		   /*                        */
949 			     rec))		   /*		             */
950 	  )					   /*			     */
951       { ErrPrint("*** BibTool");		   /*                        */
952 	if (match)				   /*			     */
953 	{ if (RuleFlag(match)&RULE_ERROR)	   /*			     */
954 	  { ErrPrint(" ERROR"); }		   /*			     */
955 	  if (RuleFlag(match)&RULE_WARNING)	   /*			     */
956 	  { ErrPrint(" WARNING"); }		   /*			     */
957 	}					   /*			     */
958 	err_location(RecordLineno(rec),		   /*                        */
959 		     RecordSource(rec), NULL);	   /*                        */
960 	ErrPrintF(": %s\n", cp);		   /*                        */
961       }						   /*			     */
962     }						   /*			     */
963   }						   /*			     */
964 						   /*			     */
965   if (r_rule)			   		   /*			     */
966   {   						   /*			     */
967     for (i = RecordFree(rec), hp = RecordHeap(rec);/*			     */
968 	 i > 0;					   /*			     */
969 	 i -= 2, hp += 2)			   /*			     */
970     {						   /*			     */
971       if (*hp && *(hp+1))			   /*			     */
972       {						   /*			     */
973 	cp = repl_regex(*hp,*(hp+1),r_rule,db,rec);/*			     */
974 	if (cp == StringNULL)		   	   /*			     */
975 	{ if (*hp) UnlinkSymbol(*hp);		   /*                        */
976 	  if (*(hp+1)) UnlinkSymbol(*(hp+1));	   /*                        */
977 	  *hp = *(hp+1) = NO_SYMBOL;		   /*                        */
978 	}					   /*                        */
979 	else if (strcmp((char*)cp,		   /*                        */
980 			(char*)SymbolValue(*(hp+1))))/*		             */
981 	{ if (*(hp+1)) UnlinkSymbol(*(hp+1));	   /*                        */
982 	  *(hp+1) = symbol(cp);			   /*                        */
983 	}		   			   /*			     */
984       }						   /*			     */
985     }						   /*			     */
986   }						   /*			     */
987 						   /*                        */
988   if (k_rules)			   		   /*			     */
989   {						   /*                        */
990     for (i = RecordFree(rec), hp = RecordHeap(rec);/*			     */
991 	 i > 0;					   /*			     */
992 	 i -= 2, hp += 2)			   /*			     */
993     {						   /*			     */
994       if (*hp &&				   /*                        */
995 	  *(hp+1) &&				   /*                        */
996 	  dont_keep(sym_star, rec, db) &&	   /*                        */
997 	  dont_keep(*hp, rec, db))		   /*                        */
998       { if (*hp) UnlinkSymbol(*hp);		   /*                        */
999 	if (*(hp+1)) UnlinkSymbol(*(hp+1));	   /*                        */
1000 	*hp = *(hp+1) = NO_SYMBOL;		   /*                        */
1001       }						   /*			     */
1002     }						   /*			     */
1003   }						   /*			     */
1004  						   /*                        */
1005   for (mac = addlist;				   /* Add all items in the   */
1006        mac;					   /*  add list		     */
1007        mac = NextMacro(mac) )			   /*			     */
1008   { cp = SymbolValue(MacroValue(mac));		   /*                        */
1009     sbrewind(sb);				   /*                        */
1010     sbputc('{',sb);				   /*                        */
1011     while ( *cp )				   /*                        */
1012     {						   /*                        */
1013       if ( *cp == '%' )			   	   /*                        */
1014       { if ( *(cp+1) == '%' )			   /*                        */
1015         { sbputc(*cp,sb); cp+=2; }		   /*                        */
1016         else { cp = fmt_expand(sb,cp,db,rec); }    /*                        */
1017       }					   	   /*                        */
1018       else { sbputc(*cp,sb); cp++; }		   /*                        */
1019     }						   /*                        */
1020     sbputc('}', sb);				   /*                        */
1021     push_to_record(rec,			   	   /*                        */
1022 		   MacroName(mac),		   /*                        */
1023 		   symbol((String)sbflush(sb)),	   /*                        */
1024 		   false);			   /*                        */
1025   }						   /*			     */
1026 }						   /*------------------------*/
1027 
1028 /*---------------------------------------------------------------------------*/
1029 /*---			    Extract Rule Section			  ---*/
1030 /*---------------------------------------------------------------------------*/
1031 
1032  static Rule x_rule = RuleNULL;
1033  static Rule x_rule_end = RuleNULL;
1034 
1035 /*-----------------------------------------------------------------------------
1036 ** Function:	add_extract()
1037 ** Purpose:	Save an extraction rule for later use. The argument is
1038 **		interpreted as regular expression to be matched
1039 **		against the field value.
1040 **
1041 **		The value of |rsc_case_select| at the invocation of
1042 **		this function determines whether the matching is
1043 **		performed case sensitive or not.
1044 ** Arguments:
1045 **	s	Rule to save.
1046 **	regexp	Boolean value indicating whether regular expressions
1047 **		should be used. If not set then plain string matching
1048 **		is performed.
1049 **	notp	Boolean value indicating whether the result should be
1050 **		negated.
1051 ** Returns:	nothing
1052 **___________________________________________________			     */
add_extract(s,regexp,notp)1053 void add_extract(s,regexp,notp)			   /*			     */
1054   Symbol s;				   	   /*			     */
1055   int regexp;					   /*                        */
1056   int notp;					   /*                        */
1057 {						   /*                        */
1058   add_rule(SymbolValue(s),			   /* The main task is       */
1059 	   &x_rule,				   /*  performed by          */
1060 	   &x_rule_end,				   /*  |add_rule()|.         */
1061 	   (regexp?RULE_REGEXP:RULE_NONE) |	   /*                        */
1062 	   (notp  ?RULE_NOT   :RULE_NONE) ,	   /*                        */
1063 	   !rsc_case_select);			   /*			     */
1064   rsc_select = true;				   /*                        */
1065 }						   /*------------------------*/
1066 
1067 /*-----------------------------------------------------------------------------
1068 ** Function:	save_regex()
1069 ** Purpose:	Save an extraction rule for later use.
1070 **		Only the regular expression of the rule is given as argument.
1071 **		The fields are taken from the resource select.fields.
1072 ** Arguments:
1073 **	s	Regular expression to search for.
1074 ** Returns:	nothing
1075 **___________________________________________________			     */
save_regex(s)1076 void save_regex(s)				   /*                        */
1077   String s;				   	   /*                        */
1078 { String t = malloc( (size_t)strlen((char*)s)	   /*                        */
1079 		   + (size_t)strlen((char*)rsc_sel_fields)/*                 */
1080 		   + 4 );			   /*			     */
1081   if ( t == NULL ) { OUT_OF_MEMORY("string"); }	   /*			     */
1082  						   /*                        */
1083   (void)strcpy((char*)t, (char*)rsc_sel_fields);   /*			     */
1084   (void)strcat((char*)t, " \"");		   /*			     */
1085   (void)strcat((char*)t, (char*)s);		   /*			     */
1086   (void)strcat((char*)t, "\"");			   /*			     */
1087  						   /*                        */
1088   add_rule(t,				   	   /*                        */
1089 	   &x_rule,				   /*                        */
1090 	   &x_rule_end,				   /*                        */
1091 	   RULE_REGEXP,				   /*                        */
1092 	   !rsc_case_select);			   /*			     */
1093  						   /*                        */
1094   free((char*)t);				   /*                        */
1095   rsc_select = true;				   /*                        */
1096 }						   /*------------------------*/
1097 
1098 
1099  static char s_class[256];			   /*                        */
1100  static String s_ignored = (String)NULL;	   /*                        */
1101  static int  s_cased    = -33;			   /*                        */
1102 
1103 /*-----------------------------------------------------------------------------
1104 ** Function*:	init_s_search()
1105 ** Purpose:
1106 **
1107 **
1108 ** Arguments:
1109 **	ignored	the letters to be ignored
1110 ** Returns:	Nothing
1111 **___________________________________________________			     */
init_s_search(ignored)1112 static void init_s_search(ignored)		   /*                        */
1113   String ignored;				   /*                        */
1114 { int i;					   /*                        */
1115   for (i = 0; i < 256; i++) s_class[i] = i;	   /*                        */
1116  						   /*                        */
1117   if (!rsc_case_select) 			   /*                        */
1118   {						   /*                        */
1119     s_class['a'] = 'A';				   /*                        */
1120     s_class['b'] = 'B';				   /*                        */
1121     s_class['c'] = 'C';				   /*                        */
1122     s_class['d'] = 'D';				   /*                        */
1123     s_class['e'] = 'E';				   /*                        */
1124     s_class['f'] = 'F';				   /*                        */
1125     s_class['g'] = 'G';				   /*                        */
1126     s_class['h'] = 'H';				   /*                        */
1127     s_class['i'] = 'I';				   /*                        */
1128     s_class['j'] = 'J';				   /*                        */
1129     s_class['k'] = 'K';				   /*                        */
1130     s_class['l'] = 'L';				   /*                        */
1131     s_class['m'] = 'M';				   /*                        */
1132     s_class['n'] = 'N';				   /*                        */
1133     s_class['o'] = 'O';				   /*                        */
1134     s_class['p'] = 'P';				   /*                        */
1135     s_class['q'] = 'Q';				   /*                        */
1136     s_class['r'] = 'R';				   /*                        */
1137     s_class['s'] = 'S';				   /*                        */
1138     s_class['t'] = 'T';				   /*                        */
1139     s_class['u'] = 'U';				   /*                        */
1140     s_class['v'] = 'V';				   /*                        */
1141     s_class['w'] = 'W';				   /*                        */
1142     s_class['x'] = 'X';				   /*                        */
1143     s_class['y'] = 'Y';				   /*                        */
1144     s_class['z'] = 'Z';				   /*                        */
1145   }						   /*                        */
1146   while ( *ignored )				   /*                        */
1147   { s_class[(unsigned int)(*(ignored++))] = '\0'; }/*                        */
1148  						   /*                        */
1149   s_cased   = rsc_case_select;			   /*                        */
1150   s_ignored = rsc_sel_ignored;	   	   	   /*                        */
1151 }						   /*------------------------*/
1152 
1153 /*-----------------------------------------------------------------------------
1154 ** Function*:	s_match()
1155 ** Purpose:
1156 **
1157 **
1158 ** Arguments:
1159 **	pattern	the pattern
1160 **	s	the string
1161 ** Returns:
1162 **___________________________________________________			     */
s_match(p,s)1163 static bool s_match(p,s)			   /*                        */
1164   String  p;					   /*                        */
1165   String  s;					   /*                        */
1166 {						   /*                        */
1167   while (*p && s_class[(unsigned int)*p] == '\0') p++;/*                     */
1168  						   /*                        */
1169   while (*p)					   /*                        */
1170   {						   /*                        */
1171     while (*s && s_class[(unsigned int)*s] == '\0') s++;/*                   */
1172  						   /*                        */
1173     if (s_class[(unsigned int)*s] != s_class[(unsigned int)*p]) return false;
1174     while (*p && s_class[(unsigned int)*p] == '\0') p++;/*                   */
1175     if (*s) s++;				   /*                        */
1176     if (*p) p++;				   /*                        */
1177   }						   /*                        */
1178   return true;					   /*                        */
1179 }						   /*------------------------*/
1180 
1181 /*-----------------------------------------------------------------------------
1182 ** Function*:	s_search()
1183 ** Purpose:	Match a pattern against all positions in a string.
1184 ** Arguments:
1185 **	pattern	the pattern
1186 **	s	the string
1187 ** Returns:	If a match is found then |true| is returned. Otherwise
1188 **		|false|.
1189 **___________________________________________________			     */
s_search(pattern,s)1190 static bool s_search(pattern,s)			   /*                        */
1191   String  pattern;				   /*                        */
1192   String  s;					   /*                        */
1193 {						   /*                        */
1194   if ( s_cased != rsc_case_select ||		   /*                        */
1195        strcmp((char*)s_ignored,			   /*                        */
1196 	      (char*)rsc_sel_ignored) != 0 )	   /*                        */
1197   { init_s_search(rsc_sel_ignored); }		   /*                        */
1198  						   /*                        */
1199   for ( ; *s; s++ )				   /*                        */
1200   { if (s_match(pattern,s)) return true;	   /*                        */
1201   }						   /*                        */
1202   return false;					   /*                        */
1203 }						   /*------------------------*/
1204 
1205 #define ReturnIf(COND)					\
1206   if ( COND )						\
1207   { if ( !(RuleFlag(rule) & RULE_NOT) ) return true; }	\
1208   else							\
1209   { if (  (RuleFlag(rule) & RULE_NOT) ) return true; }
1210 
1211 /*-----------------------------------------------------------------------------
1212 ** Function:	is_selected()
1213 ** Purpose:	Boolean function to decide whether a record should be
1214 **		considered. These selections are described by a set of
1215 **		regular expressions which are applied. If none are
1216 **		given then the match simply succeeds.
1217 ** Arguments:
1218 **	db	Database containing the record.
1219 **	rec	Record to look at.
1220 ** Returns:	|true| iff the record is seleced by a regexp or none is
1221 **		given.
1222 **___________________________________________________			     */
is_selected(db,rec)1223 bool is_selected(db,rec)			   /*			     */
1224   DB     db;					   /*                        */
1225   Record rec;			   		   /*			     */
1226 {						   /*			     */
1227   int	 len, i;				   /*			     */
1228   Symbol value;				   	   /*                        */
1229   Rule   rule;					   /*                        */
1230  						   /*                        */
1231   if ( (rule=x_rule) == RuleNULL ||		   /* If no rule is given or */
1232        !rsc_select	   			   /*  no selection is       */
1233      )				   		   /*  requested then        */
1234     return true;				   /*  select all records.   */
1235  						   /*                        */
1236   for ( ;			   		   /* Loop through all rules */
1237 	rule != RuleNULL;			   /*			     */
1238 	rule =	NextRule(rule) )		   /*			     */
1239   {						   /*			     */
1240     if ( RuleField(rule) == NULL )		   /* If no field is given   */
1241     {						   /*  then try all normal   */
1242       if ( RuleFlag(rule) & RULE_REGEXP )	   /*                        */
1243       {						   /*                        */
1244 #ifdef REGEX
1245 	if ( RecordHeap(rec)[0] )		   /*                        */
1246 	{ len = symlen(RecordHeap(rec)[0]);	   /*                        */
1247 	  ReturnIf(re_search(&RulePattern(rule),   /*			     */
1248 			     (char*)SymbolValue(RecordHeap(rec)[0]),/*       */
1249 			     len,		   /*                        */
1250 			     0,		   	   /*                        */
1251 			     len - 1,		   /*                        */
1252 			     &reg) >= 0 );	   /*		             */
1253 	}					   /*                        */
1254 	for (i = 2; i < RecordFree(rec); i += 2 )  /*                        */
1255 	{ if ( RecordHeap(rec)[i] )		   /*                        */
1256 	  { len = symlen(RecordHeap(rec)[i+1]);	   /*                        */
1257 	    ReturnIf(re_search(&RulePattern(rule), /*			     */
1258 			       (char*)SymbolValue(RecordHeap(rec)[i+1]),/*   */
1259 			       len,		   /*                        */
1260 			       0,		   /*                        */
1261 			       len - 1,		   /*                        */
1262 			       &reg) >= 0 );	   /*		             */
1263 	  }					   /*                        */
1264 	}					   /*                        */
1265 #endif
1266       }						   /*                        */
1267       else					   /*                        */
1268       {						   /*                        */
1269 	if ( SymbolValue(RecordHeap(rec)[0]) )	   /*                        */
1270 	{ ReturnIf(s_search(SymbolValue(RuleGoal(rule)),/*		     */
1271 			    SymbolValue(RecordHeap(rec)[0])) );/*	     */
1272 	}					   /*                        */
1273 	for (i = 2; i < RecordFree(rec); i += 2 )  /*                        */
1274 	{ if ( RecordHeap(rec)[i] )		   /*                        */
1275 	  { ReturnIf(s_search(SymbolValue(RuleGoal(rule)),/*		     */
1276 			      SymbolValue(RecordHeap(rec)[i+1])) );/*	     */
1277 	  }					   /*                        */
1278 	}					   /*                        */
1279       }						   /*                        */
1280     }						   /*                        */
1281     else if ( (value=get_field(db,		   /*                        */
1282 			       rec,		   /*                        */
1283 			       RuleField(rule)))   /*                        */
1284 	     != NO_SYMBOL )			   /*                        */
1285     {						   /*                        */
1286       if ( RuleFlag(rule) & RULE_REGEXP )	   /*                        */
1287       {						   /*                        */
1288 #ifdef REGEX
1289 	len = symlen(value);   			   /*                        */
1290         ReturnIf(re_search(&RulePattern(rule),	   /*			     */
1291 			   (char*)SymbolValue(value),/*                      */
1292 			   len,			   /*                        */
1293 			   0,			   /*                        */
1294 			   len - 1,		   /*                        */
1295 			   &reg) >=0 ) 		   /*			     */
1296 #endif
1297       }						   /*                        */
1298       else ReturnIf(s_search(SymbolValue(RuleGoal(rule)),/*                  */
1299 			     SymbolValue(value)))  /*                        */
1300     }						   /*			     */
1301     else if ( RuleFlag(rule) & RULE_NOT )	   /*                        */
1302     { return true;				   /*                        */
1303     }						   /*                        */
1304   }						   /*                        */
1305   return false;				   	   /* return the result.     */
1306 }						   /*------------------------*/
1307 
1308 #ifdef REGEX
1309 #endif
1310 
1311 /*-----------------------------------------------------------------------------
1312 ** Function:	set_regex_syntax()
1313 ** Type:	int
1314 ** Purpose:	experimental
1315 **
1316 ** Arguments:
1317 **	name
1318 ** Returns:	nothing
1319 **___________________________________________________			     */
set_regex_syntax(name)1320 int set_regex_syntax(name)			   /*                        */
1321   char* name;					   /*                        */
1322 {						   /*                        */
1323 #ifdef REGEX
1324   if ( strcmp(name,"emacs")  == 0 )		   /*                        */
1325   { re_set_syntax(RE_SYNTAX_EMACS); }		   /*                        */
1326   else if ( strcmp(name,"awk") == 0 )		   /*                        */
1327   { re_set_syntax(RE_SYNTAX_AWK); }		   /*                        */
1328   else if ( strcmp(name,"grep") == 0 )		   /*                        */
1329   { re_set_syntax(RE_SYNTAX_GREP); }		   /*                        */
1330   else if ( strcmp(name,"egrep") == 0 )		   /*                        */
1331   { re_set_syntax(RE_SYNTAX_EGREP); }		   /*                        */
1332   else if ( strcmp(name,"posix_awk") == 0 )	   /*                        */
1333   { re_set_syntax(RE_SYNTAX_POSIX_AWK); }	   /*                        */
1334   else if ( strcmp(name,"posix_egrep") == 0 )	   /*                        */
1335   { re_set_syntax(RE_SYNTAX_POSIX_EGREP); }	   /*                        */
1336   else if ( strcmp(name,"ed") == 0 )		   /*                        */
1337   { re_set_syntax(RE_SYNTAX_ED); }		   /*                        */
1338   else if ( strcmp(name,"sed") == 0 )		   /*                        */
1339   { re_set_syntax(RE_SYNTAX_SED); }		   /*                        */
1340   else						   /*                        */
1341   { WARNING3("Unknown regexp syntax: ",name,"\n"); /*                        */
1342     return 1;					   /*                        */
1343   }						   /*                        */
1344 #endif
1345   return 0;					   /*                        */
1346 }						   /*------------------------*/
1347 
1348 /*-----------------------------------------------------------------------------
1349 ** Function:	get_regex_syntax()
1350 ** Type:	char*
1351 ** Purpose:	Getter for the regex syntax.
1352 **
1353 ** Arguments:	none
1354 ** Returns:
1355 **___________________________________________________			     */
get_regex_syntax()1356 char* get_regex_syntax()			   /*                        */
1357 {						   /*                        */
1358 #ifdef REGEX
1359   switch(re_syntax_options)			   /*                        */
1360   { case RE_SYNTAX_EMACS:       return "emacs";	   /*                        */
1361     case RE_SYNTAX_AWK:         return "awk";	   /*                        */
1362     case RE_SYNTAX_GREP:        return "grep";	   /*                        */
1363 #if (RE_SYNTAX_EGREP != RE_SYNTAX_POSIX_EGREP)
1364     case RE_SYNTAX_EGREP:       return "egrep";	   /*                        */
1365 #endif
1366     case RE_SYNTAX_POSIX_AWK:   return "posix_awk";/*                        */
1367     case RE_SYNTAX_POSIX_EGREP: return "posix_egrep";/*                      */
1368     case RE_SYNTAX_SED:         return "sed";	   /*                        */
1369   }						   /*                        */
1370 #endif
1371   return "";				   	   /*                        */
1372 }						   /*------------------------*/
1373