1 /*
2    Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or
5    modify it under the terms of the GNU General Public License
6    as published by the Free Software Foundation; version 2 of
7    the License.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
17    02110-1301  USA */
18 
19 /*
20   Replace strings in textfile
21 
22   This program replaces strings in files or from stdin to stdout.
23   It accepts a list of from-string/to-string pairs and replaces
24   each occurrence of a from-string with the corresponding to-string.
25   The first occurrence of a found string is matched. If there is more
26   than one possibility for the string to replace, longer matches
27   are preferred before shorter matches.
28 
29   Special characters in from string:
30   \^    Match start of line.
31   \$	Match end of line.
32   \b	Match space-character, start of line or end of line.
33         For end \b the next replace starts locking at the end space-character.
34         An \b alone or in a string matches only a space-character.
35   \r, \t, \v as in C.
36   The programs make a DFA-state-machine of the strings and the speed isn't
37   dependent on the count of replace-strings (only of the number of replaces).
38   A line is assumed ending with \n or \0.
39   There are no limit exept memory on length of strings.
40 
41   Written by Monty.
42   fill_buffer_retaining() is taken from gnu-grep and modified.
43 */
44 
45 #include <my_global.h>
46 #include <m_ctype.h>
47 #include <my_sys.h>
48 #include <m_string.h>
49 #include <errno.h>
50 
51 #define PC_MALLOC		256	/* Bytes for pointers */
52 #define PS_MALLOC		512	/* Bytes for data */
53 
54 typedef struct st_pointer_array {		/* when using array-strings */
55   TYPELIB typelib;				/* Pointer to strings */
56   uchar *str;					/* Strings is here */
57   uint8	*flag;					/* Flag about each var. */
58   uint  array_allocs,max_count,length,max_length;
59 } POINTER_ARRAY;
60 
61 #define SPACE_CHAR	256
62 #define START_OF_LINE	257
63 #define END_OF_LINE	258
64 #define LAST_CHAR_CODE	259
65 
66 typedef struct st_replace {
67   my_bool   found;
68   struct st_replace *next[256];
69 } REPLACE;
70 
71 typedef struct st_replace_found {
72   my_bool found;
73   char *replace_string;
74   uint to_offset;
75   int from_offset;
76 } REPLACE_STRING;
77 
78 #ifndef WORD_BIT
79 #define WORD_BIT (8*sizeof(uint))
80 #endif
81 
82 	/* functions defined in this file */
83 
84 static int static_get_options(int *argc,char * * *argv);
85 static int get_replace_strings(int *argc,char * * *argv,
86 				   POINTER_ARRAY *from_array,
87 				   POINTER_ARRAY *to_array);
88 static int insert_pointer_name(POINTER_ARRAY *pa, char * name);
89 static void free_pointer_array(POINTER_ARRAY *pa);
90 static int convert_pipe(REPLACE *,FILE *,FILE *);
91 static int convert_file(REPLACE *, char *);
92 static REPLACE *init_replace(char * *from, char * *to,uint count,
93                              char * word_end_chars);
94 static uint replace_strings(REPLACE *rep, char * *start,uint *max_length,
95                             char * from);
96 static int initialize_buffer(void);
97 static void reset_buffer(void);
98 static void free_buffer(void);
99 
100 static int silent=0,verbose=0,updated=0;
101 
102 	/* The main program */
103 
main(int argc,char * argv[])104 int main(int argc, char *argv[])
105 {
106   int i,error;
107   char word_end_chars[256],*pos;
108   POINTER_ARRAY from,to;
109   REPLACE *replace;
110   MY_INIT(argv[0]);
111 
112   if (static_get_options(&argc,&argv))
113     exit(1);
114   if (get_replace_strings(&argc,&argv,&from,&to))
115     exit(1);
116 
117   for (i=1,pos=word_end_chars ; i < 256 ; i++)
118     if (my_isspace(&my_charset_latin1,i))
119       *pos++= (char) i;
120   *pos=0;
121   if (!(replace=init_replace((char**) from.typelib.type_names,
122 			     (char**) to.typelib.type_names,
123 			     (uint) from.typelib.count,word_end_chars)))
124     exit(1);
125   free_pointer_array(&from);
126   free_pointer_array(&to);
127   if (initialize_buffer())
128     return 1;
129 
130   error=0;
131   if (argc == 0)
132     error=convert_pipe(replace,stdin,stdout);
133   else
134   {
135     while (argc--)
136     {
137       error=convert_file(replace,*(argv++));
138     }
139   }
140   free_buffer();
141   my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
142   exit(error ? 2 : 0);
143   return 0;					/* No compiler warning */
144 } /* main */
145 
146 
147 	/* reads options */
148 	/* Initiates DEBUG - but no debugging here ! */
149 
static_get_options(argc,argv)150 static int static_get_options(argc,argv)
151 register int *argc;
152 register char **argv[];
153 {
154   int help,version;
155   char *pos;
156 
157   silent=verbose=help=0;
158 
159   while (--*argc > 0 && *(pos = *(++*argv)) == '-' && pos[1] != '-') {
160     while (*++pos)
161     {
162       version=0;
163       switch((*pos)) {
164       case 's':
165 	silent=1;
166 	break;
167       case 'v':
168 	verbose=1;
169 	break;
170       case '#':
171 	DBUG_PUSH (++pos);
172 	pos= (char*) " ";			/* Skip rest of arguments */
173 	break;
174       case 'V':
175 	version=1;
176       case 'I':
177       case '?':
178 	help=1;					/* Help text written */
179 	printf("%s  Ver 1.4 for %s at %s\n",my_progname,SYSTEM_TYPE,
180 	       MACHINE_TYPE);
181 	if (version)
182 	  break;
183 	puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n");
184 	puts("This program replaces strings in files or from stdin to stdout.\n"
185 	     "It accepts a list of from-string/to-string pairs and replaces\n"
186 	     "each occurrence of a from-string with the corresponding to-string.\n"
187          "The first occurrence of a found string is matched. If there is\n"
188          "more than one possibility for the string to replace, longer\n"
189          "matches are preferred before shorter matches.\n\n"
190 	     "A from-string can contain these special characters:\n"
191 	     "  \\^      Match start of line.\n"
192 	     "  \\$      Match end of line.\n"
193 	     "  \\b      Match space-character, start of line or end of line.\n"
194 	     "          For a end \\b the next replace starts locking at the end\n"
195 	     "          space-character. A \\b alone in a string matches only a\n"
196 	     "          space-character.\n");
197 	  printf("Usage: %s [-?svIV] from to from to ... -- [files]\n", my_progname);
198 	puts("or");
199 	  printf("Usage: %s [-?svIV] from to from to ... < fromfile > tofile\n", my_progname);
200 	puts("");
201 	puts("Options: -? or -I \"Info\"  -s \"silent\"      -v \"verbose\"");
202 	break;
203       default:
204 	fprintf(stderr,"illegal option: -%c\n",*pos);
205 	break;
206       }
207     }
208   }
209   if (*argc == 0)
210   {
211     if (!help)
212       my_message(0,"No replace options given",MYF(ME_BELL));
213     exit(0);					/* Don't use as pipe */
214   }
215   return(0);
216 } /* static_get_options */
217 
218 
get_replace_strings(argc,argv,from_array,to_array)219 static int get_replace_strings(argc,argv,from_array,to_array)
220 register int *argc;
221 register char **argv[];
222 POINTER_ARRAY *from_array,*to_array;
223 {
224   char *pos;
225 
226   bzero((char*) from_array,sizeof(from_array[0]));
227   bzero((char*) to_array,sizeof(to_array[0]));
228   while (*argc > 0 && (*(pos = *(*argv)) != '-' || pos[1] != '-' || pos[2]))
229   {
230     insert_pointer_name(from_array,pos);
231     (*argc)--;
232     (*argv)++;
233     if (!*argc || !strcmp(**argv,"--"))
234     {
235       my_message(0,"No to-string for last from-string",MYF(ME_BELL));
236       return 1;
237     }
238     insert_pointer_name(to_array,**argv);
239     (*argc)--;
240     (*argv)++;
241   }
242   if (*argc)
243   {					/* Skip "--" argument */
244     (*argc)--;
245     (*argv)++;
246   }
247   return 0;
248 }
249 
insert_pointer_name(reg1 POINTER_ARRAY * pa,char * name)250 static int insert_pointer_name(reg1 POINTER_ARRAY *pa,char * name)
251 {
252   uint i,length,old_count;
253   uchar *new_pos;
254   const char **new_array;
255   DBUG_ENTER("insert_pointer_name");
256 
257   if (! pa->typelib.count)
258   {
259     if (!(pa->typelib.type_names=(const char **)
260 	  my_malloc(((PC_MALLOC-MALLOC_OVERHEAD)/
261 		     (sizeof(char *)+sizeof(*pa->flag))*
262 		     (sizeof(char *)+sizeof(*pa->flag))),MYF(MY_WME))))
263       DBUG_RETURN(-1);
264     if (!(pa->str= (uchar*) my_malloc((uint) (PS_MALLOC-MALLOC_OVERHEAD),
265 				     MYF(MY_WME))))
266     {
267       my_free(pa->typelib.type_names);
268       DBUG_RETURN (-1);
269     }
270     pa->max_count=(PC_MALLOC-MALLOC_OVERHEAD)/(sizeof(uchar*)+
271 					       sizeof(*pa->flag));
272     pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
273     pa->length=0;
274     pa->max_length=PS_MALLOC-MALLOC_OVERHEAD;
275     pa->array_allocs=1;
276   }
277   length=(uint) strlen(name)+1;
278   if (pa->length+length >= pa->max_length)
279   {
280     pa->max_length=(pa->length+length+MALLOC_OVERHEAD+PS_MALLOC-1)/PS_MALLOC;
281     pa->max_length=pa->max_length*PS_MALLOC-MALLOC_OVERHEAD;
282     if (!(new_pos= (uchar*) my_realloc((uchar*) pa->str,
283 				      (uint) pa->max_length,
284 				      MYF(MY_WME))))
285       DBUG_RETURN(1);
286     if (new_pos != pa->str)
287     {
288       my_ptrdiff_t diff=PTR_BYTE_DIFF(new_pos,pa->str);
289       for (i=0 ; i < pa->typelib.count ; i++)
290 	pa->typelib.type_names[i]= ADD_TO_PTR(pa->typelib.type_names[i],diff,
291 					      char*);
292       pa->str=new_pos;
293     }
294   }
295   if (pa->typelib.count >= pa->max_count-1)
296   {
297     int len;
298     pa->array_allocs++;
299     len=(PC_MALLOC*pa->array_allocs - MALLOC_OVERHEAD);
300     if (!(new_array=(const char **) my_realloc((uchar*) pa->typelib.type_names,
301 					       (uint) len/
302 					 (sizeof(uchar*)+sizeof(*pa->flag))*
303 					 (sizeof(uchar*)+sizeof(*pa->flag)),
304 					 MYF(MY_WME))))
305       DBUG_RETURN(1);
306     pa->typelib.type_names=new_array;
307     old_count=pa->max_count;
308     pa->max_count=len/(sizeof(uchar*) + sizeof(*pa->flag));
309     pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
310     memcpy((uchar*) pa->flag,(char *) (pa->typelib.type_names+old_count),
311 	   old_count*sizeof(*pa->flag));
312   }
313   pa->flag[pa->typelib.count]=0;			/* Reset flag */
314   pa->typelib.type_names[pa->typelib.count++]= (char*) (pa->str+pa->length);
315   pa->typelib.type_names[pa->typelib.count]= NullS;	/* Put end-mark */
316   (void) strmov((char*) pa->str + pa->length, name);
317   pa->length+=length;
318   DBUG_RETURN(0);
319 } /* insert_pointer_name */
320 
321 
322 	/* free pointer array */
323 
free_pointer_array(reg1 POINTER_ARRAY * pa)324 static void free_pointer_array(reg1 POINTER_ARRAY *pa)
325 {
326   if (pa->typelib.count)
327   {
328     pa->typelib.count=0;
329     my_free(pa->typelib.type_names);
330     pa->typelib.type_names=0;
331     my_free(pa->str);
332   }
333   return;
334 } /* free_pointer_array */
335 
336 
337 	/* Code for replace rutines */
338 
339 #define SET_MALLOC_HUNC 64
340 
341 typedef struct st_rep_set {
342   uint  *bits;				/* Pointer to used sets */
343   short	next[LAST_CHAR_CODE];		/* Pointer to next sets */
344   uint	found_len;			/* Best match to date */
345   int	found_offset;
346   uint  table_offset;
347   uint  size_of_bits;			/* For convinience */
348 } REP_SET;
349 
350 typedef struct st_rep_sets {
351   uint		count;			/* Number of sets */
352   uint		extra;			/* Extra sets in buffer */
353   uint		invisible;		/* Sets not chown */
354   uint		size_of_bits;
355   REP_SET	*set,*set_buffer;
356   uint		*bit_buffer;
357 } REP_SETS;
358 
359 typedef struct st_found_set {
360   uint table_offset;
361   int found_offset;
362 } FOUND_SET;
363 
364 typedef struct st_follow {
365   int chr;
366   uint table_offset;
367   uint len;
368 } FOLLOWS;
369 
370 
371 static int init_sets(REP_SETS *sets,uint states);
372 static REP_SET *make_new_set(REP_SETS *sets);
373 static void make_sets_invisible(REP_SETS *sets);
374 static void free_last_set(REP_SETS *sets);
375 static void free_sets(REP_SETS *sets);
376 static void internal_set_bit(REP_SET *set, uint bit);
377 static void internal_clear_bit(REP_SET *set, uint bit);
378 static void or_bits(REP_SET *to,REP_SET *from);
379 static void copy_bits(REP_SET *to,REP_SET *from);
380 static int cmp_bits(REP_SET *set1,REP_SET *set2);
381 static int get_next_bit(REP_SET *set,uint lastpos);
382 static short find_set(REP_SETS *sets,REP_SET *find);
383 static short find_found(FOUND_SET *found_set,uint table_offset,
384                         int found_offset);
385 static uint start_at_word(char * pos);
386 static uint end_of_word(char * pos);
387 static uint replace_len(char * pos);
388 
389 static uint found_sets=0;
390 
391 
392 	/* Init a replace structure for further calls */
393 
init_replace(char ** from,char ** to,uint count,char * word_end_chars)394 static REPLACE *init_replace(char * *from, char * *to,uint count,
395                              char * word_end_chars)
396 {
397   uint i,j,states,set_nr,len,result_len,max_length,found_end,bits_set,bit_nr;
398   int used_sets,chr;
399   short default_state;
400   char used_chars[LAST_CHAR_CODE],is_word_end[256];
401   char * pos, *to_pos, **to_array;
402   REP_SETS sets;
403   REP_SET *set,*start_states,*word_states,*new_set;
404   FOLLOWS *follow,*follow_ptr;
405   REPLACE *replace;
406   FOUND_SET *found_set;
407   REPLACE_STRING *rep_str;
408   DBUG_ENTER("init_replace");
409 
410   /* Count number of states */
411   for (i=result_len=max_length=0 , states=2 ; i < count ; i++)
412   {
413     len=replace_len(from[i]);
414     if (!len)
415     {
416       errno=EINVAL;
417       my_message(0,"No to-string for last from-string",MYF(ME_BELL));
418       DBUG_RETURN(0);
419     }
420     states+=len+1;
421     result_len+=(uint) strlen(to[i])+1;
422     if (len > max_length)
423       max_length=len;
424   }
425   bzero((char*) is_word_end,sizeof(is_word_end));
426   for (i=0 ; word_end_chars[i] ; i++)
427     is_word_end[(uchar) word_end_chars[i]]=1;
428 
429   if (init_sets(&sets,states))
430     DBUG_RETURN(0);
431   found_sets=0;
432   if (!(found_set= (FOUND_SET*) my_malloc(sizeof(FOUND_SET)*max_length*count,
433 					  MYF(MY_WME))))
434   {
435     free_sets(&sets);
436     DBUG_RETURN(0);
437   }
438   (void) make_new_set(&sets);			/* Set starting set */
439   make_sets_invisible(&sets);			/* Hide previus sets */
440   used_sets=-1;
441   word_states=make_new_set(&sets);		/* Start of new word */
442   start_states=make_new_set(&sets);		/* This is first state */
443   if (!(follow=(FOLLOWS*) my_malloc((states+2)*sizeof(FOLLOWS),MYF(MY_WME))))
444   {
445     free_sets(&sets);
446     my_free(found_set);
447     DBUG_RETURN(0);
448   }
449 
450 	/* Init follow_ptr[] */
451   for (i=0, states=1, follow_ptr=follow+1 ; i < count ; i++)
452   {
453     if (from[i][0] == '\\' && from[i][1] == '^')
454     {
455       internal_set_bit(start_states,states+1);
456       if (!from[i][2])
457       {
458 	start_states->table_offset=i;
459 	start_states->found_offset=1;
460       }
461     }
462     else if (from[i][0] == '\\' && from[i][1] == '$')
463     {
464       internal_set_bit(start_states,states);
465       internal_set_bit(word_states,states);
466       if (!from[i][2] && start_states->table_offset == (uint) ~0)
467       {
468 	start_states->table_offset=i;
469 	start_states->found_offset=0;
470       }
471     }
472     else
473     {
474       internal_set_bit(word_states,states);
475       if (from[i][0] == '\\' && (from[i][1] == 'b' && from[i][2]))
476 	internal_set_bit(start_states,states+1);
477       else
478 	internal_set_bit(start_states,states);
479     }
480     for (pos=from[i], len=0; *pos ; pos++)
481     {
482       if (*pos == '\\' && *(pos+1))
483       {
484 	pos++;
485 	switch (*pos) {
486 	case 'b':
487 	  follow_ptr->chr = SPACE_CHAR;
488 	  break;
489 	case '^':
490 	  follow_ptr->chr = START_OF_LINE;
491 	  break;
492 	case '$':
493 	  follow_ptr->chr = END_OF_LINE;
494 	  break;
495 	case 'r':
496 	  follow_ptr->chr = '\r';
497 	  break;
498 	case 't':
499 	  follow_ptr->chr = '\t';
500 	  break;
501 	case 'v':
502 	  follow_ptr->chr = '\v';
503 	  break;
504 	default:
505 	  follow_ptr->chr = (uchar) *pos;
506 	  break;
507 	}
508       }
509       else
510 	follow_ptr->chr= (uchar) *pos;
511       follow_ptr->table_offset=i;
512       follow_ptr->len= ++len;
513       follow_ptr++;
514     }
515     follow_ptr->chr=0;
516     follow_ptr->table_offset=i;
517     follow_ptr->len=len;
518     follow_ptr++;
519     states+=(uint) len+1;
520   }
521 
522 
523   for (set_nr=0,pos=0 ; set_nr < sets.count ; set_nr++)
524   {
525     set=sets.set+set_nr;
526     default_state= 0;				/* Start from beginning */
527 
528     /* If end of found-string not found or start-set with current set */
529 
530     for (i= (uint) ~0; (i=get_next_bit(set,i)) ;)
531     {
532       if (!follow[i].chr)
533       {
534 	if (! default_state)
535 	  default_state= find_found(found_set,set->table_offset,
536 				    set->found_offset+1);
537       }
538     }
539     copy_bits(sets.set+used_sets,set);		/* Save set for changes */
540     if (!default_state)
541       or_bits(sets.set+used_sets,sets.set);	/* Can restart from start */
542 
543     /* Find all chars that follows current sets */
544     bzero((char*) used_chars,sizeof(used_chars));
545     for (i= (uint) ~0; (i=get_next_bit(sets.set+used_sets,i)) ;)
546     {
547       used_chars[follow[i].chr]=1;
548       if ((follow[i].chr == SPACE_CHAR && !follow[i+1].chr &&
549 	   follow[i].len > 1) || follow[i].chr == END_OF_LINE)
550 	used_chars[0]=1;
551     }
552 
553     /* Mark word_chars used if \b is in state */
554     if (used_chars[SPACE_CHAR])
555       for (pos= word_end_chars ; *pos ; pos++)
556 	used_chars[(int) (uchar) *pos] = 1;
557 
558     /* Handle other used characters */
559     for (chr= 0 ; chr < 256 ; chr++)
560     {
561       if (! used_chars[chr])
562 	set->next[chr]= (short) (chr ? default_state : -1);
563       else
564       {
565 	new_set=make_new_set(&sets);
566 	set=sets.set+set_nr;			/* if realloc */
567 	new_set->table_offset=set->table_offset;
568 	new_set->found_len=set->found_len;
569 	new_set->found_offset=set->found_offset+1;
570 	found_end=0;
571 
572 	for (i= (uint) ~0 ; (i=get_next_bit(sets.set+used_sets,i)) ; )
573 	{
574 	  if (!follow[i].chr || follow[i].chr == chr ||
575 	      (follow[i].chr == SPACE_CHAR &&
576 	       (is_word_end[chr] ||
577 		(!chr && follow[i].len > 1 && ! follow[i+1].chr))) ||
578 	      (follow[i].chr == END_OF_LINE && ! chr))
579 	  {
580 	    if ((! chr || (follow[i].chr && !follow[i+1].chr)) &&
581 		follow[i].len > found_end)
582 	      found_end=follow[i].len;
583 	    if (chr && follow[i].chr)
584 	      internal_set_bit(new_set,i+1);		/* To next set */
585 	    else
586 	      internal_set_bit(new_set,i);
587 	  }
588 	}
589 	if (found_end)
590 	{
591 	  new_set->found_len=0;			/* Set for testing if first */
592 	  bits_set=0;
593 	  for (i= (uint) ~0; (i=get_next_bit(new_set,i)) ;)
594 	  {
595 	    if ((follow[i].chr == SPACE_CHAR ||
596 		 follow[i].chr == END_OF_LINE) && ! chr)
597 	      bit_nr=i+1;
598 	    else
599 	      bit_nr=i;
600 	    if (follow[bit_nr-1].len < found_end ||
601 		(new_set->found_len &&
602 		 (chr == 0 || !follow[bit_nr].chr)))
603 	      internal_clear_bit(new_set,i);
604 	    else
605 	    {
606 	      if (chr == 0 || !follow[bit_nr].chr)
607 	      {					/* best match  */
608 		new_set->table_offset=follow[bit_nr].table_offset;
609 		if (chr || (follow[i].chr == SPACE_CHAR ||
610 			    follow[i].chr == END_OF_LINE))
611 		  new_set->found_offset=found_end;	/* New match */
612 		new_set->found_len=found_end;
613 	      }
614 	      bits_set++;
615 	    }
616 	  }
617 	  if (bits_set == 1)
618 	  {
619 	    set->next[chr] = find_found(found_set,
620 					new_set->table_offset,
621 					new_set->found_offset);
622 	    free_last_set(&sets);
623 	  }
624 	  else
625 	    set->next[chr] = find_set(&sets,new_set);
626 	}
627 	else
628 	  set->next[chr] = find_set(&sets,new_set);
629       }
630     }
631   }
632 
633 	/* Alloc replace structure for the replace-state-machine */
634 
635   if ((replace=(REPLACE*) my_malloc(sizeof(REPLACE)*(sets.count)+
636 				    sizeof(REPLACE_STRING)*(found_sets+1)+
637 				    sizeof(char *)*count+result_len,
638 				    MYF(MY_WME | MY_ZEROFILL))))
639   {
640     rep_str=(REPLACE_STRING*) (replace+sets.count);
641     to_array=(char **) (rep_str+found_sets+1);
642     to_pos=(char *) (to_array+count);
643     for (i=0 ; i < count ; i++)
644     {
645       to_array[i]=to_pos;
646       to_pos=strmov(to_pos,to[i])+1;
647     }
648     rep_str[0].found=1;
649     rep_str[0].replace_string=0;
650     for (i=1 ; i <= found_sets ; i++)
651     {
652       pos=from[found_set[i-1].table_offset];
653       rep_str[i].found= (my_bool) (!memcmp(pos,"\\^",3) ? 2 : 1);
654       rep_str[i].replace_string=to_array[found_set[i-1].table_offset];
655       rep_str[i].to_offset=found_set[i-1].found_offset-start_at_word(pos);
656       rep_str[i].from_offset=found_set[i-1].found_offset-replace_len(pos)+
657 	end_of_word(pos);
658     }
659     for (i=0 ; i < sets.count ; i++)
660     {
661       for (j=0 ; j < 256 ; j++)
662 	if (sets.set[i].next[j] >= 0)
663 	  replace[i].next[j]=replace+sets.set[i].next[j];
664 	else
665 	  replace[i].next[j]=(REPLACE*) (rep_str+(-sets.set[i].next[j]-1));
666     }
667   }
668   my_free(follow);
669   free_sets(&sets);
670   my_free(found_set);
671   DBUG_PRINT("exit",("Replace table has %d states",sets.count));
672   DBUG_RETURN(replace);
673 }
674 
675 
init_sets(REP_SETS * sets,uint states)676 static int init_sets(REP_SETS *sets,uint states)
677 {
678   bzero((char*) sets,sizeof(*sets));
679   sets->size_of_bits=((states+7)/8);
680   if (!(sets->set_buffer=(REP_SET*) my_malloc(sizeof(REP_SET)*SET_MALLOC_HUNC,
681 					      MYF(MY_WME))))
682     return 1;
683   if (!(sets->bit_buffer=(uint*) my_malloc(sizeof(uint)*sets->size_of_bits*
684 					   SET_MALLOC_HUNC,MYF(MY_WME))))
685   {
686     my_free(sets->set);
687     return 1;
688   }
689   return 0;
690 }
691 
692 	/* Make help sets invisible for nicer codeing */
693 
make_sets_invisible(REP_SETS * sets)694 static void make_sets_invisible(REP_SETS *sets)
695 {
696   sets->invisible=sets->count;
697   sets->set+=sets->count;
698   sets->count=0;
699 }
700 
make_new_set(REP_SETS * sets)701 static REP_SET *make_new_set(REP_SETS *sets)
702 {
703   uint i,count,*bit_buffer;
704   REP_SET *set;
705   if (sets->extra)
706   {
707     sets->extra--;
708     set=sets->set+ sets->count++;
709     bzero((char*) set->bits,sizeof(uint)*sets->size_of_bits);
710     bzero((char*) &set->next[0],sizeof(set->next[0])*LAST_CHAR_CODE);
711     set->found_offset=0;
712     set->found_len=0;
713     set->table_offset= (uint) ~0;
714     set->size_of_bits=sets->size_of_bits;
715     return set;
716   }
717   count=sets->count+sets->invisible+SET_MALLOC_HUNC;
718   if (!(set=(REP_SET*) my_realloc((uchar*) sets->set_buffer,
719 				   sizeof(REP_SET)*count,
720 				  MYF(MY_WME))))
721     return 0;
722   sets->set_buffer=set;
723   sets->set=set+sets->invisible;
724   if (!(bit_buffer=(uint*) my_realloc((uchar*) sets->bit_buffer,
725 				      (sizeof(uint)*sets->size_of_bits)*count,
726 				      MYF(MY_WME))))
727     return 0;
728   sets->bit_buffer=bit_buffer;
729   for (i=0 ; i < count ; i++)
730   {
731     sets->set_buffer[i].bits=bit_buffer;
732     bit_buffer+=sets->size_of_bits;
733   }
734   sets->extra=SET_MALLOC_HUNC;
735   return make_new_set(sets);
736 }
737 
free_last_set(REP_SETS * sets)738 static void free_last_set(REP_SETS *sets)
739 {
740   sets->count--;
741   sets->extra++;
742   return;
743 }
744 
free_sets(REP_SETS * sets)745 static void free_sets(REP_SETS *sets)
746 {
747   my_free(sets->set_buffer);
748   my_free(sets->bit_buffer);
749   return;
750 }
751 
internal_set_bit(REP_SET * set,uint bit)752 static void internal_set_bit(REP_SET *set, uint bit)
753 {
754   set->bits[bit / WORD_BIT] |= 1 << (bit % WORD_BIT);
755   return;
756 }
757 
internal_clear_bit(REP_SET * set,uint bit)758 static void internal_clear_bit(REP_SET *set, uint bit)
759 {
760   set->bits[bit / WORD_BIT] &= ~ (1 << (bit % WORD_BIT));
761   return;
762 }
763 
764 
or_bits(REP_SET * to,REP_SET * from)765 static void or_bits(REP_SET *to,REP_SET *from)
766 {
767   reg1 uint i;
768   for (i=0 ; i < to->size_of_bits ; i++)
769     to->bits[i]|=from->bits[i];
770   return;
771 }
772 
copy_bits(REP_SET * to,REP_SET * from)773 static void copy_bits(REP_SET *to,REP_SET *from)
774 {
775   memcpy((uchar*) to->bits,(uchar*) from->bits,
776 	 (size_t) (sizeof(uint) * to->size_of_bits));
777 }
778 
cmp_bits(REP_SET * set1,REP_SET * set2)779 static int cmp_bits(REP_SET *set1,REP_SET *set2)
780 {
781   return memcmp(set1->bits, set2->bits,
782                 sizeof(uint) * set1->size_of_bits);
783 }
784 
785 
786 	/* Get next set bit from set. */
787 
get_next_bit(REP_SET * set,uint lastpos)788 static int get_next_bit(REP_SET *set,uint lastpos)
789 {
790   uint pos,*start,*end,bits;
791 
792   start=set->bits+ ((lastpos+1) / WORD_BIT);
793   end=set->bits + set->size_of_bits;
794   bits=start[0] & ~((1 << ((lastpos+1) % WORD_BIT)) -1);
795 
796   while (! bits && ++start < end)
797     bits=start[0];
798   if (!bits)
799     return 0;
800   pos=(uint) (start-set->bits)*WORD_BIT;
801   while (! (bits & 1))
802   {
803     bits>>=1;
804     pos++;
805   }
806   return pos;
807 }
808 
809 	/* find if there is a same set in sets. If there is, use it and
810 	   free given set, else put in given set in sets and return it's
811 	   position */
812 
find_set(REP_SETS * sets,REP_SET * find)813 static short find_set(REP_SETS *sets,REP_SET *find)
814 {
815   uint i;
816   for (i=0 ; i < sets->count-1 ; i++)
817   {
818     if (!cmp_bits(sets->set+i,find))
819     {
820       free_last_set(sets);
821       return (short) i;
822     }
823   }
824   return (short) i;			/* return new position */
825 }
826 
827 
828 /*
829   find if there is a found_set with same table_offset & found_offset
830   If there is return offset to it, else add new offset and return pos.
831   Pos returned is -offset-2 in found_set_structure because it's is
832   saved in set->next and set->next[] >= 0 points to next set and
833   set->next[] == -1 is reserved for end without replaces.
834 */
835 
find_found(FOUND_SET * found_set,uint table_offset,int found_offset)836 static short find_found(FOUND_SET *found_set,uint table_offset,
837                         int found_offset)
838 {
839   int i;
840   for (i=0 ; (uint) i < found_sets ; i++)
841     if (found_set[i].table_offset == table_offset &&
842 	found_set[i].found_offset == found_offset)
843       return (short) (-i-2);
844   found_set[i].table_offset=table_offset;
845   found_set[i].found_offset=found_offset;
846   found_sets++;
847   return (short) (-i-2);			/* return new position */
848 }
849 
850 	/* Return 1 if regexp starts with \b or ends with \b*/
851 
start_at_word(char * pos)852 static uint start_at_word(char * pos)
853 {
854   return (((!memcmp(pos,"\\b",2) && pos[2]) || !memcmp(pos,"\\^",2)) ? 1 : 0);
855 }
856 
end_of_word(char * pos)857 static uint end_of_word(char * pos)
858 {
859   char * end=strend(pos);
860   return ((end > pos+2 && !memcmp(end-2,"\\b",2)) ||
861 	  (end >= pos+2 && !memcmp(end-2,"\\$",2))) ?
862 	    1 : 0;
863 }
864 
865 
replace_len(char * str)866 static uint replace_len(char * str)
867 {
868   uint len=0;
869   while (*str)
870   {
871     if (str[0] == '\\' && str[1])
872       str++;
873     str++;
874     len++;
875   }
876   return len;
877 }
878 
879 
880 	/* The actual loop */
881 
replace_strings(REPLACE * rep,char ** start,uint * max_length,char * from)882 static uint replace_strings(REPLACE *rep, char **start, uint *max_length,
883                             char *from)
884 {
885   reg1 REPLACE *rep_pos;
886   reg2 REPLACE_STRING *rep_str;
887   char *to, *end, *pos, *new;
888 
889   end=(to= *start) + *max_length-1;
890   rep_pos=rep+1;
891   for(;;)
892   {
893     while (!rep_pos->found)
894     {
895       rep_pos= rep_pos->next[(uchar) *from];
896       if (to == end)
897       {
898 	(*max_length)+=8192;
899 	if (!(new=my_realloc(*start,*max_length,MYF(MY_WME))))
900 	  return (uint) -1;
901 	to=new+(to - *start);
902 	end=(*start=new)+ *max_length-1;
903       }
904       *to++= *from++;
905     }
906     if (!(rep_str = ((REPLACE_STRING*) rep_pos))->replace_string)
907       return (uint) (to - *start)-1;
908     updated=1;			/* Some char * is replaced */
909     to-=rep_str->to_offset;
910     for (pos=rep_str->replace_string; *pos ; pos++)
911     {
912       if (to == end)
913       {
914 	(*max_length)*=2;
915 	if (!(new=my_realloc(*start,*max_length,MYF(MY_WME))))
916 	  return (uint) -1;
917 	to=new+(to - *start);
918 	end=(*start=new)+ *max_length-1;
919       }
920       *to++= *pos;
921     }
922     if (!*(from-=rep_str->from_offset) && rep_pos->found != 2)
923       return (uint) (to - *start);
924     rep_pos=rep;
925   }
926 }
927 
928 static char *buffer;		/* The buffer itself, grown as needed. */
929 static int bufbytes;		/* Number of bytes in the buffer. */
930 static int bufread,my_eof;		/* Number of bytes to get with each read(). */
931 static uint bufalloc;
932 static char *out_buff;
933 static uint out_length;
934 
initialize_buffer()935 static int initialize_buffer()
936 {
937   bufread = 8192;
938   bufalloc = bufread + bufread / 2;
939   if (!(buffer = my_malloc(bufalloc+1,MYF(MY_WME))))
940     return 1;
941   bufbytes=my_eof=0;
942   out_length=bufread;
943   if (!(out_buff=my_malloc(out_length,MYF(MY_WME))))
944     return(1);
945   return 0;
946 }
947 
reset_buffer()948 static void reset_buffer()
949 {
950   bufbytes=my_eof=0;
951 }
952 
free_buffer()953 static void free_buffer()
954 {
955   my_free(buffer);
956   my_free(out_buff);
957 }
958 
959 
960 /*
961   Fill the buffer retaining the last n bytes at the beginning of the
962   newly filled buffer (for backward context).  Returns the number of new
963   bytes read from disk.
964 */
965 
fill_buffer_retaining(fd,n)966 static int fill_buffer_retaining(fd,n)
967 File fd;
968 int n;
969 {
970   int i;
971 
972   /* See if we need to grow the buffer. */
973   if ((int) bufalloc - n <= bufread)
974   {
975     while ((int) bufalloc - n <= bufread)
976     {
977       bufalloc *= 2;
978       bufread *= 2;
979     }
980     buffer = my_realloc(buffer, bufalloc+1, MYF(MY_WME));
981     if (! buffer)
982       return(-1);
983   }
984 
985   /* Shift stuff down. */
986   bmove(buffer,buffer+bufbytes-n,(uint) n);
987   bufbytes = n;
988 
989   if (my_eof)
990     return 0;
991 
992   /* Read in new stuff. */
993   if ((i=(int) my_read(fd, (uchar*) buffer + bufbytes,
994                        (size_t) bufread, MYF(MY_WME))) < 0)
995     return -1;
996 
997   /* Kludge to pretend every nonempty file ends with a newline. */
998   if (i == 0 && bufbytes > 0 && buffer[bufbytes - 1] != '\n')
999   {
1000     my_eof = i = 1;
1001     buffer[bufbytes] = '\n';
1002   }
1003 
1004   bufbytes += i;
1005   return i;
1006 }
1007 
1008 	/* Return 0 if convert is ok */
1009 	/* Global variable update is set if something was changed */
1010 
convert_pipe(rep,in,out)1011 static int convert_pipe(rep,in,out)
1012 REPLACE *rep;
1013 FILE *in,*out;
1014 {
1015   int retain,error;
1016   uint length;
1017   char save_char,*end_of_line,*start_of_line;
1018   DBUG_ENTER("convert_pipe");
1019 
1020   updated=retain=0;
1021   reset_buffer();
1022 
1023   while ((error=fill_buffer_retaining(my_fileno(in),retain)) > 0)
1024   {
1025     end_of_line=buffer ;
1026     buffer[bufbytes]=0;			/* Sentinel  */
1027     for (;;)
1028     {
1029       start_of_line=end_of_line;
1030       while (end_of_line[0] != '\n' && end_of_line[0])
1031 	end_of_line++;
1032       if (end_of_line == buffer+bufbytes)
1033       {
1034 	retain= (int) (end_of_line - start_of_line);
1035 	break;				/* No end of line, read more */
1036       }
1037       save_char=end_of_line[0];
1038       end_of_line[0]=0;
1039       end_of_line++;
1040       if ((length=replace_strings(rep,&out_buff,&out_length,start_of_line)) ==
1041 	  (uint) -1)
1042 	return 1;
1043       if (!my_eof)
1044 	out_buff[length++]=save_char;	/* Don't write added newline */
1045       if (my_fwrite(out, (uchar*) out_buff, length, MYF(MY_WME | MY_NABP)))
1046 	DBUG_RETURN(1);
1047     }
1048   }
1049   DBUG_RETURN(error);
1050 }
1051 
1052 
convert_file(REPLACE * rep,char * name)1053 static int convert_file(REPLACE *rep, char * name)
1054 {
1055   int error;
1056   FILE *in,*out;
1057   char dir_buff[FN_REFLEN], tempname[FN_REFLEN], *org_name = name;
1058 #ifdef HAVE_READLINK
1059   char link_name[FN_REFLEN];
1060 #endif
1061   File temp_file;
1062   size_t dir_buff_length;
1063   DBUG_ENTER("convert_file");
1064 
1065   /* check if name is a symlink */
1066 #ifdef HAVE_READLINK
1067   org_name= (!my_disable_symlinks &&
1068              !my_readlink(link_name, name, MYF(0))) ? link_name : name;
1069 #endif
1070   if (!(in= my_fopen(org_name,O_RDONLY,MYF(MY_WME))))
1071     DBUG_RETURN(1);
1072   dirname_part(dir_buff, org_name, &dir_buff_length);
1073   if ((temp_file= create_temp_file(tempname, dir_buff, "PR", O_WRONLY,
1074                                    MYF(MY_WME))) < 0)
1075   {
1076     my_fclose(in,MYF(0));
1077     DBUG_RETURN(1);
1078   }
1079   if (!(out= my_fdopen(temp_file, tempname, O_WRONLY, MYF(MY_WME))))
1080   {
1081     my_fclose(in,MYF(0));
1082     DBUG_RETURN(1);
1083   }
1084 
1085   error=convert_pipe(rep,in,out);
1086   my_fclose(in,MYF(0)); my_fclose(out,MYF(0));
1087 
1088   if (updated && ! error)
1089     my_redel(org_name,tempname,MYF(MY_WME | MY_LINK_WARNING));
1090   else
1091     my_delete(tempname,MYF(MY_WME));
1092   if (!silent && ! error)
1093   {
1094     if (updated)
1095       printf("%s converted\n",name);
1096     else if (verbose)
1097       printf("%s left unchanged\n",name);
1098   }
1099   DBUG_RETURN(error);
1100 }
1101