1 /*
2    Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
23    02110-1301  USA */
24 
25 /*
26   Replace strings in textfile
27 
28   This program replaces strings in files or from stdin to stdout.
29   It accepts a list of from-string/to-string pairs and replaces
30   each occurrence of a from-string with the corresponding to-string.
31   The first occurrence of a found string is matched. If there is more
32   than one possibility for the string to replace, longer matches
33   are preferred before shorter matches.
34 
35   Special characters in from string:
36   \^    Match start of line.
37   \$	Match end of line.
38   \b	Match space-character, start of line or end of line.
39         For end \b the next replace starts locking at the end space-character.
40         An \b alone or in a string matches only a space-character.
41   \r, \t, \v as in C.
42   The programs make a DFA-state-machine of the strings and the speed isn't
43   dependent on the count of replace-strings (only of the number of replaces).
44   A line is assumed ending with \n or \0.
45   There are no limit exept memory on length of strings.
46 
47   Written by Monty.
48   fill_buffer_retaining() is taken from gnu-grep and modified.
49 */
50 
51 #include <my_global.h>
52 #include <m_ctype.h>
53 #include <my_sys.h>
54 #include <m_string.h>
55 #include <errno.h>
56 
57 #define PC_MALLOC		256	/* Bytes for pointers */
58 #define PS_MALLOC		512	/* Bytes for data */
59 
60 typedef struct st_pointer_array {		/* when using array-strings */
61   TYPELIB typelib;				/* Pointer to strings */
62   uchar *str;					/* Strings is here */
63   uint8	*flag;					/* Flag about each var. */
64   uint  array_allocs,max_count,length,max_length;
65 } POINTER_ARRAY;
66 
67 #define SPACE_CHAR	256
68 #define START_OF_LINE	257
69 #define END_OF_LINE	258
70 #define LAST_CHAR_CODE	259
71 
72 typedef struct st_replace {
73   my_bool   found;
74   struct st_replace *next[256];
75 } REPLACE;
76 
77 typedef struct st_replace_found {
78   my_bool found;
79   char *replace_string;
80   uint to_offset;
81   int from_offset;
82 } REPLACE_STRING;
83 
84 #ifndef WORD_BIT
85 #define WORD_BIT (8*sizeof(uint))
86 #endif
87 
88 	/* functions defined in this file */
89 
90 static int static_get_options(int *argc,char * * *argv);
91 static int get_replace_strings(int *argc,char * * *argv,
92 				   POINTER_ARRAY *from_array,
93 				   POINTER_ARRAY *to_array);
94 static int insert_pointer_name(POINTER_ARRAY *pa, char * name);
95 static void free_pointer_array(POINTER_ARRAY *pa);
96 static int convert_pipe(REPLACE *,FILE *,FILE *);
97 static int convert_file(REPLACE *, char *);
98 static REPLACE *init_replace(char * *from, char * *to,uint count,
99                              char * word_end_chars);
100 static uint replace_strings(REPLACE *rep, char * *start,uint *max_length,
101                             char * from);
102 static int initialize_buffer(void);
103 static void reset_buffer(void);
104 static void free_buffer(void);
105 
106 static int silent=0,verbose=0,updated=0;
107 
108 	/* The main program */
109 
main(int argc,char * argv[])110 int main(int argc, char *argv[])
111 {
112   int i,error;
113   char word_end_chars[256],*pos;
114   POINTER_ARRAY from,to;
115   REPLACE *replace;
116   MY_INIT(argv[0]);
117 
118   if (static_get_options(&argc,&argv))
119     exit(1);
120   if (get_replace_strings(&argc,&argv,&from,&to))
121     exit(1);
122 
123   for (i=1,pos=word_end_chars ; i < 256 ; i++)
124     if (my_isspace(&my_charset_latin1,i))
125       *pos++= (char) i;
126   *pos=0;
127   if (!(replace=init_replace((char**) from.typelib.type_names,
128 			     (char**) to.typelib.type_names,
129 			     (uint) from.typelib.count,word_end_chars)))
130     exit(1);
131   free_pointer_array(&from);
132   free_pointer_array(&to);
133   if (initialize_buffer())
134     return 1;
135 
136   error=0;
137   if (argc == 0)
138     error=convert_pipe(replace,stdin,stdout);
139   else
140   {
141     while (argc--)
142     {
143       error=convert_file(replace,*(argv++));
144     }
145   }
146   free_buffer();
147   my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
148   exit(error ? 2 : 0);
149   return 0;					/* No compiler warning */
150 } /* main */
151 
152 
153 	/* reads options */
154 	/* Initiates DEBUG - but no debugging here ! */
155 
static_get_options(argc,argv)156 static int static_get_options(argc,argv)
157 int *argc;
158 char **argv[];
159 {
160   int help,version;
161   char *pos;
162 
163   silent=verbose=help=0;
164 
165   while (--*argc > 0 && *(pos = *(++*argv)) == '-' && pos[1] != '-') {
166     while (*++pos)
167     {
168       version=0;
169       switch((*pos)) {
170       case 's':
171 	silent=1;
172 	break;
173       case 'v':
174 	verbose=1;
175 	break;
176       case '#':
177 	DBUG_PUSH (++pos);
178 	pos= (char*) " ";			/* Skip rest of arguments */
179 	break;
180       case 'V':
181 	version=1;
182       // fallthrough
183       case 'I':
184       case '?':
185 	help=1;					/* Help text written */
186 	printf("%s  Ver 1.4 for %s at %s\n",my_progname,SYSTEM_TYPE,
187 	       MACHINE_TYPE);
188 	if (version)
189 	  break;
190 	puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n");
191 	puts("This program replaces strings in files or from stdin to stdout.\n"
192 	     "It accepts a list of from-string/to-string pairs and replaces\n"
193 	     "each occurrence of a from-string with the corresponding to-string.\n"
194          "The first occurrence of a found string is matched. If there is\n"
195          "more than one possibility for the string to replace, longer\n"
196          "matches are preferred before shorter matches.\n\n"
197 	     "A from-string can contain these special characters:\n"
198 	     "  \\^      Match start of line.\n"
199 	     "  \\$      Match end of line.\n"
200 	     "  \\b      Match space-character, start of line or end of line.\n"
201 	     "          For a end \\b the next replace starts locking at the end\n"
202 	     "          space-character. A \\b alone in a string matches only a\n"
203 	     "          space-character.\n");
204 	  printf("Usage: %s [-?svIV] from to from to ... -- [files]\n", my_progname);
205 	puts("or");
206 	  printf("Usage: %s [-?svIV] from to from to ... < fromfile > tofile\n", my_progname);
207 	puts("");
208 	puts("Options: -? or -I \"Info\"  -s \"silent\"      -v \"verbose\"");
209 	break;
210       default:
211 	fprintf(stderr,"illegal option: -%c\n",*pos);
212 	break;
213       }
214     }
215   }
216   if (*argc == 0)
217   {
218     if (!help)
219       my_message(0,"No replace options given",MYF(ME_BELL));
220     exit(0);					/* Don't use as pipe */
221   }
222   return(0);
223 } /* static_get_options */
224 
225 
get_replace_strings(argc,argv,from_array,to_array)226 static int get_replace_strings(argc,argv,from_array,to_array)
227 int *argc;
228 char **argv[];
229 POINTER_ARRAY *from_array,*to_array;
230 {
231   char *pos;
232 
233   memset(from_array, 0, sizeof(from_array[0]));
234   memset(to_array, 0, sizeof(to_array[0]));
235   while (*argc > 0 && (*(pos = *(*argv)) != '-' || pos[1] != '-' || pos[2]))
236   {
237     insert_pointer_name(from_array,pos);
238     (*argc)--;
239     (*argv)++;
240     if (!*argc || !strcmp(**argv,"--"))
241     {
242       my_message(0,"No to-string for last from-string",MYF(ME_BELL));
243       return 1;
244     }
245     insert_pointer_name(to_array,**argv);
246     (*argc)--;
247     (*argv)++;
248   }
249   if (*argc)
250   {					/* Skip "--" argument */
251     (*argc)--;
252     (*argv)++;
253   }
254   return 0;
255 }
256 
insert_pointer_name(POINTER_ARRAY * pa,char * name)257 static int insert_pointer_name(POINTER_ARRAY *pa,char * name)
258 {
259   uint i,length,old_count;
260   uchar *new_pos;
261   const char **new_array;
262   DBUG_ENTER("insert_pointer_name");
263 
264   if (! pa->typelib.count)
265   {
266     if (!(pa->typelib.type_names=(const char **)
267 	  my_malloc(((PC_MALLOC-MALLOC_OVERHEAD)/
268 		     (sizeof(char *)+sizeof(*pa->flag))*
269 		     (sizeof(char *)+sizeof(*pa->flag))),MYF(MY_WME))))
270       DBUG_RETURN(-1);
271     if (!(pa->str= (uchar*) my_malloc((uint) (PS_MALLOC-MALLOC_OVERHEAD),
272 				     MYF(MY_WME))))
273     {
274       my_free(pa->typelib.type_names);
275       DBUG_RETURN (-1);
276     }
277     pa->max_count=(PC_MALLOC-MALLOC_OVERHEAD)/(sizeof(uchar*)+
278 					       sizeof(*pa->flag));
279     pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
280     pa->length=0;
281     pa->max_length=PS_MALLOC-MALLOC_OVERHEAD;
282     pa->array_allocs=1;
283   }
284   length=(uint) strlen(name)+1;
285   if (pa->length+length >= pa->max_length)
286   {
287     pa->max_length=(pa->length+length+MALLOC_OVERHEAD+PS_MALLOC-1)/PS_MALLOC;
288     pa->max_length=pa->max_length*PS_MALLOC-MALLOC_OVERHEAD;
289     if (!(new_pos= (uchar*) my_realloc((uchar*) pa->str,
290 				      (uint) pa->max_length,
291 				      MYF(MY_WME))))
292       DBUG_RETURN(1);
293     if (new_pos != pa->str)
294     {
295       my_ptrdiff_t diff=PTR_BYTE_DIFF(new_pos,pa->str);
296       for (i=0 ; i < pa->typelib.count ; i++)
297 	pa->typelib.type_names[i]= ADD_TO_PTR(pa->typelib.type_names[i],diff,
298 					      char*);
299       pa->str=new_pos;
300     }
301   }
302   if (pa->typelib.count >= pa->max_count-1)
303   {
304     int len;
305     pa->array_allocs++;
306     len=(PC_MALLOC*pa->array_allocs - MALLOC_OVERHEAD);
307     if (!(new_array=(const char **) my_realloc((uchar*) pa->typelib.type_names,
308 					       (uint) len/
309 					 (sizeof(uchar*)+sizeof(*pa->flag))*
310 					 (sizeof(uchar*)+sizeof(*pa->flag)),
311 					 MYF(MY_WME))))
312       DBUG_RETURN(1);
313     pa->typelib.type_names=new_array;
314     old_count=pa->max_count;
315     pa->max_count=len/(sizeof(uchar*) + sizeof(*pa->flag));
316     pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
317     memcpy((uchar*) pa->flag,(char *) (pa->typelib.type_names+old_count),
318 	   old_count*sizeof(*pa->flag));
319   }
320   pa->flag[pa->typelib.count]=0;			/* Reset flag */
321   pa->typelib.type_names[pa->typelib.count++]= (char*) (pa->str+pa->length);
322   pa->typelib.type_names[pa->typelib.count]= NullS;	/* Put end-mark */
323   (void) strmov((char*) pa->str + pa->length, name);
324   pa->length+=length;
325   DBUG_RETURN(0);
326 } /* insert_pointer_name */
327 
328 
329 	/* free pointer array */
330 
free_pointer_array(POINTER_ARRAY * pa)331 static void free_pointer_array(POINTER_ARRAY *pa)
332 {
333   if (pa->typelib.count)
334   {
335     pa->typelib.count=0;
336     my_free(pa->typelib.type_names);
337     pa->typelib.type_names=0;
338     my_free(pa->str);
339   }
340   return;
341 } /* free_pointer_array */
342 
343 
344 	/* Code for replace rutines */
345 
346 #define SET_MALLOC_HUNC 64
347 
348 typedef struct st_rep_set {
349   uint  *bits;				/* Pointer to used sets */
350   short	next[LAST_CHAR_CODE];		/* Pointer to next sets */
351   uint	found_len;			/* Best match to date */
352   int	found_offset;
353   uint  table_offset;
354   uint  size_of_bits;			/* For convinience */
355 } REP_SET;
356 
357 typedef struct st_rep_sets {
358   uint		count;			/* Number of sets */
359   uint		extra;			/* Extra sets in buffer */
360   uint		invisible;		/* Sets not chown */
361   uint		size_of_bits;
362   REP_SET	*set,*set_buffer;
363   uint		*bit_buffer;
364 } REP_SETS;
365 
366 typedef struct st_found_set {
367   uint table_offset;
368   int found_offset;
369 } FOUND_SET;
370 
371 typedef struct st_follow {
372   int chr;
373   uint table_offset;
374   uint len;
375 } FOLLOWS;
376 
377 
378 static int init_sets(REP_SETS *sets,uint states);
379 static REP_SET *make_new_set(REP_SETS *sets);
380 static void make_sets_invisible(REP_SETS *sets);
381 static void free_last_set(REP_SETS *sets);
382 static void free_sets(REP_SETS *sets);
383 static void internal_set_bit(REP_SET *set, uint bit);
384 static void internal_clear_bit(REP_SET *set, uint bit);
385 static void or_bits(REP_SET *to,REP_SET *from);
386 static void copy_bits(REP_SET *to,REP_SET *from);
387 static int cmp_bits(REP_SET *set1,REP_SET *set2);
388 static int get_next_bit(REP_SET *set,uint lastpos);
389 static short find_set(REP_SETS *sets,REP_SET *find);
390 static short find_found(FOUND_SET *found_set,uint table_offset,
391                         int found_offset);
392 static uint start_at_word(char * pos);
393 static uint end_of_word(char * pos);
394 static uint replace_len(char * pos);
395 
396 static uint found_sets=0;
397 
398 
399 	/* Init a replace structure for further calls */
400 
init_replace(char ** from,char ** to,uint count,char * word_end_chars)401 static REPLACE *init_replace(char * *from, char * *to,uint count,
402                              char * word_end_chars)
403 {
404   uint i,j,states,set_nr,len,result_len,max_length,found_end,bits_set,bit_nr;
405   int used_sets,chr;
406   short default_state;
407   char used_chars[LAST_CHAR_CODE],is_word_end[256];
408   char * pos, *to_pos, **to_array;
409   REP_SETS sets;
410   REP_SET *set,*start_states,*word_states,*new_set;
411   FOLLOWS *follow,*follow_ptr;
412   REPLACE *replace;
413   FOUND_SET *found_set;
414   REPLACE_STRING *rep_str;
415   DBUG_ENTER("init_replace");
416 
417   /* Count number of states */
418   for (i=result_len=max_length=0 , states=2 ; i < count ; i++)
419   {
420     len=replace_len(from[i]);
421     if (!len)
422     {
423       errno=EINVAL;
424       my_message(0,"No to-string for last from-string",MYF(ME_BELL));
425       DBUG_RETURN(0);
426     }
427     states+=len+1;
428     result_len+=(uint) strlen(to[i])+1;
429     if (len > max_length)
430       max_length=len;
431   }
432   memset(is_word_end, 0, sizeof(is_word_end));
433   for (i=0 ; word_end_chars[i] ; i++)
434     is_word_end[(uchar) word_end_chars[i]]=1;
435 
436   if (init_sets(&sets,states))
437     DBUG_RETURN(0);
438   found_sets=0;
439   if (!(found_set= (FOUND_SET*) my_malloc(sizeof(FOUND_SET)*max_length*count,
440 					  MYF(MY_WME))))
441   {
442     free_sets(&sets);
443     DBUG_RETURN(0);
444   }
445   (void) make_new_set(&sets);			/* Set starting set */
446   make_sets_invisible(&sets);			/* Hide previus sets */
447   used_sets=-1;
448   word_states=make_new_set(&sets);		/* Start of new word */
449   start_states=make_new_set(&sets);		/* This is first state */
450   if (!(follow=(FOLLOWS*) my_malloc((states+2)*sizeof(FOLLOWS),MYF(MY_WME))))
451   {
452     free_sets(&sets);
453     my_free(found_set);
454     DBUG_RETURN(0);
455   }
456 
457 	/* Init follow_ptr[] */
458   for (i=0, states=1, follow_ptr=follow+1 ; i < count ; i++)
459   {
460     if (from[i][0] == '\\' && from[i][1] == '^')
461     {
462       internal_set_bit(start_states,states+1);
463       if (!from[i][2])
464       {
465 	start_states->table_offset=i;
466 	start_states->found_offset=1;
467       }
468     }
469     else if (from[i][0] == '\\' && from[i][1] == '$')
470     {
471       internal_set_bit(start_states,states);
472       internal_set_bit(word_states,states);
473       if (!from[i][2] && start_states->table_offset == (uint) ~0)
474       {
475 	start_states->table_offset=i;
476 	start_states->found_offset=0;
477       }
478     }
479     else
480     {
481       internal_set_bit(word_states,states);
482       if (from[i][0] == '\\' && (from[i][1] == 'b' && from[i][2]))
483 	internal_set_bit(start_states,states+1);
484       else
485 	internal_set_bit(start_states,states);
486     }
487     for (pos=from[i], len=0; *pos ; pos++)
488     {
489       if (*pos == '\\' && *(pos+1))
490       {
491 	pos++;
492 	switch (*pos) {
493 	case 'b':
494 	  follow_ptr->chr = SPACE_CHAR;
495 	  break;
496 	case '^':
497 	  follow_ptr->chr = START_OF_LINE;
498 	  break;
499 	case '$':
500 	  follow_ptr->chr = END_OF_LINE;
501 	  break;
502 	case 'r':
503 	  follow_ptr->chr = '\r';
504 	  break;
505 	case 't':
506 	  follow_ptr->chr = '\t';
507 	  break;
508 	case 'v':
509 	  follow_ptr->chr = '\v';
510 	  break;
511 	default:
512 	  follow_ptr->chr = (uchar) *pos;
513 	  break;
514 	}
515       }
516       else
517 	follow_ptr->chr= (uchar) *pos;
518       follow_ptr->table_offset=i;
519       follow_ptr->len= ++len;
520       follow_ptr++;
521     }
522     follow_ptr->chr=0;
523     follow_ptr->table_offset=i;
524     follow_ptr->len=len;
525     follow_ptr++;
526     states+=(uint) len+1;
527   }
528 
529 
530   for (set_nr=0,pos=0 ; set_nr < sets.count ; set_nr++)
531   {
532     set=sets.set+set_nr;
533     default_state= 0;				/* Start from beginning */
534 
535     /* If end of found-string not found or start-set with current set */
536 
537     for (i= (uint) ~0; (i=get_next_bit(set,i)) ;)
538     {
539       if (!follow[i].chr)
540       {
541 	if (! default_state)
542 	  default_state= find_found(found_set,set->table_offset,
543 				    set->found_offset+1);
544       }
545     }
546     copy_bits(sets.set+used_sets,set);		/* Save set for changes */
547     if (!default_state)
548       or_bits(sets.set+used_sets,sets.set);	/* Can restart from start */
549 
550     /* Find all chars that follows current sets */
551     memset(used_chars, 0, sizeof(used_chars));
552     for (i= (uint) ~0; (i=get_next_bit(sets.set+used_sets,i)) ;)
553     {
554       used_chars[follow[i].chr]=1;
555       if ((follow[i].chr == SPACE_CHAR && !follow[i+1].chr &&
556 	   follow[i].len > 1) || follow[i].chr == END_OF_LINE)
557 	used_chars[0]=1;
558     }
559 
560     /* Mark word_chars used if \b is in state */
561     if (used_chars[SPACE_CHAR])
562       for (pos= word_end_chars ; *pos ; pos++)
563 	used_chars[(int) (uchar) *pos] = 1;
564 
565     /* Handle other used characters */
566     for (chr= 0 ; chr < 256 ; chr++)
567     {
568       if (! used_chars[chr])
569 	set->next[chr]= (short) (chr ? default_state : -1);
570       else
571       {
572 	new_set=make_new_set(&sets);
573 	set=sets.set+set_nr;			/* if realloc */
574 	new_set->table_offset=set->table_offset;
575 	new_set->found_len=set->found_len;
576 	new_set->found_offset=set->found_offset+1;
577 	found_end=0;
578 
579 	for (i= (uint) ~0 ; (i=get_next_bit(sets.set+used_sets,i)) ; )
580 	{
581 	  if (!follow[i].chr || follow[i].chr == chr ||
582 	      (follow[i].chr == SPACE_CHAR &&
583 	       (is_word_end[chr] ||
584 		(!chr && follow[i].len > 1 && ! follow[i+1].chr))) ||
585 	      (follow[i].chr == END_OF_LINE && ! chr))
586 	  {
587 	    if ((! chr || (follow[i].chr && !follow[i+1].chr)) &&
588 		follow[i].len > found_end)
589 	      found_end=follow[i].len;
590 	    if (chr && follow[i].chr)
591 	      internal_set_bit(new_set,i+1);		/* To next set */
592 	    else
593 	      internal_set_bit(new_set,i);
594 	  }
595 	}
596 	if (found_end)
597 	{
598 	  new_set->found_len=0;			/* Set for testing if first */
599 	  bits_set=0;
600 	  for (i= (uint) ~0; (i=get_next_bit(new_set,i)) ;)
601 	  {
602 	    if ((follow[i].chr == SPACE_CHAR ||
603 		 follow[i].chr == END_OF_LINE) && ! chr)
604 	      bit_nr=i+1;
605 	    else
606 	      bit_nr=i;
607 	    if (follow[bit_nr-1].len < found_end ||
608 		(new_set->found_len &&
609 		 (chr == 0 || !follow[bit_nr].chr)))
610 	      internal_clear_bit(new_set,i);
611 	    else
612 	    {
613 	      if (chr == 0 || !follow[bit_nr].chr)
614 	      {					/* best match  */
615 		new_set->table_offset=follow[bit_nr].table_offset;
616 		if (chr || (follow[i].chr == SPACE_CHAR ||
617 			    follow[i].chr == END_OF_LINE))
618 		  new_set->found_offset=found_end;	/* New match */
619 		new_set->found_len=found_end;
620 	      }
621 	      bits_set++;
622 	    }
623 	  }
624 	  if (bits_set == 1)
625 	  {
626 	    set->next[chr] = find_found(found_set,
627 					new_set->table_offset,
628 					new_set->found_offset);
629 	    free_last_set(&sets);
630 	  }
631 	  else
632 	    set->next[chr] = find_set(&sets,new_set);
633 	}
634 	else
635 	  set->next[chr] = find_set(&sets,new_set);
636       }
637     }
638   }
639 
640 	/* Alloc replace structure for the replace-state-machine */
641 
642   if ((replace=(REPLACE*) my_malloc(sizeof(REPLACE)*(sets.count)+
643 				    sizeof(REPLACE_STRING)*(found_sets+1)+
644 				    sizeof(char *)*count+result_len,
645 				    MYF(MY_WME | MY_ZEROFILL))))
646   {
647     rep_str=(REPLACE_STRING*) (replace+sets.count);
648     to_array=(char **) (rep_str+found_sets+1);
649     to_pos=(char *) (to_array+count);
650     for (i=0 ; i < count ; i++)
651     {
652       to_array[i]=to_pos;
653       to_pos=strmov(to_pos,to[i])+1;
654     }
655     rep_str[0].found=1;
656     rep_str[0].replace_string=0;
657     for (i=1 ; i <= found_sets ; i++)
658     {
659       pos=from[found_set[i-1].table_offset];
660       rep_str[i].found= (my_bool) (!memcmp(pos,"\\^",3) ? 2 : 1);
661       rep_str[i].replace_string=to_array[found_set[i-1].table_offset];
662       rep_str[i].to_offset=found_set[i-1].found_offset-start_at_word(pos);
663       rep_str[i].from_offset=found_set[i-1].found_offset-replace_len(pos)+
664 	end_of_word(pos);
665     }
666     for (i=0 ; i < sets.count ; i++)
667     {
668       for (j=0 ; j < 256 ; j++)
669 	if (sets.set[i].next[j] >= 0)
670 	  replace[i].next[j]=replace+sets.set[i].next[j];
671 	else
672 	  replace[i].next[j]=(REPLACE*) (rep_str+(-sets.set[i].next[j]-1));
673     }
674   }
675   my_free(follow);
676   free_sets(&sets);
677   my_free(found_set);
678   DBUG_PRINT("exit",("Replace table has %d states",sets.count));
679   DBUG_RETURN(replace);
680 }
681 
682 
init_sets(REP_SETS * sets,uint states)683 static int init_sets(REP_SETS *sets,uint states)
684 {
685   memset(sets, 0, sizeof(*sets));
686   sets->size_of_bits=((states+7)/8);
687   if (!(sets->set_buffer=(REP_SET*) my_malloc(sizeof(REP_SET)*SET_MALLOC_HUNC,
688 					      MYF(MY_WME))))
689     return 1;
690   if (!(sets->bit_buffer=(uint*) my_malloc(sizeof(uint)*sets->size_of_bits*
691 					   SET_MALLOC_HUNC,MYF(MY_WME))))
692   {
693     my_free(sets->set);
694     return 1;
695   }
696   return 0;
697 }
698 
699 	/* Make help sets invisible for nicer codeing */
700 
make_sets_invisible(REP_SETS * sets)701 static void make_sets_invisible(REP_SETS *sets)
702 {
703   sets->invisible=sets->count;
704   sets->set+=sets->count;
705   sets->count=0;
706 }
707 
make_new_set(REP_SETS * sets)708 static REP_SET *make_new_set(REP_SETS *sets)
709 {
710   uint i,count,*bit_buffer;
711   REP_SET *set;
712   if (sets->extra)
713   {
714     sets->extra--;
715     set=sets->set+ sets->count++;
716     memset(set->bits, 0, sizeof(uint)*sets->size_of_bits);
717     memset(&set->next[0], 0, sizeof(set->next[0])*LAST_CHAR_CODE);
718     set->found_offset=0;
719     set->found_len=0;
720     set->table_offset= (uint) ~0;
721     set->size_of_bits=sets->size_of_bits;
722     return set;
723   }
724   count=sets->count+sets->invisible+SET_MALLOC_HUNC;
725   if (!(set=(REP_SET*) my_realloc((uchar*) sets->set_buffer,
726 				   sizeof(REP_SET)*count,
727 				  MYF(MY_WME))))
728     return 0;
729   sets->set_buffer=set;
730   sets->set=set+sets->invisible;
731   if (!(bit_buffer=(uint*) my_realloc((uchar*) sets->bit_buffer,
732 				      (sizeof(uint)*sets->size_of_bits)*count,
733 				      MYF(MY_WME))))
734     return 0;
735   sets->bit_buffer=bit_buffer;
736   for (i=0 ; i < count ; i++)
737   {
738     sets->set_buffer[i].bits=bit_buffer;
739     bit_buffer+=sets->size_of_bits;
740   }
741   sets->extra=SET_MALLOC_HUNC;
742   return make_new_set(sets);
743 }
744 
free_last_set(REP_SETS * sets)745 static void free_last_set(REP_SETS *sets)
746 {
747   sets->count--;
748   sets->extra++;
749   return;
750 }
751 
free_sets(REP_SETS * sets)752 static void free_sets(REP_SETS *sets)
753 {
754   my_free(sets->set_buffer);
755   my_free(sets->bit_buffer);
756   return;
757 }
758 
internal_set_bit(REP_SET * set,uint bit)759 static void internal_set_bit(REP_SET *set, uint bit)
760 {
761   set->bits[bit / WORD_BIT] |= 1 << (bit % WORD_BIT);
762   return;
763 }
764 
internal_clear_bit(REP_SET * set,uint bit)765 static void internal_clear_bit(REP_SET *set, uint bit)
766 {
767   set->bits[bit / WORD_BIT] &= ~ (1 << (bit % WORD_BIT));
768   return;
769 }
770 
771 
or_bits(REP_SET * to,REP_SET * from)772 static void or_bits(REP_SET *to,REP_SET *from)
773 {
774   uint i;
775   for (i=0 ; i < to->size_of_bits ; i++)
776     to->bits[i]|=from->bits[i];
777   return;
778 }
779 
copy_bits(REP_SET * to,REP_SET * from)780 static void copy_bits(REP_SET *to,REP_SET *from)
781 {
782   memcpy((uchar*) to->bits,(uchar*) from->bits,
783 	 (size_t) (sizeof(uint) * to->size_of_bits));
784 }
785 
cmp_bits(REP_SET * set1,REP_SET * set2)786 static int cmp_bits(REP_SET *set1,REP_SET *set2)
787 {
788   return memcmp(set1->bits, set2->bits,
789                 sizeof(uint) * set1->size_of_bits);
790 }
791 
792 
793 	/* Get next set bit from set. */
794 
get_next_bit(REP_SET * set,uint lastpos)795 static int get_next_bit(REP_SET *set,uint lastpos)
796 {
797   uint pos,*start,*end,bits;
798 
799   start=set->bits+ ((lastpos+1) / WORD_BIT);
800   end=set->bits + set->size_of_bits;
801   bits=start[0] & ~((1 << ((lastpos+1) % WORD_BIT)) -1);
802 
803   while (! bits && ++start < end)
804     bits=start[0];
805   if (!bits)
806     return 0;
807   pos=(uint) (start-set->bits)*WORD_BIT;
808   while (! (bits & 1))
809   {
810     bits>>=1;
811     pos++;
812   }
813   return pos;
814 }
815 
816 	/* find if there is a same set in sets. If there is, use it and
817 	   free given set, else put in given set in sets and return it's
818 	   position */
819 
find_set(REP_SETS * sets,REP_SET * find)820 static short find_set(REP_SETS *sets,REP_SET *find)
821 {
822   uint i;
823   for (i=0 ; i < sets->count-1 ; i++)
824   {
825     if (!cmp_bits(sets->set+i,find))
826     {
827       free_last_set(sets);
828       return (short) i;
829     }
830   }
831   return (short) i;			/* return new position */
832 }
833 
834 
835 /*
836   find if there is a found_set with same table_offset & found_offset
837   If there is return offset to it, else add new offset and return pos.
838   Pos returned is -offset-2 in found_set_structure because it's is
839   saved in set->next and set->next[] >= 0 points to next set and
840   set->next[] == -1 is reserved for end without replaces.
841 */
842 
find_found(FOUND_SET * found_set,uint table_offset,int found_offset)843 static short find_found(FOUND_SET *found_set,uint table_offset,
844                         int found_offset)
845 {
846   int i;
847   for (i=0 ; (uint) i < found_sets ; i++)
848     if (found_set[i].table_offset == table_offset &&
849 	found_set[i].found_offset == found_offset)
850       return (short) (-i-2);
851   found_set[i].table_offset=table_offset;
852   found_set[i].found_offset=found_offset;
853   found_sets++;
854   return (short) (-i-2);			/* return new position */
855 }
856 
857 	/* Return 1 if regexp starts with \b or ends with \b*/
858 
start_at_word(char * pos)859 static uint start_at_word(char * pos)
860 {
861   return (((!memcmp(pos,"\\b",2) && pos[2]) || !memcmp(pos,"\\^",2)) ? 1 : 0);
862 }
863 
end_of_word(char * pos)864 static uint end_of_word(char * pos)
865 {
866   char * end=strend(pos);
867   return ((end > pos+2 && !memcmp(end-2,"\\b",2)) ||
868 	  (end >= pos+2 && !memcmp(end-2,"\\$",2))) ?
869 	    1 : 0;
870 }
871 
872 
replace_len(char * str)873 static uint replace_len(char * str)
874 {
875   uint len=0;
876   while (*str)
877   {
878     if (str[0] == '\\' && str[1])
879       str++;
880     str++;
881     len++;
882   }
883   return len;
884 }
885 
886 
887 	/* The actual loop */
888 
replace_strings(REPLACE * rep,char ** start,uint * max_length,char * from)889 static uint replace_strings(REPLACE *rep, char **start, uint *max_length,
890                             char *from)
891 {
892   REPLACE *rep_pos;
893   REPLACE_STRING *rep_str;
894   char *to, *end, *pos, *new;
895 
896   end=(to= *start) + *max_length-1;
897   rep_pos=rep+1;
898   for(;;)
899   {
900     while (!rep_pos->found)
901     {
902       rep_pos= rep_pos->next[(uchar) *from];
903       if (to == end)
904       {
905 	(*max_length)+=8192;
906 	if (!(new=my_realloc(*start,*max_length,MYF(MY_WME))))
907 	  return (uint) -1;
908 	to=new+(to - *start);
909 	end=(*start=new)+ *max_length-1;
910       }
911       *to++= *from++;
912     }
913     if (!(rep_str = ((REPLACE_STRING*) rep_pos))->replace_string)
914       return (uint) (to - *start)-1;
915     updated=1;			/* Some char * is replaced */
916     to-=rep_str->to_offset;
917     for (pos=rep_str->replace_string; *pos ; pos++)
918     {
919       if (to == end)
920       {
921 	(*max_length)*=2;
922 	if (!(new=my_realloc(*start,*max_length,MYF(MY_WME))))
923 	  return (uint) -1;
924 	to=new+(to - *start);
925 	end=(*start=new)+ *max_length-1;
926       }
927       *to++= *pos;
928     }
929     if (!*(from-=rep_str->from_offset) && rep_pos->found != 2)
930       return (uint) (to - *start);
931     rep_pos=rep;
932   }
933 }
934 
935 static char *buffer;		/* The buffer itself, grown as needed. */
936 static int bufbytes;		/* Number of bytes in the buffer. */
937 static int bufread,my_eof;		/* Number of bytes to get with each read(). */
938 static uint bufalloc;
939 static char *out_buff;
940 static uint out_length;
941 
initialize_buffer()942 static int initialize_buffer()
943 {
944   bufread = 8192;
945   bufalloc = bufread + bufread / 2;
946   if (!(buffer = my_malloc(bufalloc+1,MYF(MY_WME))))
947     return 1;
948   bufbytes=my_eof=0;
949   out_length=bufread;
950   if (!(out_buff=my_malloc(out_length,MYF(MY_WME))))
951     return(1);
952   return 0;
953 }
954 
reset_buffer()955 static void reset_buffer()
956 {
957   bufbytes=my_eof=0;
958 }
959 
free_buffer()960 static void free_buffer()
961 {
962   my_free(buffer);
963   my_free(out_buff);
964 }
965 
966 
967 /*
968   Fill the buffer retaining the last n bytes at the beginning of the
969   newly filled buffer (for backward context).  Returns the number of new
970   bytes read from disk.
971 */
972 
fill_buffer_retaining(fd,n)973 static int fill_buffer_retaining(fd,n)
974 File fd;
975 int n;
976 {
977   int i;
978 
979   /* See if we need to grow the buffer. */
980   if ((int) bufalloc - n <= bufread)
981   {
982     while ((int) bufalloc - n <= bufread)
983     {
984       bufalloc *= 2;
985       bufread *= 2;
986     }
987     buffer = my_realloc(buffer, bufalloc+1, MYF(MY_WME));
988     if (! buffer)
989       return(-1);
990   }
991 
992   /* Shift stuff down. */
993   bmove(buffer,buffer+bufbytes-n,(uint) n);
994   bufbytes = n;
995 
996   if (my_eof)
997     return 0;
998 
999   /* Read in new stuff. */
1000   if ((i=(int) my_read(fd, (uchar*) buffer + bufbytes,
1001                        (size_t) bufread, MYF(MY_WME))) < 0)
1002     return -1;
1003 
1004   /* Kludge to pretend every nonempty file ends with a newline. */
1005   if (i == 0 && bufbytes > 0 && buffer[bufbytes - 1] != '\n')
1006   {
1007     my_eof = i = 1;
1008     buffer[bufbytes] = '\n';
1009   }
1010 
1011   bufbytes += i;
1012   return i;
1013 }
1014 
1015 	/* Return 0 if convert is ok */
1016 	/* Global variable update is set if something was changed */
1017 
convert_pipe(rep,in,out)1018 static int convert_pipe(rep,in,out)
1019 REPLACE *rep;
1020 FILE *in,*out;
1021 {
1022   int retain,error;
1023   uint length;
1024   char save_char,*end_of_line,*start_of_line;
1025   DBUG_ENTER("convert_pipe");
1026 
1027   updated=retain=0;
1028   reset_buffer();
1029 
1030   while ((error=fill_buffer_retaining(my_fileno(in),retain)) > 0)
1031   {
1032     end_of_line=buffer ;
1033     buffer[bufbytes]=0;			/* Sentinel  */
1034     for (;;)
1035     {
1036       start_of_line=end_of_line;
1037       while (end_of_line[0] != '\n' && end_of_line[0])
1038 	end_of_line++;
1039       if (end_of_line == buffer+bufbytes)
1040       {
1041 	retain= (int) (end_of_line - start_of_line);
1042 	break;				/* No end of line, read more */
1043       }
1044       save_char=end_of_line[0];
1045       end_of_line[0]=0;
1046       end_of_line++;
1047       if ((length=replace_strings(rep,&out_buff,&out_length,start_of_line)) ==
1048 	  (uint) -1)
1049 	return 1;
1050       if (!my_eof)
1051 	out_buff[length++]=save_char;	/* Don't write added newline */
1052       if (my_fwrite(out, (uchar*) out_buff, length, MYF(MY_WME | MY_NABP)))
1053 	DBUG_RETURN(1);
1054     }
1055   }
1056   DBUG_RETURN(error);
1057 }
1058 
1059 
convert_file(REPLACE * rep,char * name)1060 static int convert_file(REPLACE *rep, char * name)
1061 {
1062   int error;
1063   FILE *in,*out;
1064   char dir_buff[FN_REFLEN], tempname[FN_REFLEN], *org_name = name;
1065 #ifdef HAVE_READLINK
1066   char link_name[FN_REFLEN];
1067 #endif
1068   File temp_file;
1069   size_t dir_buff_length;
1070   DBUG_ENTER("convert_file");
1071 
1072   /* check if name is a symlink */
1073 #ifdef HAVE_READLINK
1074   org_name= (!my_disable_symlinks &&
1075              !my_readlink(link_name, name, MYF(0))) ? link_name : name;
1076 #endif
1077   if (!(in= my_fopen(org_name,O_RDONLY,MYF(MY_WME))))
1078     DBUG_RETURN(1);
1079   dirname_part(dir_buff, org_name, &dir_buff_length);
1080   if ((temp_file= create_temp_file(tempname, dir_buff, "PR", O_WRONLY,
1081                                    MYF(MY_WME))) < 0)
1082   {
1083     my_fclose(in,MYF(0));
1084     DBUG_RETURN(1);
1085   }
1086   if (!(out= my_fdopen(temp_file, tempname, O_WRONLY, MYF(MY_WME))))
1087   {
1088     my_fclose(in,MYF(0));
1089     DBUG_RETURN(1);
1090   }
1091 
1092   error=convert_pipe(rep,in,out);
1093   my_fclose(in,MYF(0)); my_fclose(out,MYF(0));
1094 
1095   if (updated && ! error)
1096     my_redel(org_name,tempname,MYF(MY_WME | MY_LINK_WARNING));
1097   else
1098     my_delete(tempname,MYF(MY_WME));
1099   if (!silent && ! error)
1100   {
1101     if (updated)
1102       printf("%s converted\n",name);
1103     else if (verbose)
1104       printf("%s left unchanged\n",name);
1105   }
1106   DBUG_RETURN(error);
1107 }
1108