1 /*
2    Copyright (c) 2000, 2021, Oracle and/or its affiliates.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
23    02110-1301  USA */
24 
25 /*
26   Replace strings in textfile
27 
28   This program replaces strings in files or from stdin to stdout.
29   It accepts a list of from-string/to-string pairs and replaces
30   each occurrence of a from-string with the corresponding to-string.
31   The first occurrence of a found string is matched. If there is more
32   than one possibility for the string to replace, longer matches
33   are preferred before shorter matches.
34 
35   Special characters in from string:
36   \^    Match start of line.
37   \$	Match end of line.
38   \b	Match space-character, start of line or end of line.
39         For end \b the next replace starts locking at the end space-character.
40         An \b alone or in a string matches only a space-character.
41   \r, \t, \v as in C.
42   The programs make a DFA-state-machine of the strings and the speed isn't
43   dependent on the count of replace-strings (only of the number of replaces).
44   A line is assumed ending with \n or \0.
45   There are no limit exept memory on length of strings.
46 
47   Written by Monty.
48   fill_buffer_retaining() is taken from gnu-grep and modified.
49 */
50 
51 #include <my_global.h>
52 #include <m_ctype.h>
53 #include <my_sys.h>
54 #include <m_string.h>
55 #include <errno.h>
56 
57 #define PC_MALLOC		256	/* Bytes for pointers */
58 #define PS_MALLOC		512	/* Bytes for data */
59 
60 typedef struct st_pointer_array {		/* when using array-strings */
61   TYPELIB typelib;				/* Pointer to strings */
62   uchar *str;					/* Strings is here */
63   uint8	*flag;					/* Flag about each var. */
64   uint  array_allocs,max_count,length,max_length;
65 } POINTER_ARRAY;
66 
67 #define SPACE_CHAR	256
68 #define START_OF_LINE	257
69 #define END_OF_LINE	258
70 #define LAST_CHAR_CODE	259
71 
72 typedef struct st_replace {
73   my_bool   found;
74   struct st_replace *next[256];
75 } REPLACE;
76 
77 typedef struct st_replace_found {
78   my_bool found;
79   char *replace_string;
80   uint to_offset;
81   int from_offset;
82 } REPLACE_STRING;
83 
84 #ifndef WORD_BIT
85 #define WORD_BIT (8*sizeof(uint))
86 #endif
87 
88 	/* functions defined in this file */
89 
90 static int static_get_options(int *argc,char * * *argv);
91 static int get_replace_strings(int *argc,char * * *argv,
92 				   POINTER_ARRAY *from_array,
93 				   POINTER_ARRAY *to_array);
94 static int insert_pointer_name(POINTER_ARRAY *pa, char * name);
95 static void free_pointer_array(POINTER_ARRAY *pa);
96 static int convert_pipe(REPLACE *,FILE *,FILE *);
97 static int convert_file(REPLACE *, char *);
98 static REPLACE *init_replace(char * *from, char * *to,uint count,
99                              char * word_end_chars);
100 static uint replace_strings(REPLACE *rep, char * *start,uint *max_length,
101                             char * from);
102 static int initialize_buffer(void);
103 static void reset_buffer(void);
104 static void free_buffer(void);
105 
106 static int silent=0,verbose=0,updated=0;
107 
108 	/* The main program */
109 
main(int argc,char * argv[])110 int main(int argc, char *argv[])
111 {
112   int i,error;
113   char word_end_chars[256],*pos;
114   POINTER_ARRAY from,to;
115   REPLACE *replace;
116 
117   fprintf(stderr, "Warning: replace is deprecated and will be removed in a "
118           "future version.\n");
119 
120   MY_INIT(argv[0]);
121 
122   if (static_get_options(&argc,&argv))
123     exit(1);
124   if (get_replace_strings(&argc,&argv,&from,&to))
125     exit(1);
126 
127   for (i=1,pos=word_end_chars ; i < 256 ; i++)
128     if (my_isspace(&my_charset_latin1,i))
129       *pos++= (char) i;
130   *pos=0;
131   if (!(replace=init_replace((char**) from.typelib.type_names,
132 			     (char**) to.typelib.type_names,
133 			     (uint) from.typelib.count,word_end_chars)))
134     exit(1);
135   free_pointer_array(&from);
136   free_pointer_array(&to);
137   if (initialize_buffer())
138     return 1;
139 
140   error=0;
141   if (argc == 0)
142     error=convert_pipe(replace,stdin,stdout);
143   else
144   {
145     while (argc--)
146     {
147       error=convert_file(replace,*(argv++));
148     }
149   }
150   free_buffer();
151   my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
152   exit(error ? 2 : 0);
153   return 0;					/* No compiler warning */
154 } /* main */
155 
156 
157 	/* reads options */
158 	/* Initiates DEBUG - but no debugging here ! */
159 
static_get_options(argc,argv)160 static int static_get_options(argc,argv)
161 int *argc;
162 char **argv[];
163 {
164   int help,version;
165   char *pos;
166 
167   silent=verbose=help=0;
168 
169   while (--*argc > 0 && *(pos = *(++*argv)) == '-' && pos[1] != '-') {
170     while (*++pos)
171     {
172       version=0;
173       switch((*pos)) {
174       case 's':
175 	silent=1;
176 	break;
177       case 'v':
178 	verbose=1;
179 	break;
180       case '#':
181 	DBUG_PUSH (++pos);
182 	pos= (char*) " ";			/* Skip rest of arguments */
183 	break;
184       case 'V':
185 	version=1;
186         // Fall through.
187       case 'I':
188       case '?':
189 	help=1;					/* Help text written */
190 	printf("%s  Ver 1.4 for %s at %s\n",my_progname,SYSTEM_TYPE,
191 	       MACHINE_TYPE);
192 	if (version)
193 	  break;
194 	puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n");
195 	puts("This program replaces strings in files or from stdin to stdout.\n"
196 	     "It accepts a list of from-string/to-string pairs and replaces\n"
197 	     "each occurrence of a from-string with the corresponding to-string.\n"
198          "The first occurrence of a found string is matched. If there is\n"
199          "more than one possibility for the string to replace, longer\n"
200          "matches are preferred before shorter matches.\n\n"
201 	     "A from-string can contain these special characters:\n"
202 	     "  \\^      Match start of line.\n"
203 	     "  \\$      Match end of line.\n"
204 	     "  \\b      Match space-character, start of line or end of line.\n"
205 	     "          For a end \\b the next replace starts locking at the end\n"
206 	     "          space-character. A \\b alone in a string matches only a\n"
207 	     "          space-character.\n");
208 	  printf("Usage: %s [-?svIV] from to from to ... -- [files]\n", my_progname);
209 	puts("or");
210 	  printf("Usage: %s [-?svIV] from to from to ... < fromfile > tofile\n", my_progname);
211 	puts("");
212 	puts("Options: -? or -I \"Info\"  -s \"silent\"      -v \"verbose\"");
213 	break;
214       default:
215 	fprintf(stderr,"illegal option: -%c\n",*pos);
216 	break;
217       }
218     }
219   }
220   if (*argc == 0)
221   {
222     if (!help)
223       my_message(0,"No replace options given",MYF(0));
224     exit(0);					/* Don't use as pipe */
225   }
226   return(0);
227 } /* static_get_options */
228 
229 
get_replace_strings(argc,argv,from_array,to_array)230 static int get_replace_strings(argc,argv,from_array,to_array)
231 int *argc;
232 char **argv[];
233 POINTER_ARRAY *from_array,*to_array;
234 {
235   char *pos;
236 
237   memset(from_array, 0, sizeof(from_array[0]));
238   memset(to_array, 0, sizeof(to_array[0]));
239   while (*argc > 0 && (*(pos = *(*argv)) != '-' || pos[1] != '-' || pos[2]))
240   {
241     insert_pointer_name(from_array,pos);
242     (*argc)--;
243     (*argv)++;
244     if (!*argc || !strcmp(**argv,"--"))
245     {
246       my_message(0,"No to-string for last from-string",MYF(0));
247       return 1;
248     }
249     insert_pointer_name(to_array,**argv);
250     (*argc)--;
251     (*argv)++;
252   }
253   if (*argc)
254   {					/* Skip "--" argument */
255     (*argc)--;
256     (*argv)++;
257   }
258   return 0;
259 }
260 
insert_pointer_name(POINTER_ARRAY * pa,char * name)261 static int insert_pointer_name(POINTER_ARRAY *pa,char * name)
262 {
263   uint i,length,old_count;
264   uchar *new_pos;
265   const char **new_array;
266   DBUG_ENTER("insert_pointer_name");
267 
268   if (! pa->typelib.count)
269   {
270     if (!(pa->typelib.type_names=(const char **)
271 	  my_malloc(PSI_NOT_INSTRUMENTED,
272                     ((PC_MALLOC-MALLOC_OVERHEAD)/
273 		     (sizeof(char *)+sizeof(*pa->flag))*
274 		     (sizeof(char *)+sizeof(*pa->flag))),MYF(MY_WME))))
275       DBUG_RETURN(-1);
276     if (!(pa->str= (uchar*) my_malloc(PSI_NOT_INSTRUMENTED,
277                                       (uint) (PS_MALLOC-MALLOC_OVERHEAD),
278 				     MYF(MY_WME))))
279     {
280       my_free((char**)pa->typelib.type_names);
281       DBUG_RETURN (-1);
282     }
283     pa->max_count=(PC_MALLOC-MALLOC_OVERHEAD)/(sizeof(uchar*)+
284 					       sizeof(*pa->flag));
285     pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
286     pa->length=0;
287     pa->max_length=PS_MALLOC-MALLOC_OVERHEAD;
288     pa->array_allocs=1;
289   }
290   length=(uint) strlen(name)+1;
291   if (pa->length+length >= pa->max_length)
292   {
293     pa->max_length=(pa->length+length+MALLOC_OVERHEAD+PS_MALLOC-1)/PS_MALLOC;
294     pa->max_length=pa->max_length*PS_MALLOC-MALLOC_OVERHEAD;
295     if (!(new_pos= (uchar*) my_realloc(PSI_NOT_INSTRUMENTED,
296                                        (uchar*) pa->str,
297 				      (uint) pa->max_length,
298 				      MYF(MY_WME))))
299       DBUG_RETURN(1);
300     if (new_pos != pa->str)
301     {
302       my_ptrdiff_t diff=PTR_BYTE_DIFF(new_pos,pa->str);
303       for (i=0 ; i < pa->typelib.count ; i++)
304 	pa->typelib.type_names[i]= ADD_TO_PTR(pa->typelib.type_names[i],diff,
305 					      char*);
306       pa->str=new_pos;
307     }
308   }
309   if (pa->typelib.count >= pa->max_count-1)
310   {
311     int len;
312     pa->array_allocs++;
313     len=(PC_MALLOC*pa->array_allocs - MALLOC_OVERHEAD);
314     if (!(new_array=(const char **) my_realloc(PSI_NOT_INSTRUMENTED,
315                                                (uchar*) pa->typelib.type_names,
316 					       (uint) len/
317 					 (sizeof(uchar*)+sizeof(*pa->flag))*
318 					 (sizeof(uchar*)+sizeof(*pa->flag)),
319 					 MYF(MY_WME))))
320       DBUG_RETURN(1);
321     pa->typelib.type_names=new_array;
322     old_count=pa->max_count;
323     pa->max_count=len/(sizeof(uchar*) + sizeof(*pa->flag));
324     pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
325     memcpy((uchar*) pa->flag,(char *) (pa->typelib.type_names+old_count),
326 	   old_count*sizeof(*pa->flag));
327   }
328   pa->flag[pa->typelib.count]=0;			/* Reset flag */
329   pa->typelib.type_names[pa->typelib.count++]= (char*) (pa->str+pa->length);
330   pa->typelib.type_names[pa->typelib.count]= NullS;	/* Put end-mark */
331   (void) my_stpcpy((char*) pa->str + pa->length, name);
332   pa->length+=length;
333   DBUG_RETURN(0);
334 } /* insert_pointer_name */
335 
336 
337 	/* free pointer array */
338 
free_pointer_array(POINTER_ARRAY * pa)339 static void free_pointer_array(POINTER_ARRAY *pa)
340 {
341   if (pa->typelib.count)
342   {
343     pa->typelib.count=0;
344     my_free((char**)pa->typelib.type_names);
345     pa->typelib.type_names=0;
346     my_free(pa->str);
347   }
348   return;
349 } /* free_pointer_array */
350 
351 
352 	/* Code for replace rutines */
353 
354 #define SET_MALLOC_HUNC 64
355 
356 typedef struct st_rep_set {
357   uint  *bits;				/* Pointer to used sets */
358   short	next[LAST_CHAR_CODE];		/* Pointer to next sets */
359   uint	found_len;			/* Best match to date */
360   int	found_offset;
361   uint  table_offset;
362   uint  size_of_bits;			/* For convinience */
363 } REP_SET;
364 
365 typedef struct st_rep_sets {
366   uint		count;			/* Number of sets */
367   uint		extra;			/* Extra sets in buffer */
368   uint		invisible;		/* Sets not chown */
369   uint		size_of_bits;
370   REP_SET	*set,*set_buffer;
371   uint		*bit_buffer;
372 } REP_SETS;
373 
374 typedef struct st_found_set {
375   uint table_offset;
376   int found_offset;
377 } FOUND_SET;
378 
379 typedef struct st_follow {
380   int chr;
381   uint table_offset;
382   uint len;
383 } FOLLOWS;
384 
385 
386 static int init_sets(REP_SETS *sets,uint states);
387 static REP_SET *make_new_set(REP_SETS *sets);
388 static void make_sets_invisible(REP_SETS *sets);
389 static void free_last_set(REP_SETS *sets);
390 static void free_sets(REP_SETS *sets);
391 static void internal_set_bit(REP_SET *set, uint bit);
392 static void internal_clear_bit(REP_SET *set, uint bit);
393 static void or_bits(REP_SET *to,REP_SET *from);
394 static void copy_bits(REP_SET *to,REP_SET *from);
395 static int cmp_bits(REP_SET *set1,REP_SET *set2);
396 static int get_next_bit(REP_SET *set,uint lastpos);
397 static short find_set(REP_SETS *sets,REP_SET *find);
398 static short find_found(FOUND_SET *found_set,uint table_offset,
399                         int found_offset);
400 static uint start_at_word(char * pos);
401 static uint end_of_word(char * pos);
402 static uint replace_len(char * pos);
403 
404 static uint found_sets=0;
405 
406 
407 	/* Init a replace structure for further calls */
408 
init_replace(char ** from,char ** to,uint count,char * word_end_chars)409 static REPLACE *init_replace(char * *from, char * *to,uint count,
410                              char * word_end_chars)
411 {
412   uint i,j,states,set_nr,len,result_len,max_length,found_end,bits_set,bit_nr;
413   int used_sets,chr;
414   short default_state;
415   char used_chars[LAST_CHAR_CODE],is_word_end[256];
416   char * pos, *to_pos, **to_array;
417   REP_SETS sets;
418   REP_SET *set,*start_states,*word_states,*new_set;
419   FOLLOWS *follow,*follow_ptr;
420   REPLACE *replace;
421   FOUND_SET *found_set;
422   REPLACE_STRING *rep_str;
423   DBUG_ENTER("init_replace");
424 
425   /* Count number of states */
426   for (i=result_len=max_length=0 , states=2 ; i < count ; i++)
427   {
428     len=replace_len(from[i]);
429     if (!len)
430     {
431       errno=EINVAL;
432       my_message(0,"No to-string for last from-string",MYF(0));
433       DBUG_RETURN(0);
434     }
435     states+=len+1;
436     result_len+=(uint) strlen(to[i])+1;
437     if (len > max_length)
438       max_length=len;
439   }
440   memset(is_word_end, 0, sizeof(is_word_end));
441   for (i=0 ; word_end_chars[i] ; i++)
442     is_word_end[(uchar) word_end_chars[i]]=1;
443 
444   if (init_sets(&sets,states))
445     DBUG_RETURN(0);
446   found_sets=0;
447   if (!(found_set= (FOUND_SET*) my_malloc(PSI_NOT_INSTRUMENTED,
448                                           sizeof(FOUND_SET)*max_length*count,
449 					  MYF(MY_WME))))
450   {
451     free_sets(&sets);
452     DBUG_RETURN(0);
453   }
454   (void) make_new_set(&sets);			/* Set starting set */
455   make_sets_invisible(&sets);			/* Hide previus sets */
456   used_sets=-1;
457   word_states=make_new_set(&sets);		/* Start of new word */
458   start_states=make_new_set(&sets);		/* This is first state */
459   if (!(follow=(FOLLOWS*) my_malloc(PSI_NOT_INSTRUMENTED,
460                                     (states+2)*sizeof(FOLLOWS),MYF(MY_WME))))
461   {
462     free_sets(&sets);
463     my_free(found_set);
464     DBUG_RETURN(0);
465   }
466 
467 	/* Init follow_ptr[] */
468   for (i=0, states=1, follow_ptr=follow+1 ; i < count ; i++)
469   {
470     if (from[i][0] == '\\' && from[i][1] == '^')
471     {
472       internal_set_bit(start_states,states+1);
473       if (!from[i][2])
474       {
475 	start_states->table_offset=i;
476 	start_states->found_offset=1;
477       }
478     }
479     else if (from[i][0] == '\\' && from[i][1] == '$')
480     {
481       internal_set_bit(start_states,states);
482       internal_set_bit(word_states,states);
483       if (!from[i][2] && start_states->table_offset == (uint) ~0)
484       {
485 	start_states->table_offset=i;
486 	start_states->found_offset=0;
487       }
488     }
489     else
490     {
491       internal_set_bit(word_states,states);
492       if (from[i][0] == '\\' && (from[i][1] == 'b' && from[i][2]))
493 	internal_set_bit(start_states,states+1);
494       else
495 	internal_set_bit(start_states,states);
496     }
497     for (pos=from[i], len=0; *pos ; pos++)
498     {
499       if (*pos == '\\' && *(pos+1))
500       {
501 	pos++;
502 	switch (*pos) {
503 	case 'b':
504 	  follow_ptr->chr = SPACE_CHAR;
505 	  break;
506 	case '^':
507 	  follow_ptr->chr = START_OF_LINE;
508 	  break;
509 	case '$':
510 	  follow_ptr->chr = END_OF_LINE;
511 	  break;
512 	case 'r':
513 	  follow_ptr->chr = '\r';
514 	  break;
515 	case 't':
516 	  follow_ptr->chr = '\t';
517 	  break;
518 	case 'v':
519 	  follow_ptr->chr = '\v';
520 	  break;
521 	default:
522 	  follow_ptr->chr = (uchar) *pos;
523 	  break;
524 	}
525       }
526       else
527 	follow_ptr->chr= (uchar) *pos;
528       follow_ptr->table_offset=i;
529       follow_ptr->len= ++len;
530       follow_ptr++;
531     }
532     follow_ptr->chr=0;
533     follow_ptr->table_offset=i;
534     follow_ptr->len=len;
535     follow_ptr++;
536     states+=(uint) len+1;
537   }
538 
539 
540   for (set_nr=0,pos=0 ; set_nr < sets.count ; set_nr++)
541   {
542     set=sets.set+set_nr;
543     default_state= 0;				/* Start from beginning */
544 
545     /* If end of found-string not found or start-set with current set */
546 
547     for (i= (uint) ~0; (i=get_next_bit(set,i)) ;)
548     {
549       if (!follow[i].chr)
550       {
551 	if (! default_state)
552 	  default_state= find_found(found_set,set->table_offset,
553 				    set->found_offset+1);
554       }
555     }
556     copy_bits(sets.set+used_sets,set);		/* Save set for changes */
557     if (!default_state)
558       or_bits(sets.set+used_sets,sets.set);	/* Can restart from start */
559 
560     /* Find all chars that follows current sets */
561     memset(used_chars, 0, sizeof(used_chars));
562     for (i= (uint) ~0; (i=get_next_bit(sets.set+used_sets,i)) ;)
563     {
564       used_chars[follow[i].chr]=1;
565       if ((follow[i].chr == SPACE_CHAR && !follow[i+1].chr &&
566 	   follow[i].len > 1) || follow[i].chr == END_OF_LINE)
567 	used_chars[0]=1;
568     }
569 
570     /* Mark word_chars used if \b is in state */
571     if (used_chars[SPACE_CHAR])
572       for (pos= word_end_chars ; *pos ; pos++)
573 	used_chars[(int) (uchar) *pos] = 1;
574 
575     /* Handle other used characters */
576     for (chr= 0 ; chr < 256 ; chr++)
577     {
578       if (! used_chars[chr])
579 	set->next[chr]= (short) (chr ? default_state : -1);
580       else
581       {
582 	new_set=make_new_set(&sets);
583 	set=sets.set+set_nr;			/* if realloc */
584 	new_set->table_offset=set->table_offset;
585 	new_set->found_len=set->found_len;
586 	new_set->found_offset=set->found_offset+1;
587 	found_end=0;
588 
589 	for (i= (uint) ~0 ; (i=get_next_bit(sets.set+used_sets,i)) ; )
590 	{
591 	  if (!follow[i].chr || follow[i].chr == chr ||
592 	      (follow[i].chr == SPACE_CHAR &&
593 	       (is_word_end[chr] ||
594 		(!chr && follow[i].len > 1 && ! follow[i+1].chr))) ||
595 	      (follow[i].chr == END_OF_LINE && ! chr))
596 	  {
597 	    if ((! chr || (follow[i].chr && !follow[i+1].chr)) &&
598 		follow[i].len > found_end)
599 	      found_end=follow[i].len;
600 	    if (chr && follow[i].chr)
601 	      internal_set_bit(new_set,i+1);		/* To next set */
602 	    else
603 	      internal_set_bit(new_set,i);
604 	  }
605 	}
606 	if (found_end)
607 	{
608 	  new_set->found_len=0;			/* Set for testing if first */
609 	  bits_set=0;
610 	  for (i= (uint) ~0; (i=get_next_bit(new_set,i)) ;)
611 	  {
612 	    if ((follow[i].chr == SPACE_CHAR ||
613 		 follow[i].chr == END_OF_LINE) && ! chr)
614 	      bit_nr=i+1;
615 	    else
616 	      bit_nr=i;
617 	    if (follow[bit_nr-1].len < found_end ||
618 		(new_set->found_len &&
619 		 (chr == 0 || !follow[bit_nr].chr)))
620 	      internal_clear_bit(new_set,i);
621 	    else
622 	    {
623 	      if (chr == 0 || !follow[bit_nr].chr)
624 	      {					/* best match  */
625 		new_set->table_offset=follow[bit_nr].table_offset;
626 		if (chr || (follow[i].chr == SPACE_CHAR ||
627 			    follow[i].chr == END_OF_LINE))
628 		  new_set->found_offset=found_end;	/* New match */
629 		new_set->found_len=found_end;
630 	      }
631 	      bits_set++;
632 	    }
633 	  }
634 	  if (bits_set == 1)
635 	  {
636 	    set->next[chr] = find_found(found_set,
637 					new_set->table_offset,
638 					new_set->found_offset);
639 	    free_last_set(&sets);
640 	  }
641 	  else
642 	    set->next[chr] = find_set(&sets,new_set);
643 	}
644 	else
645 	  set->next[chr] = find_set(&sets,new_set);
646       }
647     }
648   }
649 
650 	/* Alloc replace structure for the replace-state-machine */
651 
652   if ((replace=(REPLACE*) my_malloc(PSI_NOT_INSTRUMENTED,
653                                     sizeof(REPLACE)*(sets.count)+
654 				    sizeof(REPLACE_STRING)*(found_sets+1)+
655 				    sizeof(char *)*count+result_len,
656 				    MYF(MY_WME | MY_ZEROFILL))))
657   {
658     rep_str=(REPLACE_STRING*) (replace+sets.count);
659     to_array=(char **) (rep_str+found_sets+1);
660     to_pos=(char *) (to_array+count);
661     for (i=0 ; i < count ; i++)
662     {
663       to_array[i]=to_pos;
664       to_pos=my_stpcpy(to_pos,to[i])+1;
665     }
666     rep_str[0].found=1;
667     rep_str[0].replace_string=0;
668     for (i=1 ; i <= found_sets ; i++)
669     {
670       pos=from[found_set[i-1].table_offset];
671       rep_str[i].found= (my_bool) (!memcmp(pos,"\\^",3) ? 2 : 1);
672       rep_str[i].replace_string=to_array[found_set[i-1].table_offset];
673       rep_str[i].to_offset=found_set[i-1].found_offset-start_at_word(pos);
674       rep_str[i].from_offset=found_set[i-1].found_offset-replace_len(pos)+
675 	end_of_word(pos);
676     }
677     for (i=0 ; i < sets.count ; i++)
678     {
679       for (j=0 ; j < 256 ; j++)
680 	if (sets.set[i].next[j] >= 0)
681 	  replace[i].next[j]=replace+sets.set[i].next[j];
682 	else
683 	  replace[i].next[j]=(REPLACE*) (rep_str+(-sets.set[i].next[j]-1));
684     }
685   }
686   my_free(follow);
687   free_sets(&sets);
688   my_free(found_set);
689   DBUG_PRINT("exit",("Replace table has %d states",sets.count));
690   DBUG_RETURN(replace);
691 }
692 
693 
init_sets(REP_SETS * sets,uint states)694 static int init_sets(REP_SETS *sets,uint states)
695 {
696   memset(sets, 0, sizeof(*sets));
697   sets->size_of_bits=((states+7)/8);
698   if (!(sets->set_buffer=(REP_SET*) my_malloc(PSI_NOT_INSTRUMENTED,
699                                               sizeof(REP_SET)*SET_MALLOC_HUNC,
700 					      MYF(MY_WME))))
701     return 1;
702   if (!(sets->bit_buffer=(uint*) my_malloc(PSI_NOT_INSTRUMENTED,
703                                            sizeof(uint)*sets->size_of_bits*
704 					   SET_MALLOC_HUNC,MYF(MY_WME))))
705   {
706     my_free(sets->set);
707     return 1;
708   }
709   return 0;
710 }
711 
712 	/* Make help sets invisible for nicer codeing */
713 
make_sets_invisible(REP_SETS * sets)714 static void make_sets_invisible(REP_SETS *sets)
715 {
716   sets->invisible=sets->count;
717   sets->set+=sets->count;
718   sets->count=0;
719 }
720 
make_new_set(REP_SETS * sets)721 static REP_SET *make_new_set(REP_SETS *sets)
722 {
723   uint i,count,*bit_buffer;
724   REP_SET *set;
725   if (sets->extra)
726   {
727     sets->extra--;
728     set=sets->set+ sets->count++;
729     memset(set->bits, 0, sizeof(uint)*sets->size_of_bits);
730     memset(&set->next[0], 0, sizeof(set->next[0])*LAST_CHAR_CODE);
731     set->found_offset=0;
732     set->found_len=0;
733     set->table_offset= (uint) ~0;
734     set->size_of_bits=sets->size_of_bits;
735     return set;
736   }
737   count=sets->count+sets->invisible+SET_MALLOC_HUNC;
738   if (!(set=(REP_SET*) my_realloc(PSI_NOT_INSTRUMENTED,
739                                   (uchar*) sets->set_buffer,
740 				   sizeof(REP_SET)*count,
741 				  MYF(MY_WME))))
742     return 0;
743   sets->set_buffer=set;
744   sets->set=set+sets->invisible;
745   if (!(bit_buffer=(uint*) my_realloc(PSI_NOT_INSTRUMENTED,
746                                       (uchar*) sets->bit_buffer,
747 				      (sizeof(uint)*sets->size_of_bits)*count,
748 				      MYF(MY_WME))))
749     return 0;
750   sets->bit_buffer=bit_buffer;
751   for (i=0 ; i < count ; i++)
752   {
753     sets->set_buffer[i].bits=bit_buffer;
754     bit_buffer+=sets->size_of_bits;
755   }
756   sets->extra=SET_MALLOC_HUNC;
757   return make_new_set(sets);
758 }
759 
free_last_set(REP_SETS * sets)760 static void free_last_set(REP_SETS *sets)
761 {
762   sets->count--;
763   sets->extra++;
764   return;
765 }
766 
free_sets(REP_SETS * sets)767 static void free_sets(REP_SETS *sets)
768 {
769   my_free(sets->set_buffer);
770   my_free(sets->bit_buffer);
771   return;
772 }
773 
internal_set_bit(REP_SET * set,uint bit)774 static void internal_set_bit(REP_SET *set, uint bit)
775 {
776   set->bits[bit / WORD_BIT] |= 1 << (bit % WORD_BIT);
777   return;
778 }
779 
internal_clear_bit(REP_SET * set,uint bit)780 static void internal_clear_bit(REP_SET *set, uint bit)
781 {
782   set->bits[bit / WORD_BIT] &= ~ (1 << (bit % WORD_BIT));
783   return;
784 }
785 
786 
or_bits(REP_SET * to,REP_SET * from)787 static void or_bits(REP_SET *to,REP_SET *from)
788 {
789   uint i;
790   for (i=0 ; i < to->size_of_bits ; i++)
791     to->bits[i]|=from->bits[i];
792   return;
793 }
794 
copy_bits(REP_SET * to,REP_SET * from)795 static void copy_bits(REP_SET *to,REP_SET *from)
796 {
797   memcpy((uchar*) to->bits,(uchar*) from->bits,
798 	 (size_t) (sizeof(uint) * to->size_of_bits));
799 }
800 
cmp_bits(REP_SET * set1,REP_SET * set2)801 static int cmp_bits(REP_SET *set1,REP_SET *set2)
802 {
803   return memcmp(set1->bits, set2->bits,
804                 sizeof(uint) * set1->size_of_bits);
805 }
806 
807 
808 	/* Get next set bit from set. */
809 
get_next_bit(REP_SET * set,uint lastpos)810 static int get_next_bit(REP_SET *set,uint lastpos)
811 {
812   uint pos,*start,*end,bits;
813 
814   start=set->bits+ ((lastpos+1) / WORD_BIT);
815   end=set->bits + set->size_of_bits;
816   bits=start[0] & ~((1 << ((lastpos+1) % WORD_BIT)) -1);
817 
818   while (! bits && ++start < end)
819     bits=start[0];
820   if (!bits)
821     return 0;
822   pos=(uint) (start-set->bits)*WORD_BIT;
823   while (! (bits & 1))
824   {
825     bits>>=1;
826     pos++;
827   }
828   return pos;
829 }
830 
831 	/* find if there is a same set in sets. If there is, use it and
832 	   free given set, else put in given set in sets and return it's
833 	   position */
834 
find_set(REP_SETS * sets,REP_SET * find)835 static short find_set(REP_SETS *sets,REP_SET *find)
836 {
837   uint i;
838   for (i=0 ; i < sets->count-1 ; i++)
839   {
840     if (!cmp_bits(sets->set+i,find))
841     {
842       free_last_set(sets);
843       return (short) i;
844     }
845   }
846   return (short) i;			/* return new position */
847 }
848 
849 
850 /*
851   find if there is a found_set with same table_offset & found_offset
852   If there is return offset to it, else add new offset and return pos.
853   Pos returned is -offset-2 in found_set_structure because it's is
854   saved in set->next and set->next[] >= 0 points to next set and
855   set->next[] == -1 is reserved for end without replaces.
856 */
857 
find_found(FOUND_SET * found_set,uint table_offset,int found_offset)858 static short find_found(FOUND_SET *found_set,uint table_offset,
859                         int found_offset)
860 {
861   int i;
862   for (i=0 ; (uint) i < found_sets ; i++)
863     if (found_set[i].table_offset == table_offset &&
864 	found_set[i].found_offset == found_offset)
865       return (short) (-i-2);
866   found_set[i].table_offset=table_offset;
867   found_set[i].found_offset=found_offset;
868   found_sets++;
869   return (short) (-i-2);			/* return new position */
870 }
871 
872 	/* Return 1 if regexp starts with \b or ends with \b*/
873 
start_at_word(char * pos)874 static uint start_at_word(char * pos)
875 {
876   return (((!memcmp(pos,"\\b",2) && pos[2]) || !memcmp(pos,"\\^",2)) ? 1 : 0);
877 }
878 
end_of_word(char * pos)879 static uint end_of_word(char * pos)
880 {
881   char * end=strend(pos);
882   return ((end > pos+2 && !memcmp(end-2,"\\b",2)) ||
883 	  (end >= pos+2 && !memcmp(end-2,"\\$",2))) ?
884 	    1 : 0;
885 }
886 
887 
replace_len(char * str)888 static uint replace_len(char * str)
889 {
890   uint len=0;
891   while (*str)
892   {
893     if (str[0] == '\\' && str[1])
894       str++;
895     str++;
896     len++;
897   }
898   return len;
899 }
900 
901 
902 	/* The actual loop */
903 
replace_strings(REPLACE * rep,char ** start,uint * max_length,char * from)904 static uint replace_strings(REPLACE *rep, char **start, uint *max_length,
905                             char *from)
906 {
907   REPLACE *rep_pos;
908   REPLACE_STRING *rep_str;
909   char *to, *end, *pos, *new;
910 
911   end=(to= *start) + *max_length-1;
912   rep_pos=rep+1;
913   for(;;)
914   {
915     while (!rep_pos->found)
916     {
917       rep_pos= rep_pos->next[(uchar) *from];
918       if (to == end)
919       {
920 	(*max_length)+=8192;
921 	if (!(new=my_realloc(PSI_NOT_INSTRUMENTED,
922                              *start,*max_length,MYF(MY_WME))))
923 	  return (uint) -1;
924 	to=new+(to - *start);
925 	end=(*start=new)+ *max_length-1;
926       }
927       *to++= *from++;
928     }
929     if (!(rep_str = ((REPLACE_STRING*) rep_pos))->replace_string)
930       return (uint) (to - *start)-1;
931     updated=1;			/* Some char * is replaced */
932     to-=rep_str->to_offset;
933     for (pos=rep_str->replace_string; *pos ; pos++)
934     {
935       if (to == end)
936       {
937 	(*max_length)*=2;
938 	if (!(new=my_realloc(PSI_NOT_INSTRUMENTED,
939                              *start,*max_length,MYF(MY_WME))))
940 	  return (uint) -1;
941 	to=new+(to - *start);
942 	end=(*start=new)+ *max_length-1;
943       }
944       *to++= *pos;
945     }
946     if (!*(from-=rep_str->from_offset) && rep_pos->found != 2)
947       return (uint) (to - *start);
948     rep_pos=rep;
949   }
950 }
951 
952 static char *buffer;		/* The buffer itself, grown as needed. */
953 static int bufbytes;		/* Number of bytes in the buffer. */
954 static int bufread,my_eof;		/* Number of bytes to get with each read(). */
955 static uint bufalloc;
956 static char *out_buff;
957 static uint out_length;
958 
initialize_buffer()959 static int initialize_buffer()
960 {
961   bufread = 8192;
962   bufalloc = bufread + bufread / 2;
963   if (!(buffer = my_malloc(PSI_NOT_INSTRUMENTED,
964                            bufalloc+1,MYF(MY_WME))))
965     return 1;
966   bufbytes=my_eof=0;
967   out_length=bufread;
968   if (!(out_buff=my_malloc(PSI_NOT_INSTRUMENTED,
969                            out_length,MYF(MY_WME))))
970     return(1);
971   return 0;
972 }
973 
reset_buffer()974 static void reset_buffer()
975 {
976   bufbytes=my_eof=0;
977 }
978 
free_buffer()979 static void free_buffer()
980 {
981   my_free(buffer);
982   my_free(out_buff);
983 }
984 
985 
986 /*
987   Fill the buffer retaining the last n bytes at the beginning of the
988   newly filled buffer (for backward context).  Returns the number of new
989   bytes read from disk.
990 */
991 
fill_buffer_retaining(fd,n)992 static int fill_buffer_retaining(fd,n)
993 File fd;
994 int n;
995 {
996   int i;
997 
998   /* See if we need to grow the buffer. */
999   if ((int) bufalloc - n <= bufread)
1000   {
1001     while ((int) bufalloc - n <= bufread)
1002     {
1003       bufalloc *= 2;
1004       bufread *= 2;
1005     }
1006     buffer = my_realloc(PSI_NOT_INSTRUMENTED,
1007                         buffer, bufalloc+1, MYF(MY_WME));
1008     if (! buffer)
1009       return(-1);
1010   }
1011 
1012   /* Shift stuff down. */
1013   memmove(buffer, buffer+bufbytes-n, (uint) n);
1014   bufbytes = n;
1015 
1016   if (my_eof)
1017     return 0;
1018 
1019   /* Read in new stuff. */
1020   if ((i=(int) my_read(fd, (uchar*) buffer + bufbytes,
1021                        (size_t) bufread, MYF(MY_WME))) < 0)
1022     return -1;
1023 
1024   /* Kludge to pretend every nonempty file ends with a newline. */
1025   if (i == 0 && bufbytes > 0 && buffer[bufbytes - 1] != '\n')
1026   {
1027     my_eof = i = 1;
1028     buffer[bufbytes] = '\n';
1029   }
1030 
1031   bufbytes += i;
1032   return i;
1033 }
1034 
1035 	/* Return 0 if convert is ok */
1036 	/* Global variable update is set if something was changed */
1037 
convert_pipe(rep,in,out)1038 static int convert_pipe(rep,in,out)
1039 REPLACE *rep;
1040 FILE *in,*out;
1041 {
1042   int retain,error;
1043   uint length;
1044   char save_char,*end_of_line,*start_of_line;
1045   DBUG_ENTER("convert_pipe");
1046 
1047   updated=retain=0;
1048   reset_buffer();
1049 
1050   while ((error=fill_buffer_retaining(my_fileno(in),retain)) > 0)
1051   {
1052     end_of_line=buffer ;
1053     buffer[bufbytes]=0;			/* Sentinel  */
1054     for (;;)
1055     {
1056       start_of_line=end_of_line;
1057       while (end_of_line[0] != '\n' && end_of_line[0])
1058 	end_of_line++;
1059       if (end_of_line == buffer+bufbytes)
1060       {
1061 	retain= (int) (end_of_line - start_of_line);
1062 	break;				/* No end of line, read more */
1063       }
1064       save_char=end_of_line[0];
1065       end_of_line[0]=0;
1066       end_of_line++;
1067       if ((length=replace_strings(rep,&out_buff,&out_length,start_of_line)) ==
1068 	  (uint) -1)
1069 	DBUG_RETURN(1);
1070       if (!my_eof)
1071 	out_buff[length++]=save_char;	/* Don't write added newline */
1072       if (my_fwrite(out, (uchar*) out_buff, length, MYF(MY_WME | MY_NABP)))
1073 	DBUG_RETURN(1);
1074     }
1075   }
1076   DBUG_RETURN(error);
1077 }
1078 
1079 
convert_file(REPLACE * rep,char * name)1080 static int convert_file(REPLACE *rep, char * name)
1081 {
1082   int error;
1083   FILE *in,*out;
1084   char dir_buff[FN_REFLEN], tempname[FN_REFLEN], *org_name = name;
1085 #ifdef HAVE_READLINK
1086   char link_name[FN_REFLEN];
1087 #endif
1088   File temp_file;
1089   size_t dir_buff_length;
1090   DBUG_ENTER("convert_file");
1091 
1092   /* check if name is a symlink */
1093 #ifdef HAVE_READLINK
1094   org_name= (my_enable_symlinks &&
1095              !my_readlink(link_name, name, MYF(0))) ? link_name : name;
1096 #endif
1097   if (!(in= my_fopen(org_name,O_RDONLY,MYF(MY_WME))))
1098     DBUG_RETURN(1);
1099   dirname_part(dir_buff, org_name, &dir_buff_length);
1100   if ((temp_file= create_temp_file(tempname, dir_buff, "PR", O_WRONLY,
1101                                    MYF(MY_WME))) < 0)
1102   {
1103     my_fclose(in,MYF(0));
1104     DBUG_RETURN(1);
1105   }
1106   if (!(out= my_fdopen(temp_file, tempname, O_WRONLY, MYF(MY_WME))))
1107   {
1108     my_fclose(in,MYF(0));
1109     DBUG_RETURN(1);
1110   }
1111 
1112   error=convert_pipe(rep,in,out);
1113   my_fclose(in,MYF(0)); my_fclose(out,MYF(0));
1114 
1115   if (updated && ! error)
1116     my_redel(org_name,tempname,MYF(MY_WME | MY_LINK_WARNING));
1117   else
1118     my_delete(tempname,MYF(MY_WME));
1119   if (!silent && ! error)
1120   {
1121     if (updated)
1122       printf("%s converted\n",name);
1123     else if (verbose)
1124       printf("%s left unchanged\n",name);
1125   }
1126   DBUG_RETURN(error);
1127 }
1128