1 /*
2    Copyright (c) 2000, 2014, Oracle and/or its affiliates
3 
4    This program is free software; you can redistribute it and/or
5    modify it under the terms of the GNU General Public License
6    as published by the Free Software Foundation; version 2 of
7    the License.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
17    02110-1335  USA */
18 
19 /*
20   Replace strings in textfile
21 
22   This program replaces strings in files or from stdin to stdout.
23   It accepts a list of from-string/to-string pairs and replaces
24   each occurrence of a from-string with the corresponding to-string.
25   The first occurrence of a found string is matched. If there is more
26   than one possibility for the string to replace, longer matches
27   are preferred before shorter matches.
28 
29   Special characters in from string:
30   \^    Match start of line.
31   \$	Match end of line.
32   \b	Match space-character, start of line or end of line.
33         For end \b the next replace starts locking at the end space-character.
34         An \b alone or in a string matches only a space-character.
35   \r, \t, \v as in C.
36   The programs make a DFA-state-machine of the strings and the speed isn't
37   dependent on the count of replace-strings (only of the number of replaces).
38   A line is assumed ending with \n or \0.
39   There are no limit except memory on length of strings.
40 
41   Written by Monty.
42   fill_buffer_retaining() is taken from gnu-grep and modified.
43 */
44 
45 #include <my_global.h>
46 #include <m_ctype.h>
47 #include <my_sys.h>
48 #include <m_string.h>
49 #include <errno.h>
50 
51 #define PC_MALLOC		256	/* Bytes for pointers */
52 #define PS_MALLOC		512	/* Bytes for data */
53 
54 typedef struct st_pointer_array {		/* when using array-strings */
55   TYPELIB typelib;				/* Pointer to strings */
56   uchar *str;					/* Strings is here */
57   uint8	*flag;					/* Flag about each var. */
58   uint  array_allocs,max_count,length,max_length;
59 } POINTER_ARRAY;
60 
61 #define SPACE_CHAR	256
62 #define START_OF_LINE	257
63 #define END_OF_LINE	258
64 #define LAST_CHAR_CODE	259
65 
66 typedef struct st_replace {
67   uint8 found;
68   struct st_replace *next[256];
69 } REPLACE;
70 
71 typedef struct st_replace_found {
72   my_bool found;
73   char *replace_string;
74   uint to_offset;
75   int from_offset;
76 } REPLACE_STRING;
77 
78 #ifndef WORD_BIT
79 #define WORD_BIT (8*sizeof(uint))
80 #endif
81 
82 	/* functions defined in this file */
83 
84 static int static_get_options(int *argc,char * * *argv);
85 static int get_replace_strings(int *argc,char * * *argv,
86 				   POINTER_ARRAY *from_array,
87 				   POINTER_ARRAY *to_array);
88 static int insert_pointer_name(POINTER_ARRAY *pa, char * name);
89 static void free_pointer_array(POINTER_ARRAY *pa);
90 static int convert_pipe(REPLACE *,FILE *,FILE *);
91 static int convert_file(REPLACE *, char *);
92 static REPLACE *init_replace(char * *from, char * *to,uint count,
93                              char * word_end_chars);
94 static uint replace_strings(REPLACE *rep, char * *start,uint *max_length,
95                             char * from);
96 static int initialize_buffer(void);
97 static void reset_buffer(void);
98 static void free_buffer(void);
99 
100 static int silent=0,verbose=0,updated=0;
101 
102 	/* The main program */
103 
main(int argc,char * argv[])104 int main(int argc, char *argv[])
105 {
106   int i,error;
107   char word_end_chars[256],*pos;
108   POINTER_ARRAY from,to;
109   REPLACE *replace;
110   MY_INIT(argv[0]);
111 
112   if (static_get_options(&argc,&argv))
113     exit(1);
114   if (get_replace_strings(&argc,&argv,&from,&to))
115     exit(1);
116 
117   for (i=1,pos=word_end_chars ; i < 256 ; i++)
118     if (my_isspace(&my_charset_latin1,i))
119       *pos++= (char) i;
120   *pos=0;
121   if (!(replace=init_replace((char**) from.typelib.type_names,
122 			     (char**) to.typelib.type_names,
123 			     (uint) from.typelib.count,word_end_chars)))
124     exit(1);
125   free_pointer_array(&from);
126   free_pointer_array(&to);
127   if (initialize_buffer())
128     return 1;
129 
130   error=0;
131   if (argc == 0)
132     error=convert_pipe(replace,stdin,stdout);
133   else
134   {
135     while (argc--)
136     {
137       error=convert_file(replace,*(argv++));
138     }
139   }
140   free_buffer();
141   my_free(replace);
142   my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
143   exit(error ? 2 : 0);
144   return 0;					/* No compiler warning */
145 } /* main */
146 
147 
148 	/* reads options */
149 	/* Initiates DEBUG - but no debugging here ! */
150 
static_get_options(argc,argv)151 static int static_get_options(argc,argv)
152 register int *argc;
153 register char **argv[];
154 {
155   int help,version;
156   char *pos;
157 
158   silent=verbose=help=0;
159 
160   while (--*argc > 0 && *(pos = *(++*argv)) == '-' && pos[1] != '-') {
161     while (*++pos)
162     {
163       version=0;
164       switch((*pos)) {
165       case 's':
166 	silent=1;
167 	break;
168       case 'v':
169 	verbose=1;
170 	break;
171       case '#':
172 	DBUG_PUSH (++pos);
173 	pos= (char*) " ";			/* Skip rest of arguments */
174 	break;
175       case 'V':
176 	version=1;
177         /* fall through */
178       case 'I':
179       case '?':
180 	help=1;					/* Help text written */
181 	printf("%s  Ver 1.4 for %s at %s\n",my_progname,SYSTEM_TYPE,
182 	       MACHINE_TYPE);
183 	if (version)
184 	  break;
185 	puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n");
186 	puts("This program replaces strings in files or from stdin to stdout.\n"
187 	     "It accepts a list of from-string/to-string pairs and replaces\n"
188 	     "each occurrence of a from-string with the corresponding to-string.\n"
189          "The first occurrence of a found string is matched. If there is\n"
190          "more than one possibility for the string to replace, longer\n"
191          "matches are preferred before shorter matches.\n\n"
192 	     "A from-string can contain these special characters:\n"
193 	     "  \\^      Match start of line.\n"
194 	     "  \\$      Match end of line.\n"
195 	     "  \\b      Match space-character, start of line or end of line.\n"
196 	     "          For a end \\b the next replace starts locking at the end\n"
197 	     "          space-character. A \\b alone in a string matches only a\n"
198 	     "          space-character.\n");
199 	  printf("Usage: %s [-?svIV] from to from to ... -- [files]\n", my_progname);
200 	puts("or");
201 	  printf("Usage: %s [-?svIV] from to from to ... < fromfile > tofile\n", my_progname);
202 	puts("");
203 	puts("Options: -? or -I \"Info\"  -s \"silent\"      -v \"verbose\"");
204 	break;
205       default:
206 	fprintf(stderr,"illegal option: -%c\n",*pos);
207 	break;
208       }
209     }
210   }
211   if (*argc == 0)
212   {
213     if (!help)
214       my_message(0,"No replace options given",MYF(ME_BELL));
215     exit(0);					/* Don't use as pipe */
216   }
217   return(0);
218 } /* static_get_options */
219 
220 
get_replace_strings(argc,argv,from_array,to_array)221 static int get_replace_strings(argc,argv,from_array,to_array)
222 register int *argc;
223 register char **argv[];
224 POINTER_ARRAY *from_array,*to_array;
225 {
226   char *pos;
227 
228   bzero((char*) from_array,sizeof(from_array[0]));
229   bzero((char*) to_array,sizeof(to_array[0]));
230   while (*argc > 0 && (*(pos = *(*argv)) != '-' || pos[1] != '-' || pos[2]))
231   {
232     insert_pointer_name(from_array,pos);
233     (*argc)--;
234     (*argv)++;
235     if (!*argc || !strcmp(**argv,"--"))
236     {
237       my_message(0,"No to-string for last from-string",MYF(ME_BELL));
238       return 1;
239     }
240     insert_pointer_name(to_array,**argv);
241     (*argc)--;
242     (*argv)++;
243   }
244   if (*argc)
245   {					/* Skip "--" argument */
246     (*argc)--;
247     (*argv)++;
248   }
249   return 0;
250 }
251 
insert_pointer_name(reg1 POINTER_ARRAY * pa,char * name)252 static int insert_pointer_name(reg1 POINTER_ARRAY *pa,char * name)
253 {
254   uint i,length,old_count;
255   uchar *new_pos;
256   const char **new_array;
257   DBUG_ENTER("insert_pointer_name");
258 
259   if (! pa->typelib.count)
260   {
261     if (!(pa->typelib.type_names=(const char **)
262 	  my_malloc(PSI_NOT_INSTRUMENTED, ((PC_MALLOC-MALLOC_OVERHEAD)/
263 		     (sizeof(char *)+sizeof(*pa->flag))*
264 		     (sizeof(char *)+sizeof(*pa->flag))),MYF(MY_WME))))
265       DBUG_RETURN(-1);
266     if (!(pa->str= (uchar*) my_malloc(PSI_NOT_INSTRUMENTED,
267                                       PS_MALLOC-MALLOC_OVERHEAD, MYF(MY_WME))))
268     {
269       my_free((void*) pa->typelib.type_names);
270       DBUG_RETURN (-1);
271     }
272     pa->max_count=(PC_MALLOC-MALLOC_OVERHEAD)/(sizeof(uchar*)+
273 					       sizeof(*pa->flag));
274     pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
275     pa->length=0;
276     pa->max_length=PS_MALLOC-MALLOC_OVERHEAD;
277     pa->array_allocs=1;
278   }
279   length=(uint) strlen(name)+1;
280   if (pa->length+length >= pa->max_length)
281   {
282     pa->max_length=(pa->length+length+MALLOC_OVERHEAD+PS_MALLOC-1)/PS_MALLOC;
283     pa->max_length=pa->max_length*PS_MALLOC-MALLOC_OVERHEAD;
284     if (!(new_pos= (uchar*) my_realloc(PSI_NOT_INSTRUMENTED, (uchar*) pa->str,
285                                       (uint) pa->max_length, MYF(MY_WME))))
286       DBUG_RETURN(1);
287     if (new_pos != pa->str)
288     {
289       my_ptrdiff_t diff=PTR_BYTE_DIFF(new_pos,pa->str);
290       for (i=0 ; i < pa->typelib.count ; i++)
291 	pa->typelib.type_names[i]= ADD_TO_PTR(pa->typelib.type_names[i],diff,
292 					      char*);
293       pa->str=new_pos;
294     }
295   }
296   if (pa->typelib.count >= pa->max_count-1)
297   {
298     int len;
299     pa->array_allocs++;
300     len=(PC_MALLOC*pa->array_allocs - MALLOC_OVERHEAD);
301     if (!(new_array=(const char **) my_realloc(PSI_NOT_INSTRUMENTED, (void*)(pa->typelib.type_names),
302 					       (uint) len/
303 					 (sizeof(uchar*)+sizeof(*pa->flag))*
304 					 (sizeof(uchar*)+sizeof(*pa->flag)),
305 					 MYF(MY_WME))))
306       DBUG_RETURN(1);
307     pa->typelib.type_names=new_array;
308     old_count=pa->max_count;
309     pa->max_count=len/(sizeof(uchar*) + sizeof(*pa->flag));
310     pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
311     memcpy((uchar*) pa->flag,(char *) (pa->typelib.type_names+old_count),
312 	   old_count*sizeof(*pa->flag));
313   }
314   pa->flag[pa->typelib.count]=0;			/* Reset flag */
315   pa->typelib.type_names[pa->typelib.count++]= (char*) (pa->str+pa->length);
316   pa->typelib.type_names[pa->typelib.count]= NullS;	/* Put end-mark */
317   (void) strmov((char*) pa->str + pa->length, name);
318   pa->length+=length;
319   DBUG_RETURN(0);
320 } /* insert_pointer_name */
321 
322 
323 	/* free pointer array */
324 
free_pointer_array(reg1 POINTER_ARRAY * pa)325 static void free_pointer_array(reg1 POINTER_ARRAY *pa)
326 {
327   if (pa->typelib.count)
328   {
329     pa->typelib.count=0;
330     my_free((void*) pa->typelib.type_names);
331     pa->typelib.type_names=0;
332     my_free(pa->str);
333   }
334   return;
335 } /* free_pointer_array */
336 
337 
338 	/* Code for replace rutines */
339 
340 #define SET_MALLOC_HUNC 64
341 
342 typedef struct st_rep_set {
343   uint  *bits;				/* Pointer to used sets */
344   short	next[LAST_CHAR_CODE];		/* Pointer to next sets */
345   uint	found_len;			/* Best match to date */
346   int	found_offset;
347   uint  table_offset;
348   uint  size_of_bits;			/* For convinience */
349 } REP_SET;
350 
351 typedef struct st_rep_sets {
352   uint		count;			/* Number of sets */
353   uint		extra;			/* Extra sets in buffer */
354   uint		invisible;		/* Sets not chown */
355   uint		size_of_bits;
356   REP_SET	*set,*set_buffer;
357   uint		*bit_buffer;
358 } REP_SETS;
359 
360 typedef struct st_found_set {
361   uint table_offset;
362   int found_offset;
363 } FOUND_SET;
364 
365 typedef struct st_follow {
366   int chr;
367   uint table_offset;
368   uint len;
369 } FOLLOWS;
370 
371 
372 static int init_sets(REP_SETS *sets,uint states);
373 static REP_SET *make_new_set(REP_SETS *sets);
374 static void make_sets_invisible(REP_SETS *sets);
375 static void free_last_set(REP_SETS *sets);
376 static void free_sets(REP_SETS *sets);
377 static void internal_set_bit(REP_SET *set, uint bit);
378 static void internal_clear_bit(REP_SET *set, uint bit);
379 static void or_bits(REP_SET *to,REP_SET *from);
380 static void copy_bits(REP_SET *to,REP_SET *from);
381 static int cmp_bits(REP_SET *set1,REP_SET *set2);
382 static int get_next_bit(REP_SET *set,uint lastpos);
383 static short find_set(REP_SETS *sets,REP_SET *find);
384 static short find_found(FOUND_SET *found_set,uint table_offset,
385                         int found_offset);
386 static uint start_at_word(char * pos);
387 static uint end_of_word(char * pos);
388 static uint replace_len(char * pos);
389 
390 static uint found_sets=0;
391 
392 
393 	/* Init a replace structure for further calls */
394 
init_replace(char ** from,char ** to,uint count,char * word_end_chars)395 static REPLACE *init_replace(char * *from, char * *to,uint count,
396                              char * word_end_chars)
397 {
398   uint i,j,states,set_nr,len,result_len,max_length,found_end,bits_set,bit_nr;
399   int used_sets,chr;
400   short default_state;
401   char used_chars[LAST_CHAR_CODE],is_word_end[256];
402   char * pos, *to_pos, **to_array;
403   REP_SETS sets;
404   REP_SET *set,*start_states,*word_states,*new_set;
405   FOLLOWS *follow,*follow_ptr;
406   REPLACE *replace;
407   FOUND_SET *found_set;
408   REPLACE_STRING *rep_str;
409   DBUG_ENTER("init_replace");
410 
411   /* Count number of states */
412   for (i=result_len=max_length=0 , states=2 ; i < count ; i++)
413   {
414     len=replace_len(from[i]);
415     if (!len)
416     {
417       errno=EINVAL;
418       my_message(0,"No to-string for last from-string",MYF(ME_BELL));
419       DBUG_RETURN(0);
420     }
421     states+=len+1;
422     result_len+=(uint) strlen(to[i])+1;
423     if (len > max_length)
424       max_length=len;
425   }
426   bzero((char*) is_word_end,sizeof(is_word_end));
427   for (i=0 ; word_end_chars[i] ; i++)
428     is_word_end[(uchar) word_end_chars[i]]=1;
429 
430   if (init_sets(&sets,states))
431     DBUG_RETURN(0);
432   found_sets=0;
433   if (!(found_set= (FOUND_SET*) my_malloc(PSI_NOT_INSTRUMENTED,
434                                           sizeof(FOUND_SET)*max_length*count,
435 					  MYF(MY_WME))))
436   {
437     free_sets(&sets);
438     DBUG_RETURN(0);
439   }
440   (void) make_new_set(&sets);			/* Set starting set */
441   make_sets_invisible(&sets);			/* Hide previus sets */
442   used_sets=-1;
443   word_states=make_new_set(&sets);		/* Start of new word */
444   start_states=make_new_set(&sets);		/* This is first state */
445   if (!(follow=(FOLLOWS*) my_malloc(PSI_NOT_INSTRUMENTED,
446                                     (states+2)*sizeof(FOLLOWS), MYF(MY_WME))))
447   {
448     free_sets(&sets);
449     my_free(found_set);
450     DBUG_RETURN(0);
451   }
452 
453 	/* Init follow_ptr[] */
454   for (i=0, states=1, follow_ptr=follow+1 ; i < count ; i++)
455   {
456     if (from[i][0] == '\\' && from[i][1] == '^')
457     {
458       internal_set_bit(start_states,states+1);
459       if (!from[i][2])
460       {
461 	start_states->table_offset=i;
462 	start_states->found_offset=1;
463       }
464     }
465     else if (from[i][0] == '\\' && from[i][1] == '$')
466     {
467       internal_set_bit(start_states,states);
468       internal_set_bit(word_states,states);
469       if (!from[i][2] && start_states->table_offset == (uint) ~0)
470       {
471 	start_states->table_offset=i;
472 	start_states->found_offset=0;
473       }
474     }
475     else
476     {
477       internal_set_bit(word_states,states);
478       if (from[i][0] == '\\' && (from[i][1] == 'b' && from[i][2]))
479 	internal_set_bit(start_states,states+1);
480       else
481 	internal_set_bit(start_states,states);
482     }
483     for (pos=from[i], len=0; *pos ; pos++)
484     {
485       if (*pos == '\\' && *(pos+1))
486       {
487 	pos++;
488 	switch (*pos) {
489 	case 'b':
490 	  follow_ptr->chr = SPACE_CHAR;
491 	  break;
492 	case '^':
493 	  follow_ptr->chr = START_OF_LINE;
494 	  break;
495 	case '$':
496 	  follow_ptr->chr = END_OF_LINE;
497 	  break;
498 	case 'r':
499 	  follow_ptr->chr = '\r';
500 	  break;
501 	case 't':
502 	  follow_ptr->chr = '\t';
503 	  break;
504 	case 'v':
505 	  follow_ptr->chr = '\v';
506 	  break;
507 	default:
508 	  follow_ptr->chr = (uchar) *pos;
509 	  break;
510 	}
511       }
512       else
513 	follow_ptr->chr= (uchar) *pos;
514       follow_ptr->table_offset=i;
515       follow_ptr->len= ++len;
516       follow_ptr++;
517     }
518     follow_ptr->chr=0;
519     follow_ptr->table_offset=i;
520     follow_ptr->len=len;
521     follow_ptr++;
522     states+=(uint) len+1;
523   }
524 
525 
526   for (set_nr=0,pos=0 ; set_nr < sets.count ; set_nr++)
527   {
528     set=sets.set+set_nr;
529     default_state= 0;				/* Start from beginning */
530 
531     /* If end of found-string not found or start-set with current set */
532 
533     for (i= (uint) ~0; (i=get_next_bit(set,i)) ;)
534     {
535       if (!follow[i].chr)
536       {
537 	if (! default_state)
538 	  default_state= find_found(found_set,set->table_offset,
539 				    set->found_offset+1);
540       }
541     }
542     copy_bits(sets.set+used_sets,set);		/* Save set for changes */
543     if (!default_state)
544       or_bits(sets.set+used_sets,sets.set);	/* Can restart from start */
545 
546     /* Find all chars that follows current sets */
547     bzero((char*) used_chars,sizeof(used_chars));
548     for (i= (uint) ~0; (i=get_next_bit(sets.set+used_sets,i)) ;)
549     {
550       used_chars[follow[i].chr]=1;
551       if ((follow[i].chr == SPACE_CHAR && !follow[i+1].chr &&
552 	   follow[i].len > 1) || follow[i].chr == END_OF_LINE)
553 	used_chars[0]=1;
554     }
555 
556     /* Mark word_chars used if \b is in state */
557     if (used_chars[SPACE_CHAR])
558       for (pos= word_end_chars ; *pos ; pos++)
559 	used_chars[(int) (uchar) *pos] = 1;
560 
561     /* Handle other used characters */
562     for (chr= 0 ; chr < 256 ; chr++)
563     {
564       if (! used_chars[chr])
565 	set->next[chr]= (short) (chr ? default_state : -1);
566       else
567       {
568 	new_set=make_new_set(&sets);
569 	set=sets.set+set_nr;			/* if realloc */
570 	new_set->table_offset=set->table_offset;
571 	new_set->found_len=set->found_len;
572 	new_set->found_offset=set->found_offset+1;
573 	found_end=0;
574 
575 	for (i= (uint) ~0 ; (i=get_next_bit(sets.set+used_sets,i)) ; )
576 	{
577 	  if (!follow[i].chr || follow[i].chr == chr ||
578 	      (follow[i].chr == SPACE_CHAR &&
579 	       (is_word_end[chr] ||
580 		(!chr && follow[i].len > 1 && ! follow[i+1].chr))) ||
581 	      (follow[i].chr == END_OF_LINE && ! chr))
582 	  {
583 	    if ((! chr || (follow[i].chr && !follow[i+1].chr)) &&
584 		follow[i].len > found_end)
585 	      found_end=follow[i].len;
586 	    if (chr && follow[i].chr)
587 	      internal_set_bit(new_set,i+1);		/* To next set */
588 	    else
589 	      internal_set_bit(new_set,i);
590 	  }
591 	}
592 	if (found_end)
593 	{
594 	  new_set->found_len=0;			/* Set for testing if first */
595 	  bits_set=0;
596 	  for (i= (uint) ~0; (i=get_next_bit(new_set,i)) ;)
597 	  {
598 	    if ((follow[i].chr == SPACE_CHAR ||
599 		 follow[i].chr == END_OF_LINE) && ! chr)
600 	      bit_nr=i+1;
601 	    else
602 	      bit_nr=i;
603 	    if (follow[bit_nr-1].len < found_end ||
604 		(new_set->found_len &&
605 		 (chr == 0 || !follow[bit_nr].chr)))
606 	      internal_clear_bit(new_set,i);
607 	    else
608 	    {
609 	      if (chr == 0 || !follow[bit_nr].chr)
610 	      {					/* best match  */
611 		new_set->table_offset=follow[bit_nr].table_offset;
612 		if (chr || (follow[i].chr == SPACE_CHAR ||
613 			    follow[i].chr == END_OF_LINE))
614 		  new_set->found_offset=found_end;	/* New match */
615 		new_set->found_len=found_end;
616 	      }
617 	      bits_set++;
618 	    }
619 	  }
620 	  if (bits_set == 1)
621 	  {
622 	    set->next[chr] = find_found(found_set,
623 					new_set->table_offset,
624 					new_set->found_offset);
625 	    free_last_set(&sets);
626 	  }
627 	  else
628 	    set->next[chr] = find_set(&sets,new_set);
629 	}
630 	else
631 	  set->next[chr] = find_set(&sets,new_set);
632       }
633     }
634   }
635 
636 	/* Alloc replace structure for the replace-state-machine */
637 
638   if ((replace=(REPLACE*) my_malloc(PSI_NOT_INSTRUMENTED,
639                                     sizeof(REPLACE)*(sets.count)+
640 				    sizeof(REPLACE_STRING)*(found_sets+1)+
641 				    sizeof(char *)*count+result_len,
642 				    MYF(MY_WME | MY_ZEROFILL))))
643   {
644     rep_str=(REPLACE_STRING*) (replace+sets.count);
645     to_array=(char **) (rep_str+found_sets+1);
646     to_pos=(char *) (to_array+count);
647     for (i=0 ; i < count ; i++)
648     {
649       to_array[i]=to_pos;
650       to_pos=strmov(to_pos,to[i])+1;
651     }
652     rep_str[0].found=1;
653     rep_str[0].replace_string=0;
654     for (i=1 ; i <= found_sets ; i++)
655     {
656       pos=from[found_set[i-1].table_offset];
657       /*
658         Test if we are matching start of string (\^)
659         We can't use bcmp() here as pos may be only 1 character and
660         that would confuse MSAN.
661       */
662       rep_str[i].found= (uint8) ((pos[0] == '\\' && pos[1] == '^' &&
663                                   pos[2] == 0) ? 2 : 1);
664       rep_str[i].replace_string=to_array[found_set[i-1].table_offset];
665       rep_str[i].to_offset=found_set[i-1].found_offset-start_at_word(pos);
666       rep_str[i].from_offset=found_set[i-1].found_offset-replace_len(pos)+
667 	end_of_word(pos);
668     }
669     for (i=0 ; i < sets.count ; i++)
670     {
671       for (j=0 ; j < 256 ; j++)
672 	if (sets.set[i].next[j] >= 0)
673 	  replace[i].next[j]=replace+sets.set[i].next[j];
674 	else
675 	  replace[i].next[j]=(REPLACE*) (rep_str+(-sets.set[i].next[j]-1));
676     }
677   }
678   my_free(follow);
679   free_sets(&sets);
680   my_free(found_set);
681   DBUG_PRINT("exit",("Replace table has %d states",sets.count));
682   DBUG_RETURN(replace);
683 }
684 
685 
init_sets(REP_SETS * sets,uint states)686 static int init_sets(REP_SETS *sets,uint states)
687 {
688   bzero((char*) sets,sizeof(*sets));
689   sets->size_of_bits=((states+7)/8);
690   if (!(sets->set_buffer=(REP_SET*) my_malloc(PSI_NOT_INSTRUMENTED,
691                                               sizeof(REP_SET)*SET_MALLOC_HUNC,
692 					      MYF(MY_WME))))
693     return 1;
694   if (!(sets->bit_buffer=(uint*) my_malloc(PSI_NOT_INSTRUMENTED,
695                                            sizeof(uint)*sets->size_of_bits*
696 					   SET_MALLOC_HUNC,MYF(MY_WME))))
697   {
698     my_free(sets->set);
699     return 1;
700   }
701   return 0;
702 }
703 
704 	/* Make help sets invisible for nicer codeing */
705 
make_sets_invisible(REP_SETS * sets)706 static void make_sets_invisible(REP_SETS *sets)
707 {
708   sets->invisible=sets->count;
709   sets->set+=sets->count;
710   sets->count=0;
711 }
712 
make_new_set(REP_SETS * sets)713 static REP_SET *make_new_set(REP_SETS *sets)
714 {
715   uint i,count,*bit_buffer;
716   REP_SET *set;
717   if (sets->extra)
718   {
719     sets->extra--;
720     set=sets->set+ sets->count++;
721     bzero((char*) set->bits,sizeof(uint)*sets->size_of_bits);
722     bzero((char*) &set->next[0],sizeof(set->next[0])*LAST_CHAR_CODE);
723     set->found_offset=0;
724     set->found_len=0;
725     set->table_offset= (uint) ~0;
726     set->size_of_bits=sets->size_of_bits;
727     return set;
728   }
729   count=sets->count+sets->invisible+SET_MALLOC_HUNC;
730   if (!(set=(REP_SET*) my_realloc(PSI_NOT_INSTRUMENTED, sets->set_buffer,
731                                   sizeof(REP_SET)*count, MYF(MY_WME))))
732     return 0;
733   sets->set_buffer=set;
734   sets->set=set+sets->invisible;
735   if (!(bit_buffer=(uint*) my_realloc(PSI_NOT_INSTRUMENTED, sets->bit_buffer,
736                                       (sizeof(uint)*sets->size_of_bits)*count,
737                                       MYF(MY_WME))))
738     return 0;
739   sets->bit_buffer=bit_buffer;
740   for (i=0 ; i < count ; i++)
741   {
742     sets->set_buffer[i].bits=bit_buffer;
743     bit_buffer+=sets->size_of_bits;
744   }
745   sets->extra=SET_MALLOC_HUNC;
746   return make_new_set(sets);
747 }
748 
free_last_set(REP_SETS * sets)749 static void free_last_set(REP_SETS *sets)
750 {
751   sets->count--;
752   sets->extra++;
753   return;
754 }
755 
free_sets(REP_SETS * sets)756 static void free_sets(REP_SETS *sets)
757 {
758   my_free(sets->set_buffer);
759   my_free(sets->bit_buffer);
760   return;
761 }
762 
internal_set_bit(REP_SET * set,uint bit)763 static void internal_set_bit(REP_SET *set, uint bit)
764 {
765   set->bits[bit / WORD_BIT] |= 1 << (bit % WORD_BIT);
766   return;
767 }
768 
internal_clear_bit(REP_SET * set,uint bit)769 static void internal_clear_bit(REP_SET *set, uint bit)
770 {
771   set->bits[bit / WORD_BIT] &= ~ (1 << (bit % WORD_BIT));
772   return;
773 }
774 
775 
or_bits(REP_SET * to,REP_SET * from)776 static void or_bits(REP_SET *to,REP_SET *from)
777 {
778   reg1 uint i;
779   for (i=0 ; i < to->size_of_bits ; i++)
780     to->bits[i]|=from->bits[i];
781   return;
782 }
783 
copy_bits(REP_SET * to,REP_SET * from)784 static void copy_bits(REP_SET *to,REP_SET *from)
785 {
786   memcpy((uchar*) to->bits,(uchar*) from->bits,
787 	 (size_t) (sizeof(uint) * to->size_of_bits));
788 }
789 
cmp_bits(REP_SET * set1,REP_SET * set2)790 static int cmp_bits(REP_SET *set1,REP_SET *set2)
791 {
792   return memcmp(set1->bits, set2->bits,
793                 sizeof(uint) * set1->size_of_bits);
794 }
795 
796 
797 	/* Get next set bit from set. */
798 
get_next_bit(REP_SET * set,uint lastpos)799 static int get_next_bit(REP_SET *set,uint lastpos)
800 {
801   uint pos,*start,*end,bits;
802 
803   start=set->bits+ ((lastpos+1) / WORD_BIT);
804   end=set->bits + set->size_of_bits;
805   bits=start[0] & ~((1 << ((lastpos+1) % WORD_BIT)) -1);
806 
807   while (! bits && ++start < end)
808     bits=start[0];
809   if (!bits)
810     return 0;
811   pos=(uint) (start-set->bits)*WORD_BIT;
812   while (! (bits & 1))
813   {
814     bits>>=1;
815     pos++;
816   }
817   return pos;
818 }
819 
820 	/* find if there is a same set in sets. If there is, use it and
821 	   free given set, else put in given set in sets and return it's
822 	   position */
823 
find_set(REP_SETS * sets,REP_SET * find)824 static short find_set(REP_SETS *sets,REP_SET *find)
825 {
826   uint i;
827   for (i=0 ; i < sets->count-1 ; i++)
828   {
829     if (!cmp_bits(sets->set+i,find))
830     {
831       free_last_set(sets);
832       return (short) i;
833     }
834   }
835   return (short) i;			/* return new position */
836 }
837 
838 
839 /*
840   find if there is a found_set with same table_offset & found_offset
841   If there is return offset to it, else add new offset and return pos.
842   Pos returned is -offset-2 in found_set_structure because it's is
843   saved in set->next and set->next[] >= 0 points to next set and
844   set->next[] == -1 is reserved for end without replaces.
845 */
846 
find_found(FOUND_SET * found_set,uint table_offset,int found_offset)847 static short find_found(FOUND_SET *found_set,uint table_offset,
848                         int found_offset)
849 {
850   int i;
851   for (i=0 ; (uint) i < found_sets ; i++)
852     if (found_set[i].table_offset == table_offset &&
853 	found_set[i].found_offset == found_offset)
854       return (short) (-i-2);
855   found_set[i].table_offset=table_offset;
856   found_set[i].found_offset=found_offset;
857   found_sets++;
858   return (short) (-i-2);			/* return new position */
859 }
860 
861 	/* Return 1 if regexp starts with \b or ends with \b*/
862 
start_at_word(char * pos)863 static uint start_at_word(char * pos)
864 {
865   return (((!memcmp(pos,"\\b",2) && pos[2]) || !memcmp(pos,"\\^",2)) ? 1 : 0);
866 }
867 
end_of_word(char * pos)868 static uint end_of_word(char * pos)
869 {
870   char * end=strend(pos);
871   return ((end > pos+2 && !memcmp(end-2,"\\b",2)) ||
872 	  (end >= pos+2 && !memcmp(end-2,"\\$",2))) ?
873 	    1 : 0;
874 }
875 
876 
replace_len(char * str)877 static uint replace_len(char * str)
878 {
879   uint len=0;
880   while (*str)
881   {
882     if (str[0] == '\\' && str[1])
883       str++;
884     str++;
885     len++;
886   }
887   return len;
888 }
889 
890 
891 	/* The actual loop */
892 
replace_strings(REPLACE * rep,char ** start,uint * max_length,char * from)893 static uint replace_strings(REPLACE *rep, char **start, uint *max_length,
894                             char *from)
895 {
896   reg1 REPLACE *rep_pos;
897   reg2 REPLACE_STRING *rep_str;
898   char *to, *end, *pos, *new;
899 
900   end=(to= *start) + *max_length-1;
901   rep_pos=rep+1;
902   for(;;)
903   {
904     while (!rep_pos->found)
905     {
906       rep_pos= rep_pos->next[(uchar) *from];
907       if (to == end)
908       {
909 	(*max_length)+=8192;
910 	if (!(new=my_realloc(PSI_NOT_INSTRUMENTED, *start,*max_length,MYF(MY_WME))))
911 	  return (uint) -1;
912 	to=new+(to - *start);
913 	end=(*start=new)+ *max_length-1;
914       }
915       *to++= *from++;
916     }
917     if (!(rep_str = ((REPLACE_STRING*) rep_pos))->replace_string)
918       return (uint) (to - *start)-1;
919     updated=1;			/* Some char * is replaced */
920     to-=rep_str->to_offset;
921     for (pos=rep_str->replace_string; *pos ; pos++)
922     {
923       if (to == end)
924       {
925 	(*max_length)*=2;
926 	if (!(new=my_realloc(PSI_NOT_INSTRUMENTED, *start,*max_length,MYF(MY_WME))))
927 	  return (uint) -1;
928 	to=new+(to - *start);
929 	end=(*start=new)+ *max_length-1;
930       }
931       *to++= *pos;
932     }
933     if (!*(from-=rep_str->from_offset) && rep_pos->found != 2)
934       return (uint) (to - *start);
935     rep_pos=rep;
936   }
937 }
938 
939 static char *buffer;		/* The buffer itself, grown as needed. */
940 static int bufbytes;		/* Number of bytes in the buffer. */
941 static int bufread,my_eof;		/* Number of bytes to get with each read(). */
942 static uint bufalloc;
943 static char *out_buff;
944 static uint out_length;
945 
initialize_buffer()946 static int initialize_buffer()
947 {
948   bufread = 8192;
949   bufalloc = bufread + bufread / 2;
950   if (!(buffer = my_malloc(PSI_NOT_INSTRUMENTED, bufalloc+1, MYF(MY_WME))))
951     return 1;
952   bufbytes=my_eof=0;
953   out_length=bufread;
954   if (!(out_buff=my_malloc(PSI_NOT_INSTRUMENTED, out_length, MYF(MY_WME))))
955     return(1);
956   return 0;
957 }
958 
reset_buffer()959 static void reset_buffer()
960 {
961   bufbytes=my_eof=0;
962 }
963 
free_buffer()964 static void free_buffer()
965 {
966   my_free(buffer);
967   my_free(out_buff);
968 }
969 
970 
971 /*
972   Fill the buffer retaining the last n bytes at the beginning of the
973   newly filled buffer (for backward context).  Returns the number of new
974   bytes read from disk.
975 */
976 
fill_buffer_retaining(fd,n)977 static int fill_buffer_retaining(fd,n)
978 File fd;
979 int n;
980 {
981   int i;
982 
983   /* See if we need to grow the buffer. */
984   if ((int) bufalloc - n <= bufread)
985   {
986     while ((int) bufalloc - n <= bufread)
987     {
988       bufalloc *= 2;
989       bufread *= 2;
990     }
991     buffer = my_realloc(PSI_NOT_INSTRUMENTED, buffer, bufalloc+1, MYF(MY_WME));
992     if (! buffer)
993       return(-1);
994   }
995 
996   /* Shift stuff down. */
997   bmove(buffer,buffer+bufbytes-n,(uint) n);
998   bufbytes = n;
999 
1000   if (my_eof)
1001     return 0;
1002 
1003   /* Read in new stuff. */
1004   if ((i=(int) my_read(fd, (uchar*) buffer + bufbytes,
1005                        (size_t) bufread, MYF(MY_WME))) < 0)
1006     return -1;
1007 
1008   /* Kludge to pretend every nonempty file ends with a newline. */
1009   if (i == 0 && bufbytes > 0 && buffer[bufbytes - 1] != '\n')
1010   {
1011     my_eof = i = 1;
1012     buffer[bufbytes] = '\n';
1013   }
1014 
1015   bufbytes += i;
1016   return i;
1017 }
1018 
1019 	/* Return 0 if convert is ok */
1020 	/* Global variable update is set if something was changed */
1021 
convert_pipe(rep,in,out)1022 static int convert_pipe(rep,in,out)
1023 REPLACE *rep;
1024 FILE *in,*out;
1025 {
1026   int retain,error;
1027   uint length;
1028   char save_char,*end_of_line,*start_of_line;
1029   DBUG_ENTER("convert_pipe");
1030 
1031   updated=retain=0;
1032   reset_buffer();
1033 
1034   while ((error=fill_buffer_retaining(my_fileno(in),retain)) > 0)
1035   {
1036     end_of_line=buffer ;
1037     buffer[bufbytes]=0;			/* Sentinel  */
1038     for (;;)
1039     {
1040       start_of_line=end_of_line;
1041       while (end_of_line[0] != '\n' && end_of_line[0])
1042 	end_of_line++;
1043       if (end_of_line == buffer+bufbytes)
1044       {
1045 	retain= (int) (end_of_line - start_of_line);
1046 	break;				/* No end of line, read more */
1047       }
1048       save_char=end_of_line[0];
1049       end_of_line[0]=0;
1050       end_of_line++;
1051       if ((length=replace_strings(rep,&out_buff,&out_length,start_of_line)) ==
1052 	  (uint) -1)
1053 	return 1;
1054       if (!my_eof)
1055 	out_buff[length++]=save_char;	/* Don't write added newline */
1056       if (my_fwrite(out, (uchar*) out_buff, length, MYF(MY_WME | MY_NABP)))
1057 	DBUG_RETURN(1);
1058     }
1059   }
1060   DBUG_RETURN(error);
1061 }
1062 
1063 
convert_file(REPLACE * rep,char * name)1064 static int convert_file(REPLACE *rep, char * name)
1065 {
1066   int error;
1067   FILE *in,*out;
1068   char dir_buff[FN_REFLEN], tempname[FN_REFLEN], *org_name = name;
1069 #ifdef HAVE_READLINK
1070   char link_name[FN_REFLEN];
1071 #endif
1072   File temp_file;
1073   size_t dir_buff_length;
1074   DBUG_ENTER("convert_file");
1075 
1076   /* check if name is a symlink */
1077 #ifdef HAVE_READLINK
1078   org_name= (!my_disable_symlinks &&
1079              !my_readlink(link_name, name, MYF(0))) ? link_name : name;
1080 #endif
1081   if (!(in= my_fopen(org_name,O_RDONLY,MYF(MY_WME))))
1082     DBUG_RETURN(1);
1083   dirname_part(dir_buff, org_name, &dir_buff_length);
1084   if ((temp_file= create_temp_file(tempname, dir_buff, "PR", 0,
1085                                    MYF(MY_WME))) < 0)
1086   {
1087     my_fclose(in,MYF(0));
1088     DBUG_RETURN(1);
1089   }
1090   if (!(out= my_fdopen(temp_file, tempname, O_WRONLY, MYF(MY_WME))))
1091   {
1092     my_fclose(in,MYF(0));
1093     DBUG_RETURN(1);
1094   }
1095 
1096   error=convert_pipe(rep,in,out);
1097   my_fclose(in,MYF(0)); my_fclose(out,MYF(0));
1098 
1099   if (updated && ! error)
1100     my_redel(org_name, tempname, 0, MYF(MY_WME | MY_LINK_WARNING));
1101   else
1102     my_delete(tempname,MYF(MY_WME));
1103   if (!silent && ! error)
1104   {
1105     if (updated)
1106       printf("%s converted\n",name);
1107     else if (verbose)
1108       printf("%s left unchanged\n",name);
1109   }
1110   DBUG_RETURN(error);
1111 }
1112