1 /*
2    Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
23    02110-1301  USA */
24 
25 /*
26   Replace strings in textfile
27 
28   This program replaces strings in files or from stdin to stdout.
29   It accepts a list of from-string/to-string pairs and replaces
30   each occurrence of a from-string with the corresponding to-string.
31   The first occurrence of a found string is matched. If there is more
32   than one possibility for the string to replace, longer matches
33   are preferred before shorter matches.
34 
35   Special characters in from string:
36   \^    Match start of line.
37   \$	Match end of line.
38   \b	Match space-character, start of line or end of line.
39         For end \b the next replace starts locking at the end space-character.
40         An \b alone or in a string matches only a space-character.
41   \r, \t, \v as in C.
42   The programs make a DFA-state-machine of the strings and the speed isn't
43   dependent on the count of replace-strings (only of the number of replaces).
44   A line is assumed ending with \n or \0.
45   There are no limit exept memory on length of strings.
46 
47   Written by Monty.
48   fill_buffer_retaining() is taken from gnu-grep and modified.
49 */
50 
51 #include <my_global.h>
52 #include <m_ctype.h>
53 #include <my_sys.h>
54 #include <m_string.h>
55 #include <errno.h>
56 
57 #define PC_MALLOC		256	/* Bytes for pointers */
58 #define PS_MALLOC		512	/* Bytes for data */
59 
60 typedef struct st_pointer_array {		/* when using array-strings */
61   TYPELIB typelib;				/* Pointer to strings */
62   uchar *str;					/* Strings is here */
63   uint8	*flag;					/* Flag about each var. */
64   uint  array_allocs,max_count,length,max_length;
65 } POINTER_ARRAY;
66 
67 #define SPACE_CHAR	256
68 #define START_OF_LINE	257
69 #define END_OF_LINE	258
70 #define LAST_CHAR_CODE	259
71 
72 typedef struct st_replace {
73   my_bool   found;
74   struct st_replace *next[256];
75 } REPLACE;
76 
77 typedef struct st_replace_found {
78   my_bool found;
79   char *replace_string;
80   uint to_offset;
81   int from_offset;
82 } REPLACE_STRING;
83 
84 #ifndef WORD_BIT
85 #define WORD_BIT (8*sizeof(uint))
86 #endif
87 
88 	/* functions defined in this file */
89 
90 static int static_get_options(int *argc,char * * *argv);
91 static int get_replace_strings(int *argc,char * * *argv,
92 				   POINTER_ARRAY *from_array,
93 				   POINTER_ARRAY *to_array);
94 static int insert_pointer_name(POINTER_ARRAY *pa, char * name);
95 static void free_pointer_array(POINTER_ARRAY *pa);
96 static int convert_pipe(REPLACE *,FILE *,FILE *);
97 static int convert_file(REPLACE *, char *);
98 static REPLACE *init_replace(char * *from, char * *to,uint count,
99                              char * word_end_chars);
100 static uint replace_strings(REPLACE *rep, char * *start,uint *max_length,
101                             char * from);
102 static int initialize_buffer(void);
103 static void reset_buffer(void);
104 static void free_buffer(void);
105 
106 static int silent=0,verbose=0,updated=0;
107 
108 	/* The main program */
109 
main(int argc,char * argv[])110 int main(int argc, char *argv[])
111 {
112   int i,error;
113   char word_end_chars[256],*pos;
114   POINTER_ARRAY from,to;
115   REPLACE *replace;
116   MY_INIT(argv[0]);
117 
118   if (static_get_options(&argc,&argv))
119     exit(1);
120   if (get_replace_strings(&argc,&argv,&from,&to))
121     exit(1);
122 
123   for (i=1,pos=word_end_chars ; i < 256 ; i++)
124     if (my_isspace(&my_charset_latin1,i))
125       *pos++= (char) i;
126   *pos=0;
127   if (!(replace=init_replace((char**) from.typelib.type_names,
128 			     (char**) to.typelib.type_names,
129 			     (uint) from.typelib.count,word_end_chars)))
130     exit(1);
131   free_pointer_array(&from);
132   free_pointer_array(&to);
133   if (initialize_buffer())
134     return 1;
135 
136   error=0;
137   if (argc == 0)
138     error=convert_pipe(replace,stdin,stdout);
139   else
140   {
141     while (argc--)
142     {
143       error=convert_file(replace,*(argv++));
144     }
145   }
146   free_buffer();
147   my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
148   exit(error ? 2 : 0);
149   return 0;					/* No compiler warning */
150 } /* main */
151 
152 
153 	/* reads options */
154 	/* Initiates DEBUG - but no debugging here ! */
155 
static_get_options(argc,argv)156 static int static_get_options(argc,argv)
157 register int *argc;
158 register char **argv[];
159 {
160   int help,version;
161   char *pos;
162 
163   silent=verbose=help=0;
164 
165   while (--*argc > 0 && *(pos = *(++*argv)) == '-' && pos[1] != '-') {
166     while (*++pos)
167     {
168       version=0;
169       switch((*pos)) {
170       case 's':
171 	silent=1;
172 	break;
173       case 'v':
174 	verbose=1;
175 	break;
176       case '#':
177 	DBUG_PUSH (++pos);
178 	pos= (char*) " ";			/* Skip rest of arguments */
179 	break;
180       case 'V':
181 	version=1;
182       case 'I':
183       case '?':
184 	help=1;					/* Help text written */
185 	printf("%s  Ver 1.4 for %s at %s\n",my_progname,SYSTEM_TYPE,
186 	       MACHINE_TYPE);
187 	if (version)
188 	  break;
189 	puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n");
190 	puts("This program replaces strings in files or from stdin to stdout.\n"
191 	     "It accepts a list of from-string/to-string pairs and replaces\n"
192 	     "each occurrence of a from-string with the corresponding to-string.\n"
193          "The first occurrence of a found string is matched. If there is\n"
194          "more than one possibility for the string to replace, longer\n"
195          "matches are preferred before shorter matches.\n\n"
196 	     "A from-string can contain these special characters:\n"
197 	     "  \\^      Match start of line.\n"
198 	     "  \\$      Match end of line.\n"
199 	     "  \\b      Match space-character, start of line or end of line.\n"
200 	     "          For a end \\b the next replace starts locking at the end\n"
201 	     "          space-character. A \\b alone in a string matches only a\n"
202 	     "          space-character.\n");
203 	  printf("Usage: %s [-?svIV] from to from to ... -- [files]\n", my_progname);
204 	puts("or");
205 	  printf("Usage: %s [-?svIV] from to from to ... < fromfile > tofile\n", my_progname);
206 	puts("");
207 	puts("Options: -? or -I \"Info\"  -s \"silent\"      -v \"verbose\"");
208 	break;
209       default:
210 	fprintf(stderr,"illegal option: -%c\n",*pos);
211 	break;
212       }
213     }
214   }
215   if (*argc == 0)
216   {
217     if (!help)
218       my_message(0,"No replace options given",MYF(ME_BELL));
219     exit(0);					/* Don't use as pipe */
220   }
221   return(0);
222 } /* static_get_options */
223 
224 
get_replace_strings(argc,argv,from_array,to_array)225 static int get_replace_strings(argc,argv,from_array,to_array)
226 register int *argc;
227 register char **argv[];
228 POINTER_ARRAY *from_array,*to_array;
229 {
230   char *pos;
231 
232   memset(from_array, 0, sizeof(from_array[0]));
233   memset(to_array, 0, sizeof(to_array[0]));
234   while (*argc > 0 && (*(pos = *(*argv)) != '-' || pos[1] != '-' || pos[2]))
235   {
236     insert_pointer_name(from_array,pos);
237     (*argc)--;
238     (*argv)++;
239     if (!*argc || !strcmp(**argv,"--"))
240     {
241       my_message(0,"No to-string for last from-string",MYF(ME_BELL));
242       return 1;
243     }
244     insert_pointer_name(to_array,**argv);
245     (*argc)--;
246     (*argv)++;
247   }
248   if (*argc)
249   {					/* Skip "--" argument */
250     (*argc)--;
251     (*argv)++;
252   }
253   return 0;
254 }
255 
insert_pointer_name(reg1 POINTER_ARRAY * pa,char * name)256 static int insert_pointer_name(reg1 POINTER_ARRAY *pa,char * name)
257 {
258   uint i,length,old_count;
259   uchar *new_pos;
260   const char **new_array;
261   DBUG_ENTER("insert_pointer_name");
262 
263   if (! pa->typelib.count)
264   {
265     if (!(pa->typelib.type_names=(const char **)
266 	  my_malloc(((PC_MALLOC-MALLOC_OVERHEAD)/
267 		     (sizeof(char *)+sizeof(*pa->flag))*
268 		     (sizeof(char *)+sizeof(*pa->flag))),MYF(MY_WME))))
269       DBUG_RETURN(-1);
270     if (!(pa->str= (uchar*) my_malloc((uint) (PS_MALLOC-MALLOC_OVERHEAD),
271 				     MYF(MY_WME))))
272     {
273       my_free(pa->typelib.type_names);
274       DBUG_RETURN (-1);
275     }
276     pa->max_count=(PC_MALLOC-MALLOC_OVERHEAD)/(sizeof(uchar*)+
277 					       sizeof(*pa->flag));
278     pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
279     pa->length=0;
280     pa->max_length=PS_MALLOC-MALLOC_OVERHEAD;
281     pa->array_allocs=1;
282   }
283   length=(uint) strlen(name)+1;
284   if (pa->length+length >= pa->max_length)
285   {
286     pa->max_length=(pa->length+length+MALLOC_OVERHEAD+PS_MALLOC-1)/PS_MALLOC;
287     pa->max_length=pa->max_length*PS_MALLOC-MALLOC_OVERHEAD;
288     if (!(new_pos= (uchar*) my_realloc((uchar*) pa->str,
289 				      (uint) pa->max_length,
290 				      MYF(MY_WME))))
291       DBUG_RETURN(1);
292     if (new_pos != pa->str)
293     {
294       my_ptrdiff_t diff=PTR_BYTE_DIFF(new_pos,pa->str);
295       for (i=0 ; i < pa->typelib.count ; i++)
296 	pa->typelib.type_names[i]= ADD_TO_PTR(pa->typelib.type_names[i],diff,
297 					      char*);
298       pa->str=new_pos;
299     }
300   }
301   if (pa->typelib.count >= pa->max_count-1)
302   {
303     int len;
304     pa->array_allocs++;
305     len=(PC_MALLOC*pa->array_allocs - MALLOC_OVERHEAD);
306     if (!(new_array=(const char **) my_realloc((uchar*) pa->typelib.type_names,
307 					       (uint) len/
308 					 (sizeof(uchar*)+sizeof(*pa->flag))*
309 					 (sizeof(uchar*)+sizeof(*pa->flag)),
310 					 MYF(MY_WME))))
311       DBUG_RETURN(1);
312     pa->typelib.type_names=new_array;
313     old_count=pa->max_count;
314     pa->max_count=len/(sizeof(uchar*) + sizeof(*pa->flag));
315     pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
316     memcpy((uchar*) pa->flag,(char *) (pa->typelib.type_names+old_count),
317 	   old_count*sizeof(*pa->flag));
318   }
319   pa->flag[pa->typelib.count]=0;			/* Reset flag */
320   pa->typelib.type_names[pa->typelib.count++]= (char*) (pa->str+pa->length);
321   pa->typelib.type_names[pa->typelib.count]= NullS;	/* Put end-mark */
322   (void) strmov((char*) pa->str + pa->length, name);
323   pa->length+=length;
324   DBUG_RETURN(0);
325 } /* insert_pointer_name */
326 
327 
328 	/* free pointer array */
329 
free_pointer_array(reg1 POINTER_ARRAY * pa)330 static void free_pointer_array(reg1 POINTER_ARRAY *pa)
331 {
332   if (pa->typelib.count)
333   {
334     pa->typelib.count=0;
335     my_free(pa->typelib.type_names);
336     pa->typelib.type_names=0;
337     my_free(pa->str);
338   }
339   return;
340 } /* free_pointer_array */
341 
342 
343 	/* Code for replace rutines */
344 
345 #define SET_MALLOC_HUNC 64
346 
347 typedef struct st_rep_set {
348   uint  *bits;				/* Pointer to used sets */
349   short	next[LAST_CHAR_CODE];		/* Pointer to next sets */
350   uint	found_len;			/* Best match to date */
351   int	found_offset;
352   uint  table_offset;
353   uint  size_of_bits;			/* For convinience */
354 } REP_SET;
355 
356 typedef struct st_rep_sets {
357   uint		count;			/* Number of sets */
358   uint		extra;			/* Extra sets in buffer */
359   uint		invisible;		/* Sets not chown */
360   uint		size_of_bits;
361   REP_SET	*set,*set_buffer;
362   uint		*bit_buffer;
363 } REP_SETS;
364 
365 typedef struct st_found_set {
366   uint table_offset;
367   int found_offset;
368 } FOUND_SET;
369 
370 typedef struct st_follow {
371   int chr;
372   uint table_offset;
373   uint len;
374 } FOLLOWS;
375 
376 
377 static int init_sets(REP_SETS *sets,uint states);
378 static REP_SET *make_new_set(REP_SETS *sets);
379 static void make_sets_invisible(REP_SETS *sets);
380 static void free_last_set(REP_SETS *sets);
381 static void free_sets(REP_SETS *sets);
382 static void internal_set_bit(REP_SET *set, uint bit);
383 static void internal_clear_bit(REP_SET *set, uint bit);
384 static void or_bits(REP_SET *to,REP_SET *from);
385 static void copy_bits(REP_SET *to,REP_SET *from);
386 static int cmp_bits(REP_SET *set1,REP_SET *set2);
387 static int get_next_bit(REP_SET *set,uint lastpos);
388 static short find_set(REP_SETS *sets,REP_SET *find);
389 static short find_found(FOUND_SET *found_set,uint table_offset,
390                         int found_offset);
391 static uint start_at_word(char * pos);
392 static uint end_of_word(char * pos);
393 static uint replace_len(char * pos);
394 
395 static uint found_sets=0;
396 
397 
398 	/* Init a replace structure for further calls */
399 
init_replace(char ** from,char ** to,uint count,char * word_end_chars)400 static REPLACE *init_replace(char * *from, char * *to,uint count,
401                              char * word_end_chars)
402 {
403   uint i,j,states,set_nr,len,result_len,max_length,found_end,bits_set,bit_nr;
404   int used_sets,chr;
405   short default_state;
406   char used_chars[LAST_CHAR_CODE],is_word_end[256];
407   char * pos, *to_pos, **to_array;
408   REP_SETS sets;
409   REP_SET *set,*start_states,*word_states,*new_set;
410   FOLLOWS *follow,*follow_ptr;
411   REPLACE *replace;
412   FOUND_SET *found_set;
413   REPLACE_STRING *rep_str;
414   DBUG_ENTER("init_replace");
415 
416   /* Count number of states */
417   for (i=result_len=max_length=0 , states=2 ; i < count ; i++)
418   {
419     len=replace_len(from[i]);
420     if (!len)
421     {
422       errno=EINVAL;
423       my_message(0,"No to-string for last from-string",MYF(ME_BELL));
424       DBUG_RETURN(0);
425     }
426     states+=len+1;
427     result_len+=(uint) strlen(to[i])+1;
428     if (len > max_length)
429       max_length=len;
430   }
431   memset(is_word_end, 0, sizeof(is_word_end));
432   for (i=0 ; word_end_chars[i] ; i++)
433     is_word_end[(uchar) word_end_chars[i]]=1;
434 
435   if (init_sets(&sets,states))
436     DBUG_RETURN(0);
437   found_sets=0;
438   if (!(found_set= (FOUND_SET*) my_malloc(sizeof(FOUND_SET)*max_length*count,
439 					  MYF(MY_WME))))
440   {
441     free_sets(&sets);
442     DBUG_RETURN(0);
443   }
444   (void) make_new_set(&sets);			/* Set starting set */
445   make_sets_invisible(&sets);			/* Hide previus sets */
446   used_sets=-1;
447   word_states=make_new_set(&sets);		/* Start of new word */
448   start_states=make_new_set(&sets);		/* This is first state */
449   if (!(follow=(FOLLOWS*) my_malloc((states+2)*sizeof(FOLLOWS),MYF(MY_WME))))
450   {
451     free_sets(&sets);
452     my_free(found_set);
453     DBUG_RETURN(0);
454   }
455 
456 	/* Init follow_ptr[] */
457   for (i=0, states=1, follow_ptr=follow+1 ; i < count ; i++)
458   {
459     if (from[i][0] == '\\' && from[i][1] == '^')
460     {
461       internal_set_bit(start_states,states+1);
462       if (!from[i][2])
463       {
464 	start_states->table_offset=i;
465 	start_states->found_offset=1;
466       }
467     }
468     else if (from[i][0] == '\\' && from[i][1] == '$')
469     {
470       internal_set_bit(start_states,states);
471       internal_set_bit(word_states,states);
472       if (!from[i][2] && start_states->table_offset == (uint) ~0)
473       {
474 	start_states->table_offset=i;
475 	start_states->found_offset=0;
476       }
477     }
478     else
479     {
480       internal_set_bit(word_states,states);
481       if (from[i][0] == '\\' && (from[i][1] == 'b' && from[i][2]))
482 	internal_set_bit(start_states,states+1);
483       else
484 	internal_set_bit(start_states,states);
485     }
486     for (pos=from[i], len=0; *pos ; pos++)
487     {
488       if (*pos == '\\' && *(pos+1))
489       {
490 	pos++;
491 	switch (*pos) {
492 	case 'b':
493 	  follow_ptr->chr = SPACE_CHAR;
494 	  break;
495 	case '^':
496 	  follow_ptr->chr = START_OF_LINE;
497 	  break;
498 	case '$':
499 	  follow_ptr->chr = END_OF_LINE;
500 	  break;
501 	case 'r':
502 	  follow_ptr->chr = '\r';
503 	  break;
504 	case 't':
505 	  follow_ptr->chr = '\t';
506 	  break;
507 	case 'v':
508 	  follow_ptr->chr = '\v';
509 	  break;
510 	default:
511 	  follow_ptr->chr = (uchar) *pos;
512 	  break;
513 	}
514       }
515       else
516 	follow_ptr->chr= (uchar) *pos;
517       follow_ptr->table_offset=i;
518       follow_ptr->len= ++len;
519       follow_ptr++;
520     }
521     follow_ptr->chr=0;
522     follow_ptr->table_offset=i;
523     follow_ptr->len=len;
524     follow_ptr++;
525     states+=(uint) len+1;
526   }
527 
528 
529   for (set_nr=0,pos=0 ; set_nr < sets.count ; set_nr++)
530   {
531     set=sets.set+set_nr;
532     default_state= 0;				/* Start from beginning */
533 
534     /* If end of found-string not found or start-set with current set */
535 
536     for (i= (uint) ~0; (i=get_next_bit(set,i)) ;)
537     {
538       if (!follow[i].chr)
539       {
540 	if (! default_state)
541 	  default_state= find_found(found_set,set->table_offset,
542 				    set->found_offset+1);
543       }
544     }
545     copy_bits(sets.set+used_sets,set);		/* Save set for changes */
546     if (!default_state)
547       or_bits(sets.set+used_sets,sets.set);	/* Can restart from start */
548 
549     /* Find all chars that follows current sets */
550     memset(used_chars, 0, sizeof(used_chars));
551     for (i= (uint) ~0; (i=get_next_bit(sets.set+used_sets,i)) ;)
552     {
553       used_chars[follow[i].chr]=1;
554       if ((follow[i].chr == SPACE_CHAR && !follow[i+1].chr &&
555 	   follow[i].len > 1) || follow[i].chr == END_OF_LINE)
556 	used_chars[0]=1;
557     }
558 
559     /* Mark word_chars used if \b is in state */
560     if (used_chars[SPACE_CHAR])
561       for (pos= word_end_chars ; *pos ; pos++)
562 	used_chars[(int) (uchar) *pos] = 1;
563 
564     /* Handle other used characters */
565     for (chr= 0 ; chr < 256 ; chr++)
566     {
567       if (! used_chars[chr])
568 	set->next[chr]= (short) (chr ? default_state : -1);
569       else
570       {
571 	new_set=make_new_set(&sets);
572 	set=sets.set+set_nr;			/* if realloc */
573 	new_set->table_offset=set->table_offset;
574 	new_set->found_len=set->found_len;
575 	new_set->found_offset=set->found_offset+1;
576 	found_end=0;
577 
578 	for (i= (uint) ~0 ; (i=get_next_bit(sets.set+used_sets,i)) ; )
579 	{
580 	  if (!follow[i].chr || follow[i].chr == chr ||
581 	      (follow[i].chr == SPACE_CHAR &&
582 	       (is_word_end[chr] ||
583 		(!chr && follow[i].len > 1 && ! follow[i+1].chr))) ||
584 	      (follow[i].chr == END_OF_LINE && ! chr))
585 	  {
586 	    if ((! chr || (follow[i].chr && !follow[i+1].chr)) &&
587 		follow[i].len > found_end)
588 	      found_end=follow[i].len;
589 	    if (chr && follow[i].chr)
590 	      internal_set_bit(new_set,i+1);		/* To next set */
591 	    else
592 	      internal_set_bit(new_set,i);
593 	  }
594 	}
595 	if (found_end)
596 	{
597 	  new_set->found_len=0;			/* Set for testing if first */
598 	  bits_set=0;
599 	  for (i= (uint) ~0; (i=get_next_bit(new_set,i)) ;)
600 	  {
601 	    if ((follow[i].chr == SPACE_CHAR ||
602 		 follow[i].chr == END_OF_LINE) && ! chr)
603 	      bit_nr=i+1;
604 	    else
605 	      bit_nr=i;
606 	    if (follow[bit_nr-1].len < found_end ||
607 		(new_set->found_len &&
608 		 (chr == 0 || !follow[bit_nr].chr)))
609 	      internal_clear_bit(new_set,i);
610 	    else
611 	    {
612 	      if (chr == 0 || !follow[bit_nr].chr)
613 	      {					/* best match  */
614 		new_set->table_offset=follow[bit_nr].table_offset;
615 		if (chr || (follow[i].chr == SPACE_CHAR ||
616 			    follow[i].chr == END_OF_LINE))
617 		  new_set->found_offset=found_end;	/* New match */
618 		new_set->found_len=found_end;
619 	      }
620 	      bits_set++;
621 	    }
622 	  }
623 	  if (bits_set == 1)
624 	  {
625 	    set->next[chr] = find_found(found_set,
626 					new_set->table_offset,
627 					new_set->found_offset);
628 	    free_last_set(&sets);
629 	  }
630 	  else
631 	    set->next[chr] = find_set(&sets,new_set);
632 	}
633 	else
634 	  set->next[chr] = find_set(&sets,new_set);
635       }
636     }
637   }
638 
639 	/* Alloc replace structure for the replace-state-machine */
640 
641   if ((replace=(REPLACE*) my_malloc(sizeof(REPLACE)*(sets.count)+
642 				    sizeof(REPLACE_STRING)*(found_sets+1)+
643 				    sizeof(char *)*count+result_len,
644 				    MYF(MY_WME | MY_ZEROFILL))))
645   {
646     rep_str=(REPLACE_STRING*) (replace+sets.count);
647     to_array=(char **) (rep_str+found_sets+1);
648     to_pos=(char *) (to_array+count);
649     for (i=0 ; i < count ; i++)
650     {
651       to_array[i]=to_pos;
652       to_pos=strmov(to_pos,to[i])+1;
653     }
654     rep_str[0].found=1;
655     rep_str[0].replace_string=0;
656     for (i=1 ; i <= found_sets ; i++)
657     {
658       pos=from[found_set[i-1].table_offset];
659       rep_str[i].found= (my_bool) (!memcmp(pos,"\\^",3) ? 2 : 1);
660       rep_str[i].replace_string=to_array[found_set[i-1].table_offset];
661       rep_str[i].to_offset=found_set[i-1].found_offset-start_at_word(pos);
662       rep_str[i].from_offset=found_set[i-1].found_offset-replace_len(pos)+
663 	end_of_word(pos);
664     }
665     for (i=0 ; i < sets.count ; i++)
666     {
667       for (j=0 ; j < 256 ; j++)
668 	if (sets.set[i].next[j] >= 0)
669 	  replace[i].next[j]=replace+sets.set[i].next[j];
670 	else
671 	  replace[i].next[j]=(REPLACE*) (rep_str+(-sets.set[i].next[j]-1));
672     }
673   }
674   my_free(follow);
675   free_sets(&sets);
676   my_free(found_set);
677   DBUG_PRINT("exit",("Replace table has %d states",sets.count));
678   DBUG_RETURN(replace);
679 }
680 
681 
init_sets(REP_SETS * sets,uint states)682 static int init_sets(REP_SETS *sets,uint states)
683 {
684   memset(sets, 0, sizeof(*sets));
685   sets->size_of_bits=((states+7)/8);
686   if (!(sets->set_buffer=(REP_SET*) my_malloc(sizeof(REP_SET)*SET_MALLOC_HUNC,
687 					      MYF(MY_WME))))
688     return 1;
689   if (!(sets->bit_buffer=(uint*) my_malloc(sizeof(uint)*sets->size_of_bits*
690 					   SET_MALLOC_HUNC,MYF(MY_WME))))
691   {
692     my_free(sets->set);
693     return 1;
694   }
695   return 0;
696 }
697 
698 	/* Make help sets invisible for nicer codeing */
699 
make_sets_invisible(REP_SETS * sets)700 static void make_sets_invisible(REP_SETS *sets)
701 {
702   sets->invisible=sets->count;
703   sets->set+=sets->count;
704   sets->count=0;
705 }
706 
make_new_set(REP_SETS * sets)707 static REP_SET *make_new_set(REP_SETS *sets)
708 {
709   uint i,count,*bit_buffer;
710   REP_SET *set;
711   if (sets->extra)
712   {
713     sets->extra--;
714     set=sets->set+ sets->count++;
715     memset(set->bits, 0, sizeof(uint)*sets->size_of_bits);
716     memset(&set->next[0], 0, sizeof(set->next[0])*LAST_CHAR_CODE);
717     set->found_offset=0;
718     set->found_len=0;
719     set->table_offset= (uint) ~0;
720     set->size_of_bits=sets->size_of_bits;
721     return set;
722   }
723   count=sets->count+sets->invisible+SET_MALLOC_HUNC;
724   if (!(set=(REP_SET*) my_realloc((uchar*) sets->set_buffer,
725 				   sizeof(REP_SET)*count,
726 				  MYF(MY_WME))))
727     return 0;
728   sets->set_buffer=set;
729   sets->set=set+sets->invisible;
730   if (!(bit_buffer=(uint*) my_realloc((uchar*) sets->bit_buffer,
731 				      (sizeof(uint)*sets->size_of_bits)*count,
732 				      MYF(MY_WME))))
733     return 0;
734   sets->bit_buffer=bit_buffer;
735   for (i=0 ; i < count ; i++)
736   {
737     sets->set_buffer[i].bits=bit_buffer;
738     bit_buffer+=sets->size_of_bits;
739   }
740   sets->extra=SET_MALLOC_HUNC;
741   return make_new_set(sets);
742 }
743 
free_last_set(REP_SETS * sets)744 static void free_last_set(REP_SETS *sets)
745 {
746   sets->count--;
747   sets->extra++;
748   return;
749 }
750 
free_sets(REP_SETS * sets)751 static void free_sets(REP_SETS *sets)
752 {
753   my_free(sets->set_buffer);
754   my_free(sets->bit_buffer);
755   return;
756 }
757 
internal_set_bit(REP_SET * set,uint bit)758 static void internal_set_bit(REP_SET *set, uint bit)
759 {
760   set->bits[bit / WORD_BIT] |= 1 << (bit % WORD_BIT);
761   return;
762 }
763 
internal_clear_bit(REP_SET * set,uint bit)764 static void internal_clear_bit(REP_SET *set, uint bit)
765 {
766   set->bits[bit / WORD_BIT] &= ~ (1 << (bit % WORD_BIT));
767   return;
768 }
769 
770 
or_bits(REP_SET * to,REP_SET * from)771 static void or_bits(REP_SET *to,REP_SET *from)
772 {
773   reg1 uint i;
774   for (i=0 ; i < to->size_of_bits ; i++)
775     to->bits[i]|=from->bits[i];
776   return;
777 }
778 
copy_bits(REP_SET * to,REP_SET * from)779 static void copy_bits(REP_SET *to,REP_SET *from)
780 {
781   memcpy((uchar*) to->bits,(uchar*) from->bits,
782 	 (size_t) (sizeof(uint) * to->size_of_bits));
783 }
784 
cmp_bits(REP_SET * set1,REP_SET * set2)785 static int cmp_bits(REP_SET *set1,REP_SET *set2)
786 {
787   return memcmp(set1->bits, set2->bits,
788                 sizeof(uint) * set1->size_of_bits);
789 }
790 
791 
792 	/* Get next set bit from set. */
793 
get_next_bit(REP_SET * set,uint lastpos)794 static int get_next_bit(REP_SET *set,uint lastpos)
795 {
796   uint pos,*start,*end,bits;
797 
798   start=set->bits+ ((lastpos+1) / WORD_BIT);
799   end=set->bits + set->size_of_bits;
800   bits=start[0] & ~((1 << ((lastpos+1) % WORD_BIT)) -1);
801 
802   while (! bits && ++start < end)
803     bits=start[0];
804   if (!bits)
805     return 0;
806   pos=(uint) (start-set->bits)*WORD_BIT;
807   while (! (bits & 1))
808   {
809     bits>>=1;
810     pos++;
811   }
812   return pos;
813 }
814 
815 	/* find if there is a same set in sets. If there is, use it and
816 	   free given set, else put in given set in sets and return it's
817 	   position */
818 
find_set(REP_SETS * sets,REP_SET * find)819 static short find_set(REP_SETS *sets,REP_SET *find)
820 {
821   uint i;
822   for (i=0 ; i < sets->count-1 ; i++)
823   {
824     if (!cmp_bits(sets->set+i,find))
825     {
826       free_last_set(sets);
827       return (short) i;
828     }
829   }
830   return (short) i;			/* return new position */
831 }
832 
833 
834 /*
835   find if there is a found_set with same table_offset & found_offset
836   If there is return offset to it, else add new offset and return pos.
837   Pos returned is -offset-2 in found_set_structure because it's is
838   saved in set->next and set->next[] >= 0 points to next set and
839   set->next[] == -1 is reserved for end without replaces.
840 */
841 
find_found(FOUND_SET * found_set,uint table_offset,int found_offset)842 static short find_found(FOUND_SET *found_set,uint table_offset,
843                         int found_offset)
844 {
845   int i;
846   for (i=0 ; (uint) i < found_sets ; i++)
847     if (found_set[i].table_offset == table_offset &&
848 	found_set[i].found_offset == found_offset)
849       return (short) (-i-2);
850   found_set[i].table_offset=table_offset;
851   found_set[i].found_offset=found_offset;
852   found_sets++;
853   return (short) (-i-2);			/* return new position */
854 }
855 
856 	/* Return 1 if regexp starts with \b or ends with \b*/
857 
start_at_word(char * pos)858 static uint start_at_word(char * pos)
859 {
860   return (((!memcmp(pos,"\\b",2) && pos[2]) || !memcmp(pos,"\\^",2)) ? 1 : 0);
861 }
862 
end_of_word(char * pos)863 static uint end_of_word(char * pos)
864 {
865   char * end=strend(pos);
866   return ((end > pos+2 && !memcmp(end-2,"\\b",2)) ||
867 	  (end >= pos+2 && !memcmp(end-2,"\\$",2))) ?
868 	    1 : 0;
869 }
870 
871 
replace_len(char * str)872 static uint replace_len(char * str)
873 {
874   uint len=0;
875   while (*str)
876   {
877     if (str[0] == '\\' && str[1])
878       str++;
879     str++;
880     len++;
881   }
882   return len;
883 }
884 
885 
886 	/* The actual loop */
887 
replace_strings(REPLACE * rep,char ** start,uint * max_length,char * from)888 static uint replace_strings(REPLACE *rep, char **start, uint *max_length,
889                             char *from)
890 {
891   reg1 REPLACE *rep_pos;
892   reg2 REPLACE_STRING *rep_str;
893   char *to, *end, *pos, *new;
894 
895   end=(to= *start) + *max_length-1;
896   rep_pos=rep+1;
897   for(;;)
898   {
899     while (!rep_pos->found)
900     {
901       rep_pos= rep_pos->next[(uchar) *from];
902       if (to == end)
903       {
904 	(*max_length)+=8192;
905 	if (!(new=my_realloc(*start,*max_length,MYF(MY_WME))))
906 	  return (uint) -1;
907 	to=new+(to - *start);
908 	end=(*start=new)+ *max_length-1;
909       }
910       *to++= *from++;
911     }
912     if (!(rep_str = ((REPLACE_STRING*) rep_pos))->replace_string)
913       return (uint) (to - *start)-1;
914     updated=1;			/* Some char * is replaced */
915     to-=rep_str->to_offset;
916     for (pos=rep_str->replace_string; *pos ; pos++)
917     {
918       if (to == end)
919       {
920 	(*max_length)*=2;
921 	if (!(new=my_realloc(*start,*max_length,MYF(MY_WME))))
922 	  return (uint) -1;
923 	to=new+(to - *start);
924 	end=(*start=new)+ *max_length-1;
925       }
926       *to++= *pos;
927     }
928     if (!*(from-=rep_str->from_offset) && rep_pos->found != 2)
929       return (uint) (to - *start);
930     rep_pos=rep;
931   }
932 }
933 
934 static char *buffer;		/* The buffer itself, grown as needed. */
935 static int bufbytes;		/* Number of bytes in the buffer. */
936 static int bufread,my_eof;		/* Number of bytes to get with each read(). */
937 static uint bufalloc;
938 static char *out_buff;
939 static uint out_length;
940 
initialize_buffer()941 static int initialize_buffer()
942 {
943   bufread = 8192;
944   bufalloc = bufread + bufread / 2;
945   if (!(buffer = my_malloc(bufalloc+1,MYF(MY_WME))))
946     return 1;
947   bufbytes=my_eof=0;
948   out_length=bufread;
949   if (!(out_buff=my_malloc(out_length,MYF(MY_WME))))
950     return(1);
951   return 0;
952 }
953 
reset_buffer()954 static void reset_buffer()
955 {
956   bufbytes=my_eof=0;
957 }
958 
free_buffer()959 static void free_buffer()
960 {
961   my_free(buffer);
962   my_free(out_buff);
963 }
964 
965 
966 /*
967   Fill the buffer retaining the last n bytes at the beginning of the
968   newly filled buffer (for backward context).  Returns the number of new
969   bytes read from disk.
970 */
971 
fill_buffer_retaining(fd,n)972 static int fill_buffer_retaining(fd,n)
973 File fd;
974 int n;
975 {
976   int i;
977 
978   /* See if we need to grow the buffer. */
979   if ((int) bufalloc - n <= bufread)
980   {
981     while ((int) bufalloc - n <= bufread)
982     {
983       bufalloc *= 2;
984       bufread *= 2;
985     }
986     buffer = my_realloc(buffer, bufalloc+1, MYF(MY_WME));
987     if (! buffer)
988       return(-1);
989   }
990 
991   /* Shift stuff down. */
992   bmove(buffer,buffer+bufbytes-n,(uint) n);
993   bufbytes = n;
994 
995   if (my_eof)
996     return 0;
997 
998   /* Read in new stuff. */
999   if ((i=(int) my_read(fd, (uchar*) buffer + bufbytes,
1000                        (size_t) bufread, MYF(MY_WME))) < 0)
1001     return -1;
1002 
1003   /* Kludge to pretend every nonempty file ends with a newline. */
1004   if (i == 0 && bufbytes > 0 && buffer[bufbytes - 1] != '\n')
1005   {
1006     my_eof = i = 1;
1007     buffer[bufbytes] = '\n';
1008   }
1009 
1010   bufbytes += i;
1011   return i;
1012 }
1013 
1014 	/* Return 0 if convert is ok */
1015 	/* Global variable update is set if something was changed */
1016 
convert_pipe(rep,in,out)1017 static int convert_pipe(rep,in,out)
1018 REPLACE *rep;
1019 FILE *in,*out;
1020 {
1021   int retain,error;
1022   uint length;
1023   char save_char,*end_of_line,*start_of_line;
1024   DBUG_ENTER("convert_pipe");
1025 
1026   updated=retain=0;
1027   reset_buffer();
1028 
1029   while ((error=fill_buffer_retaining(my_fileno(in),retain)) > 0)
1030   {
1031     end_of_line=buffer ;
1032     buffer[bufbytes]=0;			/* Sentinel  */
1033     for (;;)
1034     {
1035       start_of_line=end_of_line;
1036       while (end_of_line[0] != '\n' && end_of_line[0])
1037 	end_of_line++;
1038       if (end_of_line == buffer+bufbytes)
1039       {
1040 	retain= (int) (end_of_line - start_of_line);
1041 	break;				/* No end of line, read more */
1042       }
1043       save_char=end_of_line[0];
1044       end_of_line[0]=0;
1045       end_of_line++;
1046       if ((length=replace_strings(rep,&out_buff,&out_length,start_of_line)) ==
1047 	  (uint) -1)
1048 	return 1;
1049       if (!my_eof)
1050 	out_buff[length++]=save_char;	/* Don't write added newline */
1051       if (my_fwrite(out, (uchar*) out_buff, length, MYF(MY_WME | MY_NABP)))
1052 	DBUG_RETURN(1);
1053     }
1054   }
1055   DBUG_RETURN(error);
1056 }
1057 
1058 
convert_file(REPLACE * rep,char * name)1059 static int convert_file(REPLACE *rep, char * name)
1060 {
1061   int error;
1062   FILE *in,*out;
1063   char dir_buff[FN_REFLEN], tempname[FN_REFLEN], *org_name = name;
1064 #ifdef HAVE_READLINK
1065   char link_name[FN_REFLEN];
1066 #endif
1067   File temp_file;
1068   size_t dir_buff_length;
1069   DBUG_ENTER("convert_file");
1070 
1071   /* check if name is a symlink */
1072 #ifdef HAVE_READLINK
1073   org_name= (!my_disable_symlinks &&
1074              !my_readlink(link_name, name, MYF(0))) ? link_name : name;
1075 #endif
1076   if (!(in= my_fopen(org_name,O_RDONLY,MYF(MY_WME))))
1077     DBUG_RETURN(1);
1078   dirname_part(dir_buff, org_name, &dir_buff_length);
1079   if ((temp_file= create_temp_file(tempname, dir_buff, "PR", O_WRONLY,
1080                                    MYF(MY_WME))) < 0)
1081   {
1082     my_fclose(in,MYF(0));
1083     DBUG_RETURN(1);
1084   }
1085   if (!(out= my_fdopen(temp_file, tempname, O_WRONLY, MYF(MY_WME))))
1086   {
1087     my_fclose(in,MYF(0));
1088     DBUG_RETURN(1);
1089   }
1090 
1091   error=convert_pipe(rep,in,out);
1092   my_fclose(in,MYF(0)); my_fclose(out,MYF(0));
1093 
1094   if (updated && ! error)
1095     my_redel(org_name,tempname,MYF(MY_WME | MY_LINK_WARNING));
1096   else
1097     my_delete(tempname,MYF(MY_WME));
1098   if (!silent && ! error)
1099   {
1100     if (updated)
1101       printf("%s converted\n",name);
1102     else if (verbose)
1103       printf("%s left unchanged\n",name);
1104   }
1105   DBUG_RETURN(error);
1106 }
1107