1 /*
2    Copyright (c) 2000, 2014, Oracle and/or its affiliates
3 
4    This program is free software; you can redistribute it and/or
5    modify it under the terms of the GNU General Public License
6    as published by the Free Software Foundation; version 2 of
7    the License.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
17    02110-1335  USA */
18 
19 /*
20   Replace strings in textfile
21 
22   This program replaces strings in files or from stdin to stdout.
23   It accepts a list of from-string/to-string pairs and replaces
24   each occurrence of a from-string with the corresponding to-string.
25   The first occurrence of a found string is matched. If there is more
26   than one possibility for the string to replace, longer matches
27   are preferred before shorter matches.
28 
29   Special characters in from string:
30   \^    Match start of line.
31   \$	Match end of line.
32   \b	Match space-character, start of line or end of line.
33         For end \b the next replace starts locking at the end space-character.
34         An \b alone or in a string matches only a space-character.
35   \r, \t, \v as in C.
36   The programs make a DFA-state-machine of the strings and the speed isn't
37   dependent on the count of replace-strings (only of the number of replaces).
38   A line is assumed ending with \n or \0.
39   There are no limit except memory on length of strings.
40 
41   Written by Monty.
42   fill_buffer_retaining() is taken from gnu-grep and modified.
43 */
44 
45 #include <my_global.h>
46 #include <m_ctype.h>
47 #include <my_sys.h>
48 #include <m_string.h>
49 #include <errno.h>
50 
51 #define PC_MALLOC		256	/* Bytes for pointers */
52 #define PS_MALLOC		512	/* Bytes for data */
53 
54 typedef struct st_pointer_array {		/* when using array-strings */
55   TYPELIB typelib;				/* Pointer to strings */
56   uchar *str;					/* Strings is here */
57   uint8	*flag;					/* Flag about each var. */
58   uint  array_allocs,max_count,length,max_length;
59 } POINTER_ARRAY;
60 
61 #define SPACE_CHAR	256
62 #define START_OF_LINE	257
63 #define END_OF_LINE	258
64 #define LAST_CHAR_CODE	259
65 
66 typedef struct st_replace {
67   my_bool   found;
68   struct st_replace *next[256];
69 } REPLACE;
70 
71 typedef struct st_replace_found {
72   my_bool found;
73   char *replace_string;
74   uint to_offset;
75   int from_offset;
76 } REPLACE_STRING;
77 
78 #ifndef WORD_BIT
79 #define WORD_BIT (8*sizeof(uint))
80 #endif
81 
82 	/* functions defined in this file */
83 
84 static int static_get_options(int *argc,char * * *argv);
85 static int get_replace_strings(int *argc,char * * *argv,
86 				   POINTER_ARRAY *from_array,
87 				   POINTER_ARRAY *to_array);
88 static int insert_pointer_name(POINTER_ARRAY *pa, char * name);
89 static void free_pointer_array(POINTER_ARRAY *pa);
90 static int convert_pipe(REPLACE *,FILE *,FILE *);
91 static int convert_file(REPLACE *, char *);
92 static REPLACE *init_replace(char * *from, char * *to,uint count,
93                              char * word_end_chars);
94 static uint replace_strings(REPLACE *rep, char * *start,uint *max_length,
95                             char * from);
96 static int initialize_buffer(void);
97 static void reset_buffer(void);
98 static void free_buffer(void);
99 
100 static int silent=0,verbose=0,updated=0;
101 
102 	/* The main program */
103 
main(int argc,char * argv[])104 int main(int argc, char *argv[])
105 {
106   int i,error;
107   char word_end_chars[256],*pos;
108   POINTER_ARRAY from,to;
109   REPLACE *replace;
110   MY_INIT(argv[0]);
111 
112   if (static_get_options(&argc,&argv))
113     exit(1);
114   if (get_replace_strings(&argc,&argv,&from,&to))
115     exit(1);
116 
117   for (i=1,pos=word_end_chars ; i < 256 ; i++)
118     if (my_isspace(&my_charset_latin1,i))
119       *pos++= (char) i;
120   *pos=0;
121   if (!(replace=init_replace((char**) from.typelib.type_names,
122 			     (char**) to.typelib.type_names,
123 			     (uint) from.typelib.count,word_end_chars)))
124     exit(1);
125   free_pointer_array(&from);
126   free_pointer_array(&to);
127   if (initialize_buffer())
128     return 1;
129 
130   error=0;
131   if (argc == 0)
132     error=convert_pipe(replace,stdin,stdout);
133   else
134   {
135     while (argc--)
136     {
137       error=convert_file(replace,*(argv++));
138     }
139   }
140   free_buffer();
141   my_free(replace);
142   my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
143   exit(error ? 2 : 0);
144   return 0;					/* No compiler warning */
145 } /* main */
146 
147 
148 	/* reads options */
149 	/* Initiates DEBUG - but no debugging here ! */
150 
static_get_options(argc,argv)151 static int static_get_options(argc,argv)
152 register int *argc;
153 register char **argv[];
154 {
155   int help,version;
156   char *pos;
157 
158   silent=verbose=help=0;
159 
160   while (--*argc > 0 && *(pos = *(++*argv)) == '-' && pos[1] != '-') {
161     while (*++pos)
162     {
163       version=0;
164       switch((*pos)) {
165       case 's':
166 	silent=1;
167 	break;
168       case 'v':
169 	verbose=1;
170 	break;
171       case '#':
172 	DBUG_PUSH (++pos);
173 	pos= (char*) " ";			/* Skip rest of arguments */
174 	break;
175       case 'V':
176 	version=1;
177         /* fall through */
178       case 'I':
179       case '?':
180 	help=1;					/* Help text written */
181 	printf("%s  Ver 1.4 for %s at %s\n",my_progname,SYSTEM_TYPE,
182 	       MACHINE_TYPE);
183 	if (version)
184 	  break;
185 	puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n");
186 	puts("This program replaces strings in files or from stdin to stdout.\n"
187 	     "It accepts a list of from-string/to-string pairs and replaces\n"
188 	     "each occurrence of a from-string with the corresponding to-string.\n"
189          "The first occurrence of a found string is matched. If there is\n"
190          "more than one possibility for the string to replace, longer\n"
191          "matches are preferred before shorter matches.\n\n"
192 	     "A from-string can contain these special characters:\n"
193 	     "  \\^      Match start of line.\n"
194 	     "  \\$      Match end of line.\n"
195 	     "  \\b      Match space-character, start of line or end of line.\n"
196 	     "          For a end \\b the next replace starts locking at the end\n"
197 	     "          space-character. A \\b alone in a string matches only a\n"
198 	     "          space-character.\n");
199 	  printf("Usage: %s [-?svIV] from to from to ... -- [files]\n", my_progname);
200 	puts("or");
201 	  printf("Usage: %s [-?svIV] from to from to ... < fromfile > tofile\n", my_progname);
202 	puts("");
203 	puts("Options: -? or -I \"Info\"  -s \"silent\"      -v \"verbose\"");
204 	break;
205       default:
206 	fprintf(stderr,"illegal option: -%c\n",*pos);
207 	break;
208       }
209     }
210   }
211   if (*argc == 0)
212   {
213     if (!help)
214       my_message(0,"No replace options given",MYF(ME_BELL));
215     exit(0);					/* Don't use as pipe */
216   }
217   return(0);
218 } /* static_get_options */
219 
220 
get_replace_strings(argc,argv,from_array,to_array)221 static int get_replace_strings(argc,argv,from_array,to_array)
222 register int *argc;
223 register char **argv[];
224 POINTER_ARRAY *from_array,*to_array;
225 {
226   char *pos;
227 
228   bzero((char*) from_array,sizeof(from_array[0]));
229   bzero((char*) to_array,sizeof(to_array[0]));
230   while (*argc > 0 && (*(pos = *(*argv)) != '-' || pos[1] != '-' || pos[2]))
231   {
232     insert_pointer_name(from_array,pos);
233     (*argc)--;
234     (*argv)++;
235     if (!*argc || !strcmp(**argv,"--"))
236     {
237       my_message(0,"No to-string for last from-string",MYF(ME_BELL));
238       return 1;
239     }
240     insert_pointer_name(to_array,**argv);
241     (*argc)--;
242     (*argv)++;
243   }
244   if (*argc)
245   {					/* Skip "--" argument */
246     (*argc)--;
247     (*argv)++;
248   }
249   return 0;
250 }
251 
insert_pointer_name(reg1 POINTER_ARRAY * pa,char * name)252 static int insert_pointer_name(reg1 POINTER_ARRAY *pa,char * name)
253 {
254   uint i,length,old_count;
255   uchar *new_pos;
256   const char **new_array;
257   DBUG_ENTER("insert_pointer_name");
258 
259   if (! pa->typelib.count)
260   {
261     if (!(pa->typelib.type_names=(const char **)
262 	  my_malloc(((PC_MALLOC-MALLOC_OVERHEAD)/
263 		     (sizeof(char *)+sizeof(*pa->flag))*
264 		     (sizeof(char *)+sizeof(*pa->flag))),MYF(MY_WME))))
265       DBUG_RETURN(-1);
266     if (!(pa->str= (uchar*) my_malloc((uint) (PS_MALLOC-MALLOC_OVERHEAD),
267 				     MYF(MY_WME))))
268     {
269       my_free((void*) pa->typelib.type_names);
270       DBUG_RETURN (-1);
271     }
272     pa->max_count=(PC_MALLOC-MALLOC_OVERHEAD)/(sizeof(uchar*)+
273 					       sizeof(*pa->flag));
274     pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
275     pa->length=0;
276     pa->max_length=PS_MALLOC-MALLOC_OVERHEAD;
277     pa->array_allocs=1;
278   }
279   length=(uint) strlen(name)+1;
280   if (pa->length+length >= pa->max_length)
281   {
282     pa->max_length=(pa->length+length+MALLOC_OVERHEAD+PS_MALLOC-1)/PS_MALLOC;
283     pa->max_length=pa->max_length*PS_MALLOC-MALLOC_OVERHEAD;
284     if (!(new_pos= (uchar*) my_realloc((uchar*) pa->str,
285 				      (uint) pa->max_length,
286 				      MYF(MY_WME))))
287       DBUG_RETURN(1);
288     if (new_pos != pa->str)
289     {
290       my_ptrdiff_t diff=PTR_BYTE_DIFF(new_pos,pa->str);
291       for (i=0 ; i < pa->typelib.count ; i++)
292 	pa->typelib.type_names[i]= ADD_TO_PTR(pa->typelib.type_names[i],diff,
293 					      char*);
294       pa->str=new_pos;
295     }
296   }
297   if (pa->typelib.count >= pa->max_count-1)
298   {
299     int len;
300     pa->array_allocs++;
301     len=(PC_MALLOC*pa->array_allocs - MALLOC_OVERHEAD);
302     if (!(new_array=(const char **) my_realloc((uchar*) pa->typelib.type_names,
303 					       (uint) len/
304 					 (sizeof(uchar*)+sizeof(*pa->flag))*
305 					 (sizeof(uchar*)+sizeof(*pa->flag)),
306 					 MYF(MY_WME))))
307       DBUG_RETURN(1);
308     pa->typelib.type_names=new_array;
309     old_count=pa->max_count;
310     pa->max_count=len/(sizeof(uchar*) + sizeof(*pa->flag));
311     pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
312     memcpy((uchar*) pa->flag,(char *) (pa->typelib.type_names+old_count),
313 	   old_count*sizeof(*pa->flag));
314   }
315   pa->flag[pa->typelib.count]=0;			/* Reset flag */
316   pa->typelib.type_names[pa->typelib.count++]= (char*) (pa->str+pa->length);
317   pa->typelib.type_names[pa->typelib.count]= NullS;	/* Put end-mark */
318   (void) strmov((char*) pa->str + pa->length, name);
319   pa->length+=length;
320   DBUG_RETURN(0);
321 } /* insert_pointer_name */
322 
323 
324 	/* free pointer array */
325 
free_pointer_array(reg1 POINTER_ARRAY * pa)326 static void free_pointer_array(reg1 POINTER_ARRAY *pa)
327 {
328   if (pa->typelib.count)
329   {
330     pa->typelib.count=0;
331     my_free((void*) pa->typelib.type_names);
332     pa->typelib.type_names=0;
333     my_free(pa->str);
334   }
335   return;
336 } /* free_pointer_array */
337 
338 
339 	/* Code for replace rutines */
340 
341 #define SET_MALLOC_HUNC 64
342 
343 typedef struct st_rep_set {
344   uint  *bits;				/* Pointer to used sets */
345   short	next[LAST_CHAR_CODE];		/* Pointer to next sets */
346   uint	found_len;			/* Best match to date */
347   int	found_offset;
348   uint  table_offset;
349   uint  size_of_bits;			/* For convinience */
350 } REP_SET;
351 
352 typedef struct st_rep_sets {
353   uint		count;			/* Number of sets */
354   uint		extra;			/* Extra sets in buffer */
355   uint		invisible;		/* Sets not chown */
356   uint		size_of_bits;
357   REP_SET	*set,*set_buffer;
358   uint		*bit_buffer;
359 } REP_SETS;
360 
361 typedef struct st_found_set {
362   uint table_offset;
363   int found_offset;
364 } FOUND_SET;
365 
366 typedef struct st_follow {
367   int chr;
368   uint table_offset;
369   uint len;
370 } FOLLOWS;
371 
372 
373 static int init_sets(REP_SETS *sets,uint states);
374 static REP_SET *make_new_set(REP_SETS *sets);
375 static void make_sets_invisible(REP_SETS *sets);
376 static void free_last_set(REP_SETS *sets);
377 static void free_sets(REP_SETS *sets);
378 static void internal_set_bit(REP_SET *set, uint bit);
379 static void internal_clear_bit(REP_SET *set, uint bit);
380 static void or_bits(REP_SET *to,REP_SET *from);
381 static void copy_bits(REP_SET *to,REP_SET *from);
382 static int cmp_bits(REP_SET *set1,REP_SET *set2);
383 static int get_next_bit(REP_SET *set,uint lastpos);
384 static short find_set(REP_SETS *sets,REP_SET *find);
385 static short find_found(FOUND_SET *found_set,uint table_offset,
386                         int found_offset);
387 static uint start_at_word(char * pos);
388 static uint end_of_word(char * pos);
389 static uint replace_len(char * pos);
390 
391 static uint found_sets=0;
392 
393 
394 	/* Init a replace structure for further calls */
395 
init_replace(char ** from,char ** to,uint count,char * word_end_chars)396 static REPLACE *init_replace(char * *from, char * *to,uint count,
397                              char * word_end_chars)
398 {
399   uint i,j,states,set_nr,len,result_len,max_length,found_end,bits_set,bit_nr;
400   int used_sets,chr;
401   short default_state;
402   char used_chars[LAST_CHAR_CODE],is_word_end[256];
403   char * pos, *to_pos, **to_array;
404   REP_SETS sets;
405   REP_SET *set,*start_states,*word_states,*new_set;
406   FOLLOWS *follow,*follow_ptr;
407   REPLACE *replace;
408   FOUND_SET *found_set;
409   REPLACE_STRING *rep_str;
410   DBUG_ENTER("init_replace");
411 
412   /* Count number of states */
413   for (i=result_len=max_length=0 , states=2 ; i < count ; i++)
414   {
415     len=replace_len(from[i]);
416     if (!len)
417     {
418       errno=EINVAL;
419       my_message(0,"No to-string for last from-string",MYF(ME_BELL));
420       DBUG_RETURN(0);
421     }
422     states+=len+1;
423     result_len+=(uint) strlen(to[i])+1;
424     if (len > max_length)
425       max_length=len;
426   }
427   bzero((char*) is_word_end,sizeof(is_word_end));
428   for (i=0 ; word_end_chars[i] ; i++)
429     is_word_end[(uchar) word_end_chars[i]]=1;
430 
431   if (init_sets(&sets,states))
432     DBUG_RETURN(0);
433   found_sets=0;
434   if (!(found_set= (FOUND_SET*) my_malloc(sizeof(FOUND_SET)*max_length*count,
435 					  MYF(MY_WME))))
436   {
437     free_sets(&sets);
438     DBUG_RETURN(0);
439   }
440   (void) make_new_set(&sets);			/* Set starting set */
441   make_sets_invisible(&sets);			/* Hide previus sets */
442   used_sets=-1;
443   word_states=make_new_set(&sets);		/* Start of new word */
444   start_states=make_new_set(&sets);		/* This is first state */
445   if (!(follow=(FOLLOWS*) my_malloc((states+2)*sizeof(FOLLOWS),MYF(MY_WME))))
446   {
447     free_sets(&sets);
448     my_free(found_set);
449     DBUG_RETURN(0);
450   }
451 
452 	/* Init follow_ptr[] */
453   for (i=0, states=1, follow_ptr=follow+1 ; i < count ; i++)
454   {
455     if (from[i][0] == '\\' && from[i][1] == '^')
456     {
457       internal_set_bit(start_states,states+1);
458       if (!from[i][2])
459       {
460 	start_states->table_offset=i;
461 	start_states->found_offset=1;
462       }
463     }
464     else if (from[i][0] == '\\' && from[i][1] == '$')
465     {
466       internal_set_bit(start_states,states);
467       internal_set_bit(word_states,states);
468       if (!from[i][2] && start_states->table_offset == (uint) ~0)
469       {
470 	start_states->table_offset=i;
471 	start_states->found_offset=0;
472       }
473     }
474     else
475     {
476       internal_set_bit(word_states,states);
477       if (from[i][0] == '\\' && (from[i][1] == 'b' && from[i][2]))
478 	internal_set_bit(start_states,states+1);
479       else
480 	internal_set_bit(start_states,states);
481     }
482     for (pos=from[i], len=0; *pos ; pos++)
483     {
484       if (*pos == '\\' && *(pos+1))
485       {
486 	pos++;
487 	switch (*pos) {
488 	case 'b':
489 	  follow_ptr->chr = SPACE_CHAR;
490 	  break;
491 	case '^':
492 	  follow_ptr->chr = START_OF_LINE;
493 	  break;
494 	case '$':
495 	  follow_ptr->chr = END_OF_LINE;
496 	  break;
497 	case 'r':
498 	  follow_ptr->chr = '\r';
499 	  break;
500 	case 't':
501 	  follow_ptr->chr = '\t';
502 	  break;
503 	case 'v':
504 	  follow_ptr->chr = '\v';
505 	  break;
506 	default:
507 	  follow_ptr->chr = (uchar) *pos;
508 	  break;
509 	}
510       }
511       else
512 	follow_ptr->chr= (uchar) *pos;
513       follow_ptr->table_offset=i;
514       follow_ptr->len= ++len;
515       follow_ptr++;
516     }
517     follow_ptr->chr=0;
518     follow_ptr->table_offset=i;
519     follow_ptr->len=len;
520     follow_ptr++;
521     states+=(uint) len+1;
522   }
523 
524 
525   for (set_nr=0,pos=0 ; set_nr < sets.count ; set_nr++)
526   {
527     set=sets.set+set_nr;
528     default_state= 0;				/* Start from beginning */
529 
530     /* If end of found-string not found or start-set with current set */
531 
532     for (i= (uint) ~0; (i=get_next_bit(set,i)) ;)
533     {
534       if (!follow[i].chr)
535       {
536 	if (! default_state)
537 	  default_state= find_found(found_set,set->table_offset,
538 				    set->found_offset+1);
539       }
540     }
541     copy_bits(sets.set+used_sets,set);		/* Save set for changes */
542     if (!default_state)
543       or_bits(sets.set+used_sets,sets.set);	/* Can restart from start */
544 
545     /* Find all chars that follows current sets */
546     bzero((char*) used_chars,sizeof(used_chars));
547     for (i= (uint) ~0; (i=get_next_bit(sets.set+used_sets,i)) ;)
548     {
549       used_chars[follow[i].chr]=1;
550       if ((follow[i].chr == SPACE_CHAR && !follow[i+1].chr &&
551 	   follow[i].len > 1) || follow[i].chr == END_OF_LINE)
552 	used_chars[0]=1;
553     }
554 
555     /* Mark word_chars used if \b is in state */
556     if (used_chars[SPACE_CHAR])
557       for (pos= word_end_chars ; *pos ; pos++)
558 	used_chars[(int) (uchar) *pos] = 1;
559 
560     /* Handle other used characters */
561     for (chr= 0 ; chr < 256 ; chr++)
562     {
563       if (! used_chars[chr])
564 	set->next[chr]= (short) (chr ? default_state : -1);
565       else
566       {
567 	new_set=make_new_set(&sets);
568 	set=sets.set+set_nr;			/* if realloc */
569 	new_set->table_offset=set->table_offset;
570 	new_set->found_len=set->found_len;
571 	new_set->found_offset=set->found_offset+1;
572 	found_end=0;
573 
574 	for (i= (uint) ~0 ; (i=get_next_bit(sets.set+used_sets,i)) ; )
575 	{
576 	  if (!follow[i].chr || follow[i].chr == chr ||
577 	      (follow[i].chr == SPACE_CHAR &&
578 	       (is_word_end[chr] ||
579 		(!chr && follow[i].len > 1 && ! follow[i+1].chr))) ||
580 	      (follow[i].chr == END_OF_LINE && ! chr))
581 	  {
582 	    if ((! chr || (follow[i].chr && !follow[i+1].chr)) &&
583 		follow[i].len > found_end)
584 	      found_end=follow[i].len;
585 	    if (chr && follow[i].chr)
586 	      internal_set_bit(new_set,i+1);		/* To next set */
587 	    else
588 	      internal_set_bit(new_set,i);
589 	  }
590 	}
591 	if (found_end)
592 	{
593 	  new_set->found_len=0;			/* Set for testing if first */
594 	  bits_set=0;
595 	  for (i= (uint) ~0; (i=get_next_bit(new_set,i)) ;)
596 	  {
597 	    if ((follow[i].chr == SPACE_CHAR ||
598 		 follow[i].chr == END_OF_LINE) && ! chr)
599 	      bit_nr=i+1;
600 	    else
601 	      bit_nr=i;
602 	    if (follow[bit_nr-1].len < found_end ||
603 		(new_set->found_len &&
604 		 (chr == 0 || !follow[bit_nr].chr)))
605 	      internal_clear_bit(new_set,i);
606 	    else
607 	    {
608 	      if (chr == 0 || !follow[bit_nr].chr)
609 	      {					/* best match  */
610 		new_set->table_offset=follow[bit_nr].table_offset;
611 		if (chr || (follow[i].chr == SPACE_CHAR ||
612 			    follow[i].chr == END_OF_LINE))
613 		  new_set->found_offset=found_end;	/* New match */
614 		new_set->found_len=found_end;
615 	      }
616 	      bits_set++;
617 	    }
618 	  }
619 	  if (bits_set == 1)
620 	  {
621 	    set->next[chr] = find_found(found_set,
622 					new_set->table_offset,
623 					new_set->found_offset);
624 	    free_last_set(&sets);
625 	  }
626 	  else
627 	    set->next[chr] = find_set(&sets,new_set);
628 	}
629 	else
630 	  set->next[chr] = find_set(&sets,new_set);
631       }
632     }
633   }
634 
635 	/* Alloc replace structure for the replace-state-machine */
636 
637   if ((replace=(REPLACE*) my_malloc(sizeof(REPLACE)*(sets.count)+
638 				    sizeof(REPLACE_STRING)*(found_sets+1)+
639 				    sizeof(char *)*count+result_len,
640 				    MYF(MY_WME | MY_ZEROFILL))))
641   {
642     rep_str=(REPLACE_STRING*) (replace+sets.count);
643     to_array=(char **) (rep_str+found_sets+1);
644     to_pos=(char *) (to_array+count);
645     for (i=0 ; i < count ; i++)
646     {
647       to_array[i]=to_pos;
648       to_pos=strmov(to_pos,to[i])+1;
649     }
650     rep_str[0].found=1;
651     rep_str[0].replace_string=0;
652     for (i=1 ; i <= found_sets ; i++)
653     {
654       pos=from[found_set[i-1].table_offset];
655       rep_str[i].found= (my_bool) (!memcmp(pos,"\\^",3) ? 2 : 1);
656       rep_str[i].replace_string=to_array[found_set[i-1].table_offset];
657       rep_str[i].to_offset=found_set[i-1].found_offset-start_at_word(pos);
658       rep_str[i].from_offset=found_set[i-1].found_offset-replace_len(pos)+
659 	end_of_word(pos);
660     }
661     for (i=0 ; i < sets.count ; i++)
662     {
663       for (j=0 ; j < 256 ; j++)
664 	if (sets.set[i].next[j] >= 0)
665 	  replace[i].next[j]=replace+sets.set[i].next[j];
666 	else
667 	  replace[i].next[j]=(REPLACE*) (rep_str+(-sets.set[i].next[j]-1));
668     }
669   }
670   my_free(follow);
671   free_sets(&sets);
672   my_free(found_set);
673   DBUG_PRINT("exit",("Replace table has %d states",sets.count));
674   DBUG_RETURN(replace);
675 }
676 
677 
init_sets(REP_SETS * sets,uint states)678 static int init_sets(REP_SETS *sets,uint states)
679 {
680   bzero((char*) sets,sizeof(*sets));
681   sets->size_of_bits=((states+7)/8);
682   if (!(sets->set_buffer=(REP_SET*) my_malloc(sizeof(REP_SET)*SET_MALLOC_HUNC,
683 					      MYF(MY_WME))))
684     return 1;
685   if (!(sets->bit_buffer=(uint*) my_malloc(sizeof(uint)*sets->size_of_bits*
686 					   SET_MALLOC_HUNC,MYF(MY_WME))))
687   {
688     my_free(sets->set);
689     return 1;
690   }
691   return 0;
692 }
693 
694 	/* Make help sets invisible for nicer codeing */
695 
make_sets_invisible(REP_SETS * sets)696 static void make_sets_invisible(REP_SETS *sets)
697 {
698   sets->invisible=sets->count;
699   sets->set+=sets->count;
700   sets->count=0;
701 }
702 
make_new_set(REP_SETS * sets)703 static REP_SET *make_new_set(REP_SETS *sets)
704 {
705   uint i,count,*bit_buffer;
706   REP_SET *set;
707   if (sets->extra)
708   {
709     sets->extra--;
710     set=sets->set+ sets->count++;
711     bzero((char*) set->bits,sizeof(uint)*sets->size_of_bits);
712     bzero((char*) &set->next[0],sizeof(set->next[0])*LAST_CHAR_CODE);
713     set->found_offset=0;
714     set->found_len=0;
715     set->table_offset= (uint) ~0;
716     set->size_of_bits=sets->size_of_bits;
717     return set;
718   }
719   count=sets->count+sets->invisible+SET_MALLOC_HUNC;
720   if (!(set=(REP_SET*) my_realloc((uchar*) sets->set_buffer,
721 				   sizeof(REP_SET)*count,
722 				  MYF(MY_WME))))
723     return 0;
724   sets->set_buffer=set;
725   sets->set=set+sets->invisible;
726   if (!(bit_buffer=(uint*) my_realloc((uchar*) sets->bit_buffer,
727 				      (sizeof(uint)*sets->size_of_bits)*count,
728 				      MYF(MY_WME))))
729     return 0;
730   sets->bit_buffer=bit_buffer;
731   for (i=0 ; i < count ; i++)
732   {
733     sets->set_buffer[i].bits=bit_buffer;
734     bit_buffer+=sets->size_of_bits;
735   }
736   sets->extra=SET_MALLOC_HUNC;
737   return make_new_set(sets);
738 }
739 
free_last_set(REP_SETS * sets)740 static void free_last_set(REP_SETS *sets)
741 {
742   sets->count--;
743   sets->extra++;
744   return;
745 }
746 
free_sets(REP_SETS * sets)747 static void free_sets(REP_SETS *sets)
748 {
749   my_free(sets->set_buffer);
750   my_free(sets->bit_buffer);
751   return;
752 }
753 
internal_set_bit(REP_SET * set,uint bit)754 static void internal_set_bit(REP_SET *set, uint bit)
755 {
756   set->bits[bit / WORD_BIT] |= 1 << (bit % WORD_BIT);
757   return;
758 }
759 
internal_clear_bit(REP_SET * set,uint bit)760 static void internal_clear_bit(REP_SET *set, uint bit)
761 {
762   set->bits[bit / WORD_BIT] &= ~ (1 << (bit % WORD_BIT));
763   return;
764 }
765 
766 
or_bits(REP_SET * to,REP_SET * from)767 static void or_bits(REP_SET *to,REP_SET *from)
768 {
769   reg1 uint i;
770   for (i=0 ; i < to->size_of_bits ; i++)
771     to->bits[i]|=from->bits[i];
772   return;
773 }
774 
copy_bits(REP_SET * to,REP_SET * from)775 static void copy_bits(REP_SET *to,REP_SET *from)
776 {
777   memcpy((uchar*) to->bits,(uchar*) from->bits,
778 	 (size_t) (sizeof(uint) * to->size_of_bits));
779 }
780 
cmp_bits(REP_SET * set1,REP_SET * set2)781 static int cmp_bits(REP_SET *set1,REP_SET *set2)
782 {
783   return memcmp(set1->bits, set2->bits,
784                 sizeof(uint) * set1->size_of_bits);
785 }
786 
787 
788 	/* Get next set bit from set. */
789 
get_next_bit(REP_SET * set,uint lastpos)790 static int get_next_bit(REP_SET *set,uint lastpos)
791 {
792   uint pos,*start,*end,bits;
793 
794   start=set->bits+ ((lastpos+1) / WORD_BIT);
795   end=set->bits + set->size_of_bits;
796   bits=start[0] & ~((1 << ((lastpos+1) % WORD_BIT)) -1);
797 
798   while (! bits && ++start < end)
799     bits=start[0];
800   if (!bits)
801     return 0;
802   pos=(uint) (start-set->bits)*WORD_BIT;
803   while (! (bits & 1))
804   {
805     bits>>=1;
806     pos++;
807   }
808   return pos;
809 }
810 
811 	/* find if there is a same set in sets. If there is, use it and
812 	   free given set, else put in given set in sets and return it's
813 	   position */
814 
find_set(REP_SETS * sets,REP_SET * find)815 static short find_set(REP_SETS *sets,REP_SET *find)
816 {
817   uint i;
818   for (i=0 ; i < sets->count-1 ; i++)
819   {
820     if (!cmp_bits(sets->set+i,find))
821     {
822       free_last_set(sets);
823       return (short) i;
824     }
825   }
826   return (short) i;			/* return new position */
827 }
828 
829 
830 /*
831   find if there is a found_set with same table_offset & found_offset
832   If there is return offset to it, else add new offset and return pos.
833   Pos returned is -offset-2 in found_set_structure because it's is
834   saved in set->next and set->next[] >= 0 points to next set and
835   set->next[] == -1 is reserved for end without replaces.
836 */
837 
find_found(FOUND_SET * found_set,uint table_offset,int found_offset)838 static short find_found(FOUND_SET *found_set,uint table_offset,
839                         int found_offset)
840 {
841   int i;
842   for (i=0 ; (uint) i < found_sets ; i++)
843     if (found_set[i].table_offset == table_offset &&
844 	found_set[i].found_offset == found_offset)
845       return (short) (-i-2);
846   found_set[i].table_offset=table_offset;
847   found_set[i].found_offset=found_offset;
848   found_sets++;
849   return (short) (-i-2);			/* return new position */
850 }
851 
852 	/* Return 1 if regexp starts with \b or ends with \b*/
853 
start_at_word(char * pos)854 static uint start_at_word(char * pos)
855 {
856   return (((!memcmp(pos,"\\b",2) && pos[2]) || !memcmp(pos,"\\^",2)) ? 1 : 0);
857 }
858 
end_of_word(char * pos)859 static uint end_of_word(char * pos)
860 {
861   char * end=strend(pos);
862   return ((end > pos+2 && !memcmp(end-2,"\\b",2)) ||
863 	  (end >= pos+2 && !memcmp(end-2,"\\$",2))) ?
864 	    1 : 0;
865 }
866 
867 
replace_len(char * str)868 static uint replace_len(char * str)
869 {
870   uint len=0;
871   while (*str)
872   {
873     if (str[0] == '\\' && str[1])
874       str++;
875     str++;
876     len++;
877   }
878   return len;
879 }
880 
881 
882 	/* The actual loop */
883 
replace_strings(REPLACE * rep,char ** start,uint * max_length,char * from)884 static uint replace_strings(REPLACE *rep, char **start, uint *max_length,
885                             char *from)
886 {
887   reg1 REPLACE *rep_pos;
888   reg2 REPLACE_STRING *rep_str;
889   char *to, *end, *pos, *new;
890 
891   end=(to= *start) + *max_length-1;
892   rep_pos=rep+1;
893   for(;;)
894   {
895     while (!rep_pos->found)
896     {
897       rep_pos= rep_pos->next[(uchar) *from];
898       if (to == end)
899       {
900 	(*max_length)+=8192;
901 	if (!(new=my_realloc(*start,*max_length,MYF(MY_WME))))
902 	  return (uint) -1;
903 	to=new+(to - *start);
904 	end=(*start=new)+ *max_length-1;
905       }
906       *to++= *from++;
907     }
908     if (!(rep_str = ((REPLACE_STRING*) rep_pos))->replace_string)
909       return (uint) (to - *start)-1;
910     updated=1;			/* Some char * is replaced */
911     to-=rep_str->to_offset;
912     for (pos=rep_str->replace_string; *pos ; pos++)
913     {
914       if (to == end)
915       {
916 	(*max_length)*=2;
917 	if (!(new=my_realloc(*start,*max_length,MYF(MY_WME))))
918 	  return (uint) -1;
919 	to=new+(to - *start);
920 	end=(*start=new)+ *max_length-1;
921       }
922       *to++= *pos;
923     }
924     if (!*(from-=rep_str->from_offset) && rep_pos->found != 2)
925       return (uint) (to - *start);
926     rep_pos=rep;
927   }
928 }
929 
930 static char *buffer;		/* The buffer itself, grown as needed. */
931 static int bufbytes;		/* Number of bytes in the buffer. */
932 static int bufread,my_eof;		/* Number of bytes to get with each read(). */
933 static uint bufalloc;
934 static char *out_buff;
935 static uint out_length;
936 
initialize_buffer()937 static int initialize_buffer()
938 {
939   bufread = 8192;
940   bufalloc = bufread + bufread / 2;
941   if (!(buffer = my_malloc(bufalloc+1,MYF(MY_WME))))
942     return 1;
943   bufbytes=my_eof=0;
944   out_length=bufread;
945   if (!(out_buff=my_malloc(out_length,MYF(MY_WME))))
946     return(1);
947   return 0;
948 }
949 
reset_buffer()950 static void reset_buffer()
951 {
952   bufbytes=my_eof=0;
953 }
954 
free_buffer()955 static void free_buffer()
956 {
957   my_free(buffer);
958   my_free(out_buff);
959 }
960 
961 
962 /*
963   Fill the buffer retaining the last n bytes at the beginning of the
964   newly filled buffer (for backward context).  Returns the number of new
965   bytes read from disk.
966 */
967 
fill_buffer_retaining(fd,n)968 static int fill_buffer_retaining(fd,n)
969 File fd;
970 int n;
971 {
972   int i;
973 
974   /* See if we need to grow the buffer. */
975   if ((int) bufalloc - n <= bufread)
976   {
977     while ((int) bufalloc - n <= bufread)
978     {
979       bufalloc *= 2;
980       bufread *= 2;
981     }
982     buffer = my_realloc(buffer, bufalloc+1, MYF(MY_WME));
983     if (! buffer)
984       return(-1);
985   }
986 
987   /* Shift stuff down. */
988   bmove(buffer,buffer+bufbytes-n,(uint) n);
989   bufbytes = n;
990 
991   if (my_eof)
992     return 0;
993 
994   /* Read in new stuff. */
995   if ((i=(int) my_read(fd, (uchar*) buffer + bufbytes,
996                        (size_t) bufread, MYF(MY_WME))) < 0)
997     return -1;
998 
999   /* Kludge to pretend every nonempty file ends with a newline. */
1000   if (i == 0 && bufbytes > 0 && buffer[bufbytes - 1] != '\n')
1001   {
1002     my_eof = i = 1;
1003     buffer[bufbytes] = '\n';
1004   }
1005 
1006   bufbytes += i;
1007   return i;
1008 }
1009 
1010 	/* Return 0 if convert is ok */
1011 	/* Global variable update is set if something was changed */
1012 
convert_pipe(rep,in,out)1013 static int convert_pipe(rep,in,out)
1014 REPLACE *rep;
1015 FILE *in,*out;
1016 {
1017   int retain,error;
1018   uint length;
1019   char save_char,*end_of_line,*start_of_line;
1020   DBUG_ENTER("convert_pipe");
1021 
1022   updated=retain=0;
1023   reset_buffer();
1024 
1025   while ((error=fill_buffer_retaining(my_fileno(in),retain)) > 0)
1026   {
1027     end_of_line=buffer ;
1028     buffer[bufbytes]=0;			/* Sentinel  */
1029     for (;;)
1030     {
1031       start_of_line=end_of_line;
1032       while (end_of_line[0] != '\n' && end_of_line[0])
1033 	end_of_line++;
1034       if (end_of_line == buffer+bufbytes)
1035       {
1036 	retain= (int) (end_of_line - start_of_line);
1037 	break;				/* No end of line, read more */
1038       }
1039       save_char=end_of_line[0];
1040       end_of_line[0]=0;
1041       end_of_line++;
1042       if ((length=replace_strings(rep,&out_buff,&out_length,start_of_line)) ==
1043 	  (uint) -1)
1044 	return 1;
1045       if (!my_eof)
1046 	out_buff[length++]=save_char;	/* Don't write added newline */
1047       if (my_fwrite(out, (uchar*) out_buff, length, MYF(MY_WME | MY_NABP)))
1048 	DBUG_RETURN(1);
1049     }
1050   }
1051   DBUG_RETURN(error);
1052 }
1053 
1054 
convert_file(REPLACE * rep,char * name)1055 static int convert_file(REPLACE *rep, char * name)
1056 {
1057   int error;
1058   FILE *in,*out;
1059   char dir_buff[FN_REFLEN], tempname[FN_REFLEN], *org_name = name;
1060 #ifdef HAVE_READLINK
1061   char link_name[FN_REFLEN];
1062 #endif
1063   File temp_file;
1064   size_t dir_buff_length;
1065   DBUG_ENTER("convert_file");
1066 
1067   /* check if name is a symlink */
1068 #ifdef HAVE_READLINK
1069   org_name= (!my_disable_symlinks &&
1070              !my_readlink(link_name, name, MYF(0))) ? link_name : name;
1071 #endif
1072   if (!(in= my_fopen(org_name,O_RDONLY,MYF(MY_WME))))
1073     DBUG_RETURN(1);
1074   dirname_part(dir_buff, org_name, &dir_buff_length);
1075   if ((temp_file= create_temp_file(tempname, dir_buff, "PR", 0,
1076                                    MYF(MY_WME))) < 0)
1077   {
1078     my_fclose(in,MYF(0));
1079     DBUG_RETURN(1);
1080   }
1081   if (!(out= my_fdopen(temp_file, tempname, O_WRONLY, MYF(MY_WME))))
1082   {
1083     my_fclose(in,MYF(0));
1084     DBUG_RETURN(1);
1085   }
1086 
1087   error=convert_pipe(rep,in,out);
1088   my_fclose(in,MYF(0)); my_fclose(out,MYF(0));
1089 
1090   if (updated && ! error)
1091     my_redel(org_name, tempname, 0, MYF(MY_WME | MY_LINK_WARNING));
1092   else
1093     my_delete(tempname,MYF(MY_WME));
1094   if (!silent && ! error)
1095   {
1096     if (updated)
1097       printf("%s converted\n",name);
1098     else if (verbose)
1099       printf("%s left unchanged\n",name);
1100   }
1101   DBUG_RETURN(error);
1102 }
1103