1 /*
2 Copyright (c) 2000, 2021, Oracle and/or its affiliates.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
23 02110-1301 USA */
24
25 /*
26 Replace strings in textfile
27
28 This program replaces strings in files or from stdin to stdout.
29 It accepts a list of from-string/to-string pairs and replaces
30 each occurrence of a from-string with the corresponding to-string.
31 The first occurrence of a found string is matched. If there is more
32 than one possibility for the string to replace, longer matches
33 are preferred before shorter matches.
34
35 Special characters in from string:
36 \^ Match start of line.
37 \$ Match end of line.
38 \b Match space-character, start of line or end of line.
39 For end \b the next replace starts locking at the end space-character.
40 An \b alone or in a string matches only a space-character.
41 \r, \t, \v as in C.
42 The programs make a DFA-state-machine of the strings and the speed isn't
43 dependent on the count of replace-strings (only of the number of replaces).
44 A line is assumed ending with \n or \0.
45 There are no limit exept memory on length of strings.
46
47 Written by Monty.
48 fill_buffer_retaining() is taken from gnu-grep and modified.
49 */
50
51 #include <my_global.h>
52 #include <m_ctype.h>
53 #include <my_sys.h>
54 #include <m_string.h>
55 #include <errno.h>
56
57 #define PC_MALLOC 256 /* Bytes for pointers */
58 #define PS_MALLOC 512 /* Bytes for data */
59
60 typedef struct st_pointer_array { /* when using array-strings */
61 TYPELIB typelib; /* Pointer to strings */
62 uchar *str; /* Strings is here */
63 uint8 *flag; /* Flag about each var. */
64 uint array_allocs,max_count,length,max_length;
65 } POINTER_ARRAY;
66
67 #define SPACE_CHAR 256
68 #define START_OF_LINE 257
69 #define END_OF_LINE 258
70 #define LAST_CHAR_CODE 259
71
72 typedef struct st_replace {
73 my_bool found;
74 struct st_replace *next[256];
75 } REPLACE;
76
77 typedef struct st_replace_found {
78 my_bool found;
79 char *replace_string;
80 uint to_offset;
81 int from_offset;
82 } REPLACE_STRING;
83
84 #ifndef WORD_BIT
85 #define WORD_BIT (8*sizeof(uint))
86 #endif
87
88 /* functions defined in this file */
89
90 static int static_get_options(int *argc,char * * *argv);
91 static int get_replace_strings(int *argc,char * * *argv,
92 POINTER_ARRAY *from_array,
93 POINTER_ARRAY *to_array);
94 static int insert_pointer_name(POINTER_ARRAY *pa, char * name);
95 static void free_pointer_array(POINTER_ARRAY *pa);
96 static int convert_pipe(REPLACE *,FILE *,FILE *);
97 static int convert_file(REPLACE *, char *);
98 static REPLACE *init_replace(char * *from, char * *to,uint count,
99 char * word_end_chars);
100 static uint replace_strings(REPLACE *rep, char * *start,uint *max_length,
101 char * from);
102 static int initialize_buffer(void);
103 static void reset_buffer(void);
104 static void free_buffer(void);
105
106 static int silent=0,verbose=0,updated=0;
107
108 /* The main program */
109
main(int argc,char * argv[])110 int main(int argc, char *argv[])
111 {
112 int i,error;
113 char word_end_chars[256],*pos;
114 POINTER_ARRAY from,to;
115 REPLACE *replace;
116
117 fprintf(stderr, "Warning: replace is deprecated and will be removed in a "
118 "future version.\n");
119
120 MY_INIT(argv[0]);
121
122 if (static_get_options(&argc,&argv))
123 exit(1);
124 if (get_replace_strings(&argc,&argv,&from,&to))
125 exit(1);
126
127 for (i=1,pos=word_end_chars ; i < 256 ; i++)
128 if (my_isspace(&my_charset_latin1,i))
129 *pos++= (char) i;
130 *pos=0;
131 if (!(replace=init_replace((char**) from.typelib.type_names,
132 (char**) to.typelib.type_names,
133 (uint) from.typelib.count,word_end_chars)))
134 exit(1);
135 free_pointer_array(&from);
136 free_pointer_array(&to);
137 if (initialize_buffer())
138 return 1;
139
140 error=0;
141 if (argc == 0)
142 error=convert_pipe(replace,stdin,stdout);
143 else
144 {
145 while (argc--)
146 {
147 error=convert_file(replace,*(argv++));
148 }
149 }
150 free_buffer();
151 my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
152 exit(error ? 2 : 0);
153 return 0; /* No compiler warning */
154 } /* main */
155
156
157 /* reads options */
158 /* Initiates DEBUG - but no debugging here ! */
159
static_get_options(argc,argv)160 static int static_get_options(argc,argv)
161 int *argc;
162 char **argv[];
163 {
164 int help,version;
165 char *pos;
166
167 silent=verbose=help=0;
168
169 while (--*argc > 0 && *(pos = *(++*argv)) == '-' && pos[1] != '-') {
170 while (*++pos)
171 {
172 version=0;
173 switch((*pos)) {
174 case 's':
175 silent=1;
176 break;
177 case 'v':
178 verbose=1;
179 break;
180 case '#':
181 DBUG_PUSH (++pos);
182 pos= (char*) " "; /* Skip rest of arguments */
183 break;
184 case 'V':
185 version=1;
186 // Fall through.
187 case 'I':
188 case '?':
189 help=1; /* Help text written */
190 printf("%s Ver 1.4 for %s at %s\n",my_progname,SYSTEM_TYPE,
191 MACHINE_TYPE);
192 if (version)
193 break;
194 puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n");
195 puts("This program replaces strings in files or from stdin to stdout.\n"
196 "It accepts a list of from-string/to-string pairs and replaces\n"
197 "each occurrence of a from-string with the corresponding to-string.\n"
198 "The first occurrence of a found string is matched. If there is\n"
199 "more than one possibility for the string to replace, longer\n"
200 "matches are preferred before shorter matches.\n\n"
201 "A from-string can contain these special characters:\n"
202 " \\^ Match start of line.\n"
203 " \\$ Match end of line.\n"
204 " \\b Match space-character, start of line or end of line.\n"
205 " For a end \\b the next replace starts locking at the end\n"
206 " space-character. A \\b alone in a string matches only a\n"
207 " space-character.\n");
208 printf("Usage: %s [-?svIV] from to from to ... -- [files]\n", my_progname);
209 puts("or");
210 printf("Usage: %s [-?svIV] from to from to ... < fromfile > tofile\n", my_progname);
211 puts("");
212 puts("Options: -? or -I \"Info\" -s \"silent\" -v \"verbose\"");
213 break;
214 default:
215 fprintf(stderr,"illegal option: -%c\n",*pos);
216 break;
217 }
218 }
219 }
220 if (*argc == 0)
221 {
222 if (!help)
223 my_message(0,"No replace options given",MYF(0));
224 exit(0); /* Don't use as pipe */
225 }
226 return(0);
227 } /* static_get_options */
228
229
get_replace_strings(argc,argv,from_array,to_array)230 static int get_replace_strings(argc,argv,from_array,to_array)
231 int *argc;
232 char **argv[];
233 POINTER_ARRAY *from_array,*to_array;
234 {
235 char *pos;
236
237 memset(from_array, 0, sizeof(from_array[0]));
238 memset(to_array, 0, sizeof(to_array[0]));
239 while (*argc > 0 && (*(pos = *(*argv)) != '-' || pos[1] != '-' || pos[2]))
240 {
241 insert_pointer_name(from_array,pos);
242 (*argc)--;
243 (*argv)++;
244 if (!*argc || !strcmp(**argv,"--"))
245 {
246 my_message(0,"No to-string for last from-string",MYF(0));
247 return 1;
248 }
249 insert_pointer_name(to_array,**argv);
250 (*argc)--;
251 (*argv)++;
252 }
253 if (*argc)
254 { /* Skip "--" argument */
255 (*argc)--;
256 (*argv)++;
257 }
258 return 0;
259 }
260
insert_pointer_name(POINTER_ARRAY * pa,char * name)261 static int insert_pointer_name(POINTER_ARRAY *pa,char * name)
262 {
263 uint i,length,old_count;
264 uchar *new_pos;
265 const char **new_array;
266 DBUG_ENTER("insert_pointer_name");
267
268 if (! pa->typelib.count)
269 {
270 if (!(pa->typelib.type_names=(const char **)
271 my_malloc(PSI_NOT_INSTRUMENTED,
272 ((PC_MALLOC-MALLOC_OVERHEAD)/
273 (sizeof(char *)+sizeof(*pa->flag))*
274 (sizeof(char *)+sizeof(*pa->flag))),MYF(MY_WME))))
275 DBUG_RETURN(-1);
276 if (!(pa->str= (uchar*) my_malloc(PSI_NOT_INSTRUMENTED,
277 (uint) (PS_MALLOC-MALLOC_OVERHEAD),
278 MYF(MY_WME))))
279 {
280 my_free((char**)pa->typelib.type_names);
281 DBUG_RETURN (-1);
282 }
283 pa->max_count=(PC_MALLOC-MALLOC_OVERHEAD)/(sizeof(uchar*)+
284 sizeof(*pa->flag));
285 pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
286 pa->length=0;
287 pa->max_length=PS_MALLOC-MALLOC_OVERHEAD;
288 pa->array_allocs=1;
289 }
290 length=(uint) strlen(name)+1;
291 if (pa->length+length >= pa->max_length)
292 {
293 pa->max_length=(pa->length+length+MALLOC_OVERHEAD+PS_MALLOC-1)/PS_MALLOC;
294 pa->max_length=pa->max_length*PS_MALLOC-MALLOC_OVERHEAD;
295 if (!(new_pos= (uchar*) my_realloc(PSI_NOT_INSTRUMENTED,
296 (uchar*) pa->str,
297 (uint) pa->max_length,
298 MYF(MY_WME))))
299 DBUG_RETURN(1);
300 if (new_pos != pa->str)
301 {
302 my_ptrdiff_t diff=PTR_BYTE_DIFF(new_pos,pa->str);
303 for (i=0 ; i < pa->typelib.count ; i++)
304 pa->typelib.type_names[i]= ADD_TO_PTR(pa->typelib.type_names[i],diff,
305 char*);
306 pa->str=new_pos;
307 }
308 }
309 if (pa->typelib.count >= pa->max_count-1)
310 {
311 int len;
312 pa->array_allocs++;
313 len=(PC_MALLOC*pa->array_allocs - MALLOC_OVERHEAD);
314 if (!(new_array=(const char **) my_realloc(PSI_NOT_INSTRUMENTED,
315 (uchar*) pa->typelib.type_names,
316 (uint) len/
317 (sizeof(uchar*)+sizeof(*pa->flag))*
318 (sizeof(uchar*)+sizeof(*pa->flag)),
319 MYF(MY_WME))))
320 DBUG_RETURN(1);
321 pa->typelib.type_names=new_array;
322 old_count=pa->max_count;
323 pa->max_count=len/(sizeof(uchar*) + sizeof(*pa->flag));
324 pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
325 memcpy((uchar*) pa->flag,(char *) (pa->typelib.type_names+old_count),
326 old_count*sizeof(*pa->flag));
327 }
328 pa->flag[pa->typelib.count]=0; /* Reset flag */
329 pa->typelib.type_names[pa->typelib.count++]= (char*) (pa->str+pa->length);
330 pa->typelib.type_names[pa->typelib.count]= NullS; /* Put end-mark */
331 (void) my_stpcpy((char*) pa->str + pa->length, name);
332 pa->length+=length;
333 DBUG_RETURN(0);
334 } /* insert_pointer_name */
335
336
337 /* free pointer array */
338
free_pointer_array(POINTER_ARRAY * pa)339 static void free_pointer_array(POINTER_ARRAY *pa)
340 {
341 if (pa->typelib.count)
342 {
343 pa->typelib.count=0;
344 my_free((char**)pa->typelib.type_names);
345 pa->typelib.type_names=0;
346 my_free(pa->str);
347 }
348 return;
349 } /* free_pointer_array */
350
351
352 /* Code for replace rutines */
353
354 #define SET_MALLOC_HUNC 64
355
356 typedef struct st_rep_set {
357 uint *bits; /* Pointer to used sets */
358 short next[LAST_CHAR_CODE]; /* Pointer to next sets */
359 uint found_len; /* Best match to date */
360 int found_offset;
361 uint table_offset;
362 uint size_of_bits; /* For convinience */
363 } REP_SET;
364
365 typedef struct st_rep_sets {
366 uint count; /* Number of sets */
367 uint extra; /* Extra sets in buffer */
368 uint invisible; /* Sets not chown */
369 uint size_of_bits;
370 REP_SET *set,*set_buffer;
371 uint *bit_buffer;
372 } REP_SETS;
373
374 typedef struct st_found_set {
375 uint table_offset;
376 int found_offset;
377 } FOUND_SET;
378
379 typedef struct st_follow {
380 int chr;
381 uint table_offset;
382 uint len;
383 } FOLLOWS;
384
385
386 static int init_sets(REP_SETS *sets,uint states);
387 static REP_SET *make_new_set(REP_SETS *sets);
388 static void make_sets_invisible(REP_SETS *sets);
389 static void free_last_set(REP_SETS *sets);
390 static void free_sets(REP_SETS *sets);
391 static void internal_set_bit(REP_SET *set, uint bit);
392 static void internal_clear_bit(REP_SET *set, uint bit);
393 static void or_bits(REP_SET *to,REP_SET *from);
394 static void copy_bits(REP_SET *to,REP_SET *from);
395 static int cmp_bits(REP_SET *set1,REP_SET *set2);
396 static int get_next_bit(REP_SET *set,uint lastpos);
397 static short find_set(REP_SETS *sets,REP_SET *find);
398 static short find_found(FOUND_SET *found_set,uint table_offset,
399 int found_offset);
400 static uint start_at_word(char * pos);
401 static uint end_of_word(char * pos);
402 static uint replace_len(char * pos);
403
404 static uint found_sets=0;
405
406
407 /* Init a replace structure for further calls */
408
init_replace(char ** from,char ** to,uint count,char * word_end_chars)409 static REPLACE *init_replace(char * *from, char * *to,uint count,
410 char * word_end_chars)
411 {
412 uint i,j,states,set_nr,len,result_len,max_length,found_end,bits_set,bit_nr;
413 int used_sets,chr;
414 short default_state;
415 char used_chars[LAST_CHAR_CODE],is_word_end[256];
416 char * pos, *to_pos, **to_array;
417 REP_SETS sets;
418 REP_SET *set,*start_states,*word_states,*new_set;
419 FOLLOWS *follow,*follow_ptr;
420 REPLACE *replace;
421 FOUND_SET *found_set;
422 REPLACE_STRING *rep_str;
423 DBUG_ENTER("init_replace");
424
425 /* Count number of states */
426 for (i=result_len=max_length=0 , states=2 ; i < count ; i++)
427 {
428 len=replace_len(from[i]);
429 if (!len)
430 {
431 errno=EINVAL;
432 my_message(0,"No to-string for last from-string",MYF(0));
433 DBUG_RETURN(0);
434 }
435 states+=len+1;
436 result_len+=(uint) strlen(to[i])+1;
437 if (len > max_length)
438 max_length=len;
439 }
440 memset(is_word_end, 0, sizeof(is_word_end));
441 for (i=0 ; word_end_chars[i] ; i++)
442 is_word_end[(uchar) word_end_chars[i]]=1;
443
444 if (init_sets(&sets,states))
445 DBUG_RETURN(0);
446 found_sets=0;
447 if (!(found_set= (FOUND_SET*) my_malloc(PSI_NOT_INSTRUMENTED,
448 sizeof(FOUND_SET)*max_length*count,
449 MYF(MY_WME))))
450 {
451 free_sets(&sets);
452 DBUG_RETURN(0);
453 }
454 (void) make_new_set(&sets); /* Set starting set */
455 make_sets_invisible(&sets); /* Hide previus sets */
456 used_sets=-1;
457 word_states=make_new_set(&sets); /* Start of new word */
458 start_states=make_new_set(&sets); /* This is first state */
459 if (!(follow=(FOLLOWS*) my_malloc(PSI_NOT_INSTRUMENTED,
460 (states+2)*sizeof(FOLLOWS),MYF(MY_WME))))
461 {
462 free_sets(&sets);
463 my_free(found_set);
464 DBUG_RETURN(0);
465 }
466
467 /* Init follow_ptr[] */
468 for (i=0, states=1, follow_ptr=follow+1 ; i < count ; i++)
469 {
470 if (from[i][0] == '\\' && from[i][1] == '^')
471 {
472 internal_set_bit(start_states,states+1);
473 if (!from[i][2])
474 {
475 start_states->table_offset=i;
476 start_states->found_offset=1;
477 }
478 }
479 else if (from[i][0] == '\\' && from[i][1] == '$')
480 {
481 internal_set_bit(start_states,states);
482 internal_set_bit(word_states,states);
483 if (!from[i][2] && start_states->table_offset == (uint) ~0)
484 {
485 start_states->table_offset=i;
486 start_states->found_offset=0;
487 }
488 }
489 else
490 {
491 internal_set_bit(word_states,states);
492 if (from[i][0] == '\\' && (from[i][1] == 'b' && from[i][2]))
493 internal_set_bit(start_states,states+1);
494 else
495 internal_set_bit(start_states,states);
496 }
497 for (pos=from[i], len=0; *pos ; pos++)
498 {
499 if (*pos == '\\' && *(pos+1))
500 {
501 pos++;
502 switch (*pos) {
503 case 'b':
504 follow_ptr->chr = SPACE_CHAR;
505 break;
506 case '^':
507 follow_ptr->chr = START_OF_LINE;
508 break;
509 case '$':
510 follow_ptr->chr = END_OF_LINE;
511 break;
512 case 'r':
513 follow_ptr->chr = '\r';
514 break;
515 case 't':
516 follow_ptr->chr = '\t';
517 break;
518 case 'v':
519 follow_ptr->chr = '\v';
520 break;
521 default:
522 follow_ptr->chr = (uchar) *pos;
523 break;
524 }
525 }
526 else
527 follow_ptr->chr= (uchar) *pos;
528 follow_ptr->table_offset=i;
529 follow_ptr->len= ++len;
530 follow_ptr++;
531 }
532 follow_ptr->chr=0;
533 follow_ptr->table_offset=i;
534 follow_ptr->len=len;
535 follow_ptr++;
536 states+=(uint) len+1;
537 }
538
539
540 for (set_nr=0,pos=0 ; set_nr < sets.count ; set_nr++)
541 {
542 set=sets.set+set_nr;
543 default_state= 0; /* Start from beginning */
544
545 /* If end of found-string not found or start-set with current set */
546
547 for (i= (uint) ~0; (i=get_next_bit(set,i)) ;)
548 {
549 if (!follow[i].chr)
550 {
551 if (! default_state)
552 default_state= find_found(found_set,set->table_offset,
553 set->found_offset+1);
554 }
555 }
556 copy_bits(sets.set+used_sets,set); /* Save set for changes */
557 if (!default_state)
558 or_bits(sets.set+used_sets,sets.set); /* Can restart from start */
559
560 /* Find all chars that follows current sets */
561 memset(used_chars, 0, sizeof(used_chars));
562 for (i= (uint) ~0; (i=get_next_bit(sets.set+used_sets,i)) ;)
563 {
564 used_chars[follow[i].chr]=1;
565 if ((follow[i].chr == SPACE_CHAR && !follow[i+1].chr &&
566 follow[i].len > 1) || follow[i].chr == END_OF_LINE)
567 used_chars[0]=1;
568 }
569
570 /* Mark word_chars used if \b is in state */
571 if (used_chars[SPACE_CHAR])
572 for (pos= word_end_chars ; *pos ; pos++)
573 used_chars[(int) (uchar) *pos] = 1;
574
575 /* Handle other used characters */
576 for (chr= 0 ; chr < 256 ; chr++)
577 {
578 if (! used_chars[chr])
579 set->next[chr]= (short) (chr ? default_state : -1);
580 else
581 {
582 new_set=make_new_set(&sets);
583 set=sets.set+set_nr; /* if realloc */
584 new_set->table_offset=set->table_offset;
585 new_set->found_len=set->found_len;
586 new_set->found_offset=set->found_offset+1;
587 found_end=0;
588
589 for (i= (uint) ~0 ; (i=get_next_bit(sets.set+used_sets,i)) ; )
590 {
591 if (!follow[i].chr || follow[i].chr == chr ||
592 (follow[i].chr == SPACE_CHAR &&
593 (is_word_end[chr] ||
594 (!chr && follow[i].len > 1 && ! follow[i+1].chr))) ||
595 (follow[i].chr == END_OF_LINE && ! chr))
596 {
597 if ((! chr || (follow[i].chr && !follow[i+1].chr)) &&
598 follow[i].len > found_end)
599 found_end=follow[i].len;
600 if (chr && follow[i].chr)
601 internal_set_bit(new_set,i+1); /* To next set */
602 else
603 internal_set_bit(new_set,i);
604 }
605 }
606 if (found_end)
607 {
608 new_set->found_len=0; /* Set for testing if first */
609 bits_set=0;
610 for (i= (uint) ~0; (i=get_next_bit(new_set,i)) ;)
611 {
612 if ((follow[i].chr == SPACE_CHAR ||
613 follow[i].chr == END_OF_LINE) && ! chr)
614 bit_nr=i+1;
615 else
616 bit_nr=i;
617 if (follow[bit_nr-1].len < found_end ||
618 (new_set->found_len &&
619 (chr == 0 || !follow[bit_nr].chr)))
620 internal_clear_bit(new_set,i);
621 else
622 {
623 if (chr == 0 || !follow[bit_nr].chr)
624 { /* best match */
625 new_set->table_offset=follow[bit_nr].table_offset;
626 if (chr || (follow[i].chr == SPACE_CHAR ||
627 follow[i].chr == END_OF_LINE))
628 new_set->found_offset=found_end; /* New match */
629 new_set->found_len=found_end;
630 }
631 bits_set++;
632 }
633 }
634 if (bits_set == 1)
635 {
636 set->next[chr] = find_found(found_set,
637 new_set->table_offset,
638 new_set->found_offset);
639 free_last_set(&sets);
640 }
641 else
642 set->next[chr] = find_set(&sets,new_set);
643 }
644 else
645 set->next[chr] = find_set(&sets,new_set);
646 }
647 }
648 }
649
650 /* Alloc replace structure for the replace-state-machine */
651
652 if ((replace=(REPLACE*) my_malloc(PSI_NOT_INSTRUMENTED,
653 sizeof(REPLACE)*(sets.count)+
654 sizeof(REPLACE_STRING)*(found_sets+1)+
655 sizeof(char *)*count+result_len,
656 MYF(MY_WME | MY_ZEROFILL))))
657 {
658 rep_str=(REPLACE_STRING*) (replace+sets.count);
659 to_array=(char **) (rep_str+found_sets+1);
660 to_pos=(char *) (to_array+count);
661 for (i=0 ; i < count ; i++)
662 {
663 to_array[i]=to_pos;
664 to_pos=my_stpcpy(to_pos,to[i])+1;
665 }
666 rep_str[0].found=1;
667 rep_str[0].replace_string=0;
668 for (i=1 ; i <= found_sets ; i++)
669 {
670 pos=from[found_set[i-1].table_offset];
671 rep_str[i].found= (my_bool) (!memcmp(pos,"\\^",3) ? 2 : 1);
672 rep_str[i].replace_string=to_array[found_set[i-1].table_offset];
673 rep_str[i].to_offset=found_set[i-1].found_offset-start_at_word(pos);
674 rep_str[i].from_offset=found_set[i-1].found_offset-replace_len(pos)+
675 end_of_word(pos);
676 }
677 for (i=0 ; i < sets.count ; i++)
678 {
679 for (j=0 ; j < 256 ; j++)
680 if (sets.set[i].next[j] >= 0)
681 replace[i].next[j]=replace+sets.set[i].next[j];
682 else
683 replace[i].next[j]=(REPLACE*) (rep_str+(-sets.set[i].next[j]-1));
684 }
685 }
686 my_free(follow);
687 free_sets(&sets);
688 my_free(found_set);
689 DBUG_PRINT("exit",("Replace table has %d states",sets.count));
690 DBUG_RETURN(replace);
691 }
692
693
init_sets(REP_SETS * sets,uint states)694 static int init_sets(REP_SETS *sets,uint states)
695 {
696 memset(sets, 0, sizeof(*sets));
697 sets->size_of_bits=((states+7)/8);
698 if (!(sets->set_buffer=(REP_SET*) my_malloc(PSI_NOT_INSTRUMENTED,
699 sizeof(REP_SET)*SET_MALLOC_HUNC,
700 MYF(MY_WME))))
701 return 1;
702 if (!(sets->bit_buffer=(uint*) my_malloc(PSI_NOT_INSTRUMENTED,
703 sizeof(uint)*sets->size_of_bits*
704 SET_MALLOC_HUNC,MYF(MY_WME))))
705 {
706 my_free(sets->set);
707 return 1;
708 }
709 return 0;
710 }
711
712 /* Make help sets invisible for nicer codeing */
713
make_sets_invisible(REP_SETS * sets)714 static void make_sets_invisible(REP_SETS *sets)
715 {
716 sets->invisible=sets->count;
717 sets->set+=sets->count;
718 sets->count=0;
719 }
720
make_new_set(REP_SETS * sets)721 static REP_SET *make_new_set(REP_SETS *sets)
722 {
723 uint i,count,*bit_buffer;
724 REP_SET *set;
725 if (sets->extra)
726 {
727 sets->extra--;
728 set=sets->set+ sets->count++;
729 memset(set->bits, 0, sizeof(uint)*sets->size_of_bits);
730 memset(&set->next[0], 0, sizeof(set->next[0])*LAST_CHAR_CODE);
731 set->found_offset=0;
732 set->found_len=0;
733 set->table_offset= (uint) ~0;
734 set->size_of_bits=sets->size_of_bits;
735 return set;
736 }
737 count=sets->count+sets->invisible+SET_MALLOC_HUNC;
738 if (!(set=(REP_SET*) my_realloc(PSI_NOT_INSTRUMENTED,
739 (uchar*) sets->set_buffer,
740 sizeof(REP_SET)*count,
741 MYF(MY_WME))))
742 return 0;
743 sets->set_buffer=set;
744 sets->set=set+sets->invisible;
745 if (!(bit_buffer=(uint*) my_realloc(PSI_NOT_INSTRUMENTED,
746 (uchar*) sets->bit_buffer,
747 (sizeof(uint)*sets->size_of_bits)*count,
748 MYF(MY_WME))))
749 return 0;
750 sets->bit_buffer=bit_buffer;
751 for (i=0 ; i < count ; i++)
752 {
753 sets->set_buffer[i].bits=bit_buffer;
754 bit_buffer+=sets->size_of_bits;
755 }
756 sets->extra=SET_MALLOC_HUNC;
757 return make_new_set(sets);
758 }
759
free_last_set(REP_SETS * sets)760 static void free_last_set(REP_SETS *sets)
761 {
762 sets->count--;
763 sets->extra++;
764 return;
765 }
766
free_sets(REP_SETS * sets)767 static void free_sets(REP_SETS *sets)
768 {
769 my_free(sets->set_buffer);
770 my_free(sets->bit_buffer);
771 return;
772 }
773
internal_set_bit(REP_SET * set,uint bit)774 static void internal_set_bit(REP_SET *set, uint bit)
775 {
776 set->bits[bit / WORD_BIT] |= 1 << (bit % WORD_BIT);
777 return;
778 }
779
internal_clear_bit(REP_SET * set,uint bit)780 static void internal_clear_bit(REP_SET *set, uint bit)
781 {
782 set->bits[bit / WORD_BIT] &= ~ (1 << (bit % WORD_BIT));
783 return;
784 }
785
786
or_bits(REP_SET * to,REP_SET * from)787 static void or_bits(REP_SET *to,REP_SET *from)
788 {
789 uint i;
790 for (i=0 ; i < to->size_of_bits ; i++)
791 to->bits[i]|=from->bits[i];
792 return;
793 }
794
copy_bits(REP_SET * to,REP_SET * from)795 static void copy_bits(REP_SET *to,REP_SET *from)
796 {
797 memcpy((uchar*) to->bits,(uchar*) from->bits,
798 (size_t) (sizeof(uint) * to->size_of_bits));
799 }
800
cmp_bits(REP_SET * set1,REP_SET * set2)801 static int cmp_bits(REP_SET *set1,REP_SET *set2)
802 {
803 return memcmp(set1->bits, set2->bits,
804 sizeof(uint) * set1->size_of_bits);
805 }
806
807
808 /* Get next set bit from set. */
809
get_next_bit(REP_SET * set,uint lastpos)810 static int get_next_bit(REP_SET *set,uint lastpos)
811 {
812 uint pos,*start,*end,bits;
813
814 start=set->bits+ ((lastpos+1) / WORD_BIT);
815 end=set->bits + set->size_of_bits;
816 bits=start[0] & ~((1 << ((lastpos+1) % WORD_BIT)) -1);
817
818 while (! bits && ++start < end)
819 bits=start[0];
820 if (!bits)
821 return 0;
822 pos=(uint) (start-set->bits)*WORD_BIT;
823 while (! (bits & 1))
824 {
825 bits>>=1;
826 pos++;
827 }
828 return pos;
829 }
830
831 /* find if there is a same set in sets. If there is, use it and
832 free given set, else put in given set in sets and return it's
833 position */
834
find_set(REP_SETS * sets,REP_SET * find)835 static short find_set(REP_SETS *sets,REP_SET *find)
836 {
837 uint i;
838 for (i=0 ; i < sets->count-1 ; i++)
839 {
840 if (!cmp_bits(sets->set+i,find))
841 {
842 free_last_set(sets);
843 return (short) i;
844 }
845 }
846 return (short) i; /* return new position */
847 }
848
849
850 /*
851 find if there is a found_set with same table_offset & found_offset
852 If there is return offset to it, else add new offset and return pos.
853 Pos returned is -offset-2 in found_set_structure because it's is
854 saved in set->next and set->next[] >= 0 points to next set and
855 set->next[] == -1 is reserved for end without replaces.
856 */
857
find_found(FOUND_SET * found_set,uint table_offset,int found_offset)858 static short find_found(FOUND_SET *found_set,uint table_offset,
859 int found_offset)
860 {
861 int i;
862 for (i=0 ; (uint) i < found_sets ; i++)
863 if (found_set[i].table_offset == table_offset &&
864 found_set[i].found_offset == found_offset)
865 return (short) (-i-2);
866 found_set[i].table_offset=table_offset;
867 found_set[i].found_offset=found_offset;
868 found_sets++;
869 return (short) (-i-2); /* return new position */
870 }
871
872 /* Return 1 if regexp starts with \b or ends with \b*/
873
start_at_word(char * pos)874 static uint start_at_word(char * pos)
875 {
876 return (((!memcmp(pos,"\\b",2) && pos[2]) || !memcmp(pos,"\\^",2)) ? 1 : 0);
877 }
878
end_of_word(char * pos)879 static uint end_of_word(char * pos)
880 {
881 char * end=strend(pos);
882 return ((end > pos+2 && !memcmp(end-2,"\\b",2)) ||
883 (end >= pos+2 && !memcmp(end-2,"\\$",2))) ?
884 1 : 0;
885 }
886
887
replace_len(char * str)888 static uint replace_len(char * str)
889 {
890 uint len=0;
891 while (*str)
892 {
893 if (str[0] == '\\' && str[1])
894 str++;
895 str++;
896 len++;
897 }
898 return len;
899 }
900
901
902 /* The actual loop */
903
replace_strings(REPLACE * rep,char ** start,uint * max_length,char * from)904 static uint replace_strings(REPLACE *rep, char **start, uint *max_length,
905 char *from)
906 {
907 REPLACE *rep_pos;
908 REPLACE_STRING *rep_str;
909 char *to, *end, *pos, *new;
910
911 end=(to= *start) + *max_length-1;
912 rep_pos=rep+1;
913 for(;;)
914 {
915 while (!rep_pos->found)
916 {
917 rep_pos= rep_pos->next[(uchar) *from];
918 if (to == end)
919 {
920 (*max_length)+=8192;
921 if (!(new=my_realloc(PSI_NOT_INSTRUMENTED,
922 *start,*max_length,MYF(MY_WME))))
923 return (uint) -1;
924 to=new+(to - *start);
925 end=(*start=new)+ *max_length-1;
926 }
927 *to++= *from++;
928 }
929 if (!(rep_str = ((REPLACE_STRING*) rep_pos))->replace_string)
930 return (uint) (to - *start)-1;
931 updated=1; /* Some char * is replaced */
932 to-=rep_str->to_offset;
933 for (pos=rep_str->replace_string; *pos ; pos++)
934 {
935 if (to == end)
936 {
937 (*max_length)*=2;
938 if (!(new=my_realloc(PSI_NOT_INSTRUMENTED,
939 *start,*max_length,MYF(MY_WME))))
940 return (uint) -1;
941 to=new+(to - *start);
942 end=(*start=new)+ *max_length-1;
943 }
944 *to++= *pos;
945 }
946 if (!*(from-=rep_str->from_offset) && rep_pos->found != 2)
947 return (uint) (to - *start);
948 rep_pos=rep;
949 }
950 }
951
952 static char *buffer; /* The buffer itself, grown as needed. */
953 static int bufbytes; /* Number of bytes in the buffer. */
954 static int bufread,my_eof; /* Number of bytes to get with each read(). */
955 static uint bufalloc;
956 static char *out_buff;
957 static uint out_length;
958
initialize_buffer()959 static int initialize_buffer()
960 {
961 bufread = 8192;
962 bufalloc = bufread + bufread / 2;
963 if (!(buffer = my_malloc(PSI_NOT_INSTRUMENTED,
964 bufalloc+1,MYF(MY_WME))))
965 return 1;
966 bufbytes=my_eof=0;
967 out_length=bufread;
968 if (!(out_buff=my_malloc(PSI_NOT_INSTRUMENTED,
969 out_length,MYF(MY_WME))))
970 return(1);
971 return 0;
972 }
973
reset_buffer()974 static void reset_buffer()
975 {
976 bufbytes=my_eof=0;
977 }
978
free_buffer()979 static void free_buffer()
980 {
981 my_free(buffer);
982 my_free(out_buff);
983 }
984
985
986 /*
987 Fill the buffer retaining the last n bytes at the beginning of the
988 newly filled buffer (for backward context). Returns the number of new
989 bytes read from disk.
990 */
991
fill_buffer_retaining(fd,n)992 static int fill_buffer_retaining(fd,n)
993 File fd;
994 int n;
995 {
996 int i;
997
998 /* See if we need to grow the buffer. */
999 if ((int) bufalloc - n <= bufread)
1000 {
1001 while ((int) bufalloc - n <= bufread)
1002 {
1003 bufalloc *= 2;
1004 bufread *= 2;
1005 }
1006 buffer = my_realloc(PSI_NOT_INSTRUMENTED,
1007 buffer, bufalloc+1, MYF(MY_WME));
1008 if (! buffer)
1009 return(-1);
1010 }
1011
1012 /* Shift stuff down. */
1013 memmove(buffer, buffer+bufbytes-n, (uint) n);
1014 bufbytes = n;
1015
1016 if (my_eof)
1017 return 0;
1018
1019 /* Read in new stuff. */
1020 if ((i=(int) my_read(fd, (uchar*) buffer + bufbytes,
1021 (size_t) bufread, MYF(MY_WME))) < 0)
1022 return -1;
1023
1024 /* Kludge to pretend every nonempty file ends with a newline. */
1025 if (i == 0 && bufbytes > 0 && buffer[bufbytes - 1] != '\n')
1026 {
1027 my_eof = i = 1;
1028 buffer[bufbytes] = '\n';
1029 }
1030
1031 bufbytes += i;
1032 return i;
1033 }
1034
1035 /* Return 0 if convert is ok */
1036 /* Global variable update is set if something was changed */
1037
convert_pipe(rep,in,out)1038 static int convert_pipe(rep,in,out)
1039 REPLACE *rep;
1040 FILE *in,*out;
1041 {
1042 int retain,error;
1043 uint length;
1044 char save_char,*end_of_line,*start_of_line;
1045 DBUG_ENTER("convert_pipe");
1046
1047 updated=retain=0;
1048 reset_buffer();
1049
1050 while ((error=fill_buffer_retaining(my_fileno(in),retain)) > 0)
1051 {
1052 end_of_line=buffer ;
1053 buffer[bufbytes]=0; /* Sentinel */
1054 for (;;)
1055 {
1056 start_of_line=end_of_line;
1057 while (end_of_line[0] != '\n' && end_of_line[0])
1058 end_of_line++;
1059 if (end_of_line == buffer+bufbytes)
1060 {
1061 retain= (int) (end_of_line - start_of_line);
1062 break; /* No end of line, read more */
1063 }
1064 save_char=end_of_line[0];
1065 end_of_line[0]=0;
1066 end_of_line++;
1067 if ((length=replace_strings(rep,&out_buff,&out_length,start_of_line)) ==
1068 (uint) -1)
1069 DBUG_RETURN(1);
1070 if (!my_eof)
1071 out_buff[length++]=save_char; /* Don't write added newline */
1072 if (my_fwrite(out, (uchar*) out_buff, length, MYF(MY_WME | MY_NABP)))
1073 DBUG_RETURN(1);
1074 }
1075 }
1076 DBUG_RETURN(error);
1077 }
1078
1079
convert_file(REPLACE * rep,char * name)1080 static int convert_file(REPLACE *rep, char * name)
1081 {
1082 int error;
1083 FILE *in,*out;
1084 char dir_buff[FN_REFLEN], tempname[FN_REFLEN], *org_name = name;
1085 #ifdef HAVE_READLINK
1086 char link_name[FN_REFLEN];
1087 #endif
1088 File temp_file;
1089 size_t dir_buff_length;
1090 DBUG_ENTER("convert_file");
1091
1092 /* check if name is a symlink */
1093 #ifdef HAVE_READLINK
1094 org_name= (my_enable_symlinks &&
1095 !my_readlink(link_name, name, MYF(0))) ? link_name : name;
1096 #endif
1097 if (!(in= my_fopen(org_name,O_RDONLY,MYF(MY_WME))))
1098 DBUG_RETURN(1);
1099 dirname_part(dir_buff, org_name, &dir_buff_length);
1100 if ((temp_file= create_temp_file(tempname, dir_buff, "PR", O_WRONLY,
1101 MYF(MY_WME))) < 0)
1102 {
1103 my_fclose(in,MYF(0));
1104 DBUG_RETURN(1);
1105 }
1106 if (!(out= my_fdopen(temp_file, tempname, O_WRONLY, MYF(MY_WME))))
1107 {
1108 my_fclose(in,MYF(0));
1109 DBUG_RETURN(1);
1110 }
1111
1112 error=convert_pipe(rep,in,out);
1113 my_fclose(in,MYF(0)); my_fclose(out,MYF(0));
1114
1115 if (updated && ! error)
1116 my_redel(org_name,tempname,MYF(MY_WME | MY_LINK_WARNING));
1117 else
1118 my_delete(tempname,MYF(MY_WME));
1119 if (!silent && ! error)
1120 {
1121 if (updated)
1122 printf("%s converted\n",name);
1123 else if (verbose)
1124 printf("%s left unchanged\n",name);
1125 }
1126 DBUG_RETURN(error);
1127 }
1128