1 /*
2 Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
23 02110-1301 USA */
24
25 /*
26 Replace strings in textfile
27
28 This program replaces strings in files or from stdin to stdout.
29 It accepts a list of from-string/to-string pairs and replaces
30 each occurrence of a from-string with the corresponding to-string.
31 The first occurrence of a found string is matched. If there is more
32 than one possibility for the string to replace, longer matches
33 are preferred before shorter matches.
34
35 Special characters in from string:
36 \^ Match start of line.
37 \$ Match end of line.
38 \b Match space-character, start of line or end of line.
39 For end \b the next replace starts locking at the end space-character.
40 An \b alone or in a string matches only a space-character.
41 \r, \t, \v as in C.
42 The programs make a DFA-state-machine of the strings and the speed isn't
43 dependent on the count of replace-strings (only of the number of replaces).
44 A line is assumed ending with \n or \0.
45 There are no limit exept memory on length of strings.
46
47 Written by Monty.
48 fill_buffer_retaining() is taken from gnu-grep and modified.
49 */
50
51 #include <my_global.h>
52 #include <m_ctype.h>
53 #include <my_sys.h>
54 #include <m_string.h>
55 #include <errno.h>
56
57 #define PC_MALLOC 256 /* Bytes for pointers */
58 #define PS_MALLOC 512 /* Bytes for data */
59
60 typedef struct st_pointer_array { /* when using array-strings */
61 TYPELIB typelib; /* Pointer to strings */
62 uchar *str; /* Strings is here */
63 uint8 *flag; /* Flag about each var. */
64 uint array_allocs,max_count,length,max_length;
65 } POINTER_ARRAY;
66
67 #define SPACE_CHAR 256
68 #define START_OF_LINE 257
69 #define END_OF_LINE 258
70 #define LAST_CHAR_CODE 259
71
72 typedef struct st_replace {
73 my_bool found;
74 struct st_replace *next[256];
75 } REPLACE;
76
77 typedef struct st_replace_found {
78 my_bool found;
79 char *replace_string;
80 uint to_offset;
81 int from_offset;
82 } REPLACE_STRING;
83
84 #ifndef WORD_BIT
85 #define WORD_BIT (8*sizeof(uint))
86 #endif
87
88 /* functions defined in this file */
89
90 static int static_get_options(int *argc,char * * *argv);
91 static int get_replace_strings(int *argc,char * * *argv,
92 POINTER_ARRAY *from_array,
93 POINTER_ARRAY *to_array);
94 static int insert_pointer_name(POINTER_ARRAY *pa, char * name);
95 static void free_pointer_array(POINTER_ARRAY *pa);
96 static int convert_pipe(REPLACE *,FILE *,FILE *);
97 static int convert_file(REPLACE *, char *);
98 static REPLACE *init_replace(char * *from, char * *to,uint count,
99 char * word_end_chars);
100 static uint replace_strings(REPLACE *rep, char * *start,uint *max_length,
101 char * from);
102 static int initialize_buffer(void);
103 static void reset_buffer(void);
104 static void free_buffer(void);
105
106 static int silent=0,verbose=0,updated=0;
107
108 /* The main program */
109
main(int argc,char * argv[])110 int main(int argc, char *argv[])
111 {
112 int i,error;
113 char word_end_chars[256],*pos;
114 POINTER_ARRAY from,to;
115 REPLACE *replace;
116 MY_INIT(argv[0]);
117
118 if (static_get_options(&argc,&argv))
119 exit(1);
120 if (get_replace_strings(&argc,&argv,&from,&to))
121 exit(1);
122
123 for (i=1,pos=word_end_chars ; i < 256 ; i++)
124 if (my_isspace(&my_charset_latin1,i))
125 *pos++= (char) i;
126 *pos=0;
127 if (!(replace=init_replace((char**) from.typelib.type_names,
128 (char**) to.typelib.type_names,
129 (uint) from.typelib.count,word_end_chars)))
130 exit(1);
131 free_pointer_array(&from);
132 free_pointer_array(&to);
133 if (initialize_buffer())
134 return 1;
135
136 error=0;
137 if (argc == 0)
138 error=convert_pipe(replace,stdin,stdout);
139 else
140 {
141 while (argc--)
142 {
143 error=convert_file(replace,*(argv++));
144 }
145 }
146 free_buffer();
147 my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
148 exit(error ? 2 : 0);
149 return 0; /* No compiler warning */
150 } /* main */
151
152
153 /* reads options */
154 /* Initiates DEBUG - but no debugging here ! */
155
static_get_options(argc,argv)156 static int static_get_options(argc,argv)
157 register int *argc;
158 register char **argv[];
159 {
160 int help,version;
161 char *pos;
162
163 silent=verbose=help=0;
164
165 while (--*argc > 0 && *(pos = *(++*argv)) == '-' && pos[1] != '-') {
166 while (*++pos)
167 {
168 version=0;
169 switch((*pos)) {
170 case 's':
171 silent=1;
172 break;
173 case 'v':
174 verbose=1;
175 break;
176 case '#':
177 DBUG_PUSH (++pos);
178 pos= (char*) " "; /* Skip rest of arguments */
179 break;
180 case 'V':
181 version=1;
182 case 'I':
183 case '?':
184 help=1; /* Help text written */
185 printf("%s Ver 1.4 for %s at %s\n",my_progname,SYSTEM_TYPE,
186 MACHINE_TYPE);
187 if (version)
188 break;
189 puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n");
190 puts("This program replaces strings in files or from stdin to stdout.\n"
191 "It accepts a list of from-string/to-string pairs and replaces\n"
192 "each occurrence of a from-string with the corresponding to-string.\n"
193 "The first occurrence of a found string is matched. If there is\n"
194 "more than one possibility for the string to replace, longer\n"
195 "matches are preferred before shorter matches.\n\n"
196 "A from-string can contain these special characters:\n"
197 " \\^ Match start of line.\n"
198 " \\$ Match end of line.\n"
199 " \\b Match space-character, start of line or end of line.\n"
200 " For a end \\b the next replace starts locking at the end\n"
201 " space-character. A \\b alone in a string matches only a\n"
202 " space-character.\n");
203 printf("Usage: %s [-?svIV] from to from to ... -- [files]\n", my_progname);
204 puts("or");
205 printf("Usage: %s [-?svIV] from to from to ... < fromfile > tofile\n", my_progname);
206 puts("");
207 puts("Options: -? or -I \"Info\" -s \"silent\" -v \"verbose\"");
208 break;
209 default:
210 fprintf(stderr,"illegal option: -%c\n",*pos);
211 break;
212 }
213 }
214 }
215 if (*argc == 0)
216 {
217 if (!help)
218 my_message(0,"No replace options given",MYF(ME_BELL));
219 exit(0); /* Don't use as pipe */
220 }
221 return(0);
222 } /* static_get_options */
223
224
get_replace_strings(argc,argv,from_array,to_array)225 static int get_replace_strings(argc,argv,from_array,to_array)
226 register int *argc;
227 register char **argv[];
228 POINTER_ARRAY *from_array,*to_array;
229 {
230 char *pos;
231
232 memset(from_array, 0, sizeof(from_array[0]));
233 memset(to_array, 0, sizeof(to_array[0]));
234 while (*argc > 0 && (*(pos = *(*argv)) != '-' || pos[1] != '-' || pos[2]))
235 {
236 insert_pointer_name(from_array,pos);
237 (*argc)--;
238 (*argv)++;
239 if (!*argc || !strcmp(**argv,"--"))
240 {
241 my_message(0,"No to-string for last from-string",MYF(ME_BELL));
242 return 1;
243 }
244 insert_pointer_name(to_array,**argv);
245 (*argc)--;
246 (*argv)++;
247 }
248 if (*argc)
249 { /* Skip "--" argument */
250 (*argc)--;
251 (*argv)++;
252 }
253 return 0;
254 }
255
insert_pointer_name(reg1 POINTER_ARRAY * pa,char * name)256 static int insert_pointer_name(reg1 POINTER_ARRAY *pa,char * name)
257 {
258 uint i,length,old_count;
259 uchar *new_pos;
260 const char **new_array;
261 DBUG_ENTER("insert_pointer_name");
262
263 if (! pa->typelib.count)
264 {
265 if (!(pa->typelib.type_names=(const char **)
266 my_malloc(((PC_MALLOC-MALLOC_OVERHEAD)/
267 (sizeof(char *)+sizeof(*pa->flag))*
268 (sizeof(char *)+sizeof(*pa->flag))),MYF(MY_WME))))
269 DBUG_RETURN(-1);
270 if (!(pa->str= (uchar*) my_malloc((uint) (PS_MALLOC-MALLOC_OVERHEAD),
271 MYF(MY_WME))))
272 {
273 my_free(pa->typelib.type_names);
274 DBUG_RETURN (-1);
275 }
276 pa->max_count=(PC_MALLOC-MALLOC_OVERHEAD)/(sizeof(uchar*)+
277 sizeof(*pa->flag));
278 pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
279 pa->length=0;
280 pa->max_length=PS_MALLOC-MALLOC_OVERHEAD;
281 pa->array_allocs=1;
282 }
283 length=(uint) strlen(name)+1;
284 if (pa->length+length >= pa->max_length)
285 {
286 pa->max_length=(pa->length+length+MALLOC_OVERHEAD+PS_MALLOC-1)/PS_MALLOC;
287 pa->max_length=pa->max_length*PS_MALLOC-MALLOC_OVERHEAD;
288 if (!(new_pos= (uchar*) my_realloc((uchar*) pa->str,
289 (uint) pa->max_length,
290 MYF(MY_WME))))
291 DBUG_RETURN(1);
292 if (new_pos != pa->str)
293 {
294 my_ptrdiff_t diff=PTR_BYTE_DIFF(new_pos,pa->str);
295 for (i=0 ; i < pa->typelib.count ; i++)
296 pa->typelib.type_names[i]= ADD_TO_PTR(pa->typelib.type_names[i],diff,
297 char*);
298 pa->str=new_pos;
299 }
300 }
301 if (pa->typelib.count >= pa->max_count-1)
302 {
303 int len;
304 pa->array_allocs++;
305 len=(PC_MALLOC*pa->array_allocs - MALLOC_OVERHEAD);
306 if (!(new_array=(const char **) my_realloc((uchar*) pa->typelib.type_names,
307 (uint) len/
308 (sizeof(uchar*)+sizeof(*pa->flag))*
309 (sizeof(uchar*)+sizeof(*pa->flag)),
310 MYF(MY_WME))))
311 DBUG_RETURN(1);
312 pa->typelib.type_names=new_array;
313 old_count=pa->max_count;
314 pa->max_count=len/(sizeof(uchar*) + sizeof(*pa->flag));
315 pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
316 memcpy((uchar*) pa->flag,(char *) (pa->typelib.type_names+old_count),
317 old_count*sizeof(*pa->flag));
318 }
319 pa->flag[pa->typelib.count]=0; /* Reset flag */
320 pa->typelib.type_names[pa->typelib.count++]= (char*) (pa->str+pa->length);
321 pa->typelib.type_names[pa->typelib.count]= NullS; /* Put end-mark */
322 (void) strmov((char*) pa->str + pa->length, name);
323 pa->length+=length;
324 DBUG_RETURN(0);
325 } /* insert_pointer_name */
326
327
328 /* free pointer array */
329
free_pointer_array(reg1 POINTER_ARRAY * pa)330 static void free_pointer_array(reg1 POINTER_ARRAY *pa)
331 {
332 if (pa->typelib.count)
333 {
334 pa->typelib.count=0;
335 my_free(pa->typelib.type_names);
336 pa->typelib.type_names=0;
337 my_free(pa->str);
338 }
339 return;
340 } /* free_pointer_array */
341
342
343 /* Code for replace rutines */
344
345 #define SET_MALLOC_HUNC 64
346
347 typedef struct st_rep_set {
348 uint *bits; /* Pointer to used sets */
349 short next[LAST_CHAR_CODE]; /* Pointer to next sets */
350 uint found_len; /* Best match to date */
351 int found_offset;
352 uint table_offset;
353 uint size_of_bits; /* For convinience */
354 } REP_SET;
355
356 typedef struct st_rep_sets {
357 uint count; /* Number of sets */
358 uint extra; /* Extra sets in buffer */
359 uint invisible; /* Sets not chown */
360 uint size_of_bits;
361 REP_SET *set,*set_buffer;
362 uint *bit_buffer;
363 } REP_SETS;
364
365 typedef struct st_found_set {
366 uint table_offset;
367 int found_offset;
368 } FOUND_SET;
369
370 typedef struct st_follow {
371 int chr;
372 uint table_offset;
373 uint len;
374 } FOLLOWS;
375
376
377 static int init_sets(REP_SETS *sets,uint states);
378 static REP_SET *make_new_set(REP_SETS *sets);
379 static void make_sets_invisible(REP_SETS *sets);
380 static void free_last_set(REP_SETS *sets);
381 static void free_sets(REP_SETS *sets);
382 static void internal_set_bit(REP_SET *set, uint bit);
383 static void internal_clear_bit(REP_SET *set, uint bit);
384 static void or_bits(REP_SET *to,REP_SET *from);
385 static void copy_bits(REP_SET *to,REP_SET *from);
386 static int cmp_bits(REP_SET *set1,REP_SET *set2);
387 static int get_next_bit(REP_SET *set,uint lastpos);
388 static short find_set(REP_SETS *sets,REP_SET *find);
389 static short find_found(FOUND_SET *found_set,uint table_offset,
390 int found_offset);
391 static uint start_at_word(char * pos);
392 static uint end_of_word(char * pos);
393 static uint replace_len(char * pos);
394
395 static uint found_sets=0;
396
397
398 /* Init a replace structure for further calls */
399
init_replace(char ** from,char ** to,uint count,char * word_end_chars)400 static REPLACE *init_replace(char * *from, char * *to,uint count,
401 char * word_end_chars)
402 {
403 uint i,j,states,set_nr,len,result_len,max_length,found_end,bits_set,bit_nr;
404 int used_sets,chr;
405 short default_state;
406 char used_chars[LAST_CHAR_CODE],is_word_end[256];
407 char * pos, *to_pos, **to_array;
408 REP_SETS sets;
409 REP_SET *set,*start_states,*word_states,*new_set;
410 FOLLOWS *follow,*follow_ptr;
411 REPLACE *replace;
412 FOUND_SET *found_set;
413 REPLACE_STRING *rep_str;
414 DBUG_ENTER("init_replace");
415
416 /* Count number of states */
417 for (i=result_len=max_length=0 , states=2 ; i < count ; i++)
418 {
419 len=replace_len(from[i]);
420 if (!len)
421 {
422 errno=EINVAL;
423 my_message(0,"No to-string for last from-string",MYF(ME_BELL));
424 DBUG_RETURN(0);
425 }
426 states+=len+1;
427 result_len+=(uint) strlen(to[i])+1;
428 if (len > max_length)
429 max_length=len;
430 }
431 memset(is_word_end, 0, sizeof(is_word_end));
432 for (i=0 ; word_end_chars[i] ; i++)
433 is_word_end[(uchar) word_end_chars[i]]=1;
434
435 if (init_sets(&sets,states))
436 DBUG_RETURN(0);
437 found_sets=0;
438 if (!(found_set= (FOUND_SET*) my_malloc(sizeof(FOUND_SET)*max_length*count,
439 MYF(MY_WME))))
440 {
441 free_sets(&sets);
442 DBUG_RETURN(0);
443 }
444 (void) make_new_set(&sets); /* Set starting set */
445 make_sets_invisible(&sets); /* Hide previus sets */
446 used_sets=-1;
447 word_states=make_new_set(&sets); /* Start of new word */
448 start_states=make_new_set(&sets); /* This is first state */
449 if (!(follow=(FOLLOWS*) my_malloc((states+2)*sizeof(FOLLOWS),MYF(MY_WME))))
450 {
451 free_sets(&sets);
452 my_free(found_set);
453 DBUG_RETURN(0);
454 }
455
456 /* Init follow_ptr[] */
457 for (i=0, states=1, follow_ptr=follow+1 ; i < count ; i++)
458 {
459 if (from[i][0] == '\\' && from[i][1] == '^')
460 {
461 internal_set_bit(start_states,states+1);
462 if (!from[i][2])
463 {
464 start_states->table_offset=i;
465 start_states->found_offset=1;
466 }
467 }
468 else if (from[i][0] == '\\' && from[i][1] == '$')
469 {
470 internal_set_bit(start_states,states);
471 internal_set_bit(word_states,states);
472 if (!from[i][2] && start_states->table_offset == (uint) ~0)
473 {
474 start_states->table_offset=i;
475 start_states->found_offset=0;
476 }
477 }
478 else
479 {
480 internal_set_bit(word_states,states);
481 if (from[i][0] == '\\' && (from[i][1] == 'b' && from[i][2]))
482 internal_set_bit(start_states,states+1);
483 else
484 internal_set_bit(start_states,states);
485 }
486 for (pos=from[i], len=0; *pos ; pos++)
487 {
488 if (*pos == '\\' && *(pos+1))
489 {
490 pos++;
491 switch (*pos) {
492 case 'b':
493 follow_ptr->chr = SPACE_CHAR;
494 break;
495 case '^':
496 follow_ptr->chr = START_OF_LINE;
497 break;
498 case '$':
499 follow_ptr->chr = END_OF_LINE;
500 break;
501 case 'r':
502 follow_ptr->chr = '\r';
503 break;
504 case 't':
505 follow_ptr->chr = '\t';
506 break;
507 case 'v':
508 follow_ptr->chr = '\v';
509 break;
510 default:
511 follow_ptr->chr = (uchar) *pos;
512 break;
513 }
514 }
515 else
516 follow_ptr->chr= (uchar) *pos;
517 follow_ptr->table_offset=i;
518 follow_ptr->len= ++len;
519 follow_ptr++;
520 }
521 follow_ptr->chr=0;
522 follow_ptr->table_offset=i;
523 follow_ptr->len=len;
524 follow_ptr++;
525 states+=(uint) len+1;
526 }
527
528
529 for (set_nr=0,pos=0 ; set_nr < sets.count ; set_nr++)
530 {
531 set=sets.set+set_nr;
532 default_state= 0; /* Start from beginning */
533
534 /* If end of found-string not found or start-set with current set */
535
536 for (i= (uint) ~0; (i=get_next_bit(set,i)) ;)
537 {
538 if (!follow[i].chr)
539 {
540 if (! default_state)
541 default_state= find_found(found_set,set->table_offset,
542 set->found_offset+1);
543 }
544 }
545 copy_bits(sets.set+used_sets,set); /* Save set for changes */
546 if (!default_state)
547 or_bits(sets.set+used_sets,sets.set); /* Can restart from start */
548
549 /* Find all chars that follows current sets */
550 memset(used_chars, 0, sizeof(used_chars));
551 for (i= (uint) ~0; (i=get_next_bit(sets.set+used_sets,i)) ;)
552 {
553 used_chars[follow[i].chr]=1;
554 if ((follow[i].chr == SPACE_CHAR && !follow[i+1].chr &&
555 follow[i].len > 1) || follow[i].chr == END_OF_LINE)
556 used_chars[0]=1;
557 }
558
559 /* Mark word_chars used if \b is in state */
560 if (used_chars[SPACE_CHAR])
561 for (pos= word_end_chars ; *pos ; pos++)
562 used_chars[(int) (uchar) *pos] = 1;
563
564 /* Handle other used characters */
565 for (chr= 0 ; chr < 256 ; chr++)
566 {
567 if (! used_chars[chr])
568 set->next[chr]= (short) (chr ? default_state : -1);
569 else
570 {
571 new_set=make_new_set(&sets);
572 set=sets.set+set_nr; /* if realloc */
573 new_set->table_offset=set->table_offset;
574 new_set->found_len=set->found_len;
575 new_set->found_offset=set->found_offset+1;
576 found_end=0;
577
578 for (i= (uint) ~0 ; (i=get_next_bit(sets.set+used_sets,i)) ; )
579 {
580 if (!follow[i].chr || follow[i].chr == chr ||
581 (follow[i].chr == SPACE_CHAR &&
582 (is_word_end[chr] ||
583 (!chr && follow[i].len > 1 && ! follow[i+1].chr))) ||
584 (follow[i].chr == END_OF_LINE && ! chr))
585 {
586 if ((! chr || (follow[i].chr && !follow[i+1].chr)) &&
587 follow[i].len > found_end)
588 found_end=follow[i].len;
589 if (chr && follow[i].chr)
590 internal_set_bit(new_set,i+1); /* To next set */
591 else
592 internal_set_bit(new_set,i);
593 }
594 }
595 if (found_end)
596 {
597 new_set->found_len=0; /* Set for testing if first */
598 bits_set=0;
599 for (i= (uint) ~0; (i=get_next_bit(new_set,i)) ;)
600 {
601 if ((follow[i].chr == SPACE_CHAR ||
602 follow[i].chr == END_OF_LINE) && ! chr)
603 bit_nr=i+1;
604 else
605 bit_nr=i;
606 if (follow[bit_nr-1].len < found_end ||
607 (new_set->found_len &&
608 (chr == 0 || !follow[bit_nr].chr)))
609 internal_clear_bit(new_set,i);
610 else
611 {
612 if (chr == 0 || !follow[bit_nr].chr)
613 { /* best match */
614 new_set->table_offset=follow[bit_nr].table_offset;
615 if (chr || (follow[i].chr == SPACE_CHAR ||
616 follow[i].chr == END_OF_LINE))
617 new_set->found_offset=found_end; /* New match */
618 new_set->found_len=found_end;
619 }
620 bits_set++;
621 }
622 }
623 if (bits_set == 1)
624 {
625 set->next[chr] = find_found(found_set,
626 new_set->table_offset,
627 new_set->found_offset);
628 free_last_set(&sets);
629 }
630 else
631 set->next[chr] = find_set(&sets,new_set);
632 }
633 else
634 set->next[chr] = find_set(&sets,new_set);
635 }
636 }
637 }
638
639 /* Alloc replace structure for the replace-state-machine */
640
641 if ((replace=(REPLACE*) my_malloc(sizeof(REPLACE)*(sets.count)+
642 sizeof(REPLACE_STRING)*(found_sets+1)+
643 sizeof(char *)*count+result_len,
644 MYF(MY_WME | MY_ZEROFILL))))
645 {
646 rep_str=(REPLACE_STRING*) (replace+sets.count);
647 to_array=(char **) (rep_str+found_sets+1);
648 to_pos=(char *) (to_array+count);
649 for (i=0 ; i < count ; i++)
650 {
651 to_array[i]=to_pos;
652 to_pos=strmov(to_pos,to[i])+1;
653 }
654 rep_str[0].found=1;
655 rep_str[0].replace_string=0;
656 for (i=1 ; i <= found_sets ; i++)
657 {
658 pos=from[found_set[i-1].table_offset];
659 rep_str[i].found= (my_bool) (!memcmp(pos,"\\^",3) ? 2 : 1);
660 rep_str[i].replace_string=to_array[found_set[i-1].table_offset];
661 rep_str[i].to_offset=found_set[i-1].found_offset-start_at_word(pos);
662 rep_str[i].from_offset=found_set[i-1].found_offset-replace_len(pos)+
663 end_of_word(pos);
664 }
665 for (i=0 ; i < sets.count ; i++)
666 {
667 for (j=0 ; j < 256 ; j++)
668 if (sets.set[i].next[j] >= 0)
669 replace[i].next[j]=replace+sets.set[i].next[j];
670 else
671 replace[i].next[j]=(REPLACE*) (rep_str+(-sets.set[i].next[j]-1));
672 }
673 }
674 my_free(follow);
675 free_sets(&sets);
676 my_free(found_set);
677 DBUG_PRINT("exit",("Replace table has %d states",sets.count));
678 DBUG_RETURN(replace);
679 }
680
681
init_sets(REP_SETS * sets,uint states)682 static int init_sets(REP_SETS *sets,uint states)
683 {
684 memset(sets, 0, sizeof(*sets));
685 sets->size_of_bits=((states+7)/8);
686 if (!(sets->set_buffer=(REP_SET*) my_malloc(sizeof(REP_SET)*SET_MALLOC_HUNC,
687 MYF(MY_WME))))
688 return 1;
689 if (!(sets->bit_buffer=(uint*) my_malloc(sizeof(uint)*sets->size_of_bits*
690 SET_MALLOC_HUNC,MYF(MY_WME))))
691 {
692 my_free(sets->set);
693 return 1;
694 }
695 return 0;
696 }
697
698 /* Make help sets invisible for nicer codeing */
699
make_sets_invisible(REP_SETS * sets)700 static void make_sets_invisible(REP_SETS *sets)
701 {
702 sets->invisible=sets->count;
703 sets->set+=sets->count;
704 sets->count=0;
705 }
706
make_new_set(REP_SETS * sets)707 static REP_SET *make_new_set(REP_SETS *sets)
708 {
709 uint i,count,*bit_buffer;
710 REP_SET *set;
711 if (sets->extra)
712 {
713 sets->extra--;
714 set=sets->set+ sets->count++;
715 memset(set->bits, 0, sizeof(uint)*sets->size_of_bits);
716 memset(&set->next[0], 0, sizeof(set->next[0])*LAST_CHAR_CODE);
717 set->found_offset=0;
718 set->found_len=0;
719 set->table_offset= (uint) ~0;
720 set->size_of_bits=sets->size_of_bits;
721 return set;
722 }
723 count=sets->count+sets->invisible+SET_MALLOC_HUNC;
724 if (!(set=(REP_SET*) my_realloc((uchar*) sets->set_buffer,
725 sizeof(REP_SET)*count,
726 MYF(MY_WME))))
727 return 0;
728 sets->set_buffer=set;
729 sets->set=set+sets->invisible;
730 if (!(bit_buffer=(uint*) my_realloc((uchar*) sets->bit_buffer,
731 (sizeof(uint)*sets->size_of_bits)*count,
732 MYF(MY_WME))))
733 return 0;
734 sets->bit_buffer=bit_buffer;
735 for (i=0 ; i < count ; i++)
736 {
737 sets->set_buffer[i].bits=bit_buffer;
738 bit_buffer+=sets->size_of_bits;
739 }
740 sets->extra=SET_MALLOC_HUNC;
741 return make_new_set(sets);
742 }
743
free_last_set(REP_SETS * sets)744 static void free_last_set(REP_SETS *sets)
745 {
746 sets->count--;
747 sets->extra++;
748 return;
749 }
750
free_sets(REP_SETS * sets)751 static void free_sets(REP_SETS *sets)
752 {
753 my_free(sets->set_buffer);
754 my_free(sets->bit_buffer);
755 return;
756 }
757
internal_set_bit(REP_SET * set,uint bit)758 static void internal_set_bit(REP_SET *set, uint bit)
759 {
760 set->bits[bit / WORD_BIT] |= 1 << (bit % WORD_BIT);
761 return;
762 }
763
internal_clear_bit(REP_SET * set,uint bit)764 static void internal_clear_bit(REP_SET *set, uint bit)
765 {
766 set->bits[bit / WORD_BIT] &= ~ (1 << (bit % WORD_BIT));
767 return;
768 }
769
770
or_bits(REP_SET * to,REP_SET * from)771 static void or_bits(REP_SET *to,REP_SET *from)
772 {
773 reg1 uint i;
774 for (i=0 ; i < to->size_of_bits ; i++)
775 to->bits[i]|=from->bits[i];
776 return;
777 }
778
copy_bits(REP_SET * to,REP_SET * from)779 static void copy_bits(REP_SET *to,REP_SET *from)
780 {
781 memcpy((uchar*) to->bits,(uchar*) from->bits,
782 (size_t) (sizeof(uint) * to->size_of_bits));
783 }
784
cmp_bits(REP_SET * set1,REP_SET * set2)785 static int cmp_bits(REP_SET *set1,REP_SET *set2)
786 {
787 return memcmp(set1->bits, set2->bits,
788 sizeof(uint) * set1->size_of_bits);
789 }
790
791
792 /* Get next set bit from set. */
793
get_next_bit(REP_SET * set,uint lastpos)794 static int get_next_bit(REP_SET *set,uint lastpos)
795 {
796 uint pos,*start,*end,bits;
797
798 start=set->bits+ ((lastpos+1) / WORD_BIT);
799 end=set->bits + set->size_of_bits;
800 bits=start[0] & ~((1 << ((lastpos+1) % WORD_BIT)) -1);
801
802 while (! bits && ++start < end)
803 bits=start[0];
804 if (!bits)
805 return 0;
806 pos=(uint) (start-set->bits)*WORD_BIT;
807 while (! (bits & 1))
808 {
809 bits>>=1;
810 pos++;
811 }
812 return pos;
813 }
814
815 /* find if there is a same set in sets. If there is, use it and
816 free given set, else put in given set in sets and return it's
817 position */
818
find_set(REP_SETS * sets,REP_SET * find)819 static short find_set(REP_SETS *sets,REP_SET *find)
820 {
821 uint i;
822 for (i=0 ; i < sets->count-1 ; i++)
823 {
824 if (!cmp_bits(sets->set+i,find))
825 {
826 free_last_set(sets);
827 return (short) i;
828 }
829 }
830 return (short) i; /* return new position */
831 }
832
833
834 /*
835 find if there is a found_set with same table_offset & found_offset
836 If there is return offset to it, else add new offset and return pos.
837 Pos returned is -offset-2 in found_set_structure because it's is
838 saved in set->next and set->next[] >= 0 points to next set and
839 set->next[] == -1 is reserved for end without replaces.
840 */
841
find_found(FOUND_SET * found_set,uint table_offset,int found_offset)842 static short find_found(FOUND_SET *found_set,uint table_offset,
843 int found_offset)
844 {
845 int i;
846 for (i=0 ; (uint) i < found_sets ; i++)
847 if (found_set[i].table_offset == table_offset &&
848 found_set[i].found_offset == found_offset)
849 return (short) (-i-2);
850 found_set[i].table_offset=table_offset;
851 found_set[i].found_offset=found_offset;
852 found_sets++;
853 return (short) (-i-2); /* return new position */
854 }
855
856 /* Return 1 if regexp starts with \b or ends with \b*/
857
start_at_word(char * pos)858 static uint start_at_word(char * pos)
859 {
860 return (((!memcmp(pos,"\\b",2) && pos[2]) || !memcmp(pos,"\\^",2)) ? 1 : 0);
861 }
862
end_of_word(char * pos)863 static uint end_of_word(char * pos)
864 {
865 char * end=strend(pos);
866 return ((end > pos+2 && !memcmp(end-2,"\\b",2)) ||
867 (end >= pos+2 && !memcmp(end-2,"\\$",2))) ?
868 1 : 0;
869 }
870
871
replace_len(char * str)872 static uint replace_len(char * str)
873 {
874 uint len=0;
875 while (*str)
876 {
877 if (str[0] == '\\' && str[1])
878 str++;
879 str++;
880 len++;
881 }
882 return len;
883 }
884
885
886 /* The actual loop */
887
replace_strings(REPLACE * rep,char ** start,uint * max_length,char * from)888 static uint replace_strings(REPLACE *rep, char **start, uint *max_length,
889 char *from)
890 {
891 reg1 REPLACE *rep_pos;
892 reg2 REPLACE_STRING *rep_str;
893 char *to, *end, *pos, *new;
894
895 end=(to= *start) + *max_length-1;
896 rep_pos=rep+1;
897 for(;;)
898 {
899 while (!rep_pos->found)
900 {
901 rep_pos= rep_pos->next[(uchar) *from];
902 if (to == end)
903 {
904 (*max_length)+=8192;
905 if (!(new=my_realloc(*start,*max_length,MYF(MY_WME))))
906 return (uint) -1;
907 to=new+(to - *start);
908 end=(*start=new)+ *max_length-1;
909 }
910 *to++= *from++;
911 }
912 if (!(rep_str = ((REPLACE_STRING*) rep_pos))->replace_string)
913 return (uint) (to - *start)-1;
914 updated=1; /* Some char * is replaced */
915 to-=rep_str->to_offset;
916 for (pos=rep_str->replace_string; *pos ; pos++)
917 {
918 if (to == end)
919 {
920 (*max_length)*=2;
921 if (!(new=my_realloc(*start,*max_length,MYF(MY_WME))))
922 return (uint) -1;
923 to=new+(to - *start);
924 end=(*start=new)+ *max_length-1;
925 }
926 *to++= *pos;
927 }
928 if (!*(from-=rep_str->from_offset) && rep_pos->found != 2)
929 return (uint) (to - *start);
930 rep_pos=rep;
931 }
932 }
933
934 static char *buffer; /* The buffer itself, grown as needed. */
935 static int bufbytes; /* Number of bytes in the buffer. */
936 static int bufread,my_eof; /* Number of bytes to get with each read(). */
937 static uint bufalloc;
938 static char *out_buff;
939 static uint out_length;
940
initialize_buffer()941 static int initialize_buffer()
942 {
943 bufread = 8192;
944 bufalloc = bufread + bufread / 2;
945 if (!(buffer = my_malloc(bufalloc+1,MYF(MY_WME))))
946 return 1;
947 bufbytes=my_eof=0;
948 out_length=bufread;
949 if (!(out_buff=my_malloc(out_length,MYF(MY_WME))))
950 return(1);
951 return 0;
952 }
953
reset_buffer()954 static void reset_buffer()
955 {
956 bufbytes=my_eof=0;
957 }
958
free_buffer()959 static void free_buffer()
960 {
961 my_free(buffer);
962 my_free(out_buff);
963 }
964
965
966 /*
967 Fill the buffer retaining the last n bytes at the beginning of the
968 newly filled buffer (for backward context). Returns the number of new
969 bytes read from disk.
970 */
971
fill_buffer_retaining(fd,n)972 static int fill_buffer_retaining(fd,n)
973 File fd;
974 int n;
975 {
976 int i;
977
978 /* See if we need to grow the buffer. */
979 if ((int) bufalloc - n <= bufread)
980 {
981 while ((int) bufalloc - n <= bufread)
982 {
983 bufalloc *= 2;
984 bufread *= 2;
985 }
986 buffer = my_realloc(buffer, bufalloc+1, MYF(MY_WME));
987 if (! buffer)
988 return(-1);
989 }
990
991 /* Shift stuff down. */
992 bmove(buffer,buffer+bufbytes-n,(uint) n);
993 bufbytes = n;
994
995 if (my_eof)
996 return 0;
997
998 /* Read in new stuff. */
999 if ((i=(int) my_read(fd, (uchar*) buffer + bufbytes,
1000 (size_t) bufread, MYF(MY_WME))) < 0)
1001 return -1;
1002
1003 /* Kludge to pretend every nonempty file ends with a newline. */
1004 if (i == 0 && bufbytes > 0 && buffer[bufbytes - 1] != '\n')
1005 {
1006 my_eof = i = 1;
1007 buffer[bufbytes] = '\n';
1008 }
1009
1010 bufbytes += i;
1011 return i;
1012 }
1013
1014 /* Return 0 if convert is ok */
1015 /* Global variable update is set if something was changed */
1016
convert_pipe(rep,in,out)1017 static int convert_pipe(rep,in,out)
1018 REPLACE *rep;
1019 FILE *in,*out;
1020 {
1021 int retain,error;
1022 uint length;
1023 char save_char,*end_of_line,*start_of_line;
1024 DBUG_ENTER("convert_pipe");
1025
1026 updated=retain=0;
1027 reset_buffer();
1028
1029 while ((error=fill_buffer_retaining(my_fileno(in),retain)) > 0)
1030 {
1031 end_of_line=buffer ;
1032 buffer[bufbytes]=0; /* Sentinel */
1033 for (;;)
1034 {
1035 start_of_line=end_of_line;
1036 while (end_of_line[0] != '\n' && end_of_line[0])
1037 end_of_line++;
1038 if (end_of_line == buffer+bufbytes)
1039 {
1040 retain= (int) (end_of_line - start_of_line);
1041 break; /* No end of line, read more */
1042 }
1043 save_char=end_of_line[0];
1044 end_of_line[0]=0;
1045 end_of_line++;
1046 if ((length=replace_strings(rep,&out_buff,&out_length,start_of_line)) ==
1047 (uint) -1)
1048 return 1;
1049 if (!my_eof)
1050 out_buff[length++]=save_char; /* Don't write added newline */
1051 if (my_fwrite(out, (uchar*) out_buff, length, MYF(MY_WME | MY_NABP)))
1052 DBUG_RETURN(1);
1053 }
1054 }
1055 DBUG_RETURN(error);
1056 }
1057
1058
convert_file(REPLACE * rep,char * name)1059 static int convert_file(REPLACE *rep, char * name)
1060 {
1061 int error;
1062 FILE *in,*out;
1063 char dir_buff[FN_REFLEN], tempname[FN_REFLEN], *org_name = name;
1064 #ifdef HAVE_READLINK
1065 char link_name[FN_REFLEN];
1066 #endif
1067 File temp_file;
1068 size_t dir_buff_length;
1069 DBUG_ENTER("convert_file");
1070
1071 /* check if name is a symlink */
1072 #ifdef HAVE_READLINK
1073 org_name= (!my_disable_symlinks &&
1074 !my_readlink(link_name, name, MYF(0))) ? link_name : name;
1075 #endif
1076 if (!(in= my_fopen(org_name,O_RDONLY,MYF(MY_WME))))
1077 DBUG_RETURN(1);
1078 dirname_part(dir_buff, org_name, &dir_buff_length);
1079 if ((temp_file= create_temp_file(tempname, dir_buff, "PR", O_WRONLY,
1080 MYF(MY_WME))) < 0)
1081 {
1082 my_fclose(in,MYF(0));
1083 DBUG_RETURN(1);
1084 }
1085 if (!(out= my_fdopen(temp_file, tempname, O_WRONLY, MYF(MY_WME))))
1086 {
1087 my_fclose(in,MYF(0));
1088 DBUG_RETURN(1);
1089 }
1090
1091 error=convert_pipe(rep,in,out);
1092 my_fclose(in,MYF(0)); my_fclose(out,MYF(0));
1093
1094 if (updated && ! error)
1095 my_redel(org_name,tempname,MYF(MY_WME | MY_LINK_WARNING));
1096 else
1097 my_delete(tempname,MYF(MY_WME));
1098 if (!silent && ! error)
1099 {
1100 if (updated)
1101 printf("%s converted\n",name);
1102 else if (verbose)
1103 printf("%s left unchanged\n",name);
1104 }
1105 DBUG_RETURN(error);
1106 }
1107