1 /*
2 Copyright (c) 2000, 2014, Oracle and/or its affiliates
3
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License
6 as published by the Free Software Foundation; version 2 of
7 the License.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
17 02110-1335 USA */
18
19 /*
20 Replace strings in textfile
21
22 This program replaces strings in files or from stdin to stdout.
23 It accepts a list of from-string/to-string pairs and replaces
24 each occurrence of a from-string with the corresponding to-string.
25 The first occurrence of a found string is matched. If there is more
26 than one possibility for the string to replace, longer matches
27 are preferred before shorter matches.
28
29 Special characters in from string:
30 \^ Match start of line.
31 \$ Match end of line.
32 \b Match space-character, start of line or end of line.
33 For end \b the next replace starts locking at the end space-character.
34 An \b alone or in a string matches only a space-character.
35 \r, \t, \v as in C.
36 The programs make a DFA-state-machine of the strings and the speed isn't
37 dependent on the count of replace-strings (only of the number of replaces).
38 A line is assumed ending with \n or \0.
39 There are no limit except memory on length of strings.
40
41 Written by Monty.
42 fill_buffer_retaining() is taken from gnu-grep and modified.
43 */
44
45 #include <my_global.h>
46 #include <m_ctype.h>
47 #include <my_sys.h>
48 #include <m_string.h>
49 #include <errno.h>
50
51 #define PC_MALLOC 256 /* Bytes for pointers */
52 #define PS_MALLOC 512 /* Bytes for data */
53
54 typedef struct st_pointer_array { /* when using array-strings */
55 TYPELIB typelib; /* Pointer to strings */
56 uchar *str; /* Strings is here */
57 uint8 *flag; /* Flag about each var. */
58 uint array_allocs,max_count,length,max_length;
59 } POINTER_ARRAY;
60
61 #define SPACE_CHAR 256
62 #define START_OF_LINE 257
63 #define END_OF_LINE 258
64 #define LAST_CHAR_CODE 259
65
66 typedef struct st_replace {
67 uint8 found;
68 struct st_replace *next[256];
69 } REPLACE;
70
71 typedef struct st_replace_found {
72 my_bool found;
73 char *replace_string;
74 uint to_offset;
75 int from_offset;
76 } REPLACE_STRING;
77
78 #ifndef WORD_BIT
79 #define WORD_BIT (8*sizeof(uint))
80 #endif
81
82 /* functions defined in this file */
83
84 static int static_get_options(int *argc,char * * *argv);
85 static int get_replace_strings(int *argc,char * * *argv,
86 POINTER_ARRAY *from_array,
87 POINTER_ARRAY *to_array);
88 static int insert_pointer_name(POINTER_ARRAY *pa, char * name);
89 static void free_pointer_array(POINTER_ARRAY *pa);
90 static int convert_pipe(REPLACE *,FILE *,FILE *);
91 static int convert_file(REPLACE *, char *);
92 static REPLACE *init_replace(char * *from, char * *to,uint count,
93 char * word_end_chars);
94 static uint replace_strings(REPLACE *rep, char * *start,uint *max_length,
95 char * from);
96 static int initialize_buffer(void);
97 static void reset_buffer(void);
98 static void free_buffer(void);
99
100 static int silent=0,verbose=0,updated=0;
101
102 /* The main program */
103
main(int argc,char * argv[])104 int main(int argc, char *argv[])
105 {
106 int i,error;
107 char word_end_chars[256],*pos;
108 POINTER_ARRAY from,to;
109 REPLACE *replace;
110 MY_INIT(argv[0]);
111
112 if (static_get_options(&argc,&argv))
113 exit(1);
114 if (get_replace_strings(&argc,&argv,&from,&to))
115 exit(1);
116
117 for (i=1,pos=word_end_chars ; i < 256 ; i++)
118 if (my_isspace(&my_charset_latin1,i))
119 *pos++= (char) i;
120 *pos=0;
121 if (!(replace=init_replace((char**) from.typelib.type_names,
122 (char**) to.typelib.type_names,
123 (uint) from.typelib.count,word_end_chars)))
124 exit(1);
125 free_pointer_array(&from);
126 free_pointer_array(&to);
127 if (initialize_buffer())
128 return 1;
129
130 error=0;
131 if (argc == 0)
132 error=convert_pipe(replace,stdin,stdout);
133 else
134 {
135 while (argc--)
136 {
137 error=convert_file(replace,*(argv++));
138 }
139 }
140 free_buffer();
141 my_free(replace);
142 my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
143 exit(error ? 2 : 0);
144 return 0; /* No compiler warning */
145 } /* main */
146
147
148 /* reads options */
149 /* Initiates DEBUG - but no debugging here ! */
150
static_get_options(argc,argv)151 static int static_get_options(argc,argv)
152 register int *argc;
153 register char **argv[];
154 {
155 int help,version;
156 char *pos;
157
158 silent=verbose=help=0;
159
160 while (--*argc > 0 && *(pos = *(++*argv)) == '-' && pos[1] != '-') {
161 while (*++pos)
162 {
163 version=0;
164 switch((*pos)) {
165 case 's':
166 silent=1;
167 break;
168 case 'v':
169 verbose=1;
170 break;
171 case '#':
172 DBUG_PUSH (++pos);
173 pos= (char*) " "; /* Skip rest of arguments */
174 break;
175 case 'V':
176 version=1;
177 /* fall through */
178 case 'I':
179 case '?':
180 help=1; /* Help text written */
181 printf("%s Ver 1.4 for %s at %s\n",my_progname,SYSTEM_TYPE,
182 MACHINE_TYPE);
183 if (version)
184 break;
185 puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n");
186 puts("This program replaces strings in files or from stdin to stdout.\n"
187 "It accepts a list of from-string/to-string pairs and replaces\n"
188 "each occurrence of a from-string with the corresponding to-string.\n"
189 "The first occurrence of a found string is matched. If there is\n"
190 "more than one possibility for the string to replace, longer\n"
191 "matches are preferred before shorter matches.\n\n"
192 "A from-string can contain these special characters:\n"
193 " \\^ Match start of line.\n"
194 " \\$ Match end of line.\n"
195 " \\b Match space-character, start of line or end of line.\n"
196 " For a end \\b the next replace starts locking at the end\n"
197 " space-character. A \\b alone in a string matches only a\n"
198 " space-character.\n");
199 printf("Usage: %s [-?svIV] from to from to ... -- [files]\n", my_progname);
200 puts("or");
201 printf("Usage: %s [-?svIV] from to from to ... < fromfile > tofile\n", my_progname);
202 puts("");
203 puts("Options: -? or -I \"Info\" -s \"silent\" -v \"verbose\"");
204 break;
205 default:
206 fprintf(stderr,"illegal option: -%c\n",*pos);
207 break;
208 }
209 }
210 }
211 if (*argc == 0)
212 {
213 if (!help)
214 my_message(0,"No replace options given",MYF(ME_BELL));
215 exit(0); /* Don't use as pipe */
216 }
217 return(0);
218 } /* static_get_options */
219
220
get_replace_strings(argc,argv,from_array,to_array)221 static int get_replace_strings(argc,argv,from_array,to_array)
222 register int *argc;
223 register char **argv[];
224 POINTER_ARRAY *from_array,*to_array;
225 {
226 char *pos;
227
228 bzero((char*) from_array,sizeof(from_array[0]));
229 bzero((char*) to_array,sizeof(to_array[0]));
230 while (*argc > 0 && (*(pos = *(*argv)) != '-' || pos[1] != '-' || pos[2]))
231 {
232 insert_pointer_name(from_array,pos);
233 (*argc)--;
234 (*argv)++;
235 if (!*argc || !strcmp(**argv,"--"))
236 {
237 my_message(0,"No to-string for last from-string",MYF(ME_BELL));
238 return 1;
239 }
240 insert_pointer_name(to_array,**argv);
241 (*argc)--;
242 (*argv)++;
243 }
244 if (*argc)
245 { /* Skip "--" argument */
246 (*argc)--;
247 (*argv)++;
248 }
249 return 0;
250 }
251
insert_pointer_name(reg1 POINTER_ARRAY * pa,char * name)252 static int insert_pointer_name(reg1 POINTER_ARRAY *pa,char * name)
253 {
254 uint i,length,old_count;
255 uchar *new_pos;
256 const char **new_array;
257 DBUG_ENTER("insert_pointer_name");
258
259 if (! pa->typelib.count)
260 {
261 if (!(pa->typelib.type_names=(const char **)
262 my_malloc(PSI_NOT_INSTRUMENTED, ((PC_MALLOC-MALLOC_OVERHEAD)/
263 (sizeof(char *)+sizeof(*pa->flag))*
264 (sizeof(char *)+sizeof(*pa->flag))),MYF(MY_WME))))
265 DBUG_RETURN(-1);
266 if (!(pa->str= (uchar*) my_malloc(PSI_NOT_INSTRUMENTED,
267 PS_MALLOC-MALLOC_OVERHEAD, MYF(MY_WME))))
268 {
269 my_free((void*) pa->typelib.type_names);
270 DBUG_RETURN (-1);
271 }
272 pa->max_count=(PC_MALLOC-MALLOC_OVERHEAD)/(sizeof(uchar*)+
273 sizeof(*pa->flag));
274 pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
275 pa->length=0;
276 pa->max_length=PS_MALLOC-MALLOC_OVERHEAD;
277 pa->array_allocs=1;
278 }
279 length=(uint) strlen(name)+1;
280 if (pa->length+length >= pa->max_length)
281 {
282 pa->max_length=(pa->length+length+MALLOC_OVERHEAD+PS_MALLOC-1)/PS_MALLOC;
283 pa->max_length=pa->max_length*PS_MALLOC-MALLOC_OVERHEAD;
284 if (!(new_pos= (uchar*) my_realloc(PSI_NOT_INSTRUMENTED, (uchar*) pa->str,
285 (uint) pa->max_length, MYF(MY_WME))))
286 DBUG_RETURN(1);
287 if (new_pos != pa->str)
288 {
289 my_ptrdiff_t diff=PTR_BYTE_DIFF(new_pos,pa->str);
290 for (i=0 ; i < pa->typelib.count ; i++)
291 pa->typelib.type_names[i]= ADD_TO_PTR(pa->typelib.type_names[i],diff,
292 char*);
293 pa->str=new_pos;
294 }
295 }
296 if (pa->typelib.count >= pa->max_count-1)
297 {
298 int len;
299 pa->array_allocs++;
300 len=(PC_MALLOC*pa->array_allocs - MALLOC_OVERHEAD);
301 if (!(new_array=(const char **) my_realloc(PSI_NOT_INSTRUMENTED, (void*)(pa->typelib.type_names),
302 (uint) len/
303 (sizeof(uchar*)+sizeof(*pa->flag))*
304 (sizeof(uchar*)+sizeof(*pa->flag)),
305 MYF(MY_WME))))
306 DBUG_RETURN(1);
307 pa->typelib.type_names=new_array;
308 old_count=pa->max_count;
309 pa->max_count=len/(sizeof(uchar*) + sizeof(*pa->flag));
310 pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
311 memcpy((uchar*) pa->flag,(char *) (pa->typelib.type_names+old_count),
312 old_count*sizeof(*pa->flag));
313 }
314 pa->flag[pa->typelib.count]=0; /* Reset flag */
315 pa->typelib.type_names[pa->typelib.count++]= (char*) (pa->str+pa->length);
316 pa->typelib.type_names[pa->typelib.count]= NullS; /* Put end-mark */
317 (void) strmov((char*) pa->str + pa->length, name);
318 pa->length+=length;
319 DBUG_RETURN(0);
320 } /* insert_pointer_name */
321
322
323 /* free pointer array */
324
free_pointer_array(reg1 POINTER_ARRAY * pa)325 static void free_pointer_array(reg1 POINTER_ARRAY *pa)
326 {
327 if (pa->typelib.count)
328 {
329 pa->typelib.count=0;
330 my_free((void*) pa->typelib.type_names);
331 pa->typelib.type_names=0;
332 my_free(pa->str);
333 }
334 return;
335 } /* free_pointer_array */
336
337
338 /* Code for replace rutines */
339
340 #define SET_MALLOC_HUNC 64
341
342 typedef struct st_rep_set {
343 uint *bits; /* Pointer to used sets */
344 short next[LAST_CHAR_CODE]; /* Pointer to next sets */
345 uint found_len; /* Best match to date */
346 int found_offset;
347 uint table_offset;
348 uint size_of_bits; /* For convinience */
349 } REP_SET;
350
351 typedef struct st_rep_sets {
352 uint count; /* Number of sets */
353 uint extra; /* Extra sets in buffer */
354 uint invisible; /* Sets not chown */
355 uint size_of_bits;
356 REP_SET *set,*set_buffer;
357 uint *bit_buffer;
358 } REP_SETS;
359
360 typedef struct st_found_set {
361 uint table_offset;
362 int found_offset;
363 } FOUND_SET;
364
365 typedef struct st_follow {
366 int chr;
367 uint table_offset;
368 uint len;
369 } FOLLOWS;
370
371
372 static int init_sets(REP_SETS *sets,uint states);
373 static REP_SET *make_new_set(REP_SETS *sets);
374 static void make_sets_invisible(REP_SETS *sets);
375 static void free_last_set(REP_SETS *sets);
376 static void free_sets(REP_SETS *sets);
377 static void internal_set_bit(REP_SET *set, uint bit);
378 static void internal_clear_bit(REP_SET *set, uint bit);
379 static void or_bits(REP_SET *to,REP_SET *from);
380 static void copy_bits(REP_SET *to,REP_SET *from);
381 static int cmp_bits(REP_SET *set1,REP_SET *set2);
382 static int get_next_bit(REP_SET *set,uint lastpos);
383 static short find_set(REP_SETS *sets,REP_SET *find);
384 static short find_found(FOUND_SET *found_set,uint table_offset,
385 int found_offset);
386 static uint start_at_word(char * pos);
387 static uint end_of_word(char * pos);
388 static uint replace_len(char * pos);
389
390 static uint found_sets=0;
391
392
393 /* Init a replace structure for further calls */
394
init_replace(char ** from,char ** to,uint count,char * word_end_chars)395 static REPLACE *init_replace(char * *from, char * *to,uint count,
396 char * word_end_chars)
397 {
398 uint i,j,states,set_nr,len,result_len,max_length,found_end,bits_set,bit_nr;
399 int used_sets,chr;
400 short default_state;
401 char used_chars[LAST_CHAR_CODE],is_word_end[256];
402 char * pos, *to_pos, **to_array;
403 REP_SETS sets;
404 REP_SET *set,*start_states,*word_states,*new_set;
405 FOLLOWS *follow,*follow_ptr;
406 REPLACE *replace;
407 FOUND_SET *found_set;
408 REPLACE_STRING *rep_str;
409 DBUG_ENTER("init_replace");
410
411 /* Count number of states */
412 for (i=result_len=max_length=0 , states=2 ; i < count ; i++)
413 {
414 len=replace_len(from[i]);
415 if (!len)
416 {
417 errno=EINVAL;
418 my_message(0,"No to-string for last from-string",MYF(ME_BELL));
419 DBUG_RETURN(0);
420 }
421 states+=len+1;
422 result_len+=(uint) strlen(to[i])+1;
423 if (len > max_length)
424 max_length=len;
425 }
426 bzero((char*) is_word_end,sizeof(is_word_end));
427 for (i=0 ; word_end_chars[i] ; i++)
428 is_word_end[(uchar) word_end_chars[i]]=1;
429
430 if (init_sets(&sets,states))
431 DBUG_RETURN(0);
432 found_sets=0;
433 if (!(found_set= (FOUND_SET*) my_malloc(PSI_NOT_INSTRUMENTED,
434 sizeof(FOUND_SET)*max_length*count,
435 MYF(MY_WME))))
436 {
437 free_sets(&sets);
438 DBUG_RETURN(0);
439 }
440 (void) make_new_set(&sets); /* Set starting set */
441 make_sets_invisible(&sets); /* Hide previus sets */
442 used_sets=-1;
443 word_states=make_new_set(&sets); /* Start of new word */
444 start_states=make_new_set(&sets); /* This is first state */
445 if (!(follow=(FOLLOWS*) my_malloc(PSI_NOT_INSTRUMENTED,
446 (states+2)*sizeof(FOLLOWS), MYF(MY_WME))))
447 {
448 free_sets(&sets);
449 my_free(found_set);
450 DBUG_RETURN(0);
451 }
452
453 /* Init follow_ptr[] */
454 for (i=0, states=1, follow_ptr=follow+1 ; i < count ; i++)
455 {
456 if (from[i][0] == '\\' && from[i][1] == '^')
457 {
458 internal_set_bit(start_states,states+1);
459 if (!from[i][2])
460 {
461 start_states->table_offset=i;
462 start_states->found_offset=1;
463 }
464 }
465 else if (from[i][0] == '\\' && from[i][1] == '$')
466 {
467 internal_set_bit(start_states,states);
468 internal_set_bit(word_states,states);
469 if (!from[i][2] && start_states->table_offset == (uint) ~0)
470 {
471 start_states->table_offset=i;
472 start_states->found_offset=0;
473 }
474 }
475 else
476 {
477 internal_set_bit(word_states,states);
478 if (from[i][0] == '\\' && (from[i][1] == 'b' && from[i][2]))
479 internal_set_bit(start_states,states+1);
480 else
481 internal_set_bit(start_states,states);
482 }
483 for (pos=from[i], len=0; *pos ; pos++)
484 {
485 if (*pos == '\\' && *(pos+1))
486 {
487 pos++;
488 switch (*pos) {
489 case 'b':
490 follow_ptr->chr = SPACE_CHAR;
491 break;
492 case '^':
493 follow_ptr->chr = START_OF_LINE;
494 break;
495 case '$':
496 follow_ptr->chr = END_OF_LINE;
497 break;
498 case 'r':
499 follow_ptr->chr = '\r';
500 break;
501 case 't':
502 follow_ptr->chr = '\t';
503 break;
504 case 'v':
505 follow_ptr->chr = '\v';
506 break;
507 default:
508 follow_ptr->chr = (uchar) *pos;
509 break;
510 }
511 }
512 else
513 follow_ptr->chr= (uchar) *pos;
514 follow_ptr->table_offset=i;
515 follow_ptr->len= ++len;
516 follow_ptr++;
517 }
518 follow_ptr->chr=0;
519 follow_ptr->table_offset=i;
520 follow_ptr->len=len;
521 follow_ptr++;
522 states+=(uint) len+1;
523 }
524
525
526 for (set_nr=0,pos=0 ; set_nr < sets.count ; set_nr++)
527 {
528 set=sets.set+set_nr;
529 default_state= 0; /* Start from beginning */
530
531 /* If end of found-string not found or start-set with current set */
532
533 for (i= (uint) ~0; (i=get_next_bit(set,i)) ;)
534 {
535 if (!follow[i].chr)
536 {
537 if (! default_state)
538 default_state= find_found(found_set,set->table_offset,
539 set->found_offset+1);
540 }
541 }
542 copy_bits(sets.set+used_sets,set); /* Save set for changes */
543 if (!default_state)
544 or_bits(sets.set+used_sets,sets.set); /* Can restart from start */
545
546 /* Find all chars that follows current sets */
547 bzero((char*) used_chars,sizeof(used_chars));
548 for (i= (uint) ~0; (i=get_next_bit(sets.set+used_sets,i)) ;)
549 {
550 used_chars[follow[i].chr]=1;
551 if ((follow[i].chr == SPACE_CHAR && !follow[i+1].chr &&
552 follow[i].len > 1) || follow[i].chr == END_OF_LINE)
553 used_chars[0]=1;
554 }
555
556 /* Mark word_chars used if \b is in state */
557 if (used_chars[SPACE_CHAR])
558 for (pos= word_end_chars ; *pos ; pos++)
559 used_chars[(int) (uchar) *pos] = 1;
560
561 /* Handle other used characters */
562 for (chr= 0 ; chr < 256 ; chr++)
563 {
564 if (! used_chars[chr])
565 set->next[chr]= (short) (chr ? default_state : -1);
566 else
567 {
568 new_set=make_new_set(&sets);
569 set=sets.set+set_nr; /* if realloc */
570 new_set->table_offset=set->table_offset;
571 new_set->found_len=set->found_len;
572 new_set->found_offset=set->found_offset+1;
573 found_end=0;
574
575 for (i= (uint) ~0 ; (i=get_next_bit(sets.set+used_sets,i)) ; )
576 {
577 if (!follow[i].chr || follow[i].chr == chr ||
578 (follow[i].chr == SPACE_CHAR &&
579 (is_word_end[chr] ||
580 (!chr && follow[i].len > 1 && ! follow[i+1].chr))) ||
581 (follow[i].chr == END_OF_LINE && ! chr))
582 {
583 if ((! chr || (follow[i].chr && !follow[i+1].chr)) &&
584 follow[i].len > found_end)
585 found_end=follow[i].len;
586 if (chr && follow[i].chr)
587 internal_set_bit(new_set,i+1); /* To next set */
588 else
589 internal_set_bit(new_set,i);
590 }
591 }
592 if (found_end)
593 {
594 new_set->found_len=0; /* Set for testing if first */
595 bits_set=0;
596 for (i= (uint) ~0; (i=get_next_bit(new_set,i)) ;)
597 {
598 if ((follow[i].chr == SPACE_CHAR ||
599 follow[i].chr == END_OF_LINE) && ! chr)
600 bit_nr=i+1;
601 else
602 bit_nr=i;
603 if (follow[bit_nr-1].len < found_end ||
604 (new_set->found_len &&
605 (chr == 0 || !follow[bit_nr].chr)))
606 internal_clear_bit(new_set,i);
607 else
608 {
609 if (chr == 0 || !follow[bit_nr].chr)
610 { /* best match */
611 new_set->table_offset=follow[bit_nr].table_offset;
612 if (chr || (follow[i].chr == SPACE_CHAR ||
613 follow[i].chr == END_OF_LINE))
614 new_set->found_offset=found_end; /* New match */
615 new_set->found_len=found_end;
616 }
617 bits_set++;
618 }
619 }
620 if (bits_set == 1)
621 {
622 set->next[chr] = find_found(found_set,
623 new_set->table_offset,
624 new_set->found_offset);
625 free_last_set(&sets);
626 }
627 else
628 set->next[chr] = find_set(&sets,new_set);
629 }
630 else
631 set->next[chr] = find_set(&sets,new_set);
632 }
633 }
634 }
635
636 /* Alloc replace structure for the replace-state-machine */
637
638 if ((replace=(REPLACE*) my_malloc(PSI_NOT_INSTRUMENTED,
639 sizeof(REPLACE)*(sets.count)+
640 sizeof(REPLACE_STRING)*(found_sets+1)+
641 sizeof(char *)*count+result_len,
642 MYF(MY_WME | MY_ZEROFILL))))
643 {
644 rep_str=(REPLACE_STRING*) (replace+sets.count);
645 to_array=(char **) (rep_str+found_sets+1);
646 to_pos=(char *) (to_array+count);
647 for (i=0 ; i < count ; i++)
648 {
649 to_array[i]=to_pos;
650 to_pos=strmov(to_pos,to[i])+1;
651 }
652 rep_str[0].found=1;
653 rep_str[0].replace_string=0;
654 for (i=1 ; i <= found_sets ; i++)
655 {
656 pos=from[found_set[i-1].table_offset];
657 /*
658 Test if we are matching start of string (\^)
659 We can't use bcmp() here as pos may be only 1 character and
660 that would confuse MSAN.
661 */
662 rep_str[i].found= (uint8) ((pos[0] == '\\' && pos[1] == '^' &&
663 pos[2] == 0) ? 2 : 1);
664 rep_str[i].replace_string=to_array[found_set[i-1].table_offset];
665 rep_str[i].to_offset=found_set[i-1].found_offset-start_at_word(pos);
666 rep_str[i].from_offset=found_set[i-1].found_offset-replace_len(pos)+
667 end_of_word(pos);
668 }
669 for (i=0 ; i < sets.count ; i++)
670 {
671 for (j=0 ; j < 256 ; j++)
672 if (sets.set[i].next[j] >= 0)
673 replace[i].next[j]=replace+sets.set[i].next[j];
674 else
675 replace[i].next[j]=(REPLACE*) (rep_str+(-sets.set[i].next[j]-1));
676 }
677 }
678 my_free(follow);
679 free_sets(&sets);
680 my_free(found_set);
681 DBUG_PRINT("exit",("Replace table has %d states",sets.count));
682 DBUG_RETURN(replace);
683 }
684
685
init_sets(REP_SETS * sets,uint states)686 static int init_sets(REP_SETS *sets,uint states)
687 {
688 bzero((char*) sets,sizeof(*sets));
689 sets->size_of_bits=((states+7)/8);
690 if (!(sets->set_buffer=(REP_SET*) my_malloc(PSI_NOT_INSTRUMENTED,
691 sizeof(REP_SET)*SET_MALLOC_HUNC,
692 MYF(MY_WME))))
693 return 1;
694 if (!(sets->bit_buffer=(uint*) my_malloc(PSI_NOT_INSTRUMENTED,
695 sizeof(uint)*sets->size_of_bits*
696 SET_MALLOC_HUNC,MYF(MY_WME))))
697 {
698 my_free(sets->set);
699 return 1;
700 }
701 return 0;
702 }
703
704 /* Make help sets invisible for nicer codeing */
705
make_sets_invisible(REP_SETS * sets)706 static void make_sets_invisible(REP_SETS *sets)
707 {
708 sets->invisible=sets->count;
709 sets->set+=sets->count;
710 sets->count=0;
711 }
712
make_new_set(REP_SETS * sets)713 static REP_SET *make_new_set(REP_SETS *sets)
714 {
715 uint i,count,*bit_buffer;
716 REP_SET *set;
717 if (sets->extra)
718 {
719 sets->extra--;
720 set=sets->set+ sets->count++;
721 bzero((char*) set->bits,sizeof(uint)*sets->size_of_bits);
722 bzero((char*) &set->next[0],sizeof(set->next[0])*LAST_CHAR_CODE);
723 set->found_offset=0;
724 set->found_len=0;
725 set->table_offset= (uint) ~0;
726 set->size_of_bits=sets->size_of_bits;
727 return set;
728 }
729 count=sets->count+sets->invisible+SET_MALLOC_HUNC;
730 if (!(set=(REP_SET*) my_realloc(PSI_NOT_INSTRUMENTED, sets->set_buffer,
731 sizeof(REP_SET)*count, MYF(MY_WME))))
732 return 0;
733 sets->set_buffer=set;
734 sets->set=set+sets->invisible;
735 if (!(bit_buffer=(uint*) my_realloc(PSI_NOT_INSTRUMENTED, sets->bit_buffer,
736 (sizeof(uint)*sets->size_of_bits)*count,
737 MYF(MY_WME))))
738 return 0;
739 sets->bit_buffer=bit_buffer;
740 for (i=0 ; i < count ; i++)
741 {
742 sets->set_buffer[i].bits=bit_buffer;
743 bit_buffer+=sets->size_of_bits;
744 }
745 sets->extra=SET_MALLOC_HUNC;
746 return make_new_set(sets);
747 }
748
free_last_set(REP_SETS * sets)749 static void free_last_set(REP_SETS *sets)
750 {
751 sets->count--;
752 sets->extra++;
753 return;
754 }
755
free_sets(REP_SETS * sets)756 static void free_sets(REP_SETS *sets)
757 {
758 my_free(sets->set_buffer);
759 my_free(sets->bit_buffer);
760 return;
761 }
762
internal_set_bit(REP_SET * set,uint bit)763 static void internal_set_bit(REP_SET *set, uint bit)
764 {
765 set->bits[bit / WORD_BIT] |= 1 << (bit % WORD_BIT);
766 return;
767 }
768
internal_clear_bit(REP_SET * set,uint bit)769 static void internal_clear_bit(REP_SET *set, uint bit)
770 {
771 set->bits[bit / WORD_BIT] &= ~ (1 << (bit % WORD_BIT));
772 return;
773 }
774
775
or_bits(REP_SET * to,REP_SET * from)776 static void or_bits(REP_SET *to,REP_SET *from)
777 {
778 reg1 uint i;
779 for (i=0 ; i < to->size_of_bits ; i++)
780 to->bits[i]|=from->bits[i];
781 return;
782 }
783
copy_bits(REP_SET * to,REP_SET * from)784 static void copy_bits(REP_SET *to,REP_SET *from)
785 {
786 memcpy((uchar*) to->bits,(uchar*) from->bits,
787 (size_t) (sizeof(uint) * to->size_of_bits));
788 }
789
cmp_bits(REP_SET * set1,REP_SET * set2)790 static int cmp_bits(REP_SET *set1,REP_SET *set2)
791 {
792 return memcmp(set1->bits, set2->bits,
793 sizeof(uint) * set1->size_of_bits);
794 }
795
796
797 /* Get next set bit from set. */
798
get_next_bit(REP_SET * set,uint lastpos)799 static int get_next_bit(REP_SET *set,uint lastpos)
800 {
801 uint pos,*start,*end,bits;
802
803 start=set->bits+ ((lastpos+1) / WORD_BIT);
804 end=set->bits + set->size_of_bits;
805 bits=start[0] & ~((1 << ((lastpos+1) % WORD_BIT)) -1);
806
807 while (! bits && ++start < end)
808 bits=start[0];
809 if (!bits)
810 return 0;
811 pos=(uint) (start-set->bits)*WORD_BIT;
812 while (! (bits & 1))
813 {
814 bits>>=1;
815 pos++;
816 }
817 return pos;
818 }
819
820 /* find if there is a same set in sets. If there is, use it and
821 free given set, else put in given set in sets and return it's
822 position */
823
find_set(REP_SETS * sets,REP_SET * find)824 static short find_set(REP_SETS *sets,REP_SET *find)
825 {
826 uint i;
827 for (i=0 ; i < sets->count-1 ; i++)
828 {
829 if (!cmp_bits(sets->set+i,find))
830 {
831 free_last_set(sets);
832 return (short) i;
833 }
834 }
835 return (short) i; /* return new position */
836 }
837
838
839 /*
840 find if there is a found_set with same table_offset & found_offset
841 If there is return offset to it, else add new offset and return pos.
842 Pos returned is -offset-2 in found_set_structure because it's is
843 saved in set->next and set->next[] >= 0 points to next set and
844 set->next[] == -1 is reserved for end without replaces.
845 */
846
find_found(FOUND_SET * found_set,uint table_offset,int found_offset)847 static short find_found(FOUND_SET *found_set,uint table_offset,
848 int found_offset)
849 {
850 int i;
851 for (i=0 ; (uint) i < found_sets ; i++)
852 if (found_set[i].table_offset == table_offset &&
853 found_set[i].found_offset == found_offset)
854 return (short) (-i-2);
855 found_set[i].table_offset=table_offset;
856 found_set[i].found_offset=found_offset;
857 found_sets++;
858 return (short) (-i-2); /* return new position */
859 }
860
861 /* Return 1 if regexp starts with \b or ends with \b*/
862
start_at_word(char * pos)863 static uint start_at_word(char * pos)
864 {
865 return (((!memcmp(pos,"\\b",2) && pos[2]) || !memcmp(pos,"\\^",2)) ? 1 : 0);
866 }
867
end_of_word(char * pos)868 static uint end_of_word(char * pos)
869 {
870 char * end=strend(pos);
871 return ((end > pos+2 && !memcmp(end-2,"\\b",2)) ||
872 (end >= pos+2 && !memcmp(end-2,"\\$",2))) ?
873 1 : 0;
874 }
875
876
replace_len(char * str)877 static uint replace_len(char * str)
878 {
879 uint len=0;
880 while (*str)
881 {
882 if (str[0] == '\\' && str[1])
883 str++;
884 str++;
885 len++;
886 }
887 return len;
888 }
889
890
891 /* The actual loop */
892
replace_strings(REPLACE * rep,char ** start,uint * max_length,char * from)893 static uint replace_strings(REPLACE *rep, char **start, uint *max_length,
894 char *from)
895 {
896 reg1 REPLACE *rep_pos;
897 reg2 REPLACE_STRING *rep_str;
898 char *to, *end, *pos, *new;
899
900 end=(to= *start) + *max_length-1;
901 rep_pos=rep+1;
902 for(;;)
903 {
904 while (!rep_pos->found)
905 {
906 rep_pos= rep_pos->next[(uchar) *from];
907 if (to == end)
908 {
909 (*max_length)+=8192;
910 if (!(new=my_realloc(PSI_NOT_INSTRUMENTED, *start,*max_length,MYF(MY_WME))))
911 return (uint) -1;
912 to=new+(to - *start);
913 end=(*start=new)+ *max_length-1;
914 }
915 *to++= *from++;
916 }
917 if (!(rep_str = ((REPLACE_STRING*) rep_pos))->replace_string)
918 return (uint) (to - *start)-1;
919 updated=1; /* Some char * is replaced */
920 to-=rep_str->to_offset;
921 for (pos=rep_str->replace_string; *pos ; pos++)
922 {
923 if (to == end)
924 {
925 (*max_length)*=2;
926 if (!(new=my_realloc(PSI_NOT_INSTRUMENTED, *start,*max_length,MYF(MY_WME))))
927 return (uint) -1;
928 to=new+(to - *start);
929 end=(*start=new)+ *max_length-1;
930 }
931 *to++= *pos;
932 }
933 if (!*(from-=rep_str->from_offset) && rep_pos->found != 2)
934 return (uint) (to - *start);
935 rep_pos=rep;
936 }
937 }
938
939 static char *buffer; /* The buffer itself, grown as needed. */
940 static int bufbytes; /* Number of bytes in the buffer. */
941 static int bufread,my_eof; /* Number of bytes to get with each read(). */
942 static uint bufalloc;
943 static char *out_buff;
944 static uint out_length;
945
initialize_buffer()946 static int initialize_buffer()
947 {
948 bufread = 8192;
949 bufalloc = bufread + bufread / 2;
950 if (!(buffer = my_malloc(PSI_NOT_INSTRUMENTED, bufalloc+1, MYF(MY_WME))))
951 return 1;
952 bufbytes=my_eof=0;
953 out_length=bufread;
954 if (!(out_buff=my_malloc(PSI_NOT_INSTRUMENTED, out_length, MYF(MY_WME))))
955 return(1);
956 return 0;
957 }
958
reset_buffer()959 static void reset_buffer()
960 {
961 bufbytes=my_eof=0;
962 }
963
free_buffer()964 static void free_buffer()
965 {
966 my_free(buffer);
967 my_free(out_buff);
968 }
969
970
971 /*
972 Fill the buffer retaining the last n bytes at the beginning of the
973 newly filled buffer (for backward context). Returns the number of new
974 bytes read from disk.
975 */
976
fill_buffer_retaining(fd,n)977 static int fill_buffer_retaining(fd,n)
978 File fd;
979 int n;
980 {
981 int i;
982
983 /* See if we need to grow the buffer. */
984 if ((int) bufalloc - n <= bufread)
985 {
986 while ((int) bufalloc - n <= bufread)
987 {
988 bufalloc *= 2;
989 bufread *= 2;
990 }
991 buffer = my_realloc(PSI_NOT_INSTRUMENTED, buffer, bufalloc+1, MYF(MY_WME));
992 if (! buffer)
993 return(-1);
994 }
995
996 /* Shift stuff down. */
997 bmove(buffer,buffer+bufbytes-n,(uint) n);
998 bufbytes = n;
999
1000 if (my_eof)
1001 return 0;
1002
1003 /* Read in new stuff. */
1004 if ((i=(int) my_read(fd, (uchar*) buffer + bufbytes,
1005 (size_t) bufread, MYF(MY_WME))) < 0)
1006 return -1;
1007
1008 /* Kludge to pretend every nonempty file ends with a newline. */
1009 if (i == 0 && bufbytes > 0 && buffer[bufbytes - 1] != '\n')
1010 {
1011 my_eof = i = 1;
1012 buffer[bufbytes] = '\n';
1013 }
1014
1015 bufbytes += i;
1016 return i;
1017 }
1018
1019 /* Return 0 if convert is ok */
1020 /* Global variable update is set if something was changed */
1021
convert_pipe(rep,in,out)1022 static int convert_pipe(rep,in,out)
1023 REPLACE *rep;
1024 FILE *in,*out;
1025 {
1026 int retain,error;
1027 uint length;
1028 char save_char,*end_of_line,*start_of_line;
1029 DBUG_ENTER("convert_pipe");
1030
1031 updated=retain=0;
1032 reset_buffer();
1033
1034 while ((error=fill_buffer_retaining(my_fileno(in),retain)) > 0)
1035 {
1036 end_of_line=buffer ;
1037 buffer[bufbytes]=0; /* Sentinel */
1038 for (;;)
1039 {
1040 start_of_line=end_of_line;
1041 while (end_of_line[0] != '\n' && end_of_line[0])
1042 end_of_line++;
1043 if (end_of_line == buffer+bufbytes)
1044 {
1045 retain= (int) (end_of_line - start_of_line);
1046 break; /* No end of line, read more */
1047 }
1048 save_char=end_of_line[0];
1049 end_of_line[0]=0;
1050 end_of_line++;
1051 if ((length=replace_strings(rep,&out_buff,&out_length,start_of_line)) ==
1052 (uint) -1)
1053 return 1;
1054 if (!my_eof)
1055 out_buff[length++]=save_char; /* Don't write added newline */
1056 if (my_fwrite(out, (uchar*) out_buff, length, MYF(MY_WME | MY_NABP)))
1057 DBUG_RETURN(1);
1058 }
1059 }
1060 DBUG_RETURN(error);
1061 }
1062
1063
convert_file(REPLACE * rep,char * name)1064 static int convert_file(REPLACE *rep, char * name)
1065 {
1066 int error;
1067 FILE *in,*out;
1068 char dir_buff[FN_REFLEN], tempname[FN_REFLEN], *org_name = name;
1069 #ifdef HAVE_READLINK
1070 char link_name[FN_REFLEN];
1071 #endif
1072 File temp_file;
1073 size_t dir_buff_length;
1074 DBUG_ENTER("convert_file");
1075
1076 /* check if name is a symlink */
1077 #ifdef HAVE_READLINK
1078 org_name= (!my_disable_symlinks &&
1079 !my_readlink(link_name, name, MYF(0))) ? link_name : name;
1080 #endif
1081 if (!(in= my_fopen(org_name,O_RDONLY,MYF(MY_WME))))
1082 DBUG_RETURN(1);
1083 dirname_part(dir_buff, org_name, &dir_buff_length);
1084 if ((temp_file= create_temp_file(tempname, dir_buff, "PR", 0,
1085 MYF(MY_WME))) < 0)
1086 {
1087 my_fclose(in,MYF(0));
1088 DBUG_RETURN(1);
1089 }
1090 if (!(out= my_fdopen(temp_file, tempname, O_WRONLY, MYF(MY_WME))))
1091 {
1092 my_fclose(in,MYF(0));
1093 DBUG_RETURN(1);
1094 }
1095
1096 error=convert_pipe(rep,in,out);
1097 my_fclose(in,MYF(0)); my_fclose(out,MYF(0));
1098
1099 if (updated && ! error)
1100 my_redel(org_name, tempname, 0, MYF(MY_WME | MY_LINK_WARNING));
1101 else
1102 my_delete(tempname,MYF(MY_WME));
1103 if (!silent && ! error)
1104 {
1105 if (updated)
1106 printf("%s converted\n",name);
1107 else if (verbose)
1108 printf("%s left unchanged\n",name);
1109 }
1110 DBUG_RETURN(error);
1111 }
1112