1 /*
2 Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
23 02110-1301 USA */
24
25 /*
26 Replace strings in textfile
27
28 This program replaces strings in files or from stdin to stdout.
29 It accepts a list of from-string/to-string pairs and replaces
30 each occurrence of a from-string with the corresponding to-string.
31 The first occurrence of a found string is matched. If there is more
32 than one possibility for the string to replace, longer matches
33 are preferred before shorter matches.
34
35 Special characters in from string:
36 \^ Match start of line.
37 \$ Match end of line.
38 \b Match space-character, start of line or end of line.
39 For end \b the next replace starts locking at the end space-character.
40 An \b alone or in a string matches only a space-character.
41 \r, \t, \v as in C.
42 The programs make a DFA-state-machine of the strings and the speed isn't
43 dependent on the count of replace-strings (only of the number of replaces).
44 A line is assumed ending with \n or \0.
45 There are no limit exept memory on length of strings.
46
47 Written by Monty.
48 fill_buffer_retaining() is taken from gnu-grep and modified.
49 */
50
51 #include <my_global.h>
52 #include <m_ctype.h>
53 #include <my_sys.h>
54 #include <m_string.h>
55 #include <errno.h>
56
57 #define PC_MALLOC 256 /* Bytes for pointers */
58 #define PS_MALLOC 512 /* Bytes for data */
59
60 typedef struct st_pointer_array { /* when using array-strings */
61 TYPELIB typelib; /* Pointer to strings */
62 uchar *str; /* Strings is here */
63 uint8 *flag; /* Flag about each var. */
64 uint array_allocs,max_count,length,max_length;
65 } POINTER_ARRAY;
66
67 #define SPACE_CHAR 256
68 #define START_OF_LINE 257
69 #define END_OF_LINE 258
70 #define LAST_CHAR_CODE 259
71
72 typedef struct st_replace {
73 my_bool found;
74 struct st_replace *next[256];
75 } REPLACE;
76
77 typedef struct st_replace_found {
78 my_bool found;
79 char *replace_string;
80 uint to_offset;
81 int from_offset;
82 } REPLACE_STRING;
83
84 #ifndef WORD_BIT
85 #define WORD_BIT (8*sizeof(uint))
86 #endif
87
88 /* functions defined in this file */
89
90 static int static_get_options(int *argc,char * * *argv);
91 static int get_replace_strings(int *argc,char * * *argv,
92 POINTER_ARRAY *from_array,
93 POINTER_ARRAY *to_array);
94 static int insert_pointer_name(POINTER_ARRAY *pa, char * name);
95 static void free_pointer_array(POINTER_ARRAY *pa);
96 static int convert_pipe(REPLACE *,FILE *,FILE *);
97 static int convert_file(REPLACE *, char *);
98 static REPLACE *init_replace(char * *from, char * *to,uint count,
99 char * word_end_chars);
100 static uint replace_strings(REPLACE *rep, char * *start,uint *max_length,
101 char * from);
102 static int initialize_buffer(void);
103 static void reset_buffer(void);
104 static void free_buffer(void);
105
106 static int silent=0,verbose=0,updated=0;
107
108 /* The main program */
109
main(int argc,char * argv[])110 int main(int argc, char *argv[])
111 {
112 int i,error;
113 char word_end_chars[256],*pos;
114 POINTER_ARRAY from,to;
115 REPLACE *replace;
116 MY_INIT(argv[0]);
117
118 if (static_get_options(&argc,&argv))
119 exit(1);
120 if (get_replace_strings(&argc,&argv,&from,&to))
121 exit(1);
122
123 for (i=1,pos=word_end_chars ; i < 256 ; i++)
124 if (my_isspace(&my_charset_latin1,i))
125 *pos++= (char) i;
126 *pos=0;
127 if (!(replace=init_replace((char**) from.typelib.type_names,
128 (char**) to.typelib.type_names,
129 (uint) from.typelib.count,word_end_chars)))
130 exit(1);
131 free_pointer_array(&from);
132 free_pointer_array(&to);
133 if (initialize_buffer())
134 return 1;
135
136 error=0;
137 if (argc == 0)
138 error=convert_pipe(replace,stdin,stdout);
139 else
140 {
141 while (argc--)
142 {
143 error=convert_file(replace,*(argv++));
144 }
145 }
146 free_buffer();
147 my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
148 exit(error ? 2 : 0);
149 return 0; /* No compiler warning */
150 } /* main */
151
152
153 /* reads options */
154 /* Initiates DEBUG - but no debugging here ! */
155
static_get_options(argc,argv)156 static int static_get_options(argc,argv)
157 int *argc;
158 char **argv[];
159 {
160 int help,version;
161 char *pos;
162
163 silent=verbose=help=0;
164
165 while (--*argc > 0 && *(pos = *(++*argv)) == '-' && pos[1] != '-') {
166 while (*++pos)
167 {
168 version=0;
169 switch((*pos)) {
170 case 's':
171 silent=1;
172 break;
173 case 'v':
174 verbose=1;
175 break;
176 case '#':
177 DBUG_PUSH (++pos);
178 pos= (char*) " "; /* Skip rest of arguments */
179 break;
180 case 'V':
181 version=1;
182 // fallthrough
183 case 'I':
184 case '?':
185 help=1; /* Help text written */
186 printf("%s Ver 1.4 for %s at %s\n",my_progname,SYSTEM_TYPE,
187 MACHINE_TYPE);
188 if (version)
189 break;
190 puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n");
191 puts("This program replaces strings in files or from stdin to stdout.\n"
192 "It accepts a list of from-string/to-string pairs and replaces\n"
193 "each occurrence of a from-string with the corresponding to-string.\n"
194 "The first occurrence of a found string is matched. If there is\n"
195 "more than one possibility for the string to replace, longer\n"
196 "matches are preferred before shorter matches.\n\n"
197 "A from-string can contain these special characters:\n"
198 " \\^ Match start of line.\n"
199 " \\$ Match end of line.\n"
200 " \\b Match space-character, start of line or end of line.\n"
201 " For a end \\b the next replace starts locking at the end\n"
202 " space-character. A \\b alone in a string matches only a\n"
203 " space-character.\n");
204 printf("Usage: %s [-?svIV] from to from to ... -- [files]\n", my_progname);
205 puts("or");
206 printf("Usage: %s [-?svIV] from to from to ... < fromfile > tofile\n", my_progname);
207 puts("");
208 puts("Options: -? or -I \"Info\" -s \"silent\" -v \"verbose\"");
209 break;
210 default:
211 fprintf(stderr,"illegal option: -%c\n",*pos);
212 break;
213 }
214 }
215 }
216 if (*argc == 0)
217 {
218 if (!help)
219 my_message(0,"No replace options given",MYF(ME_BELL));
220 exit(0); /* Don't use as pipe */
221 }
222 return(0);
223 } /* static_get_options */
224
225
get_replace_strings(argc,argv,from_array,to_array)226 static int get_replace_strings(argc,argv,from_array,to_array)
227 int *argc;
228 char **argv[];
229 POINTER_ARRAY *from_array,*to_array;
230 {
231 char *pos;
232
233 memset(from_array, 0, sizeof(from_array[0]));
234 memset(to_array, 0, sizeof(to_array[0]));
235 while (*argc > 0 && (*(pos = *(*argv)) != '-' || pos[1] != '-' || pos[2]))
236 {
237 insert_pointer_name(from_array,pos);
238 (*argc)--;
239 (*argv)++;
240 if (!*argc || !strcmp(**argv,"--"))
241 {
242 my_message(0,"No to-string for last from-string",MYF(ME_BELL));
243 return 1;
244 }
245 insert_pointer_name(to_array,**argv);
246 (*argc)--;
247 (*argv)++;
248 }
249 if (*argc)
250 { /* Skip "--" argument */
251 (*argc)--;
252 (*argv)++;
253 }
254 return 0;
255 }
256
insert_pointer_name(POINTER_ARRAY * pa,char * name)257 static int insert_pointer_name(POINTER_ARRAY *pa,char * name)
258 {
259 uint i,length,old_count;
260 uchar *new_pos;
261 const char **new_array;
262 DBUG_ENTER("insert_pointer_name");
263
264 if (! pa->typelib.count)
265 {
266 if (!(pa->typelib.type_names=(const char **)
267 my_malloc(((PC_MALLOC-MALLOC_OVERHEAD)/
268 (sizeof(char *)+sizeof(*pa->flag))*
269 (sizeof(char *)+sizeof(*pa->flag))),MYF(MY_WME))))
270 DBUG_RETURN(-1);
271 if (!(pa->str= (uchar*) my_malloc((uint) (PS_MALLOC-MALLOC_OVERHEAD),
272 MYF(MY_WME))))
273 {
274 my_free(pa->typelib.type_names);
275 DBUG_RETURN (-1);
276 }
277 pa->max_count=(PC_MALLOC-MALLOC_OVERHEAD)/(sizeof(uchar*)+
278 sizeof(*pa->flag));
279 pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
280 pa->length=0;
281 pa->max_length=PS_MALLOC-MALLOC_OVERHEAD;
282 pa->array_allocs=1;
283 }
284 length=(uint) strlen(name)+1;
285 if (pa->length+length >= pa->max_length)
286 {
287 pa->max_length=(pa->length+length+MALLOC_OVERHEAD+PS_MALLOC-1)/PS_MALLOC;
288 pa->max_length=pa->max_length*PS_MALLOC-MALLOC_OVERHEAD;
289 if (!(new_pos= (uchar*) my_realloc((uchar*) pa->str,
290 (uint) pa->max_length,
291 MYF(MY_WME))))
292 DBUG_RETURN(1);
293 if (new_pos != pa->str)
294 {
295 my_ptrdiff_t diff=PTR_BYTE_DIFF(new_pos,pa->str);
296 for (i=0 ; i < pa->typelib.count ; i++)
297 pa->typelib.type_names[i]= ADD_TO_PTR(pa->typelib.type_names[i],diff,
298 char*);
299 pa->str=new_pos;
300 }
301 }
302 if (pa->typelib.count >= pa->max_count-1)
303 {
304 int len;
305 pa->array_allocs++;
306 len=(PC_MALLOC*pa->array_allocs - MALLOC_OVERHEAD);
307 if (!(new_array=(const char **) my_realloc((uchar*) pa->typelib.type_names,
308 (uint) len/
309 (sizeof(uchar*)+sizeof(*pa->flag))*
310 (sizeof(uchar*)+sizeof(*pa->flag)),
311 MYF(MY_WME))))
312 DBUG_RETURN(1);
313 pa->typelib.type_names=new_array;
314 old_count=pa->max_count;
315 pa->max_count=len/(sizeof(uchar*) + sizeof(*pa->flag));
316 pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
317 memcpy((uchar*) pa->flag,(char *) (pa->typelib.type_names+old_count),
318 old_count*sizeof(*pa->flag));
319 }
320 pa->flag[pa->typelib.count]=0; /* Reset flag */
321 pa->typelib.type_names[pa->typelib.count++]= (char*) (pa->str+pa->length);
322 pa->typelib.type_names[pa->typelib.count]= NullS; /* Put end-mark */
323 (void) strmov((char*) pa->str + pa->length, name);
324 pa->length+=length;
325 DBUG_RETURN(0);
326 } /* insert_pointer_name */
327
328
329 /* free pointer array */
330
free_pointer_array(POINTER_ARRAY * pa)331 static void free_pointer_array(POINTER_ARRAY *pa)
332 {
333 if (pa->typelib.count)
334 {
335 pa->typelib.count=0;
336 my_free(pa->typelib.type_names);
337 pa->typelib.type_names=0;
338 my_free(pa->str);
339 }
340 return;
341 } /* free_pointer_array */
342
343
344 /* Code for replace rutines */
345
346 #define SET_MALLOC_HUNC 64
347
348 typedef struct st_rep_set {
349 uint *bits; /* Pointer to used sets */
350 short next[LAST_CHAR_CODE]; /* Pointer to next sets */
351 uint found_len; /* Best match to date */
352 int found_offset;
353 uint table_offset;
354 uint size_of_bits; /* For convinience */
355 } REP_SET;
356
357 typedef struct st_rep_sets {
358 uint count; /* Number of sets */
359 uint extra; /* Extra sets in buffer */
360 uint invisible; /* Sets not chown */
361 uint size_of_bits;
362 REP_SET *set,*set_buffer;
363 uint *bit_buffer;
364 } REP_SETS;
365
366 typedef struct st_found_set {
367 uint table_offset;
368 int found_offset;
369 } FOUND_SET;
370
371 typedef struct st_follow {
372 int chr;
373 uint table_offset;
374 uint len;
375 } FOLLOWS;
376
377
378 static int init_sets(REP_SETS *sets,uint states);
379 static REP_SET *make_new_set(REP_SETS *sets);
380 static void make_sets_invisible(REP_SETS *sets);
381 static void free_last_set(REP_SETS *sets);
382 static void free_sets(REP_SETS *sets);
383 static void internal_set_bit(REP_SET *set, uint bit);
384 static void internal_clear_bit(REP_SET *set, uint bit);
385 static void or_bits(REP_SET *to,REP_SET *from);
386 static void copy_bits(REP_SET *to,REP_SET *from);
387 static int cmp_bits(REP_SET *set1,REP_SET *set2);
388 static int get_next_bit(REP_SET *set,uint lastpos);
389 static short find_set(REP_SETS *sets,REP_SET *find);
390 static short find_found(FOUND_SET *found_set,uint table_offset,
391 int found_offset);
392 static uint start_at_word(char * pos);
393 static uint end_of_word(char * pos);
394 static uint replace_len(char * pos);
395
396 static uint found_sets=0;
397
398
399 /* Init a replace structure for further calls */
400
init_replace(char ** from,char ** to,uint count,char * word_end_chars)401 static REPLACE *init_replace(char * *from, char * *to,uint count,
402 char * word_end_chars)
403 {
404 uint i,j,states,set_nr,len,result_len,max_length,found_end,bits_set,bit_nr;
405 int used_sets,chr;
406 short default_state;
407 char used_chars[LAST_CHAR_CODE],is_word_end[256];
408 char * pos, *to_pos, **to_array;
409 REP_SETS sets;
410 REP_SET *set,*start_states,*word_states,*new_set;
411 FOLLOWS *follow,*follow_ptr;
412 REPLACE *replace;
413 FOUND_SET *found_set;
414 REPLACE_STRING *rep_str;
415 DBUG_ENTER("init_replace");
416
417 /* Count number of states */
418 for (i=result_len=max_length=0 , states=2 ; i < count ; i++)
419 {
420 len=replace_len(from[i]);
421 if (!len)
422 {
423 errno=EINVAL;
424 my_message(0,"No to-string for last from-string",MYF(ME_BELL));
425 DBUG_RETURN(0);
426 }
427 states+=len+1;
428 result_len+=(uint) strlen(to[i])+1;
429 if (len > max_length)
430 max_length=len;
431 }
432 memset(is_word_end, 0, sizeof(is_word_end));
433 for (i=0 ; word_end_chars[i] ; i++)
434 is_word_end[(uchar) word_end_chars[i]]=1;
435
436 if (init_sets(&sets,states))
437 DBUG_RETURN(0);
438 found_sets=0;
439 if (!(found_set= (FOUND_SET*) my_malloc(sizeof(FOUND_SET)*max_length*count,
440 MYF(MY_WME))))
441 {
442 free_sets(&sets);
443 DBUG_RETURN(0);
444 }
445 (void) make_new_set(&sets); /* Set starting set */
446 make_sets_invisible(&sets); /* Hide previus sets */
447 used_sets=-1;
448 word_states=make_new_set(&sets); /* Start of new word */
449 start_states=make_new_set(&sets); /* This is first state */
450 if (!(follow=(FOLLOWS*) my_malloc((states+2)*sizeof(FOLLOWS),MYF(MY_WME))))
451 {
452 free_sets(&sets);
453 my_free(found_set);
454 DBUG_RETURN(0);
455 }
456
457 /* Init follow_ptr[] */
458 for (i=0, states=1, follow_ptr=follow+1 ; i < count ; i++)
459 {
460 if (from[i][0] == '\\' && from[i][1] == '^')
461 {
462 internal_set_bit(start_states,states+1);
463 if (!from[i][2])
464 {
465 start_states->table_offset=i;
466 start_states->found_offset=1;
467 }
468 }
469 else if (from[i][0] == '\\' && from[i][1] == '$')
470 {
471 internal_set_bit(start_states,states);
472 internal_set_bit(word_states,states);
473 if (!from[i][2] && start_states->table_offset == (uint) ~0)
474 {
475 start_states->table_offset=i;
476 start_states->found_offset=0;
477 }
478 }
479 else
480 {
481 internal_set_bit(word_states,states);
482 if (from[i][0] == '\\' && (from[i][1] == 'b' && from[i][2]))
483 internal_set_bit(start_states,states+1);
484 else
485 internal_set_bit(start_states,states);
486 }
487 for (pos=from[i], len=0; *pos ; pos++)
488 {
489 if (*pos == '\\' && *(pos+1))
490 {
491 pos++;
492 switch (*pos) {
493 case 'b':
494 follow_ptr->chr = SPACE_CHAR;
495 break;
496 case '^':
497 follow_ptr->chr = START_OF_LINE;
498 break;
499 case '$':
500 follow_ptr->chr = END_OF_LINE;
501 break;
502 case 'r':
503 follow_ptr->chr = '\r';
504 break;
505 case 't':
506 follow_ptr->chr = '\t';
507 break;
508 case 'v':
509 follow_ptr->chr = '\v';
510 break;
511 default:
512 follow_ptr->chr = (uchar) *pos;
513 break;
514 }
515 }
516 else
517 follow_ptr->chr= (uchar) *pos;
518 follow_ptr->table_offset=i;
519 follow_ptr->len= ++len;
520 follow_ptr++;
521 }
522 follow_ptr->chr=0;
523 follow_ptr->table_offset=i;
524 follow_ptr->len=len;
525 follow_ptr++;
526 states+=(uint) len+1;
527 }
528
529
530 for (set_nr=0,pos=0 ; set_nr < sets.count ; set_nr++)
531 {
532 set=sets.set+set_nr;
533 default_state= 0; /* Start from beginning */
534
535 /* If end of found-string not found or start-set with current set */
536
537 for (i= (uint) ~0; (i=get_next_bit(set,i)) ;)
538 {
539 if (!follow[i].chr)
540 {
541 if (! default_state)
542 default_state= find_found(found_set,set->table_offset,
543 set->found_offset+1);
544 }
545 }
546 copy_bits(sets.set+used_sets,set); /* Save set for changes */
547 if (!default_state)
548 or_bits(sets.set+used_sets,sets.set); /* Can restart from start */
549
550 /* Find all chars that follows current sets */
551 memset(used_chars, 0, sizeof(used_chars));
552 for (i= (uint) ~0; (i=get_next_bit(sets.set+used_sets,i)) ;)
553 {
554 used_chars[follow[i].chr]=1;
555 if ((follow[i].chr == SPACE_CHAR && !follow[i+1].chr &&
556 follow[i].len > 1) || follow[i].chr == END_OF_LINE)
557 used_chars[0]=1;
558 }
559
560 /* Mark word_chars used if \b is in state */
561 if (used_chars[SPACE_CHAR])
562 for (pos= word_end_chars ; *pos ; pos++)
563 used_chars[(int) (uchar) *pos] = 1;
564
565 /* Handle other used characters */
566 for (chr= 0 ; chr < 256 ; chr++)
567 {
568 if (! used_chars[chr])
569 set->next[chr]= (short) (chr ? default_state : -1);
570 else
571 {
572 new_set=make_new_set(&sets);
573 set=sets.set+set_nr; /* if realloc */
574 new_set->table_offset=set->table_offset;
575 new_set->found_len=set->found_len;
576 new_set->found_offset=set->found_offset+1;
577 found_end=0;
578
579 for (i= (uint) ~0 ; (i=get_next_bit(sets.set+used_sets,i)) ; )
580 {
581 if (!follow[i].chr || follow[i].chr == chr ||
582 (follow[i].chr == SPACE_CHAR &&
583 (is_word_end[chr] ||
584 (!chr && follow[i].len > 1 && ! follow[i+1].chr))) ||
585 (follow[i].chr == END_OF_LINE && ! chr))
586 {
587 if ((! chr || (follow[i].chr && !follow[i+1].chr)) &&
588 follow[i].len > found_end)
589 found_end=follow[i].len;
590 if (chr && follow[i].chr)
591 internal_set_bit(new_set,i+1); /* To next set */
592 else
593 internal_set_bit(new_set,i);
594 }
595 }
596 if (found_end)
597 {
598 new_set->found_len=0; /* Set for testing if first */
599 bits_set=0;
600 for (i= (uint) ~0; (i=get_next_bit(new_set,i)) ;)
601 {
602 if ((follow[i].chr == SPACE_CHAR ||
603 follow[i].chr == END_OF_LINE) && ! chr)
604 bit_nr=i+1;
605 else
606 bit_nr=i;
607 if (follow[bit_nr-1].len < found_end ||
608 (new_set->found_len &&
609 (chr == 0 || !follow[bit_nr].chr)))
610 internal_clear_bit(new_set,i);
611 else
612 {
613 if (chr == 0 || !follow[bit_nr].chr)
614 { /* best match */
615 new_set->table_offset=follow[bit_nr].table_offset;
616 if (chr || (follow[i].chr == SPACE_CHAR ||
617 follow[i].chr == END_OF_LINE))
618 new_set->found_offset=found_end; /* New match */
619 new_set->found_len=found_end;
620 }
621 bits_set++;
622 }
623 }
624 if (bits_set == 1)
625 {
626 set->next[chr] = find_found(found_set,
627 new_set->table_offset,
628 new_set->found_offset);
629 free_last_set(&sets);
630 }
631 else
632 set->next[chr] = find_set(&sets,new_set);
633 }
634 else
635 set->next[chr] = find_set(&sets,new_set);
636 }
637 }
638 }
639
640 /* Alloc replace structure for the replace-state-machine */
641
642 if ((replace=(REPLACE*) my_malloc(sizeof(REPLACE)*(sets.count)+
643 sizeof(REPLACE_STRING)*(found_sets+1)+
644 sizeof(char *)*count+result_len,
645 MYF(MY_WME | MY_ZEROFILL))))
646 {
647 rep_str=(REPLACE_STRING*) (replace+sets.count);
648 to_array=(char **) (rep_str+found_sets+1);
649 to_pos=(char *) (to_array+count);
650 for (i=0 ; i < count ; i++)
651 {
652 to_array[i]=to_pos;
653 to_pos=strmov(to_pos,to[i])+1;
654 }
655 rep_str[0].found=1;
656 rep_str[0].replace_string=0;
657 for (i=1 ; i <= found_sets ; i++)
658 {
659 pos=from[found_set[i-1].table_offset];
660 rep_str[i].found= (my_bool) (!memcmp(pos,"\\^",3) ? 2 : 1);
661 rep_str[i].replace_string=to_array[found_set[i-1].table_offset];
662 rep_str[i].to_offset=found_set[i-1].found_offset-start_at_word(pos);
663 rep_str[i].from_offset=found_set[i-1].found_offset-replace_len(pos)+
664 end_of_word(pos);
665 }
666 for (i=0 ; i < sets.count ; i++)
667 {
668 for (j=0 ; j < 256 ; j++)
669 if (sets.set[i].next[j] >= 0)
670 replace[i].next[j]=replace+sets.set[i].next[j];
671 else
672 replace[i].next[j]=(REPLACE*) (rep_str+(-sets.set[i].next[j]-1));
673 }
674 }
675 my_free(follow);
676 free_sets(&sets);
677 my_free(found_set);
678 DBUG_PRINT("exit",("Replace table has %d states",sets.count));
679 DBUG_RETURN(replace);
680 }
681
682
init_sets(REP_SETS * sets,uint states)683 static int init_sets(REP_SETS *sets,uint states)
684 {
685 memset(sets, 0, sizeof(*sets));
686 sets->size_of_bits=((states+7)/8);
687 if (!(sets->set_buffer=(REP_SET*) my_malloc(sizeof(REP_SET)*SET_MALLOC_HUNC,
688 MYF(MY_WME))))
689 return 1;
690 if (!(sets->bit_buffer=(uint*) my_malloc(sizeof(uint)*sets->size_of_bits*
691 SET_MALLOC_HUNC,MYF(MY_WME))))
692 {
693 my_free(sets->set);
694 return 1;
695 }
696 return 0;
697 }
698
699 /* Make help sets invisible for nicer codeing */
700
make_sets_invisible(REP_SETS * sets)701 static void make_sets_invisible(REP_SETS *sets)
702 {
703 sets->invisible=sets->count;
704 sets->set+=sets->count;
705 sets->count=0;
706 }
707
make_new_set(REP_SETS * sets)708 static REP_SET *make_new_set(REP_SETS *sets)
709 {
710 uint i,count,*bit_buffer;
711 REP_SET *set;
712 if (sets->extra)
713 {
714 sets->extra--;
715 set=sets->set+ sets->count++;
716 memset(set->bits, 0, sizeof(uint)*sets->size_of_bits);
717 memset(&set->next[0], 0, sizeof(set->next[0])*LAST_CHAR_CODE);
718 set->found_offset=0;
719 set->found_len=0;
720 set->table_offset= (uint) ~0;
721 set->size_of_bits=sets->size_of_bits;
722 return set;
723 }
724 count=sets->count+sets->invisible+SET_MALLOC_HUNC;
725 if (!(set=(REP_SET*) my_realloc((uchar*) sets->set_buffer,
726 sizeof(REP_SET)*count,
727 MYF(MY_WME))))
728 return 0;
729 sets->set_buffer=set;
730 sets->set=set+sets->invisible;
731 if (!(bit_buffer=(uint*) my_realloc((uchar*) sets->bit_buffer,
732 (sizeof(uint)*sets->size_of_bits)*count,
733 MYF(MY_WME))))
734 return 0;
735 sets->bit_buffer=bit_buffer;
736 for (i=0 ; i < count ; i++)
737 {
738 sets->set_buffer[i].bits=bit_buffer;
739 bit_buffer+=sets->size_of_bits;
740 }
741 sets->extra=SET_MALLOC_HUNC;
742 return make_new_set(sets);
743 }
744
free_last_set(REP_SETS * sets)745 static void free_last_set(REP_SETS *sets)
746 {
747 sets->count--;
748 sets->extra++;
749 return;
750 }
751
free_sets(REP_SETS * sets)752 static void free_sets(REP_SETS *sets)
753 {
754 my_free(sets->set_buffer);
755 my_free(sets->bit_buffer);
756 return;
757 }
758
internal_set_bit(REP_SET * set,uint bit)759 static void internal_set_bit(REP_SET *set, uint bit)
760 {
761 set->bits[bit / WORD_BIT] |= 1 << (bit % WORD_BIT);
762 return;
763 }
764
internal_clear_bit(REP_SET * set,uint bit)765 static void internal_clear_bit(REP_SET *set, uint bit)
766 {
767 set->bits[bit / WORD_BIT] &= ~ (1 << (bit % WORD_BIT));
768 return;
769 }
770
771
or_bits(REP_SET * to,REP_SET * from)772 static void or_bits(REP_SET *to,REP_SET *from)
773 {
774 uint i;
775 for (i=0 ; i < to->size_of_bits ; i++)
776 to->bits[i]|=from->bits[i];
777 return;
778 }
779
copy_bits(REP_SET * to,REP_SET * from)780 static void copy_bits(REP_SET *to,REP_SET *from)
781 {
782 memcpy((uchar*) to->bits,(uchar*) from->bits,
783 (size_t) (sizeof(uint) * to->size_of_bits));
784 }
785
cmp_bits(REP_SET * set1,REP_SET * set2)786 static int cmp_bits(REP_SET *set1,REP_SET *set2)
787 {
788 return memcmp(set1->bits, set2->bits,
789 sizeof(uint) * set1->size_of_bits);
790 }
791
792
793 /* Get next set bit from set. */
794
get_next_bit(REP_SET * set,uint lastpos)795 static int get_next_bit(REP_SET *set,uint lastpos)
796 {
797 uint pos,*start,*end,bits;
798
799 start=set->bits+ ((lastpos+1) / WORD_BIT);
800 end=set->bits + set->size_of_bits;
801 bits=start[0] & ~((1 << ((lastpos+1) % WORD_BIT)) -1);
802
803 while (! bits && ++start < end)
804 bits=start[0];
805 if (!bits)
806 return 0;
807 pos=(uint) (start-set->bits)*WORD_BIT;
808 while (! (bits & 1))
809 {
810 bits>>=1;
811 pos++;
812 }
813 return pos;
814 }
815
816 /* find if there is a same set in sets. If there is, use it and
817 free given set, else put in given set in sets and return it's
818 position */
819
find_set(REP_SETS * sets,REP_SET * find)820 static short find_set(REP_SETS *sets,REP_SET *find)
821 {
822 uint i;
823 for (i=0 ; i < sets->count-1 ; i++)
824 {
825 if (!cmp_bits(sets->set+i,find))
826 {
827 free_last_set(sets);
828 return (short) i;
829 }
830 }
831 return (short) i; /* return new position */
832 }
833
834
835 /*
836 find if there is a found_set with same table_offset & found_offset
837 If there is return offset to it, else add new offset and return pos.
838 Pos returned is -offset-2 in found_set_structure because it's is
839 saved in set->next and set->next[] >= 0 points to next set and
840 set->next[] == -1 is reserved for end without replaces.
841 */
842
find_found(FOUND_SET * found_set,uint table_offset,int found_offset)843 static short find_found(FOUND_SET *found_set,uint table_offset,
844 int found_offset)
845 {
846 int i;
847 for (i=0 ; (uint) i < found_sets ; i++)
848 if (found_set[i].table_offset == table_offset &&
849 found_set[i].found_offset == found_offset)
850 return (short) (-i-2);
851 found_set[i].table_offset=table_offset;
852 found_set[i].found_offset=found_offset;
853 found_sets++;
854 return (short) (-i-2); /* return new position */
855 }
856
857 /* Return 1 if regexp starts with \b or ends with \b*/
858
start_at_word(char * pos)859 static uint start_at_word(char * pos)
860 {
861 return (((!memcmp(pos,"\\b",2) && pos[2]) || !memcmp(pos,"\\^",2)) ? 1 : 0);
862 }
863
end_of_word(char * pos)864 static uint end_of_word(char * pos)
865 {
866 char * end=strend(pos);
867 return ((end > pos+2 && !memcmp(end-2,"\\b",2)) ||
868 (end >= pos+2 && !memcmp(end-2,"\\$",2))) ?
869 1 : 0;
870 }
871
872
replace_len(char * str)873 static uint replace_len(char * str)
874 {
875 uint len=0;
876 while (*str)
877 {
878 if (str[0] == '\\' && str[1])
879 str++;
880 str++;
881 len++;
882 }
883 return len;
884 }
885
886
887 /* The actual loop */
888
replace_strings(REPLACE * rep,char ** start,uint * max_length,char * from)889 static uint replace_strings(REPLACE *rep, char **start, uint *max_length,
890 char *from)
891 {
892 REPLACE *rep_pos;
893 REPLACE_STRING *rep_str;
894 char *to, *end, *pos, *new;
895
896 end=(to= *start) + *max_length-1;
897 rep_pos=rep+1;
898 for(;;)
899 {
900 while (!rep_pos->found)
901 {
902 rep_pos= rep_pos->next[(uchar) *from];
903 if (to == end)
904 {
905 (*max_length)+=8192;
906 if (!(new=my_realloc(*start,*max_length,MYF(MY_WME))))
907 return (uint) -1;
908 to=new+(to - *start);
909 end=(*start=new)+ *max_length-1;
910 }
911 *to++= *from++;
912 }
913 if (!(rep_str = ((REPLACE_STRING*) rep_pos))->replace_string)
914 return (uint) (to - *start)-1;
915 updated=1; /* Some char * is replaced */
916 to-=rep_str->to_offset;
917 for (pos=rep_str->replace_string; *pos ; pos++)
918 {
919 if (to == end)
920 {
921 (*max_length)*=2;
922 if (!(new=my_realloc(*start,*max_length,MYF(MY_WME))))
923 return (uint) -1;
924 to=new+(to - *start);
925 end=(*start=new)+ *max_length-1;
926 }
927 *to++= *pos;
928 }
929 if (!*(from-=rep_str->from_offset) && rep_pos->found != 2)
930 return (uint) (to - *start);
931 rep_pos=rep;
932 }
933 }
934
935 static char *buffer; /* The buffer itself, grown as needed. */
936 static int bufbytes; /* Number of bytes in the buffer. */
937 static int bufread,my_eof; /* Number of bytes to get with each read(). */
938 static uint bufalloc;
939 static char *out_buff;
940 static uint out_length;
941
initialize_buffer()942 static int initialize_buffer()
943 {
944 bufread = 8192;
945 bufalloc = bufread + bufread / 2;
946 if (!(buffer = my_malloc(bufalloc+1,MYF(MY_WME))))
947 return 1;
948 bufbytes=my_eof=0;
949 out_length=bufread;
950 if (!(out_buff=my_malloc(out_length,MYF(MY_WME))))
951 return(1);
952 return 0;
953 }
954
reset_buffer()955 static void reset_buffer()
956 {
957 bufbytes=my_eof=0;
958 }
959
free_buffer()960 static void free_buffer()
961 {
962 my_free(buffer);
963 my_free(out_buff);
964 }
965
966
967 /*
968 Fill the buffer retaining the last n bytes at the beginning of the
969 newly filled buffer (for backward context). Returns the number of new
970 bytes read from disk.
971 */
972
fill_buffer_retaining(fd,n)973 static int fill_buffer_retaining(fd,n)
974 File fd;
975 int n;
976 {
977 int i;
978
979 /* See if we need to grow the buffer. */
980 if ((int) bufalloc - n <= bufread)
981 {
982 while ((int) bufalloc - n <= bufread)
983 {
984 bufalloc *= 2;
985 bufread *= 2;
986 }
987 buffer = my_realloc(buffer, bufalloc+1, MYF(MY_WME));
988 if (! buffer)
989 return(-1);
990 }
991
992 /* Shift stuff down. */
993 bmove(buffer,buffer+bufbytes-n,(uint) n);
994 bufbytes = n;
995
996 if (my_eof)
997 return 0;
998
999 /* Read in new stuff. */
1000 if ((i=(int) my_read(fd, (uchar*) buffer + bufbytes,
1001 (size_t) bufread, MYF(MY_WME))) < 0)
1002 return -1;
1003
1004 /* Kludge to pretend every nonempty file ends with a newline. */
1005 if (i == 0 && bufbytes > 0 && buffer[bufbytes - 1] != '\n')
1006 {
1007 my_eof = i = 1;
1008 buffer[bufbytes] = '\n';
1009 }
1010
1011 bufbytes += i;
1012 return i;
1013 }
1014
1015 /* Return 0 if convert is ok */
1016 /* Global variable update is set if something was changed */
1017
convert_pipe(rep,in,out)1018 static int convert_pipe(rep,in,out)
1019 REPLACE *rep;
1020 FILE *in,*out;
1021 {
1022 int retain,error;
1023 uint length;
1024 char save_char,*end_of_line,*start_of_line;
1025 DBUG_ENTER("convert_pipe");
1026
1027 updated=retain=0;
1028 reset_buffer();
1029
1030 while ((error=fill_buffer_retaining(my_fileno(in),retain)) > 0)
1031 {
1032 end_of_line=buffer ;
1033 buffer[bufbytes]=0; /* Sentinel */
1034 for (;;)
1035 {
1036 start_of_line=end_of_line;
1037 while (end_of_line[0] != '\n' && end_of_line[0])
1038 end_of_line++;
1039 if (end_of_line == buffer+bufbytes)
1040 {
1041 retain= (int) (end_of_line - start_of_line);
1042 break; /* No end of line, read more */
1043 }
1044 save_char=end_of_line[0];
1045 end_of_line[0]=0;
1046 end_of_line++;
1047 if ((length=replace_strings(rep,&out_buff,&out_length,start_of_line)) ==
1048 (uint) -1)
1049 return 1;
1050 if (!my_eof)
1051 out_buff[length++]=save_char; /* Don't write added newline */
1052 if (my_fwrite(out, (uchar*) out_buff, length, MYF(MY_WME | MY_NABP)))
1053 DBUG_RETURN(1);
1054 }
1055 }
1056 DBUG_RETURN(error);
1057 }
1058
1059
convert_file(REPLACE * rep,char * name)1060 static int convert_file(REPLACE *rep, char * name)
1061 {
1062 int error;
1063 FILE *in,*out;
1064 char dir_buff[FN_REFLEN], tempname[FN_REFLEN], *org_name = name;
1065 #ifdef HAVE_READLINK
1066 char link_name[FN_REFLEN];
1067 #endif
1068 File temp_file;
1069 size_t dir_buff_length;
1070 DBUG_ENTER("convert_file");
1071
1072 /* check if name is a symlink */
1073 #ifdef HAVE_READLINK
1074 org_name= (!my_disable_symlinks &&
1075 !my_readlink(link_name, name, MYF(0))) ? link_name : name;
1076 #endif
1077 if (!(in= my_fopen(org_name,O_RDONLY,MYF(MY_WME))))
1078 DBUG_RETURN(1);
1079 dirname_part(dir_buff, org_name, &dir_buff_length);
1080 if ((temp_file= create_temp_file(tempname, dir_buff, "PR", O_WRONLY,
1081 MYF(MY_WME))) < 0)
1082 {
1083 my_fclose(in,MYF(0));
1084 DBUG_RETURN(1);
1085 }
1086 if (!(out= my_fdopen(temp_file, tempname, O_WRONLY, MYF(MY_WME))))
1087 {
1088 my_fclose(in,MYF(0));
1089 DBUG_RETURN(1);
1090 }
1091
1092 error=convert_pipe(rep,in,out);
1093 my_fclose(in,MYF(0)); my_fclose(out,MYF(0));
1094
1095 if (updated && ! error)
1096 my_redel(org_name,tempname,MYF(MY_WME | MY_LINK_WARNING));
1097 else
1098 my_delete(tempname,MYF(MY_WME));
1099 if (!silent && ! error)
1100 {
1101 if (updated)
1102 printf("%s converted\n",name);
1103 else if (verbose)
1104 printf("%s left unchanged\n",name);
1105 }
1106 DBUG_RETURN(error);
1107 }
1108