1 /*
2 Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License
6 as published by the Free Software Foundation; version 2 of
7 the License.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
17 02110-1301 USA */
18
19 /*
20 Replace strings in textfile
21
22 This program replaces strings in files or from stdin to stdout.
23 It accepts a list of from-string/to-string pairs and replaces
24 each occurrence of a from-string with the corresponding to-string.
25 The first occurrence of a found string is matched. If there is more
26 than one possibility for the string to replace, longer matches
27 are preferred before shorter matches.
28
29 Special characters in from string:
30 \^ Match start of line.
31 \$ Match end of line.
32 \b Match space-character, start of line or end of line.
33 For end \b the next replace starts locking at the end space-character.
34 An \b alone or in a string matches only a space-character.
35 \r, \t, \v as in C.
36 The programs make a DFA-state-machine of the strings and the speed isn't
37 dependent on the count of replace-strings (only of the number of replaces).
38 A line is assumed ending with \n or \0.
39 There are no limit exept memory on length of strings.
40
41 Written by Monty.
42 fill_buffer_retaining() is taken from gnu-grep and modified.
43 */
44
45 #include <my_global.h>
46 #include <m_ctype.h>
47 #include <my_sys.h>
48 #include <m_string.h>
49 #include <errno.h>
50
51 #define PC_MALLOC 256 /* Bytes for pointers */
52 #define PS_MALLOC 512 /* Bytes for data */
53
54 typedef struct st_pointer_array { /* when using array-strings */
55 TYPELIB typelib; /* Pointer to strings */
56 uchar *str; /* Strings is here */
57 uint8 *flag; /* Flag about each var. */
58 uint array_allocs,max_count,length,max_length;
59 } POINTER_ARRAY;
60
61 #define SPACE_CHAR 256
62 #define START_OF_LINE 257
63 #define END_OF_LINE 258
64 #define LAST_CHAR_CODE 259
65
66 typedef struct st_replace {
67 my_bool found;
68 struct st_replace *next[256];
69 } REPLACE;
70
71 typedef struct st_replace_found {
72 my_bool found;
73 char *replace_string;
74 uint to_offset;
75 int from_offset;
76 } REPLACE_STRING;
77
78 #ifndef WORD_BIT
79 #define WORD_BIT (8*sizeof(uint))
80 #endif
81
82 /* functions defined in this file */
83
84 static int static_get_options(int *argc,char * * *argv);
85 static int get_replace_strings(int *argc,char * * *argv,
86 POINTER_ARRAY *from_array,
87 POINTER_ARRAY *to_array);
88 static int insert_pointer_name(POINTER_ARRAY *pa, char * name);
89 static void free_pointer_array(POINTER_ARRAY *pa);
90 static int convert_pipe(REPLACE *,FILE *,FILE *);
91 static int convert_file(REPLACE *, char *);
92 static REPLACE *init_replace(char * *from, char * *to,uint count,
93 char * word_end_chars);
94 static uint replace_strings(REPLACE *rep, char * *start,uint *max_length,
95 char * from);
96 static int initialize_buffer(void);
97 static void reset_buffer(void);
98 static void free_buffer(void);
99
100 static int silent=0,verbose=0,updated=0;
101
102 /* The main program */
103
main(int argc,char * argv[])104 int main(int argc, char *argv[])
105 {
106 int i,error;
107 char word_end_chars[256],*pos;
108 POINTER_ARRAY from,to;
109 REPLACE *replace;
110 MY_INIT(argv[0]);
111
112 if (static_get_options(&argc,&argv))
113 exit(1);
114 if (get_replace_strings(&argc,&argv,&from,&to))
115 exit(1);
116
117 for (i=1,pos=word_end_chars ; i < 256 ; i++)
118 if (my_isspace(&my_charset_latin1,i))
119 *pos++= (char) i;
120 *pos=0;
121 if (!(replace=init_replace((char**) from.typelib.type_names,
122 (char**) to.typelib.type_names,
123 (uint) from.typelib.count,word_end_chars)))
124 exit(1);
125 free_pointer_array(&from);
126 free_pointer_array(&to);
127 if (initialize_buffer())
128 return 1;
129
130 error=0;
131 if (argc == 0)
132 error=convert_pipe(replace,stdin,stdout);
133 else
134 {
135 while (argc--)
136 {
137 error=convert_file(replace,*(argv++));
138 }
139 }
140 free_buffer();
141 my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
142 exit(error ? 2 : 0);
143 return 0; /* No compiler warning */
144 } /* main */
145
146
147 /* reads options */
148 /* Initiates DEBUG - but no debugging here ! */
149
static_get_options(argc,argv)150 static int static_get_options(argc,argv)
151 register int *argc;
152 register char **argv[];
153 {
154 int help,version;
155 char *pos;
156
157 silent=verbose=help=0;
158
159 while (--*argc > 0 && *(pos = *(++*argv)) == '-' && pos[1] != '-') {
160 while (*++pos)
161 {
162 version=0;
163 switch((*pos)) {
164 case 's':
165 silent=1;
166 break;
167 case 'v':
168 verbose=1;
169 break;
170 case '#':
171 DBUG_PUSH (++pos);
172 pos= (char*) " "; /* Skip rest of arguments */
173 break;
174 case 'V':
175 version=1;
176 case 'I':
177 case '?':
178 help=1; /* Help text written */
179 printf("%s Ver 1.4 for %s at %s\n",my_progname,SYSTEM_TYPE,
180 MACHINE_TYPE);
181 if (version)
182 break;
183 puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n");
184 puts("This program replaces strings in files or from stdin to stdout.\n"
185 "It accepts a list of from-string/to-string pairs and replaces\n"
186 "each occurrence of a from-string with the corresponding to-string.\n"
187 "The first occurrence of a found string is matched. If there is\n"
188 "more than one possibility for the string to replace, longer\n"
189 "matches are preferred before shorter matches.\n\n"
190 "A from-string can contain these special characters:\n"
191 " \\^ Match start of line.\n"
192 " \\$ Match end of line.\n"
193 " \\b Match space-character, start of line or end of line.\n"
194 " For a end \\b the next replace starts locking at the end\n"
195 " space-character. A \\b alone in a string matches only a\n"
196 " space-character.\n");
197 printf("Usage: %s [-?svIV] from to from to ... -- [files]\n", my_progname);
198 puts("or");
199 printf("Usage: %s [-?svIV] from to from to ... < fromfile > tofile\n", my_progname);
200 puts("");
201 puts("Options: -? or -I \"Info\" -s \"silent\" -v \"verbose\"");
202 break;
203 default:
204 fprintf(stderr,"illegal option: -%c\n",*pos);
205 break;
206 }
207 }
208 }
209 if (*argc == 0)
210 {
211 if (!help)
212 my_message(0,"No replace options given",MYF(ME_BELL));
213 exit(0); /* Don't use as pipe */
214 }
215 return(0);
216 } /* static_get_options */
217
218
get_replace_strings(argc,argv,from_array,to_array)219 static int get_replace_strings(argc,argv,from_array,to_array)
220 register int *argc;
221 register char **argv[];
222 POINTER_ARRAY *from_array,*to_array;
223 {
224 char *pos;
225
226 bzero((char*) from_array,sizeof(from_array[0]));
227 bzero((char*) to_array,sizeof(to_array[0]));
228 while (*argc > 0 && (*(pos = *(*argv)) != '-' || pos[1] != '-' || pos[2]))
229 {
230 insert_pointer_name(from_array,pos);
231 (*argc)--;
232 (*argv)++;
233 if (!*argc || !strcmp(**argv,"--"))
234 {
235 my_message(0,"No to-string for last from-string",MYF(ME_BELL));
236 return 1;
237 }
238 insert_pointer_name(to_array,**argv);
239 (*argc)--;
240 (*argv)++;
241 }
242 if (*argc)
243 { /* Skip "--" argument */
244 (*argc)--;
245 (*argv)++;
246 }
247 return 0;
248 }
249
insert_pointer_name(reg1 POINTER_ARRAY * pa,char * name)250 static int insert_pointer_name(reg1 POINTER_ARRAY *pa,char * name)
251 {
252 uint i,length,old_count;
253 uchar *new_pos;
254 const char **new_array;
255 DBUG_ENTER("insert_pointer_name");
256
257 if (! pa->typelib.count)
258 {
259 if (!(pa->typelib.type_names=(const char **)
260 my_malloc(((PC_MALLOC-MALLOC_OVERHEAD)/
261 (sizeof(char *)+sizeof(*pa->flag))*
262 (sizeof(char *)+sizeof(*pa->flag))),MYF(MY_WME))))
263 DBUG_RETURN(-1);
264 if (!(pa->str= (uchar*) my_malloc((uint) (PS_MALLOC-MALLOC_OVERHEAD),
265 MYF(MY_WME))))
266 {
267 my_free(pa->typelib.type_names);
268 DBUG_RETURN (-1);
269 }
270 pa->max_count=(PC_MALLOC-MALLOC_OVERHEAD)/(sizeof(uchar*)+
271 sizeof(*pa->flag));
272 pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
273 pa->length=0;
274 pa->max_length=PS_MALLOC-MALLOC_OVERHEAD;
275 pa->array_allocs=1;
276 }
277 length=(uint) strlen(name)+1;
278 if (pa->length+length >= pa->max_length)
279 {
280 pa->max_length=(pa->length+length+MALLOC_OVERHEAD+PS_MALLOC-1)/PS_MALLOC;
281 pa->max_length=pa->max_length*PS_MALLOC-MALLOC_OVERHEAD;
282 if (!(new_pos= (uchar*) my_realloc((uchar*) pa->str,
283 (uint) pa->max_length,
284 MYF(MY_WME))))
285 DBUG_RETURN(1);
286 if (new_pos != pa->str)
287 {
288 my_ptrdiff_t diff=PTR_BYTE_DIFF(new_pos,pa->str);
289 for (i=0 ; i < pa->typelib.count ; i++)
290 pa->typelib.type_names[i]= ADD_TO_PTR(pa->typelib.type_names[i],diff,
291 char*);
292 pa->str=new_pos;
293 }
294 }
295 if (pa->typelib.count >= pa->max_count-1)
296 {
297 int len;
298 pa->array_allocs++;
299 len=(PC_MALLOC*pa->array_allocs - MALLOC_OVERHEAD);
300 if (!(new_array=(const char **) my_realloc((uchar*) pa->typelib.type_names,
301 (uint) len/
302 (sizeof(uchar*)+sizeof(*pa->flag))*
303 (sizeof(uchar*)+sizeof(*pa->flag)),
304 MYF(MY_WME))))
305 DBUG_RETURN(1);
306 pa->typelib.type_names=new_array;
307 old_count=pa->max_count;
308 pa->max_count=len/(sizeof(uchar*) + sizeof(*pa->flag));
309 pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
310 memcpy((uchar*) pa->flag,(char *) (pa->typelib.type_names+old_count),
311 old_count*sizeof(*pa->flag));
312 }
313 pa->flag[pa->typelib.count]=0; /* Reset flag */
314 pa->typelib.type_names[pa->typelib.count++]= (char*) (pa->str+pa->length);
315 pa->typelib.type_names[pa->typelib.count]= NullS; /* Put end-mark */
316 (void) strmov((char*) pa->str + pa->length, name);
317 pa->length+=length;
318 DBUG_RETURN(0);
319 } /* insert_pointer_name */
320
321
322 /* free pointer array */
323
free_pointer_array(reg1 POINTER_ARRAY * pa)324 static void free_pointer_array(reg1 POINTER_ARRAY *pa)
325 {
326 if (pa->typelib.count)
327 {
328 pa->typelib.count=0;
329 my_free(pa->typelib.type_names);
330 pa->typelib.type_names=0;
331 my_free(pa->str);
332 }
333 return;
334 } /* free_pointer_array */
335
336
337 /* Code for replace rutines */
338
339 #define SET_MALLOC_HUNC 64
340
341 typedef struct st_rep_set {
342 uint *bits; /* Pointer to used sets */
343 short next[LAST_CHAR_CODE]; /* Pointer to next sets */
344 uint found_len; /* Best match to date */
345 int found_offset;
346 uint table_offset;
347 uint size_of_bits; /* For convinience */
348 } REP_SET;
349
350 typedef struct st_rep_sets {
351 uint count; /* Number of sets */
352 uint extra; /* Extra sets in buffer */
353 uint invisible; /* Sets not chown */
354 uint size_of_bits;
355 REP_SET *set,*set_buffer;
356 uint *bit_buffer;
357 } REP_SETS;
358
359 typedef struct st_found_set {
360 uint table_offset;
361 int found_offset;
362 } FOUND_SET;
363
364 typedef struct st_follow {
365 int chr;
366 uint table_offset;
367 uint len;
368 } FOLLOWS;
369
370
371 static int init_sets(REP_SETS *sets,uint states);
372 static REP_SET *make_new_set(REP_SETS *sets);
373 static void make_sets_invisible(REP_SETS *sets);
374 static void free_last_set(REP_SETS *sets);
375 static void free_sets(REP_SETS *sets);
376 static void internal_set_bit(REP_SET *set, uint bit);
377 static void internal_clear_bit(REP_SET *set, uint bit);
378 static void or_bits(REP_SET *to,REP_SET *from);
379 static void copy_bits(REP_SET *to,REP_SET *from);
380 static int cmp_bits(REP_SET *set1,REP_SET *set2);
381 static int get_next_bit(REP_SET *set,uint lastpos);
382 static short find_set(REP_SETS *sets,REP_SET *find);
383 static short find_found(FOUND_SET *found_set,uint table_offset,
384 int found_offset);
385 static uint start_at_word(char * pos);
386 static uint end_of_word(char * pos);
387 static uint replace_len(char * pos);
388
389 static uint found_sets=0;
390
391
392 /* Init a replace structure for further calls */
393
init_replace(char ** from,char ** to,uint count,char * word_end_chars)394 static REPLACE *init_replace(char * *from, char * *to,uint count,
395 char * word_end_chars)
396 {
397 uint i,j,states,set_nr,len,result_len,max_length,found_end,bits_set,bit_nr;
398 int used_sets,chr;
399 short default_state;
400 char used_chars[LAST_CHAR_CODE],is_word_end[256];
401 char * pos, *to_pos, **to_array;
402 REP_SETS sets;
403 REP_SET *set,*start_states,*word_states,*new_set;
404 FOLLOWS *follow,*follow_ptr;
405 REPLACE *replace;
406 FOUND_SET *found_set;
407 REPLACE_STRING *rep_str;
408 DBUG_ENTER("init_replace");
409
410 /* Count number of states */
411 for (i=result_len=max_length=0 , states=2 ; i < count ; i++)
412 {
413 len=replace_len(from[i]);
414 if (!len)
415 {
416 errno=EINVAL;
417 my_message(0,"No to-string for last from-string",MYF(ME_BELL));
418 DBUG_RETURN(0);
419 }
420 states+=len+1;
421 result_len+=(uint) strlen(to[i])+1;
422 if (len > max_length)
423 max_length=len;
424 }
425 bzero((char*) is_word_end,sizeof(is_word_end));
426 for (i=0 ; word_end_chars[i] ; i++)
427 is_word_end[(uchar) word_end_chars[i]]=1;
428
429 if (init_sets(&sets,states))
430 DBUG_RETURN(0);
431 found_sets=0;
432 if (!(found_set= (FOUND_SET*) my_malloc(sizeof(FOUND_SET)*max_length*count,
433 MYF(MY_WME))))
434 {
435 free_sets(&sets);
436 DBUG_RETURN(0);
437 }
438 (void) make_new_set(&sets); /* Set starting set */
439 make_sets_invisible(&sets); /* Hide previus sets */
440 used_sets=-1;
441 word_states=make_new_set(&sets); /* Start of new word */
442 start_states=make_new_set(&sets); /* This is first state */
443 if (!(follow=(FOLLOWS*) my_malloc((states+2)*sizeof(FOLLOWS),MYF(MY_WME))))
444 {
445 free_sets(&sets);
446 my_free(found_set);
447 DBUG_RETURN(0);
448 }
449
450 /* Init follow_ptr[] */
451 for (i=0, states=1, follow_ptr=follow+1 ; i < count ; i++)
452 {
453 if (from[i][0] == '\\' && from[i][1] == '^')
454 {
455 internal_set_bit(start_states,states+1);
456 if (!from[i][2])
457 {
458 start_states->table_offset=i;
459 start_states->found_offset=1;
460 }
461 }
462 else if (from[i][0] == '\\' && from[i][1] == '$')
463 {
464 internal_set_bit(start_states,states);
465 internal_set_bit(word_states,states);
466 if (!from[i][2] && start_states->table_offset == (uint) ~0)
467 {
468 start_states->table_offset=i;
469 start_states->found_offset=0;
470 }
471 }
472 else
473 {
474 internal_set_bit(word_states,states);
475 if (from[i][0] == '\\' && (from[i][1] == 'b' && from[i][2]))
476 internal_set_bit(start_states,states+1);
477 else
478 internal_set_bit(start_states,states);
479 }
480 for (pos=from[i], len=0; *pos ; pos++)
481 {
482 if (*pos == '\\' && *(pos+1))
483 {
484 pos++;
485 switch (*pos) {
486 case 'b':
487 follow_ptr->chr = SPACE_CHAR;
488 break;
489 case '^':
490 follow_ptr->chr = START_OF_LINE;
491 break;
492 case '$':
493 follow_ptr->chr = END_OF_LINE;
494 break;
495 case 'r':
496 follow_ptr->chr = '\r';
497 break;
498 case 't':
499 follow_ptr->chr = '\t';
500 break;
501 case 'v':
502 follow_ptr->chr = '\v';
503 break;
504 default:
505 follow_ptr->chr = (uchar) *pos;
506 break;
507 }
508 }
509 else
510 follow_ptr->chr= (uchar) *pos;
511 follow_ptr->table_offset=i;
512 follow_ptr->len= ++len;
513 follow_ptr++;
514 }
515 follow_ptr->chr=0;
516 follow_ptr->table_offset=i;
517 follow_ptr->len=len;
518 follow_ptr++;
519 states+=(uint) len+1;
520 }
521
522
523 for (set_nr=0,pos=0 ; set_nr < sets.count ; set_nr++)
524 {
525 set=sets.set+set_nr;
526 default_state= 0; /* Start from beginning */
527
528 /* If end of found-string not found or start-set with current set */
529
530 for (i= (uint) ~0; (i=get_next_bit(set,i)) ;)
531 {
532 if (!follow[i].chr)
533 {
534 if (! default_state)
535 default_state= find_found(found_set,set->table_offset,
536 set->found_offset+1);
537 }
538 }
539 copy_bits(sets.set+used_sets,set); /* Save set for changes */
540 if (!default_state)
541 or_bits(sets.set+used_sets,sets.set); /* Can restart from start */
542
543 /* Find all chars that follows current sets */
544 bzero((char*) used_chars,sizeof(used_chars));
545 for (i= (uint) ~0; (i=get_next_bit(sets.set+used_sets,i)) ;)
546 {
547 used_chars[follow[i].chr]=1;
548 if ((follow[i].chr == SPACE_CHAR && !follow[i+1].chr &&
549 follow[i].len > 1) || follow[i].chr == END_OF_LINE)
550 used_chars[0]=1;
551 }
552
553 /* Mark word_chars used if \b is in state */
554 if (used_chars[SPACE_CHAR])
555 for (pos= word_end_chars ; *pos ; pos++)
556 used_chars[(int) (uchar) *pos] = 1;
557
558 /* Handle other used characters */
559 for (chr= 0 ; chr < 256 ; chr++)
560 {
561 if (! used_chars[chr])
562 set->next[chr]= (short) (chr ? default_state : -1);
563 else
564 {
565 new_set=make_new_set(&sets);
566 set=sets.set+set_nr; /* if realloc */
567 new_set->table_offset=set->table_offset;
568 new_set->found_len=set->found_len;
569 new_set->found_offset=set->found_offset+1;
570 found_end=0;
571
572 for (i= (uint) ~0 ; (i=get_next_bit(sets.set+used_sets,i)) ; )
573 {
574 if (!follow[i].chr || follow[i].chr == chr ||
575 (follow[i].chr == SPACE_CHAR &&
576 (is_word_end[chr] ||
577 (!chr && follow[i].len > 1 && ! follow[i+1].chr))) ||
578 (follow[i].chr == END_OF_LINE && ! chr))
579 {
580 if ((! chr || (follow[i].chr && !follow[i+1].chr)) &&
581 follow[i].len > found_end)
582 found_end=follow[i].len;
583 if (chr && follow[i].chr)
584 internal_set_bit(new_set,i+1); /* To next set */
585 else
586 internal_set_bit(new_set,i);
587 }
588 }
589 if (found_end)
590 {
591 new_set->found_len=0; /* Set for testing if first */
592 bits_set=0;
593 for (i= (uint) ~0; (i=get_next_bit(new_set,i)) ;)
594 {
595 if ((follow[i].chr == SPACE_CHAR ||
596 follow[i].chr == END_OF_LINE) && ! chr)
597 bit_nr=i+1;
598 else
599 bit_nr=i;
600 if (follow[bit_nr-1].len < found_end ||
601 (new_set->found_len &&
602 (chr == 0 || !follow[bit_nr].chr)))
603 internal_clear_bit(new_set,i);
604 else
605 {
606 if (chr == 0 || !follow[bit_nr].chr)
607 { /* best match */
608 new_set->table_offset=follow[bit_nr].table_offset;
609 if (chr || (follow[i].chr == SPACE_CHAR ||
610 follow[i].chr == END_OF_LINE))
611 new_set->found_offset=found_end; /* New match */
612 new_set->found_len=found_end;
613 }
614 bits_set++;
615 }
616 }
617 if (bits_set == 1)
618 {
619 set->next[chr] = find_found(found_set,
620 new_set->table_offset,
621 new_set->found_offset);
622 free_last_set(&sets);
623 }
624 else
625 set->next[chr] = find_set(&sets,new_set);
626 }
627 else
628 set->next[chr] = find_set(&sets,new_set);
629 }
630 }
631 }
632
633 /* Alloc replace structure for the replace-state-machine */
634
635 if ((replace=(REPLACE*) my_malloc(sizeof(REPLACE)*(sets.count)+
636 sizeof(REPLACE_STRING)*(found_sets+1)+
637 sizeof(char *)*count+result_len,
638 MYF(MY_WME | MY_ZEROFILL))))
639 {
640 rep_str=(REPLACE_STRING*) (replace+sets.count);
641 to_array=(char **) (rep_str+found_sets+1);
642 to_pos=(char *) (to_array+count);
643 for (i=0 ; i < count ; i++)
644 {
645 to_array[i]=to_pos;
646 to_pos=strmov(to_pos,to[i])+1;
647 }
648 rep_str[0].found=1;
649 rep_str[0].replace_string=0;
650 for (i=1 ; i <= found_sets ; i++)
651 {
652 pos=from[found_set[i-1].table_offset];
653 rep_str[i].found= (my_bool) (!memcmp(pos,"\\^",3) ? 2 : 1);
654 rep_str[i].replace_string=to_array[found_set[i-1].table_offset];
655 rep_str[i].to_offset=found_set[i-1].found_offset-start_at_word(pos);
656 rep_str[i].from_offset=found_set[i-1].found_offset-replace_len(pos)+
657 end_of_word(pos);
658 }
659 for (i=0 ; i < sets.count ; i++)
660 {
661 for (j=0 ; j < 256 ; j++)
662 if (sets.set[i].next[j] >= 0)
663 replace[i].next[j]=replace+sets.set[i].next[j];
664 else
665 replace[i].next[j]=(REPLACE*) (rep_str+(-sets.set[i].next[j]-1));
666 }
667 }
668 my_free(follow);
669 free_sets(&sets);
670 my_free(found_set);
671 DBUG_PRINT("exit",("Replace table has %d states",sets.count));
672 DBUG_RETURN(replace);
673 }
674
675
init_sets(REP_SETS * sets,uint states)676 static int init_sets(REP_SETS *sets,uint states)
677 {
678 bzero((char*) sets,sizeof(*sets));
679 sets->size_of_bits=((states+7)/8);
680 if (!(sets->set_buffer=(REP_SET*) my_malloc(sizeof(REP_SET)*SET_MALLOC_HUNC,
681 MYF(MY_WME))))
682 return 1;
683 if (!(sets->bit_buffer=(uint*) my_malloc(sizeof(uint)*sets->size_of_bits*
684 SET_MALLOC_HUNC,MYF(MY_WME))))
685 {
686 my_free(sets->set);
687 return 1;
688 }
689 return 0;
690 }
691
692 /* Make help sets invisible for nicer codeing */
693
make_sets_invisible(REP_SETS * sets)694 static void make_sets_invisible(REP_SETS *sets)
695 {
696 sets->invisible=sets->count;
697 sets->set+=sets->count;
698 sets->count=0;
699 }
700
make_new_set(REP_SETS * sets)701 static REP_SET *make_new_set(REP_SETS *sets)
702 {
703 uint i,count,*bit_buffer;
704 REP_SET *set;
705 if (sets->extra)
706 {
707 sets->extra--;
708 set=sets->set+ sets->count++;
709 bzero((char*) set->bits,sizeof(uint)*sets->size_of_bits);
710 bzero((char*) &set->next[0],sizeof(set->next[0])*LAST_CHAR_CODE);
711 set->found_offset=0;
712 set->found_len=0;
713 set->table_offset= (uint) ~0;
714 set->size_of_bits=sets->size_of_bits;
715 return set;
716 }
717 count=sets->count+sets->invisible+SET_MALLOC_HUNC;
718 if (!(set=(REP_SET*) my_realloc((uchar*) sets->set_buffer,
719 sizeof(REP_SET)*count,
720 MYF(MY_WME))))
721 return 0;
722 sets->set_buffer=set;
723 sets->set=set+sets->invisible;
724 if (!(bit_buffer=(uint*) my_realloc((uchar*) sets->bit_buffer,
725 (sizeof(uint)*sets->size_of_bits)*count,
726 MYF(MY_WME))))
727 return 0;
728 sets->bit_buffer=bit_buffer;
729 for (i=0 ; i < count ; i++)
730 {
731 sets->set_buffer[i].bits=bit_buffer;
732 bit_buffer+=sets->size_of_bits;
733 }
734 sets->extra=SET_MALLOC_HUNC;
735 return make_new_set(sets);
736 }
737
free_last_set(REP_SETS * sets)738 static void free_last_set(REP_SETS *sets)
739 {
740 sets->count--;
741 sets->extra++;
742 return;
743 }
744
free_sets(REP_SETS * sets)745 static void free_sets(REP_SETS *sets)
746 {
747 my_free(sets->set_buffer);
748 my_free(sets->bit_buffer);
749 return;
750 }
751
internal_set_bit(REP_SET * set,uint bit)752 static void internal_set_bit(REP_SET *set, uint bit)
753 {
754 set->bits[bit / WORD_BIT] |= 1 << (bit % WORD_BIT);
755 return;
756 }
757
internal_clear_bit(REP_SET * set,uint bit)758 static void internal_clear_bit(REP_SET *set, uint bit)
759 {
760 set->bits[bit / WORD_BIT] &= ~ (1 << (bit % WORD_BIT));
761 return;
762 }
763
764
or_bits(REP_SET * to,REP_SET * from)765 static void or_bits(REP_SET *to,REP_SET *from)
766 {
767 reg1 uint i;
768 for (i=0 ; i < to->size_of_bits ; i++)
769 to->bits[i]|=from->bits[i];
770 return;
771 }
772
copy_bits(REP_SET * to,REP_SET * from)773 static void copy_bits(REP_SET *to,REP_SET *from)
774 {
775 memcpy((uchar*) to->bits,(uchar*) from->bits,
776 (size_t) (sizeof(uint) * to->size_of_bits));
777 }
778
cmp_bits(REP_SET * set1,REP_SET * set2)779 static int cmp_bits(REP_SET *set1,REP_SET *set2)
780 {
781 return memcmp(set1->bits, set2->bits,
782 sizeof(uint) * set1->size_of_bits);
783 }
784
785
786 /* Get next set bit from set. */
787
get_next_bit(REP_SET * set,uint lastpos)788 static int get_next_bit(REP_SET *set,uint lastpos)
789 {
790 uint pos,*start,*end,bits;
791
792 start=set->bits+ ((lastpos+1) / WORD_BIT);
793 end=set->bits + set->size_of_bits;
794 bits=start[0] & ~((1 << ((lastpos+1) % WORD_BIT)) -1);
795
796 while (! bits && ++start < end)
797 bits=start[0];
798 if (!bits)
799 return 0;
800 pos=(uint) (start-set->bits)*WORD_BIT;
801 while (! (bits & 1))
802 {
803 bits>>=1;
804 pos++;
805 }
806 return pos;
807 }
808
809 /* find if there is a same set in sets. If there is, use it and
810 free given set, else put in given set in sets and return it's
811 position */
812
find_set(REP_SETS * sets,REP_SET * find)813 static short find_set(REP_SETS *sets,REP_SET *find)
814 {
815 uint i;
816 for (i=0 ; i < sets->count-1 ; i++)
817 {
818 if (!cmp_bits(sets->set+i,find))
819 {
820 free_last_set(sets);
821 return (short) i;
822 }
823 }
824 return (short) i; /* return new position */
825 }
826
827
828 /*
829 find if there is a found_set with same table_offset & found_offset
830 If there is return offset to it, else add new offset and return pos.
831 Pos returned is -offset-2 in found_set_structure because it's is
832 saved in set->next and set->next[] >= 0 points to next set and
833 set->next[] == -1 is reserved for end without replaces.
834 */
835
find_found(FOUND_SET * found_set,uint table_offset,int found_offset)836 static short find_found(FOUND_SET *found_set,uint table_offset,
837 int found_offset)
838 {
839 int i;
840 for (i=0 ; (uint) i < found_sets ; i++)
841 if (found_set[i].table_offset == table_offset &&
842 found_set[i].found_offset == found_offset)
843 return (short) (-i-2);
844 found_set[i].table_offset=table_offset;
845 found_set[i].found_offset=found_offset;
846 found_sets++;
847 return (short) (-i-2); /* return new position */
848 }
849
850 /* Return 1 if regexp starts with \b or ends with \b*/
851
start_at_word(char * pos)852 static uint start_at_word(char * pos)
853 {
854 return (((!memcmp(pos,"\\b",2) && pos[2]) || !memcmp(pos,"\\^",2)) ? 1 : 0);
855 }
856
end_of_word(char * pos)857 static uint end_of_word(char * pos)
858 {
859 char * end=strend(pos);
860 return ((end > pos+2 && !memcmp(end-2,"\\b",2)) ||
861 (end >= pos+2 && !memcmp(end-2,"\\$",2))) ?
862 1 : 0;
863 }
864
865
replace_len(char * str)866 static uint replace_len(char * str)
867 {
868 uint len=0;
869 while (*str)
870 {
871 if (str[0] == '\\' && str[1])
872 str++;
873 str++;
874 len++;
875 }
876 return len;
877 }
878
879
880 /* The actual loop */
881
replace_strings(REPLACE * rep,char ** start,uint * max_length,char * from)882 static uint replace_strings(REPLACE *rep, char **start, uint *max_length,
883 char *from)
884 {
885 reg1 REPLACE *rep_pos;
886 reg2 REPLACE_STRING *rep_str;
887 char *to, *end, *pos, *new;
888
889 end=(to= *start) + *max_length-1;
890 rep_pos=rep+1;
891 for(;;)
892 {
893 while (!rep_pos->found)
894 {
895 rep_pos= rep_pos->next[(uchar) *from];
896 if (to == end)
897 {
898 (*max_length)+=8192;
899 if (!(new=my_realloc(*start,*max_length,MYF(MY_WME))))
900 return (uint) -1;
901 to=new+(to - *start);
902 end=(*start=new)+ *max_length-1;
903 }
904 *to++= *from++;
905 }
906 if (!(rep_str = ((REPLACE_STRING*) rep_pos))->replace_string)
907 return (uint) (to - *start)-1;
908 updated=1; /* Some char * is replaced */
909 to-=rep_str->to_offset;
910 for (pos=rep_str->replace_string; *pos ; pos++)
911 {
912 if (to == end)
913 {
914 (*max_length)*=2;
915 if (!(new=my_realloc(*start,*max_length,MYF(MY_WME))))
916 return (uint) -1;
917 to=new+(to - *start);
918 end=(*start=new)+ *max_length-1;
919 }
920 *to++= *pos;
921 }
922 if (!*(from-=rep_str->from_offset) && rep_pos->found != 2)
923 return (uint) (to - *start);
924 rep_pos=rep;
925 }
926 }
927
928 static char *buffer; /* The buffer itself, grown as needed. */
929 static int bufbytes; /* Number of bytes in the buffer. */
930 static int bufread,my_eof; /* Number of bytes to get with each read(). */
931 static uint bufalloc;
932 static char *out_buff;
933 static uint out_length;
934
initialize_buffer()935 static int initialize_buffer()
936 {
937 bufread = 8192;
938 bufalloc = bufread + bufread / 2;
939 if (!(buffer = my_malloc(bufalloc+1,MYF(MY_WME))))
940 return 1;
941 bufbytes=my_eof=0;
942 out_length=bufread;
943 if (!(out_buff=my_malloc(out_length,MYF(MY_WME))))
944 return(1);
945 return 0;
946 }
947
reset_buffer()948 static void reset_buffer()
949 {
950 bufbytes=my_eof=0;
951 }
952
free_buffer()953 static void free_buffer()
954 {
955 my_free(buffer);
956 my_free(out_buff);
957 }
958
959
960 /*
961 Fill the buffer retaining the last n bytes at the beginning of the
962 newly filled buffer (for backward context). Returns the number of new
963 bytes read from disk.
964 */
965
fill_buffer_retaining(fd,n)966 static int fill_buffer_retaining(fd,n)
967 File fd;
968 int n;
969 {
970 int i;
971
972 /* See if we need to grow the buffer. */
973 if ((int) bufalloc - n <= bufread)
974 {
975 while ((int) bufalloc - n <= bufread)
976 {
977 bufalloc *= 2;
978 bufread *= 2;
979 }
980 buffer = my_realloc(buffer, bufalloc+1, MYF(MY_WME));
981 if (! buffer)
982 return(-1);
983 }
984
985 /* Shift stuff down. */
986 bmove(buffer,buffer+bufbytes-n,(uint) n);
987 bufbytes = n;
988
989 if (my_eof)
990 return 0;
991
992 /* Read in new stuff. */
993 if ((i=(int) my_read(fd, (uchar*) buffer + bufbytes,
994 (size_t) bufread, MYF(MY_WME))) < 0)
995 return -1;
996
997 /* Kludge to pretend every nonempty file ends with a newline. */
998 if (i == 0 && bufbytes > 0 && buffer[bufbytes - 1] != '\n')
999 {
1000 my_eof = i = 1;
1001 buffer[bufbytes] = '\n';
1002 }
1003
1004 bufbytes += i;
1005 return i;
1006 }
1007
1008 /* Return 0 if convert is ok */
1009 /* Global variable update is set if something was changed */
1010
convert_pipe(rep,in,out)1011 static int convert_pipe(rep,in,out)
1012 REPLACE *rep;
1013 FILE *in,*out;
1014 {
1015 int retain,error;
1016 uint length;
1017 char save_char,*end_of_line,*start_of_line;
1018 DBUG_ENTER("convert_pipe");
1019
1020 updated=retain=0;
1021 reset_buffer();
1022
1023 while ((error=fill_buffer_retaining(my_fileno(in),retain)) > 0)
1024 {
1025 end_of_line=buffer ;
1026 buffer[bufbytes]=0; /* Sentinel */
1027 for (;;)
1028 {
1029 start_of_line=end_of_line;
1030 while (end_of_line[0] != '\n' && end_of_line[0])
1031 end_of_line++;
1032 if (end_of_line == buffer+bufbytes)
1033 {
1034 retain= (int) (end_of_line - start_of_line);
1035 break; /* No end of line, read more */
1036 }
1037 save_char=end_of_line[0];
1038 end_of_line[0]=0;
1039 end_of_line++;
1040 if ((length=replace_strings(rep,&out_buff,&out_length,start_of_line)) ==
1041 (uint) -1)
1042 return 1;
1043 if (!my_eof)
1044 out_buff[length++]=save_char; /* Don't write added newline */
1045 if (my_fwrite(out, (uchar*) out_buff, length, MYF(MY_WME | MY_NABP)))
1046 DBUG_RETURN(1);
1047 }
1048 }
1049 DBUG_RETURN(error);
1050 }
1051
1052
convert_file(REPLACE * rep,char * name)1053 static int convert_file(REPLACE *rep, char * name)
1054 {
1055 int error;
1056 FILE *in,*out;
1057 char dir_buff[FN_REFLEN], tempname[FN_REFLEN], *org_name = name;
1058 #ifdef HAVE_READLINK
1059 char link_name[FN_REFLEN];
1060 #endif
1061 File temp_file;
1062 size_t dir_buff_length;
1063 DBUG_ENTER("convert_file");
1064
1065 /* check if name is a symlink */
1066 #ifdef HAVE_READLINK
1067 org_name= (!my_disable_symlinks &&
1068 !my_readlink(link_name, name, MYF(0))) ? link_name : name;
1069 #endif
1070 if (!(in= my_fopen(org_name,O_RDONLY,MYF(MY_WME))))
1071 DBUG_RETURN(1);
1072 dirname_part(dir_buff, org_name, &dir_buff_length);
1073 if ((temp_file= create_temp_file(tempname, dir_buff, "PR", O_WRONLY,
1074 MYF(MY_WME))) < 0)
1075 {
1076 my_fclose(in,MYF(0));
1077 DBUG_RETURN(1);
1078 }
1079 if (!(out= my_fdopen(temp_file, tempname, O_WRONLY, MYF(MY_WME))))
1080 {
1081 my_fclose(in,MYF(0));
1082 DBUG_RETURN(1);
1083 }
1084
1085 error=convert_pipe(rep,in,out);
1086 my_fclose(in,MYF(0)); my_fclose(out,MYF(0));
1087
1088 if (updated && ! error)
1089 my_redel(org_name,tempname,MYF(MY_WME | MY_LINK_WARNING));
1090 else
1091 my_delete(tempname,MYF(MY_WME));
1092 if (!silent && ! error)
1093 {
1094 if (updated)
1095 printf("%s converted\n",name);
1096 else if (verbose)
1097 printf("%s left unchanged\n",name);
1098 }
1099 DBUG_RETURN(error);
1100 }
1101