1 /*
2 Copyright (c) 2000, 2014, Oracle and/or its affiliates
3
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License
6 as published by the Free Software Foundation; version 2 of
7 the License.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
17 02110-1335 USA */
18
19 /*
20 Replace strings in textfile
21
22 This program replaces strings in files or from stdin to stdout.
23 It accepts a list of from-string/to-string pairs and replaces
24 each occurrence of a from-string with the corresponding to-string.
25 The first occurrence of a found string is matched. If there is more
26 than one possibility for the string to replace, longer matches
27 are preferred before shorter matches.
28
29 Special characters in from string:
30 \^ Match start of line.
31 \$ Match end of line.
32 \b Match space-character, start of line or end of line.
33 For end \b the next replace starts locking at the end space-character.
34 An \b alone or in a string matches only a space-character.
35 \r, \t, \v as in C.
36 The programs make a DFA-state-machine of the strings and the speed isn't
37 dependent on the count of replace-strings (only of the number of replaces).
38 A line is assumed ending with \n or \0.
39 There are no limit except memory on length of strings.
40
41 Written by Monty.
42 fill_buffer_retaining() is taken from gnu-grep and modified.
43 */
44
45 #include <my_global.h>
46 #include <m_ctype.h>
47 #include <my_sys.h>
48 #include <m_string.h>
49 #include <errno.h>
50
51 #define PC_MALLOC 256 /* Bytes for pointers */
52 #define PS_MALLOC 512 /* Bytes for data */
53
54 typedef struct st_pointer_array { /* when using array-strings */
55 TYPELIB typelib; /* Pointer to strings */
56 uchar *str; /* Strings is here */
57 uint8 *flag; /* Flag about each var. */
58 uint array_allocs,max_count,length,max_length;
59 } POINTER_ARRAY;
60
61 #define SPACE_CHAR 256
62 #define START_OF_LINE 257
63 #define END_OF_LINE 258
64 #define LAST_CHAR_CODE 259
65
66 typedef struct st_replace {
67 my_bool found;
68 struct st_replace *next[256];
69 } REPLACE;
70
71 typedef struct st_replace_found {
72 my_bool found;
73 char *replace_string;
74 uint to_offset;
75 int from_offset;
76 } REPLACE_STRING;
77
78 #ifndef WORD_BIT
79 #define WORD_BIT (8*sizeof(uint))
80 #endif
81
82 /* functions defined in this file */
83
84 static int static_get_options(int *argc,char * * *argv);
85 static int get_replace_strings(int *argc,char * * *argv,
86 POINTER_ARRAY *from_array,
87 POINTER_ARRAY *to_array);
88 static int insert_pointer_name(POINTER_ARRAY *pa, char * name);
89 static void free_pointer_array(POINTER_ARRAY *pa);
90 static int convert_pipe(REPLACE *,FILE *,FILE *);
91 static int convert_file(REPLACE *, char *);
92 static REPLACE *init_replace(char * *from, char * *to,uint count,
93 char * word_end_chars);
94 static uint replace_strings(REPLACE *rep, char * *start,uint *max_length,
95 char * from);
96 static int initialize_buffer(void);
97 static void reset_buffer(void);
98 static void free_buffer(void);
99
100 static int silent=0,verbose=0,updated=0;
101
102 /* The main program */
103
main(int argc,char * argv[])104 int main(int argc, char *argv[])
105 {
106 int i,error;
107 char word_end_chars[256],*pos;
108 POINTER_ARRAY from,to;
109 REPLACE *replace;
110 MY_INIT(argv[0]);
111
112 if (static_get_options(&argc,&argv))
113 exit(1);
114 if (get_replace_strings(&argc,&argv,&from,&to))
115 exit(1);
116
117 for (i=1,pos=word_end_chars ; i < 256 ; i++)
118 if (my_isspace(&my_charset_latin1,i))
119 *pos++= (char) i;
120 *pos=0;
121 if (!(replace=init_replace((char**) from.typelib.type_names,
122 (char**) to.typelib.type_names,
123 (uint) from.typelib.count,word_end_chars)))
124 exit(1);
125 free_pointer_array(&from);
126 free_pointer_array(&to);
127 if (initialize_buffer())
128 return 1;
129
130 error=0;
131 if (argc == 0)
132 error=convert_pipe(replace,stdin,stdout);
133 else
134 {
135 while (argc--)
136 {
137 error=convert_file(replace,*(argv++));
138 }
139 }
140 free_buffer();
141 my_free(replace);
142 my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
143 exit(error ? 2 : 0);
144 return 0; /* No compiler warning */
145 } /* main */
146
147
148 /* reads options */
149 /* Initiates DEBUG - but no debugging here ! */
150
static_get_options(argc,argv)151 static int static_get_options(argc,argv)
152 register int *argc;
153 register char **argv[];
154 {
155 int help,version;
156 char *pos;
157
158 silent=verbose=help=0;
159
160 while (--*argc > 0 && *(pos = *(++*argv)) == '-' && pos[1] != '-') {
161 while (*++pos)
162 {
163 version=0;
164 switch((*pos)) {
165 case 's':
166 silent=1;
167 break;
168 case 'v':
169 verbose=1;
170 break;
171 case '#':
172 DBUG_PUSH (++pos);
173 pos= (char*) " "; /* Skip rest of arguments */
174 break;
175 case 'V':
176 version=1;
177 /* fall through */
178 case 'I':
179 case '?':
180 help=1; /* Help text written */
181 printf("%s Ver 1.4 for %s at %s\n",my_progname,SYSTEM_TYPE,
182 MACHINE_TYPE);
183 if (version)
184 break;
185 puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n");
186 puts("This program replaces strings in files or from stdin to stdout.\n"
187 "It accepts a list of from-string/to-string pairs and replaces\n"
188 "each occurrence of a from-string with the corresponding to-string.\n"
189 "The first occurrence of a found string is matched. If there is\n"
190 "more than one possibility for the string to replace, longer\n"
191 "matches are preferred before shorter matches.\n\n"
192 "A from-string can contain these special characters:\n"
193 " \\^ Match start of line.\n"
194 " \\$ Match end of line.\n"
195 " \\b Match space-character, start of line or end of line.\n"
196 " For a end \\b the next replace starts locking at the end\n"
197 " space-character. A \\b alone in a string matches only a\n"
198 " space-character.\n");
199 printf("Usage: %s [-?svIV] from to from to ... -- [files]\n", my_progname);
200 puts("or");
201 printf("Usage: %s [-?svIV] from to from to ... < fromfile > tofile\n", my_progname);
202 puts("");
203 puts("Options: -? or -I \"Info\" -s \"silent\" -v \"verbose\"");
204 break;
205 default:
206 fprintf(stderr,"illegal option: -%c\n",*pos);
207 break;
208 }
209 }
210 }
211 if (*argc == 0)
212 {
213 if (!help)
214 my_message(0,"No replace options given",MYF(ME_BELL));
215 exit(0); /* Don't use as pipe */
216 }
217 return(0);
218 } /* static_get_options */
219
220
get_replace_strings(argc,argv,from_array,to_array)221 static int get_replace_strings(argc,argv,from_array,to_array)
222 register int *argc;
223 register char **argv[];
224 POINTER_ARRAY *from_array,*to_array;
225 {
226 char *pos;
227
228 bzero((char*) from_array,sizeof(from_array[0]));
229 bzero((char*) to_array,sizeof(to_array[0]));
230 while (*argc > 0 && (*(pos = *(*argv)) != '-' || pos[1] != '-' || pos[2]))
231 {
232 insert_pointer_name(from_array,pos);
233 (*argc)--;
234 (*argv)++;
235 if (!*argc || !strcmp(**argv,"--"))
236 {
237 my_message(0,"No to-string for last from-string",MYF(ME_BELL));
238 return 1;
239 }
240 insert_pointer_name(to_array,**argv);
241 (*argc)--;
242 (*argv)++;
243 }
244 if (*argc)
245 { /* Skip "--" argument */
246 (*argc)--;
247 (*argv)++;
248 }
249 return 0;
250 }
251
insert_pointer_name(reg1 POINTER_ARRAY * pa,char * name)252 static int insert_pointer_name(reg1 POINTER_ARRAY *pa,char * name)
253 {
254 uint i,length,old_count;
255 uchar *new_pos;
256 const char **new_array;
257 DBUG_ENTER("insert_pointer_name");
258
259 if (! pa->typelib.count)
260 {
261 if (!(pa->typelib.type_names=(const char **)
262 my_malloc(((PC_MALLOC-MALLOC_OVERHEAD)/
263 (sizeof(char *)+sizeof(*pa->flag))*
264 (sizeof(char *)+sizeof(*pa->flag))),MYF(MY_WME))))
265 DBUG_RETURN(-1);
266 if (!(pa->str= (uchar*) my_malloc((uint) (PS_MALLOC-MALLOC_OVERHEAD),
267 MYF(MY_WME))))
268 {
269 my_free((void*) pa->typelib.type_names);
270 DBUG_RETURN (-1);
271 }
272 pa->max_count=(PC_MALLOC-MALLOC_OVERHEAD)/(sizeof(uchar*)+
273 sizeof(*pa->flag));
274 pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
275 pa->length=0;
276 pa->max_length=PS_MALLOC-MALLOC_OVERHEAD;
277 pa->array_allocs=1;
278 }
279 length=(uint) strlen(name)+1;
280 if (pa->length+length >= pa->max_length)
281 {
282 pa->max_length=(pa->length+length+MALLOC_OVERHEAD+PS_MALLOC-1)/PS_MALLOC;
283 pa->max_length=pa->max_length*PS_MALLOC-MALLOC_OVERHEAD;
284 if (!(new_pos= (uchar*) my_realloc((uchar*) pa->str,
285 (uint) pa->max_length,
286 MYF(MY_WME))))
287 DBUG_RETURN(1);
288 if (new_pos != pa->str)
289 {
290 my_ptrdiff_t diff=PTR_BYTE_DIFF(new_pos,pa->str);
291 for (i=0 ; i < pa->typelib.count ; i++)
292 pa->typelib.type_names[i]= ADD_TO_PTR(pa->typelib.type_names[i],diff,
293 char*);
294 pa->str=new_pos;
295 }
296 }
297 if (pa->typelib.count >= pa->max_count-1)
298 {
299 int len;
300 pa->array_allocs++;
301 len=(PC_MALLOC*pa->array_allocs - MALLOC_OVERHEAD);
302 if (!(new_array=(const char **) my_realloc((uchar*) pa->typelib.type_names,
303 (uint) len/
304 (sizeof(uchar*)+sizeof(*pa->flag))*
305 (sizeof(uchar*)+sizeof(*pa->flag)),
306 MYF(MY_WME))))
307 DBUG_RETURN(1);
308 pa->typelib.type_names=new_array;
309 old_count=pa->max_count;
310 pa->max_count=len/(sizeof(uchar*) + sizeof(*pa->flag));
311 pa->flag= (uint8*) (pa->typelib.type_names+pa->max_count);
312 memcpy((uchar*) pa->flag,(char *) (pa->typelib.type_names+old_count),
313 old_count*sizeof(*pa->flag));
314 }
315 pa->flag[pa->typelib.count]=0; /* Reset flag */
316 pa->typelib.type_names[pa->typelib.count++]= (char*) (pa->str+pa->length);
317 pa->typelib.type_names[pa->typelib.count]= NullS; /* Put end-mark */
318 (void) strmov((char*) pa->str + pa->length, name);
319 pa->length+=length;
320 DBUG_RETURN(0);
321 } /* insert_pointer_name */
322
323
324 /* free pointer array */
325
free_pointer_array(reg1 POINTER_ARRAY * pa)326 static void free_pointer_array(reg1 POINTER_ARRAY *pa)
327 {
328 if (pa->typelib.count)
329 {
330 pa->typelib.count=0;
331 my_free((void*) pa->typelib.type_names);
332 pa->typelib.type_names=0;
333 my_free(pa->str);
334 }
335 return;
336 } /* free_pointer_array */
337
338
339 /* Code for replace rutines */
340
341 #define SET_MALLOC_HUNC 64
342
343 typedef struct st_rep_set {
344 uint *bits; /* Pointer to used sets */
345 short next[LAST_CHAR_CODE]; /* Pointer to next sets */
346 uint found_len; /* Best match to date */
347 int found_offset;
348 uint table_offset;
349 uint size_of_bits; /* For convinience */
350 } REP_SET;
351
352 typedef struct st_rep_sets {
353 uint count; /* Number of sets */
354 uint extra; /* Extra sets in buffer */
355 uint invisible; /* Sets not chown */
356 uint size_of_bits;
357 REP_SET *set,*set_buffer;
358 uint *bit_buffer;
359 } REP_SETS;
360
361 typedef struct st_found_set {
362 uint table_offset;
363 int found_offset;
364 } FOUND_SET;
365
366 typedef struct st_follow {
367 int chr;
368 uint table_offset;
369 uint len;
370 } FOLLOWS;
371
372
373 static int init_sets(REP_SETS *sets,uint states);
374 static REP_SET *make_new_set(REP_SETS *sets);
375 static void make_sets_invisible(REP_SETS *sets);
376 static void free_last_set(REP_SETS *sets);
377 static void free_sets(REP_SETS *sets);
378 static void internal_set_bit(REP_SET *set, uint bit);
379 static void internal_clear_bit(REP_SET *set, uint bit);
380 static void or_bits(REP_SET *to,REP_SET *from);
381 static void copy_bits(REP_SET *to,REP_SET *from);
382 static int cmp_bits(REP_SET *set1,REP_SET *set2);
383 static int get_next_bit(REP_SET *set,uint lastpos);
384 static short find_set(REP_SETS *sets,REP_SET *find);
385 static short find_found(FOUND_SET *found_set,uint table_offset,
386 int found_offset);
387 static uint start_at_word(char * pos);
388 static uint end_of_word(char * pos);
389 static uint replace_len(char * pos);
390
391 static uint found_sets=0;
392
393
394 /* Init a replace structure for further calls */
395
init_replace(char ** from,char ** to,uint count,char * word_end_chars)396 static REPLACE *init_replace(char * *from, char * *to,uint count,
397 char * word_end_chars)
398 {
399 uint i,j,states,set_nr,len,result_len,max_length,found_end,bits_set,bit_nr;
400 int used_sets,chr;
401 short default_state;
402 char used_chars[LAST_CHAR_CODE],is_word_end[256];
403 char * pos, *to_pos, **to_array;
404 REP_SETS sets;
405 REP_SET *set,*start_states,*word_states,*new_set;
406 FOLLOWS *follow,*follow_ptr;
407 REPLACE *replace;
408 FOUND_SET *found_set;
409 REPLACE_STRING *rep_str;
410 DBUG_ENTER("init_replace");
411
412 /* Count number of states */
413 for (i=result_len=max_length=0 , states=2 ; i < count ; i++)
414 {
415 len=replace_len(from[i]);
416 if (!len)
417 {
418 errno=EINVAL;
419 my_message(0,"No to-string for last from-string",MYF(ME_BELL));
420 DBUG_RETURN(0);
421 }
422 states+=len+1;
423 result_len+=(uint) strlen(to[i])+1;
424 if (len > max_length)
425 max_length=len;
426 }
427 bzero((char*) is_word_end,sizeof(is_word_end));
428 for (i=0 ; word_end_chars[i] ; i++)
429 is_word_end[(uchar) word_end_chars[i]]=1;
430
431 if (init_sets(&sets,states))
432 DBUG_RETURN(0);
433 found_sets=0;
434 if (!(found_set= (FOUND_SET*) my_malloc(sizeof(FOUND_SET)*max_length*count,
435 MYF(MY_WME))))
436 {
437 free_sets(&sets);
438 DBUG_RETURN(0);
439 }
440 (void) make_new_set(&sets); /* Set starting set */
441 make_sets_invisible(&sets); /* Hide previus sets */
442 used_sets=-1;
443 word_states=make_new_set(&sets); /* Start of new word */
444 start_states=make_new_set(&sets); /* This is first state */
445 if (!(follow=(FOLLOWS*) my_malloc((states+2)*sizeof(FOLLOWS),MYF(MY_WME))))
446 {
447 free_sets(&sets);
448 my_free(found_set);
449 DBUG_RETURN(0);
450 }
451
452 /* Init follow_ptr[] */
453 for (i=0, states=1, follow_ptr=follow+1 ; i < count ; i++)
454 {
455 if (from[i][0] == '\\' && from[i][1] == '^')
456 {
457 internal_set_bit(start_states,states+1);
458 if (!from[i][2])
459 {
460 start_states->table_offset=i;
461 start_states->found_offset=1;
462 }
463 }
464 else if (from[i][0] == '\\' && from[i][1] == '$')
465 {
466 internal_set_bit(start_states,states);
467 internal_set_bit(word_states,states);
468 if (!from[i][2] && start_states->table_offset == (uint) ~0)
469 {
470 start_states->table_offset=i;
471 start_states->found_offset=0;
472 }
473 }
474 else
475 {
476 internal_set_bit(word_states,states);
477 if (from[i][0] == '\\' && (from[i][1] == 'b' && from[i][2]))
478 internal_set_bit(start_states,states+1);
479 else
480 internal_set_bit(start_states,states);
481 }
482 for (pos=from[i], len=0; *pos ; pos++)
483 {
484 if (*pos == '\\' && *(pos+1))
485 {
486 pos++;
487 switch (*pos) {
488 case 'b':
489 follow_ptr->chr = SPACE_CHAR;
490 break;
491 case '^':
492 follow_ptr->chr = START_OF_LINE;
493 break;
494 case '$':
495 follow_ptr->chr = END_OF_LINE;
496 break;
497 case 'r':
498 follow_ptr->chr = '\r';
499 break;
500 case 't':
501 follow_ptr->chr = '\t';
502 break;
503 case 'v':
504 follow_ptr->chr = '\v';
505 break;
506 default:
507 follow_ptr->chr = (uchar) *pos;
508 break;
509 }
510 }
511 else
512 follow_ptr->chr= (uchar) *pos;
513 follow_ptr->table_offset=i;
514 follow_ptr->len= ++len;
515 follow_ptr++;
516 }
517 follow_ptr->chr=0;
518 follow_ptr->table_offset=i;
519 follow_ptr->len=len;
520 follow_ptr++;
521 states+=(uint) len+1;
522 }
523
524
525 for (set_nr=0,pos=0 ; set_nr < sets.count ; set_nr++)
526 {
527 set=sets.set+set_nr;
528 default_state= 0; /* Start from beginning */
529
530 /* If end of found-string not found or start-set with current set */
531
532 for (i= (uint) ~0; (i=get_next_bit(set,i)) ;)
533 {
534 if (!follow[i].chr)
535 {
536 if (! default_state)
537 default_state= find_found(found_set,set->table_offset,
538 set->found_offset+1);
539 }
540 }
541 copy_bits(sets.set+used_sets,set); /* Save set for changes */
542 if (!default_state)
543 or_bits(sets.set+used_sets,sets.set); /* Can restart from start */
544
545 /* Find all chars that follows current sets */
546 bzero((char*) used_chars,sizeof(used_chars));
547 for (i= (uint) ~0; (i=get_next_bit(sets.set+used_sets,i)) ;)
548 {
549 used_chars[follow[i].chr]=1;
550 if ((follow[i].chr == SPACE_CHAR && !follow[i+1].chr &&
551 follow[i].len > 1) || follow[i].chr == END_OF_LINE)
552 used_chars[0]=1;
553 }
554
555 /* Mark word_chars used if \b is in state */
556 if (used_chars[SPACE_CHAR])
557 for (pos= word_end_chars ; *pos ; pos++)
558 used_chars[(int) (uchar) *pos] = 1;
559
560 /* Handle other used characters */
561 for (chr= 0 ; chr < 256 ; chr++)
562 {
563 if (! used_chars[chr])
564 set->next[chr]= (short) (chr ? default_state : -1);
565 else
566 {
567 new_set=make_new_set(&sets);
568 set=sets.set+set_nr; /* if realloc */
569 new_set->table_offset=set->table_offset;
570 new_set->found_len=set->found_len;
571 new_set->found_offset=set->found_offset+1;
572 found_end=0;
573
574 for (i= (uint) ~0 ; (i=get_next_bit(sets.set+used_sets,i)) ; )
575 {
576 if (!follow[i].chr || follow[i].chr == chr ||
577 (follow[i].chr == SPACE_CHAR &&
578 (is_word_end[chr] ||
579 (!chr && follow[i].len > 1 && ! follow[i+1].chr))) ||
580 (follow[i].chr == END_OF_LINE && ! chr))
581 {
582 if ((! chr || (follow[i].chr && !follow[i+1].chr)) &&
583 follow[i].len > found_end)
584 found_end=follow[i].len;
585 if (chr && follow[i].chr)
586 internal_set_bit(new_set,i+1); /* To next set */
587 else
588 internal_set_bit(new_set,i);
589 }
590 }
591 if (found_end)
592 {
593 new_set->found_len=0; /* Set for testing if first */
594 bits_set=0;
595 for (i= (uint) ~0; (i=get_next_bit(new_set,i)) ;)
596 {
597 if ((follow[i].chr == SPACE_CHAR ||
598 follow[i].chr == END_OF_LINE) && ! chr)
599 bit_nr=i+1;
600 else
601 bit_nr=i;
602 if (follow[bit_nr-1].len < found_end ||
603 (new_set->found_len &&
604 (chr == 0 || !follow[bit_nr].chr)))
605 internal_clear_bit(new_set,i);
606 else
607 {
608 if (chr == 0 || !follow[bit_nr].chr)
609 { /* best match */
610 new_set->table_offset=follow[bit_nr].table_offset;
611 if (chr || (follow[i].chr == SPACE_CHAR ||
612 follow[i].chr == END_OF_LINE))
613 new_set->found_offset=found_end; /* New match */
614 new_set->found_len=found_end;
615 }
616 bits_set++;
617 }
618 }
619 if (bits_set == 1)
620 {
621 set->next[chr] = find_found(found_set,
622 new_set->table_offset,
623 new_set->found_offset);
624 free_last_set(&sets);
625 }
626 else
627 set->next[chr] = find_set(&sets,new_set);
628 }
629 else
630 set->next[chr] = find_set(&sets,new_set);
631 }
632 }
633 }
634
635 /* Alloc replace structure for the replace-state-machine */
636
637 if ((replace=(REPLACE*) my_malloc(sizeof(REPLACE)*(sets.count)+
638 sizeof(REPLACE_STRING)*(found_sets+1)+
639 sizeof(char *)*count+result_len,
640 MYF(MY_WME | MY_ZEROFILL))))
641 {
642 rep_str=(REPLACE_STRING*) (replace+sets.count);
643 to_array=(char **) (rep_str+found_sets+1);
644 to_pos=(char *) (to_array+count);
645 for (i=0 ; i < count ; i++)
646 {
647 to_array[i]=to_pos;
648 to_pos=strmov(to_pos,to[i])+1;
649 }
650 rep_str[0].found=1;
651 rep_str[0].replace_string=0;
652 for (i=1 ; i <= found_sets ; i++)
653 {
654 pos=from[found_set[i-1].table_offset];
655 rep_str[i].found= (my_bool) (!memcmp(pos,"\\^",3) ? 2 : 1);
656 rep_str[i].replace_string=to_array[found_set[i-1].table_offset];
657 rep_str[i].to_offset=found_set[i-1].found_offset-start_at_word(pos);
658 rep_str[i].from_offset=found_set[i-1].found_offset-replace_len(pos)+
659 end_of_word(pos);
660 }
661 for (i=0 ; i < sets.count ; i++)
662 {
663 for (j=0 ; j < 256 ; j++)
664 if (sets.set[i].next[j] >= 0)
665 replace[i].next[j]=replace+sets.set[i].next[j];
666 else
667 replace[i].next[j]=(REPLACE*) (rep_str+(-sets.set[i].next[j]-1));
668 }
669 }
670 my_free(follow);
671 free_sets(&sets);
672 my_free(found_set);
673 DBUG_PRINT("exit",("Replace table has %d states",sets.count));
674 DBUG_RETURN(replace);
675 }
676
677
init_sets(REP_SETS * sets,uint states)678 static int init_sets(REP_SETS *sets,uint states)
679 {
680 bzero((char*) sets,sizeof(*sets));
681 sets->size_of_bits=((states+7)/8);
682 if (!(sets->set_buffer=(REP_SET*) my_malloc(sizeof(REP_SET)*SET_MALLOC_HUNC,
683 MYF(MY_WME))))
684 return 1;
685 if (!(sets->bit_buffer=(uint*) my_malloc(sizeof(uint)*sets->size_of_bits*
686 SET_MALLOC_HUNC,MYF(MY_WME))))
687 {
688 my_free(sets->set);
689 return 1;
690 }
691 return 0;
692 }
693
694 /* Make help sets invisible for nicer codeing */
695
make_sets_invisible(REP_SETS * sets)696 static void make_sets_invisible(REP_SETS *sets)
697 {
698 sets->invisible=sets->count;
699 sets->set+=sets->count;
700 sets->count=0;
701 }
702
make_new_set(REP_SETS * sets)703 static REP_SET *make_new_set(REP_SETS *sets)
704 {
705 uint i,count,*bit_buffer;
706 REP_SET *set;
707 if (sets->extra)
708 {
709 sets->extra--;
710 set=sets->set+ sets->count++;
711 bzero((char*) set->bits,sizeof(uint)*sets->size_of_bits);
712 bzero((char*) &set->next[0],sizeof(set->next[0])*LAST_CHAR_CODE);
713 set->found_offset=0;
714 set->found_len=0;
715 set->table_offset= (uint) ~0;
716 set->size_of_bits=sets->size_of_bits;
717 return set;
718 }
719 count=sets->count+sets->invisible+SET_MALLOC_HUNC;
720 if (!(set=(REP_SET*) my_realloc((uchar*) sets->set_buffer,
721 sizeof(REP_SET)*count,
722 MYF(MY_WME))))
723 return 0;
724 sets->set_buffer=set;
725 sets->set=set+sets->invisible;
726 if (!(bit_buffer=(uint*) my_realloc((uchar*) sets->bit_buffer,
727 (sizeof(uint)*sets->size_of_bits)*count,
728 MYF(MY_WME))))
729 return 0;
730 sets->bit_buffer=bit_buffer;
731 for (i=0 ; i < count ; i++)
732 {
733 sets->set_buffer[i].bits=bit_buffer;
734 bit_buffer+=sets->size_of_bits;
735 }
736 sets->extra=SET_MALLOC_HUNC;
737 return make_new_set(sets);
738 }
739
free_last_set(REP_SETS * sets)740 static void free_last_set(REP_SETS *sets)
741 {
742 sets->count--;
743 sets->extra++;
744 return;
745 }
746
free_sets(REP_SETS * sets)747 static void free_sets(REP_SETS *sets)
748 {
749 my_free(sets->set_buffer);
750 my_free(sets->bit_buffer);
751 return;
752 }
753
internal_set_bit(REP_SET * set,uint bit)754 static void internal_set_bit(REP_SET *set, uint bit)
755 {
756 set->bits[bit / WORD_BIT] |= 1 << (bit % WORD_BIT);
757 return;
758 }
759
internal_clear_bit(REP_SET * set,uint bit)760 static void internal_clear_bit(REP_SET *set, uint bit)
761 {
762 set->bits[bit / WORD_BIT] &= ~ (1 << (bit % WORD_BIT));
763 return;
764 }
765
766
or_bits(REP_SET * to,REP_SET * from)767 static void or_bits(REP_SET *to,REP_SET *from)
768 {
769 reg1 uint i;
770 for (i=0 ; i < to->size_of_bits ; i++)
771 to->bits[i]|=from->bits[i];
772 return;
773 }
774
copy_bits(REP_SET * to,REP_SET * from)775 static void copy_bits(REP_SET *to,REP_SET *from)
776 {
777 memcpy((uchar*) to->bits,(uchar*) from->bits,
778 (size_t) (sizeof(uint) * to->size_of_bits));
779 }
780
cmp_bits(REP_SET * set1,REP_SET * set2)781 static int cmp_bits(REP_SET *set1,REP_SET *set2)
782 {
783 return memcmp(set1->bits, set2->bits,
784 sizeof(uint) * set1->size_of_bits);
785 }
786
787
788 /* Get next set bit from set. */
789
get_next_bit(REP_SET * set,uint lastpos)790 static int get_next_bit(REP_SET *set,uint lastpos)
791 {
792 uint pos,*start,*end,bits;
793
794 start=set->bits+ ((lastpos+1) / WORD_BIT);
795 end=set->bits + set->size_of_bits;
796 bits=start[0] & ~((1 << ((lastpos+1) % WORD_BIT)) -1);
797
798 while (! bits && ++start < end)
799 bits=start[0];
800 if (!bits)
801 return 0;
802 pos=(uint) (start-set->bits)*WORD_BIT;
803 while (! (bits & 1))
804 {
805 bits>>=1;
806 pos++;
807 }
808 return pos;
809 }
810
811 /* find if there is a same set in sets. If there is, use it and
812 free given set, else put in given set in sets and return it's
813 position */
814
find_set(REP_SETS * sets,REP_SET * find)815 static short find_set(REP_SETS *sets,REP_SET *find)
816 {
817 uint i;
818 for (i=0 ; i < sets->count-1 ; i++)
819 {
820 if (!cmp_bits(sets->set+i,find))
821 {
822 free_last_set(sets);
823 return (short) i;
824 }
825 }
826 return (short) i; /* return new position */
827 }
828
829
830 /*
831 find if there is a found_set with same table_offset & found_offset
832 If there is return offset to it, else add new offset and return pos.
833 Pos returned is -offset-2 in found_set_structure because it's is
834 saved in set->next and set->next[] >= 0 points to next set and
835 set->next[] == -1 is reserved for end without replaces.
836 */
837
find_found(FOUND_SET * found_set,uint table_offset,int found_offset)838 static short find_found(FOUND_SET *found_set,uint table_offset,
839 int found_offset)
840 {
841 int i;
842 for (i=0 ; (uint) i < found_sets ; i++)
843 if (found_set[i].table_offset == table_offset &&
844 found_set[i].found_offset == found_offset)
845 return (short) (-i-2);
846 found_set[i].table_offset=table_offset;
847 found_set[i].found_offset=found_offset;
848 found_sets++;
849 return (short) (-i-2); /* return new position */
850 }
851
852 /* Return 1 if regexp starts with \b or ends with \b*/
853
start_at_word(char * pos)854 static uint start_at_word(char * pos)
855 {
856 return (((!memcmp(pos,"\\b",2) && pos[2]) || !memcmp(pos,"\\^",2)) ? 1 : 0);
857 }
858
end_of_word(char * pos)859 static uint end_of_word(char * pos)
860 {
861 char * end=strend(pos);
862 return ((end > pos+2 && !memcmp(end-2,"\\b",2)) ||
863 (end >= pos+2 && !memcmp(end-2,"\\$",2))) ?
864 1 : 0;
865 }
866
867
replace_len(char * str)868 static uint replace_len(char * str)
869 {
870 uint len=0;
871 while (*str)
872 {
873 if (str[0] == '\\' && str[1])
874 str++;
875 str++;
876 len++;
877 }
878 return len;
879 }
880
881
882 /* The actual loop */
883
replace_strings(REPLACE * rep,char ** start,uint * max_length,char * from)884 static uint replace_strings(REPLACE *rep, char **start, uint *max_length,
885 char *from)
886 {
887 reg1 REPLACE *rep_pos;
888 reg2 REPLACE_STRING *rep_str;
889 char *to, *end, *pos, *new;
890
891 end=(to= *start) + *max_length-1;
892 rep_pos=rep+1;
893 for(;;)
894 {
895 while (!rep_pos->found)
896 {
897 rep_pos= rep_pos->next[(uchar) *from];
898 if (to == end)
899 {
900 (*max_length)+=8192;
901 if (!(new=my_realloc(*start,*max_length,MYF(MY_WME))))
902 return (uint) -1;
903 to=new+(to - *start);
904 end=(*start=new)+ *max_length-1;
905 }
906 *to++= *from++;
907 }
908 if (!(rep_str = ((REPLACE_STRING*) rep_pos))->replace_string)
909 return (uint) (to - *start)-1;
910 updated=1; /* Some char * is replaced */
911 to-=rep_str->to_offset;
912 for (pos=rep_str->replace_string; *pos ; pos++)
913 {
914 if (to == end)
915 {
916 (*max_length)*=2;
917 if (!(new=my_realloc(*start,*max_length,MYF(MY_WME))))
918 return (uint) -1;
919 to=new+(to - *start);
920 end=(*start=new)+ *max_length-1;
921 }
922 *to++= *pos;
923 }
924 if (!*(from-=rep_str->from_offset) && rep_pos->found != 2)
925 return (uint) (to - *start);
926 rep_pos=rep;
927 }
928 }
929
930 static char *buffer; /* The buffer itself, grown as needed. */
931 static int bufbytes; /* Number of bytes in the buffer. */
932 static int bufread,my_eof; /* Number of bytes to get with each read(). */
933 static uint bufalloc;
934 static char *out_buff;
935 static uint out_length;
936
initialize_buffer()937 static int initialize_buffer()
938 {
939 bufread = 8192;
940 bufalloc = bufread + bufread / 2;
941 if (!(buffer = my_malloc(bufalloc+1,MYF(MY_WME))))
942 return 1;
943 bufbytes=my_eof=0;
944 out_length=bufread;
945 if (!(out_buff=my_malloc(out_length,MYF(MY_WME))))
946 return(1);
947 return 0;
948 }
949
reset_buffer()950 static void reset_buffer()
951 {
952 bufbytes=my_eof=0;
953 }
954
free_buffer()955 static void free_buffer()
956 {
957 my_free(buffer);
958 my_free(out_buff);
959 }
960
961
962 /*
963 Fill the buffer retaining the last n bytes at the beginning of the
964 newly filled buffer (for backward context). Returns the number of new
965 bytes read from disk.
966 */
967
fill_buffer_retaining(fd,n)968 static int fill_buffer_retaining(fd,n)
969 File fd;
970 int n;
971 {
972 int i;
973
974 /* See if we need to grow the buffer. */
975 if ((int) bufalloc - n <= bufread)
976 {
977 while ((int) bufalloc - n <= bufread)
978 {
979 bufalloc *= 2;
980 bufread *= 2;
981 }
982 buffer = my_realloc(buffer, bufalloc+1, MYF(MY_WME));
983 if (! buffer)
984 return(-1);
985 }
986
987 /* Shift stuff down. */
988 bmove(buffer,buffer+bufbytes-n,(uint) n);
989 bufbytes = n;
990
991 if (my_eof)
992 return 0;
993
994 /* Read in new stuff. */
995 if ((i=(int) my_read(fd, (uchar*) buffer + bufbytes,
996 (size_t) bufread, MYF(MY_WME))) < 0)
997 return -1;
998
999 /* Kludge to pretend every nonempty file ends with a newline. */
1000 if (i == 0 && bufbytes > 0 && buffer[bufbytes - 1] != '\n')
1001 {
1002 my_eof = i = 1;
1003 buffer[bufbytes] = '\n';
1004 }
1005
1006 bufbytes += i;
1007 return i;
1008 }
1009
1010 /* Return 0 if convert is ok */
1011 /* Global variable update is set if something was changed */
1012
convert_pipe(rep,in,out)1013 static int convert_pipe(rep,in,out)
1014 REPLACE *rep;
1015 FILE *in,*out;
1016 {
1017 int retain,error;
1018 uint length;
1019 char save_char,*end_of_line,*start_of_line;
1020 DBUG_ENTER("convert_pipe");
1021
1022 updated=retain=0;
1023 reset_buffer();
1024
1025 while ((error=fill_buffer_retaining(my_fileno(in),retain)) > 0)
1026 {
1027 end_of_line=buffer ;
1028 buffer[bufbytes]=0; /* Sentinel */
1029 for (;;)
1030 {
1031 start_of_line=end_of_line;
1032 while (end_of_line[0] != '\n' && end_of_line[0])
1033 end_of_line++;
1034 if (end_of_line == buffer+bufbytes)
1035 {
1036 retain= (int) (end_of_line - start_of_line);
1037 break; /* No end of line, read more */
1038 }
1039 save_char=end_of_line[0];
1040 end_of_line[0]=0;
1041 end_of_line++;
1042 if ((length=replace_strings(rep,&out_buff,&out_length,start_of_line)) ==
1043 (uint) -1)
1044 return 1;
1045 if (!my_eof)
1046 out_buff[length++]=save_char; /* Don't write added newline */
1047 if (my_fwrite(out, (uchar*) out_buff, length, MYF(MY_WME | MY_NABP)))
1048 DBUG_RETURN(1);
1049 }
1050 }
1051 DBUG_RETURN(error);
1052 }
1053
1054
convert_file(REPLACE * rep,char * name)1055 static int convert_file(REPLACE *rep, char * name)
1056 {
1057 int error;
1058 FILE *in,*out;
1059 char dir_buff[FN_REFLEN], tempname[FN_REFLEN], *org_name = name;
1060 #ifdef HAVE_READLINK
1061 char link_name[FN_REFLEN];
1062 #endif
1063 File temp_file;
1064 size_t dir_buff_length;
1065 DBUG_ENTER("convert_file");
1066
1067 /* check if name is a symlink */
1068 #ifdef HAVE_READLINK
1069 org_name= (!my_disable_symlinks &&
1070 !my_readlink(link_name, name, MYF(0))) ? link_name : name;
1071 #endif
1072 if (!(in= my_fopen(org_name,O_RDONLY,MYF(MY_WME))))
1073 DBUG_RETURN(1);
1074 dirname_part(dir_buff, org_name, &dir_buff_length);
1075 if ((temp_file= create_temp_file(tempname, dir_buff, "PR", 0,
1076 MYF(MY_WME))) < 0)
1077 {
1078 my_fclose(in,MYF(0));
1079 DBUG_RETURN(1);
1080 }
1081 if (!(out= my_fdopen(temp_file, tempname, O_WRONLY, MYF(MY_WME))))
1082 {
1083 my_fclose(in,MYF(0));
1084 DBUG_RETURN(1);
1085 }
1086
1087 error=convert_pipe(rep,in,out);
1088 my_fclose(in,MYF(0)); my_fclose(out,MYF(0));
1089
1090 if (updated && ! error)
1091 my_redel(org_name, tempname, 0, MYF(MY_WME | MY_LINK_WARNING));
1092 else
1093 my_delete(tempname,MYF(MY_WME));
1094 if (!silent && ! error)
1095 {
1096 if (updated)
1097 printf("%s converted\n",name);
1098 else if (verbose)
1099 printf("%s left unchanged\n",name);
1100 }
1101 DBUG_RETURN(error);
1102 }
1103