1 /*
2 Numdiff - compare putatively similar files,
3 ignoring small numeric differences
4 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 Ivano Primi <ivprimi@libero.it>
5
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include<stdio.h>
21 #include<stdlib.h>
22 #include<string.h>
23 #include"linesplit.h"
24
25 #define O_DEF_SEP '\n'
26 #define ESC_CHAR '\\'
27
28 const unsigned char InvDigit = (unsigned char) -1;
29
30 static
is_hex_digit(char ch)31 unsigned char is_hex_digit (char ch)
32 {
33 if (ch >= '0' && ch <= '9')
34 return ch - '0';
35 else
36 {
37 switch (ch)
38 {
39 case 'a':
40 case 'A':
41 return 10;
42 case 'b':
43 case 'B':
44 return 11;
45 case 'c':
46 case 'C':
47 return 12;
48 case 'd':
49 case 'D':
50 return 13;
51 case 'e':
52 case 'E':
53 return 14;
54 case 'f':
55 case 'F':
56 return 15;
57 default:
58 return InvDigit;
59 }
60 }
61 }
62
63 static
is_oct_digit(char ch)64 unsigned char is_oct_digit (char ch)
65 {
66 if (ch >= '0' && ch <= '7')
67 return ch - '0';
68 else
69 return InvDigit;
70 }
71
72 static
is_hex(const char * pafter_escape,char * byte)73 int is_hex (const char* pafter_escape, char* byte)
74 {
75 unsigned char hb, lb;
76
77 /*
78 The return value is the number of hexadecimal digits successfully read
79 */
80 if ( (hb = is_hex_digit (*pafter_escape)) != InvDigit )
81 {
82 int rv = 1;
83
84 lb = is_hex_digit (*(pafter_escape+1));
85 if (lb == InvDigit)
86 {
87 lb = hb;
88 hb = 0;
89 }
90 else
91 rv = 2;
92
93 if ((byte))
94 *byte = (char)(16 * hb + lb);
95 return rv;
96 }
97 else
98 {
99 if ((byte))
100 *byte = -1;
101 return 0;
102 }
103 }
104
105 static
is_oct(const char * pafter_escape,char * byte)106 int is_oct (const char* pafter_escape, char* byte)
107 {
108 unsigned char ho, mo, lo;
109
110 /*
111 The return value is the number of octal digits successfully read
112 */
113 if ( (ho = is_oct_digit (*pafter_escape)) != InvDigit )
114 {
115 int rv = 1;
116
117 mo = is_oct_digit (*(pafter_escape+1));
118 lo = is_oct_digit (*(pafter_escape+2));
119 if (mo == InvDigit)
120 {
121 lo = ho;
122 ho = 0;
123 mo = 0;
124 }
125 else
126 {
127 rv++;
128 if (lo == InvDigit)
129 {
130 lo = mo;
131 mo = ho;
132 ho = 0;
133 }
134 else
135 rv++;
136 }
137 if ((byte))
138 *byte = (char)(64 * ho + 8 * mo + lo);
139 return (64 * (int)ho + 8 * (int)mo + (int)lo < 256 ? rv : 0);
140 }
141 else
142 {
143 if ((byte))
144 *byte = -1;
145 return 0;
146 }
147 }
148
149 static
process_character(const char * chp,const char ** new_chp)150 char process_character (const char* chp, const char** new_chp)
151 {
152 const char* nchp;
153 char byte;
154 int r;
155
156 if (*chp == ESC_CHAR)
157 {
158 switch (*(chp+1))
159 {
160 case 'a':
161 byte = '\a';
162 nchp = chp+2;
163 break;
164 case 'b':
165 byte = '\b';
166 nchp = chp+2;
167 break;
168 case 'f':
169 byte = '\f';
170 nchp = chp+2;
171 break;
172 case 'n':
173 byte = '\n';
174 nchp = chp+2;
175 break;
176 case 'r':
177 byte = '\r';
178 nchp = chp+2;
179 break;
180 case 't':
181 byte = '\t';
182 nchp = chp+2;
183 break;
184 case 'v':
185 byte = '\v';
186 nchp = chp+2;
187 break;
188 case 's':
189 byte = ' ';
190 nchp = chp+2;
191 break;
192 case 'x':
193 if ( (r = is_hex (chp+2, &byte)) && byte != '\0' )
194 nchp = chp + (r + 2);
195 else
196 {
197 byte = *(chp+1);
198 nchp = chp+2;
199 }
200 break;
201 case '0':
202 case '1':
203 case '2':
204 case '3':
205 case '4':
206 case '5':
207 case '6':
208 case '7':
209 if ( (r = is_oct (chp+1, &byte)) && byte != '\0' )
210 nchp = chp + (r + 1);
211 else
212 {
213 byte = *(chp+1);
214 nchp = chp+2;
215 }
216 break;
217 default:
218 byte = *(chp+1);
219 nchp = chp+2;
220 break;
221 }
222 }
223 else
224 {
225 byte = *chp;
226 nchp = chp+1;
227 }
228 if ((new_chp))
229 *new_chp = nchp;
230 return byte;
231 }
232
233 /*
234 Remark: process the substring [BPTR, EPTR) and return
235 the result (NULL in case of error while allocating memory for the result).
236 Precondition is EPTR >= BPTR.
237 */
238 static
process_substring(const char * bptr,const char * eptr)239 char* process_substring (const char* bptr, const char* eptr)
240 {
241 size_t subssize = eptr - bptr + 1;
242 const char* ptr;
243 char *pstr, *pstrp;
244
245 if ( !(pstr = (char*) calloc(subssize, sizeof(char))) )
246 return NULL;
247 else
248 {
249 for (pstrp = pstr, ptr = bptr; ptr < eptr; pstrp++)
250 {
251 *pstrp = process_character (ptr, &ptr);
252 }
253 return pstr;
254 }
255 }
256
257 /*
258 Create and return a vector of strings using the description
259 contained in the string pointed to by STR.
260 The items in STR (to each of which should correspond
261 a string in the returned vector) are separated by the
262 character SEPARATOR.
263 In case of error while allocating memory for the vector
264 and its elements return NULL.
265 Return NULL also if STR == NULL.
266
267 Remark: SEPARATOR cannot be the nul character.
268 Return NULL if SEPARATOR is the nul character.
269 */
ssplit(const char * str,char separator)270 char** ssplit (const char* str, char separator)
271 {
272 size_t i, n;
273 const char *beg, *ptr, *ptr2sep;
274 char** sv;
275
276 if (!str || separator == '\0')
277 return NULL;
278 for (beg = str; *beg == separator; beg++);
279 /*
280 Now BEG points to the first charatacrer of the buffer
281 pointed to by STR which is not equal to SEPARATOR.
282 */
283
284 /*
285 First count the substrings contained
286 in the buffer pointed to by STR.
287 */
288 for (n = 1, ptr = beg; (ptr2sep = strchr (ptr, separator)) != NULL;
289 n++)
290 {
291 for (ptr = ptr2sep+1; *ptr == separator; ptr++);
292 }
293 /*
294 Now allocate memory for a vector of N+1 char*.
295 If the allocation fails, return NULL.
296 */
297 if ( !(sv = (char**) malloc ((n+1)*sizeof(char*))) )
298 return NULL;
299 sv[n] = NULL;
300
301 for (i = 0, ptr = beg; (ptr2sep = strchr (ptr, separator)) != NULL;
302 i++)
303 {
304 sv[i] = process_substring (ptr, ptr2sep);
305 if (!sv[i])
306 {
307 delete_string_vector (sv);
308 return NULL;
309 }
310 for (ptr = ptr2sep+1; *ptr == separator; ptr++);
311 }
312 if (*ptr != '\0')
313 {
314 ptr2sep = strchr (ptr, '\0');
315 sv[i] = process_substring (ptr, ptr2sep);
316 if (!sv[i])
317 {
318 delete_string_vector (sv);
319 return NULL;
320 }
321 }
322 return sv;
323 }
324
325 /*
326 Create and return a vector of strings using the characters
327 contained in the string pointed to by STR. To each
328 (eventually escaped) character in this string will
329 correspond exactly one string in the returned vector.
330 Return NULL if STR == NULL or in case of out of memory.
331 */
ssplit_former_way(const char * str)332 char** ssplit_former_way (const char* str)
333 {
334 if ((str))
335 {
336 size_t n, ls = strlen(str);
337 char **sv;
338 const char *ptr, *nptr;
339
340 sv = (char**) calloc (ls + 1, sizeof(char*));
341 if (!sv)
342 return NULL;
343 for (n = 0, ptr = str; *ptr != '\0'; ptr = nptr, n++)
344 {
345 sv[n] = (char*) malloc (2 *sizeof(char));
346 if ((sv[n]))
347 {
348 sv[n][0] = process_character (ptr, &nptr);
349 sv[n][1] = '\0';
350 }
351 else
352 {
353 delete_string_vector (sv);
354 return NULL;
355 }
356 }
357 return sv;
358 }
359 else
360 return NULL;
361 }
362
363 /*
364 Process the string pointed to by ISTR and return the result
365 (NULL in case of error while allocating memory for the result).
366 */
get_separating_string(const char * istr)367 char* get_separating_string (const char* istr)
368 {
369 return process_substring (istr, istr+strlen(istr));
370 }
371
372 /*
373 Write to the file pointed to by FP the strings contained in
374 the vector SV. Use SEPARATOR to separate each string from
375 the following one.
376 */
print_string_vector(FILE * fp,const char ** sv,char separator)377 void print_string_vector (FILE* fp, const char** sv, char separator)
378 {
379 size_t n;
380
381 if (!sv)
382 {
383 fputs ("<Empty>", fp);
384 fputc (separator, fp);
385 }
386 else
387 {
388 for (n = 0; sv[n] != NULL; n++)
389 {
390 fprintf (fp, "\"%s\"%c", sv[n], separator);
391 }
392 }
393 }
394
395 /*
396 Rearrange the strings of the vector SV in descending order
397 with respect to their length.
398
399 Rem.: Pre-condition is that SV is NULL-terminated.
400 This function is suitable only for small vectors,
401 since it uses a bubble-sort algorithm.
402 */
sort_string_vector(char ** sv)403 void sort_string_vector (char** sv)
404 {
405 if ((sv))
406 {
407 size_t n, m, l, lmax, poslmax;
408 char *tmp;
409
410 for (n = 0; sv[n] != NULL; n++)
411 {
412 lmax = strlen(sv[n]);
413 poslmax = n;
414 for (m = n+1; sv[m] != NULL; m++)
415 {
416 if ( (l = strlen(sv[m])) > lmax )
417 {
418 lmax = l;
419 poslmax = m;
420 }
421 }
422 tmp = sv[n];
423 sv[n] = sv[poslmax];
424 sv[poslmax] = tmp;
425 }
426 }
427 }
428
429 /*
430 Remove duplicates from the vector SV.
431 */
remove_duplicates_from_string_vector(char ** sv)432 void remove_duplicates_from_string_vector (char** sv)
433 {
434 if ((sv))
435 {
436 size_t k, m, n;
437
438 for (n = 0; sv[n] != NULL; n++)
439 {
440 m = n+1;
441 while (sv[m] != NULL)
442 {
443 if (strcmp (sv[m], sv[n]) == 0)
444 {
445 free((void*)sv[m]);
446 for (k = m+1; sv[k] != NULL; k++)
447 sv[k-1] = sv[k];
448 sv[k-1] = NULL;
449 }
450 else
451 m++;
452 }
453 }
454 }
455 }
456
457 /*
458 Return 1 if the string pointed to by STR is found in the vector SV,
459 otherwise 0. 0 should be also returned if STR or SV is NULL.
460 */
is_string_in_vector(const char * str,const char ** sv)461 int is_string_in_vector (const char* str, const char** sv)
462 {
463 if ((sv) && (str))
464 {
465 size_t n;
466
467 for (n = 0; sv[n] != NULL && strcmp(str, sv[n]) != 0; n++);
468 return (sv[n] == NULL ? 0 : 1);
469 }
470 else
471 return 0;
472 }
473
474 /*
475 Return 0 if there is no string in the vector SV which contains the
476 character CH, otherwise return the length of the longest string
477 between those ones which contain the character CH.
478 0 should also be returned if SV is null.
479 */
is_char_in_vector(int ch,const char ** sv)480 size_t is_char_in_vector (int ch, const char** sv)
481 {
482 if ((sv))
483 {
484 size_t l, lm, n;
485
486 for (lm = n = 0; sv[n] != NULL; n++)
487 {
488 if ( (strchr (sv[n], ch)) && (l = strlen (sv[n])) > lm)
489 lm = l;
490 }
491 return lm;
492 }
493 else
494 return 0;
495 }
496
497 /*
498 Remove the memory allocated for the strings of the vector SV
499 and then free the memory allocated for the vector itself.
500 */
delete_string_vector(char ** sv)501 void delete_string_vector (char** sv)
502 {
503 size_t n;
504
505 if ((sv))
506 {
507 for (n = 0; sv[n] != NULL; n++)
508 {
509 free ((void*)sv[n]);
510 }
511 free((void*)sv);
512 }
513 }
514
515 /*
516 Return a pointer to the position following the initial
517 segment of STR that does not contain any string
518 from the vector SV. If such an initial segment does not
519 exist, return a pointer to STR.
520 Consider the string STR as ending at the first occurrence of EOS.
521
522 SV must be NULL terminated, it cannot contain the empty ("") string
523 nor a string of length > 1 with EOS being one of its non-null characters
524 (but SV may well contain the string of length 1 having EOS as its
525 only non-null character).
526
527 Rem.: EOS can be the null character.
528 If the string pointed to by STR does not contain any EOS
529 character, a buffer overrun will occur.
530 */
string_cspn(const char * str,const char ** sv,int eos)531 char* string_cspn (const char* str, const char** sv, int eos)
532 {
533 register const char *sviptr;
534 register const char *endptr;
535 register const char *nendptr;
536 register const char *ptr;
537 register size_t n;
538
539 if (!str || !sv)
540 {
541 /* security check */
542 return NULL;
543 }
544 else
545 {
546 for (endptr = str; *endptr != eos; endptr++);
547 for (nendptr = str; nendptr < endptr; nendptr++)
548 {
549 for (n = 0; sv[n] != NULL; n++)
550 {
551 for (ptr = nendptr, sviptr = sv[n];
552 *sviptr != '\0' && *sviptr == *ptr;
553 sviptr++, ptr++);
554 if (*sviptr == '\0')
555 return (char*)nendptr;
556 }
557 }
558 return (char*)nendptr;
559 }
560 }
561
562 /*
563 Return a pointer to the position following the initial
564 segment of STR that consists entirely of strings
565 from the vector SV. If such an initial segment does not
566 exist, return a pointer to STR.
567 Consider the string STR as ending at the first occurrence of EOS.
568
569 SV must be NULL terminated, it cannot contain the empty ("") string
570 nor a string of length > 1 with EOS being one of its non-null characters
571 (but SV may well contain the string of length 1 having EOS as its
572 only non-null character).
573
574 Rem.: this function works under the assumption that the strings in
575 the vector SV are ordered according to their lengths, where SV[0]
576 is the string with the greatest length.
577
578 EOS can be the null character.
579 */
string_spn(const char * str,const char ** sv,int eos)580 char* string_spn (const char* str, const char** sv, int eos)
581 {
582 register const char *ptr;
583 register const char *nptr;
584 register const char *sviptr;
585 register size_t n;
586
587 if (!str || !sv)
588 {
589 /* security check */
590 return NULL;
591 }
592 else
593 {
594 ptr = str;
595 while (*ptr != eos)
596 {
597 /*
598 Rem.: if strlen(sv[n])== 1 and sv[n][0] == EOS, then
599 strstr(ptr, sv[n]) != ptr. Thus, whenever the following
600 for cycle terminates, sv[n] can not be equal to the
601 string "<EOS>" (i.e. the string having EOS as its only
602 null character).
603 */
604 for (n = 0; sv[n] != NULL; n++)
605 {
606 for (nptr = ptr, sviptr = sv[n];
607 *sviptr!='\0' && *sviptr == *nptr;
608 sviptr++, nptr++);
609 if (*sviptr == '\0')
610 {
611 /*
612 Rem.: if sv[n] does not contain any EOS, then
613 by setting PTR to NPTR we do not
614 skip any EOS.
615 */
616 ptr = nptr;
617 break;
618 }
619 }
620 if (!sv[n])
621 break;
622 }
623 return (char*)ptr;
624 }
625 }
626
627 #ifdef _TEST_LINE_SPLIT_
628
629 #define I_DEF_SEP ' '
630
631 static
print_help(const char * progname)632 void print_help (const char* progname)
633 {
634 printf ("Usage: %s STRING\n\n", progname);
635 }
636
637 static
print_substring(FILE * fp,const char * bptr,const char * eptr,int nl)638 void print_substring (FILE* fp, const char* bptr, const char* eptr, int nl)
639 {
640 const char *ptr;
641
642 if (eptr > bptr)
643 {
644 for (ptr = bptr; ptr != eptr; ptr++)
645 {
646 putc (*ptr, fp);
647 }
648 if ((nl))
649 putc ('\n', fp);
650 }
651 }
652
653 #define BUFFSIZE 1024
654
main(int argc,char * argv[])655 int main (int argc, char* argv[])
656 {
657 if (argc != 2)
658 {
659 print_help(argv[0]);
660 return 1;
661 }
662 else
663 {
664 char** string_vector = NULL;
665 char** sv = NULL;
666 char linebuff[BUFFSIZE] = "";
667 char *rv, *ptr, *endptr;
668 size_t l;
669
670 string_vector = ssplit (argv[1], I_DEF_SEP);
671 sv = ssplit_former_way (argv[1]);
672 remove_duplicates_from_string_vector (string_vector);
673 remove_duplicates_from_string_vector (sv);
674 sort_string_vector (string_vector);
675 print_string_vector (stdout, (const char**)string_vector, O_DEF_SEP);
676 l = is_char_in_vector (':', string_vector);
677 printf ("Length of the longest string containing \':\' = %zu\n", l);
678 puts ("\n\nSplitting the string in the former way produces the following result:");
679 print_string_vector (stdout, (const char**)sv, O_DEF_SEP);
680 do
681 {
682 #ifdef _MINOR_TEST_
683 puts ("\nEnter a line of text (Ctrl+D to terminate)");
684 #endif
685 rv = fgets (linebuff, BUFFSIZE, stdin);
686 if ((rv))
687 {
688 #ifdef _MINOR_TEST_
689 ptr = string_cspn (linebuff, (const char**)string_vector, '\0');
690 fputs ("Cspn =", stdout);
691 print_substring (stdout, linebuff, ptr, 0);
692 fputs ("|EoS|\n", stdout);
693
694 ptr = string_spn (linebuff, (const char**)string_vector, '\0');
695 fputs ("Spn =", stdout);
696 print_substring (stdout, linebuff, ptr, 0);
697 fputs ("|EoS|\n", stdout);
698 #else
699 unsigned long fieldno;
700
701
702 for (fieldno = 1, ptr = linebuff; *ptr != '\0'; fieldno++)
703 {
704 ptr = string_spn (ptr, (const char**)string_vector, '\0');
705 endptr = string_cspn (ptr, (const char**)string_vector, '\0');
706 if ((*ptr))
707 {
708 printf ("%3lu.>", fieldno);
709 print_substring (stdout, ptr, endptr, 0);
710 puts ("<");
711 }
712 ptr = endptr;
713 }
714 putchar ('\n');
715 #endif /* _MINOR_TEST_ */
716 }
717 } while ((rv));
718 delete_string_vector (string_vector);
719 return 0;
720 }
721 }
722
723 #endif /* _TEST_LINE_SPLIT_ */
724