1 /*
2 Numdiff - compare putatively similar files,
3 ignoring small numeric differences
4 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 Ivano Primi <ivprimi@libero.it>
5
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 /* Leave this inclusion at the begin, otherwise problems */
21 /* with the symbol __USE_FILE_OFFSET64 */
22 #include"numdiff.h"
23 #include"linesplit.h"
24 #include<stdio.h>
25 #include<stdlib.h> /* for free() */
26 #include<string.h>
27 #if HAVE_GETTIMEOFDAY
28 #include<sys/time.h>
29 #endif
30 #ifdef ENABLE_NLS
31 #include<locale.h>
32 #endif
33
34 /* See cmpfns.c */
35 extern int cmp_files (FILE* pf1, FILE* pf2, const argslist* argl, statlist* statres);
36
37 /* See options.c */
38 extern void print_version (const char* progname);
39 extern void print_help (const char* progname);
40 extern int setargs (int argc, char* argv[], argslist *list);
41
42 static
load_defaults(argslist * list,statlist * statres)43 void load_defaults (argslist * list, statlist* statres)
44 {
45 int i;
46
47 binary = 0;
48 suppress_common_lines = 0;
49 ignore_white_space = IGNORE_NO_WHITE_SPACE;
50 expand_tabs = 0;
51 speed_large_files = 0;
52 program_name = PACKAGE;
53
54 list->optmask = newBitVector (MAX_NUMDIFF_OPTIONS);
55 list->output_mode = OUTMODE_NORMAL;
56 for (i=0; i < FIELDMASK_SIZE; i++)
57 {
58 list->ghostmask1[i] = list->ghostmask2[i] = 0x0;
59 list->tblurmask1[i] = list->tblurmask2[i] = 0x0;
60 list->pblurmask1[i] = list->pblurmask2[i] = 0x0;
61 }
62 list->relerr_formula = CLASSIC_FORMULA;
63 statres->Labserr_location.lineno1 = statres->Labserr_location.fieldno1 = 0;
64 statres->Labserr_location.lineno2 = statres->Labserr_location.fieldno2 = 0;
65 statres->Rabserr_location.lineno1 = statres->Rabserr_location.fieldno1 = 0;
66 statres->Rabserr_location.lineno2 = statres->Rabserr_location.fieldno2 = 0;
67 statres->Nentries = statres->Ndisperr = 0;
68 list->flag = 0;
69 list->ifs1 = list->ifs2 = NULL;
70 list->iscale = ISCALE;
71 list->nf1.dp = DP;
72 list->nf1.thsep = THSEP;
73 list->nf1.grouping = GROUPING;
74 list->nf1.pos_sign = POS_SIGN;
75 list->nf1.neg_sign = NEG_SIGN;
76 list->nf1.ech = ECH;
77 list->nf1.iu = IU;
78 list->file1 = list->file2 = NULL;
79 list->nf2 = list->nf1;
80 list->nf1.currency = get_separating_string (CURRENCY);
81 list->nf2.currency = get_separating_string (CURRENCY);
82 }
83
84 static
init_mpa_support(argslist * list,statlist * statres)85 void init_mpa_support (argslist* list, statlist* statres)
86 {
87 init_mpa(list->iscale);
88 initR (&statres->Labserr);
89 initR (&statres->Crelerr);
90 initR (&statres->Lrelerr);
91 initR (&statres->Cabserr);
92 initR (&statres->N1abserr);
93 initR (&statres->N1disperr);
94 initR (&statres->N2abserr);
95 initR (&statres->N2disperr);
96 list->maxabserr = thrlist_new ();
97 list->maxrelerr = thrlist_new ();
98 }
99
100 static
dismiss_mpa_support(argslist * list,statlist * statres)101 void dismiss_mpa_support (argslist* list, statlist* statres)
102 {
103 delR (&statres->Labserr);
104 delR (&statres->Crelerr);
105 delR (&statres->Lrelerr);
106 delR (&statres->Cabserr);
107 delR (&statres->N1abserr);
108 delR (&statres->N1disperr);
109 delR (&statres->N2abserr);
110 delR (&statres->N2disperr);
111 thrlist_dispose (&list->maxabserr);
112 thrlist_dispose (&list->maxrelerr);
113 end_mpa();
114 }
115
116 static void
set_mtime_to_now(struct stat * st)117 set_mtime_to_now (struct stat *st)
118 {
119 #ifdef ST_MTIM_NSEC
120
121 # if HAVE_CLOCK_GETTIME && defined CLOCK_REALTIME
122 if (clock_gettime (CLOCK_REALTIME, &st->st_mtim) == 0)
123 return;
124 # endif
125
126 # if HAVE_GETTIMEOFDAY
127 {
128 struct timeval timeval;
129 if (gettimeofday (&timeval, NULL) == 0)
130 {
131 st->st_mtime = timeval.tv_sec;
132 st->st_mtim.ST_MTIM_NSEC = timeval.tv_usec * 1000;
133 return;
134 }
135 }
136 # endif
137
138 #endif /* ST_MTIM_NSEC */
139
140 time (&st->st_mtime);
141 }
142
143 /* cmp.file[f].desc markers */
144 #define NONEXISTENT (-1) /* nonexistent file */
145 #define UNOPENED (-2) /* unopened file (e.g. directory) */
146 #define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */
147
148 #define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of ERRNO_ENCODE */
149 #define DIR_P(f) (S_ISDIR (files[f].stat.st_mode) != 0)
150
151 static
open_files(const char * name0,const char * name1)152 int open_files (const char* name0, const char* name1)
153 {
154 register int f;
155 int status = EXIT_SUCCESS;
156 bool same_files;
157
158 if (!name0 || !name1)
159 return EXIT_TROUBLE;
160
161 memset (files, 0, sizeof files);
162 files[0].desc = UNOPENED;
163 files[1].desc = UNOPENED;
164 files[0].name = name0;
165 files[1].name = name1;
166
167 /* Stat the files. */
168
169 for (f = 0; f < 2; f++)
170 {
171 if ((f) && file_name_cmp (files[f].name, files[0].name) == 0)
172 {
173 files[f].desc = files[0].desc;
174 files[f].stat = files[0].stat;
175 }
176 else if (strcmp (files[f].name, "-") == 0)
177 {
178 files[f].desc = STDIN_FILENO;
179 if (fstat (STDIN_FILENO, &files[f].stat) != 0)
180 files[f].desc = ERRNO_ENCODE (errno);
181 else
182 {
183 if (S_ISREG (files[f].stat.st_mode))
184 {
185 off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR);
186 if (pos < 0)
187 files[f].desc = ERRNO_ENCODE (errno);
188 else
189 files[f].stat.st_size =
190 MAX (0, files[f].stat.st_size - pos);
191 }
192
193 /* POSIX 1003.1-2001 requires current time for
194 stdin. */
195 set_mtime_to_now (&files[f].stat);
196 }
197 }
198 else if (stat (files[f].name, &files[f].stat) != 0)
199 files[f].desc = ERRNO_ENCODE (errno);
200 }
201
202 for (f = 0; f < 2; f++)
203 {
204 int e = ERRNO_DECODE (files[f].desc);
205 if (0 <= e)
206 {
207 errno = e;
208 perror_with_name (files[f].name);
209 status = EXIT_TROUBLE;
210 }
211 }
212
213 if (status != EXIT_SUCCESS)
214 /* One of the files should exist but does not. */
215 return status;
216 else if (DIR_P (0) | DIR_P (1))
217 return EXIT_TROUBLE;
218 else
219 {
220 /* Both exist and neither is a directory. */
221 /* Are they the same file ? */
222 same_files
223 = (files[0].desc != NONEXISTENT
224 && files[1].desc != NONEXISTENT
225 && 0 < same_file (&files[0].stat, &files[1].stat)
226 && same_file_attributes (&files[0].stat,
227 &files[1].stat));
228
229 /* Open the files and record their descriptors. */
230
231 if (files[0].desc == UNOPENED)
232 if ((files[0].desc = open (files[0].name, O_RDONLY, 0)) < 0)
233 {
234 perror_with_name (files[0].name);
235 status = EXIT_TROUBLE;
236 }
237 if (files[1].desc == UNOPENED)
238 {
239 if ((same_files))
240 files[1].desc = files[0].desc;
241 else if ((files[1].desc = open (files[1].name, O_RDONLY, 0))
242 < 0)
243 {
244 perror_with_name (files[1].name);
245 status = EXIT_TROUBLE;
246 }
247 }
248
249 #if HAVE_SETMODE_DOS
250 if (binary)
251 for (f = 0; f < 2; f++)
252 if (0 <= files[f].desc)
253 set_binary_mode (files[f].desc, 1);
254 #endif
255 return status;
256 }
257 }
258
259 static
compare_files(const argslist * list,int * is_same_physical_file)260 int compare_files (const argslist* list, int* is_same_physical_file)
261 {
262 if ((files[0].desc != NONEXISTENT
263 && files[1].desc != NONEXISTENT
264 && 0 < same_file (&files[0].stat, &files[1].stat)
265 && same_file_attributes (&files[0].stat,
266 &files[1].stat)))
267 {
268 /* The two named files are actually the same physical file.
269 We know they are identical without actually reading them. */
270 *is_same_physical_file = 1;
271 return 0;
272 }
273 else
274 {
275 int status = diff_2_files (files, list);
276 /*
277 STATUS is 0 if no changes have been found,
278 1 in case of detected changes, -1 if either
279 file is binary.
280 */
281
282 *is_same_physical_file = 0;
283 return status;
284 }
285 }
286
287 static
rewind_files(void)288 int rewind_files (void)
289 {
290 off_t pos0, pos1;
291 int status = EXIT_SUCCESS;
292
293 if ((pos0 = lseek(files[0].desc, (off_t) 0, SEEK_SET)) < 0)
294 {
295 perror_with_name (files[0].name);
296 status = EXIT_TROUBLE;
297 }
298 if ((pos1 = lseek(files[1].desc, (off_t) 0, SEEK_SET)) < 0)
299 {
300 perror_with_name (files[1].name);
301 status = EXIT_TROUBLE;
302 }
303 return status;
304 }
305
306 static
set_file_pointers(FILE ** fpp1,FILE ** fpp2)307 int set_file_pointers (FILE** fpp1, FILE** fpp2)
308 {
309 int status = EXIT_SUCCESS;
310
311 if ( !(*fpp1 = fdopen (files[0].desc, "r")) )
312 {
313 perror_with_name (files[0].name);
314 status = EXIT_TROUBLE;
315 }
316 if ( !(*fpp2 = fdopen (files[1].desc, "r")) )
317 {
318 perror_with_name (files[1].name);
319 status = EXIT_TROUBLE;
320 }
321 return status;
322 }
323
324 static
close_files(void)325 int close_files (void)
326 {
327 /* Close the file descriptors. */
328
329 if (0 <= files[0].desc && close (files[0].desc) != 0)
330 {
331 perror_with_name (files[0].name);
332 return EXIT_TROUBLE;
333 }
334 if (0 <= files[1].desc && files[0].desc != files[1].desc
335 && close (files[1].desc) != 0)
336 {
337 perror_with_name (files[1].name);
338 return EXIT_TROUBLE;
339 }
340 return EXIT_SUCCESS;
341 }
342
343 static
isLocationDefined(difference_location loc)344 int isLocationDefined (difference_location loc)
345 {
346 return (loc.lineno1 + loc.fieldno1 + loc.lineno2 + loc.fieldno2 > 0
347 ? 1 : 0);
348 }
349
350 static
print_statistics(const argslist * list,statlist * statres)351 void print_statistics (const argslist* list, statlist* statres)
352 {
353 Real qm_abserr, qm_relerr;
354
355 #ifdef USE_GMP
356 initR (&qm_abserr);
357 initR (&qm_relerr);
358 #endif /* USE_GMP */
359 if (list->flag > 0)
360 {
361 fputs (_("\n In the computation of the following quantities\n only the errors with positive sign are considered:\n"),
362 stdout);
363 fputs (_(" differences due to numeric fields of the second file that are\n less than the corresponding fields in the first file are neglected\n\n"),
364 stdout);
365 }
366 if (list->flag < 0)
367 {
368 fputs (_("\n In the computation of the following quantities\n only the errors with negative sign are considered:\n"),
369 stdout);
370 fputs (_(" differences due to numeric fields of the second file that are\n greater than the corresponding fields in the first file are neglected\n\n"),
371 stdout);
372 }
373 if ( statres->Ndisperr == 0 )
374 {
375 if ( statres->Nentries == 0 )
376 fputs (_("\nNo numeric comparison has been done\n"),
377 stdout);
378 else
379 printf(ngettext (
380 "\nOne numeric comparison has been done and\nthe resulting numeric difference is negligible\n",
381 "\n%d numeric comparisons have been done and\nthe resulting numeric differences are all negligible\n",
382 statres->Nentries), statres->Nentries);
383 }
384 else if ( statres->Ndisperr == statres->Nentries )
385 {
386 printf(ngettext (
387 "\nOne numeric comparison has been done and\nhas produced an outcome beyond the tolerance threshold\n",
388 "\n%d numeric comparisons have been done, all of them\nhave produced an outcome beyond the tolerance threshold\n",
389 statres->Nentries), statres->Nentries);
390 }
391 else
392 {
393 /* Case 0 < LIST->NDISPERR < LIST->NENTRIES */
394 printf (ngettext (
395 "\nOne numeric comparison has been done,\n",
396 "\n%d numeric comparisons have been done,\n",
397 statres->Nentries), statres->Nentries);
398
399 printf (ngettext (
400 "only one numeric comparison has produced an outcome\nbeyond the tolerance threshold\n",
401 "%d numeric comparisons have produced an outcome\nbeyond the tolerance threshold\n",
402 statres->Ndisperr), statres->Ndisperr);
403 }
404
405 fputs (_("\nLargest absolute error in the set of the major numerical differences:\n"),
406 stdout);
407 printno (statres->Labserr, DEF_LIM);
408 fputs (_("\nCorresponding relative error:\n"), stdout);
409 printno (statres->Crelerr, DEF_LIM);
410 if ( (isLocationDefined (statres->Labserr_location)) )
411 {
412 printf (_("\nFirst occurrence (#line, #field) in the first file: %lu, %lu\n"),
413 statres->Labserr_location.lineno1, statres->Labserr_location.fieldno1+1);
414 printf (_("First occurrence (#line, #field) in the second file: %lu, %lu\n"),
415 statres->Labserr_location.lineno2, statres->Labserr_location.fieldno2+1);
416 }
417
418 fputs (_("\nLargest relative error in the set of the major numerical differences:\n"),
419 stdout);
420 printno (statres->Lrelerr, DEF_LIM);
421 fputs (_("\nCorresponding absolute error:\n"), stdout);
422 printno (statres->Cabserr, DEF_LIM);
423 if ( (isLocationDefined (statres->Rabserr_location)) )
424 {
425 printf (_("\nFirst occurrence (#line, #field) in the first file: %lu, %lu\n"),
426 statres->Rabserr_location.lineno1, statres->Rabserr_location.fieldno1+1);
427 printf (_("First occurrence (#line, #field) in the second file: %lu, %lu\n"),
428 statres->Rabserr_location.lineno2, statres->Rabserr_location.fieldno2+1);
429 }
430
431 fputs (_("\n\nSum of all absolute errors:\n"),
432 stdout);
433 printno (statres->N1abserr, DEF_LIM);
434 fputs (_("\nSum of the major absolute errors:\n"),
435 stdout);
436 printno (statres->N1disperr, DEF_LIM);
437 /* Arithmetic means */
438 divide_by_int (&statres->N1abserr, statres->Nentries, list->iscale);
439 divide_by_int (&statres->N1disperr, statres->Ndisperr, list->iscale);
440 fputs (_("\nArithmetic mean of all absolute errors:\n"),
441 stdout);
442 printno (statres->N1abserr, DEF_LIM);
443 fputs (_("\nArithmetic mean of the major absolute errors:\n"),
444 stdout);
445 printno (statres->N1disperr, DEF_LIM);
446
447 /* 2-norms and quadratic means of the errors */
448 copyR (&qm_abserr, statres->N2abserr);
449 divide_by_int (&qm_abserr, statres->Nentries, list->iscale);
450 square_root (&qm_abserr, list->iscale);
451 square_root (&statres->N2abserr, list->iscale);
452 fputs (_("\nSquare root of the sum of the squares of all absolute errors:\n"),
453 stdout);
454 printno (statres->N2abserr, DEF_LIM);
455 fputs (_("\nQuadratic mean of all absolute errors:\n"),
456 stdout);
457 printno (qm_abserr, DEF_LIM);
458
459 copyR (&qm_relerr, statres->N2disperr);
460 divide_by_int (&qm_relerr, statres->Ndisperr, list->iscale);
461 square_root (&qm_relerr, list->iscale);
462 square_root (&statres->N2disperr, list->iscale);
463 fputs (_("\nSquare root of the sum of the squares\nof the major absolute errors:\n"),
464 stdout);
465 printno (statres->N2disperr, DEF_LIM);
466 fputs (_("\nQuadratic mean of the major absolute errors:\n"),
467 stdout);
468 printno (qm_relerr, DEF_LIM);
469 putchar ('\n');
470 delR (&qm_relerr);
471 delR (&qm_abserr);
472 }
473
474 char **def_ifs = NULL;
475
476 static
clean_memory(argslist * pList,statlist * pRes)477 void clean_memory (argslist* pList, statlist* pRes)
478 {
479 emptyBitVector (&pList->optmask);
480 delete_string_vector (def_ifs);
481 dismiss_mpa_support (pList, pRes);
482 if ((pList->ifs1))
483 delete_string_vector (pList->ifs1);
484 if ((pList->ifs2))
485 delete_string_vector (pList->ifs2);
486 }
487
main(int argc,char * argv[])488 int main (int argc, char* argv[])
489 {
490 argslist list;
491 statlist statres;
492 int pHelp, pVersion;
493
494 #ifdef ENABLE_NLS
495 setlocale (LC_CTYPE, "");
496 setlocale (LC_MESSAGES, "");
497 #endif
498 bindtextdomain (PACKAGE, LOCALEDIR);
499 textdomain (PACKAGE);
500
501 def_ifs = ssplit (DEF_IFS, I_DEF_SEP);
502 if (!def_ifs)
503 {
504 fprintf (stderr, _("*** %s: memory exhausted\n"), PACKAGE);
505 return -1;
506 }
507
508 /* This code was used to discover the reason of a bug */
509 /*
510 #ifdef __USE_FILE_OFFSET64
511 printf ("\n %s: FILE OFFSET 64 in use, sizeof (struct stat) = %u\n", __FILE__,
512 sizeof (struct stat));
513 #else
514 printf ("\n %s: FILE OFFSET 64 NOT in use, sizeof (struct stat) = %u\n", __FILE__,
515 sizeof (struct stat));
516 #endif
517 */
518
519 load_defaults (&list, &statres);
520 init_mpa_support (&list, &statres);
521 init_flags ();
522
523 if ( setargs (argc, argv, &list) != 0 )
524 {
525 clean_memory (&list, &statres);
526 return -1;
527 }
528 pHelp = getBitAtPosition (&list.optmask, _H_MASK) == BIT_ON;
529 pVersion = getBitAtPosition (&list.optmask, _V_MASK) == BIT_ON;
530 if ((pHelp) || (pVersion))
531 {
532 if ((pVersion))
533 print_version(PACKAGE);
534 if ((pHelp))
535 print_help(PACKAGE);
536 clean_memory (&list, &statres);
537 return (argc > 2 ? -1 : 0);
538 }
539 else
540 {
541 int test = 0, ident_files = 0;
542 FILE *fp1, *fp2;
543 int qm = getBitAtPosition (&list.optmask, _Q_MASK);
544
545 if ( open_files (list.file1, list.file2) != EXIT_SUCCESS )
546 {
547 clean_memory (&list, &statres);
548 return EXIT_TROUBLE;
549 }
550 if (getBitAtPosition (&list.optmask, _F_MASK) == BIT_ON ||
551 getBitAtPosition (&list.optmask, _Z_MASK) == BIT_ON ||
552 getBitAtPosition (&list.optmask, _SZ_MASK) == BIT_ON )
553 test = compare_files (&list, &ident_files);
554
555 if (test < 0)
556 {
557 fputs (_("\n*** The requested comparison cannot be performed:\n"), stdout);
558 printf (_("*** At least one between \"%s\" and \"%s\" is a binary file\n"),
559 list.file1, list.file2);
560 clean_memory (&list, &statres);
561 close_files ();
562 return EXIT_TROUBLE;
563 }
564
565 if ( getBitAtPosition (&list.optmask, _F_MASK) == BIT_OFF &&
566 !ident_files )
567 {
568 if ( rewind_files () != EXIT_SUCCESS ||
569 set_file_pointers (&fp1, &fp2) != EXIT_SUCCESS )
570 {
571 clean_memory (&list, &statres);
572 close_files ();
573 return EXIT_TROUBLE;
574 }
575 test = cmp_files (fp1, fp2, &list, &statres);
576 if (getBitAtPosition (&list.optmask, _SS_MASK) == BIT_ON &&
577 test <= 1)
578 print_statistics (&list, &statres);
579 }
580 if (test == 0 && !qm)
581 {
582 if (getBitAtPosition (&list.optmask, _F_MASK) == BIT_ON)
583 printf (_("\n+++ Files \"%s\" and \"%s\" have the same structure\n"),
584 list.file1, list.file2);
585 else
586 printf (_("\n+++ Files \"%s\" and \"%s\" are equal\n"),
587 list.file1, list.file2);
588 }
589 if (test == 1 && !qm)
590 printf (_("\n+++ File \"%s\" differs from file \"%s\"\n"),
591 list.file1, list.file2);
592 erase_flags ();
593 clean_memory (&list, &statres);
594 close_files ();
595 return test;
596 }
597 }
598