1 /*
2     Numdiff - compare putatively similar files,
3     ignoring small numeric differences
4     Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017  Ivano Primi  <ivprimi@libero.it>
5 
6     This program is free software: you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation, either version 3 of the License, or
9     (at your option) any later version.
10 
11     This program is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15 
16     You should have received a copy of the GNU General Public License
17     along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 */
19 
20 /* Leave this inclusion at the begin, otherwise problems */
21 /* with the symbol __USE_FILE_OFFSET64                   */
22 #include"numdiff.h"
23 #include"linesplit.h"
24 #include<stdio.h>
25 #include<stdlib.h> /* for free() */
26 #include<string.h>
27 #if HAVE_GETTIMEOFDAY
28 #include<sys/time.h>
29 #endif
30 #ifdef ENABLE_NLS
31 #include<locale.h>
32 #endif
33 
34 /* See cmpfns.c */
35 extern int cmp_files (FILE* pf1, FILE* pf2, const argslist* argl, statlist* statres);
36 
37 /* See options.c */
38 extern void print_version (const char* progname);
39 extern void print_help (const char* progname);
40 extern int setargs (int argc, char* argv[], argslist *list);
41 
42 static
load_defaults(argslist * list,statlist * statres)43 void load_defaults (argslist * list, statlist* statres)
44 {
45   int i;
46 
47   binary = 0;
48   suppress_common_lines = 0;
49   ignore_white_space = IGNORE_NO_WHITE_SPACE;
50   expand_tabs = 0;
51   speed_large_files = 0;
52   program_name = PACKAGE;
53 
54   list->optmask = newBitVector (MAX_NUMDIFF_OPTIONS);
55   list->output_mode = OUTMODE_NORMAL;
56   for (i=0; i < FIELDMASK_SIZE; i++)
57     {
58       list->ghostmask1[i] = list->ghostmask2[i] = 0x0;
59       list->tblurmask1[i] = list->tblurmask2[i] = 0x0;
60       list->pblurmask1[i] = list->pblurmask2[i] = 0x0;
61     }
62   list->relerr_formula = CLASSIC_FORMULA;
63   statres->Labserr_location.lineno1 = statres->Labserr_location.fieldno1 = 0;
64   statres->Labserr_location.lineno2 = statres->Labserr_location.fieldno2 = 0;
65   statres->Rabserr_location.lineno1 = statres->Rabserr_location.fieldno1 = 0;
66   statres->Rabserr_location.lineno2 = statres->Rabserr_location.fieldno2 = 0;
67   statres->Nentries = statres->Ndisperr = 0;
68   list->flag = 0;
69   list->ifs1 = list->ifs2 = NULL;
70   list->iscale = ISCALE;
71   list->nf1.dp = DP;
72   list->nf1.thsep = THSEP;
73   list->nf1.grouping = GROUPING;
74   list->nf1.pos_sign = POS_SIGN;
75   list->nf1.neg_sign = NEG_SIGN;
76   list->nf1.ech = ECH;
77   list->nf1.iu = IU;
78   list->file1 = list->file2 = NULL;
79   list->nf2 = list->nf1;
80   list->nf1.currency = get_separating_string (CURRENCY);
81   list->nf2.currency = get_separating_string (CURRENCY);
82 }
83 
84 static
init_mpa_support(argslist * list,statlist * statres)85 void init_mpa_support (argslist* list, statlist* statres)
86 {
87   init_mpa(list->iscale);
88   initR (&statres->Labserr);
89   initR (&statres->Crelerr);
90   initR (&statres->Lrelerr);
91   initR (&statres->Cabserr);
92   initR (&statres->N1abserr);
93   initR (&statres->N1disperr);
94   initR (&statres->N2abserr);
95   initR (&statres->N2disperr);
96   list->maxabserr = thrlist_new ();
97   list->maxrelerr = thrlist_new ();
98 }
99 
100 static
dismiss_mpa_support(argslist * list,statlist * statres)101 void dismiss_mpa_support (argslist* list, statlist* statres)
102 {
103   delR (&statres->Labserr);
104   delR (&statres->Crelerr);
105   delR (&statres->Lrelerr);
106   delR (&statres->Cabserr);
107   delR (&statres->N1abserr);
108   delR (&statres->N1disperr);
109   delR (&statres->N2abserr);
110   delR (&statres->N2disperr);
111   thrlist_dispose (&list->maxabserr);
112   thrlist_dispose (&list->maxrelerr);
113   end_mpa();
114 }
115 
116 static void
set_mtime_to_now(struct stat * st)117 set_mtime_to_now (struct stat *st)
118 {
119 #ifdef ST_MTIM_NSEC
120 
121 # if HAVE_CLOCK_GETTIME && defined CLOCK_REALTIME
122   if (clock_gettime (CLOCK_REALTIME, &st->st_mtim) == 0)
123     return;
124 # endif
125 
126 # if HAVE_GETTIMEOFDAY
127   {
128     struct timeval timeval;
129     if (gettimeofday (&timeval, NULL) == 0)
130       {
131 	st->st_mtime = timeval.tv_sec;
132 	st->st_mtim.ST_MTIM_NSEC = timeval.tv_usec * 1000;
133 	return;
134       }
135   }
136 # endif
137 
138 #endif /* ST_MTIM_NSEC */
139 
140   time (&st->st_mtime);
141 }
142 
143 /* cmp.file[f].desc markers */
144 #define NONEXISTENT (-1)   /* nonexistent file */
145 #define UNOPENED (-2)      /* unopened file (e.g. directory) */
146 #define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */
147 
148 #define ERRNO_DECODE(desc) (-3 - (desc))   /* inverse of ERRNO_ENCODE */
149 #define DIR_P(f) (S_ISDIR (files[f].stat.st_mode) != 0)
150 
151 static
open_files(const char * name0,const char * name1)152 int open_files (const char* name0, const char* name1)
153 {
154   register int f;
155   int status = EXIT_SUCCESS;
156   bool same_files;
157 
158   if (!name0 || !name1)
159     return EXIT_TROUBLE;
160 
161   memset (files, 0, sizeof files);
162   files[0].desc = UNOPENED;
163   files[1].desc = UNOPENED;
164   files[0].name = name0;
165   files[1].name = name1;
166 
167   /* Stat the files.  */
168 
169   for (f = 0; f < 2; f++)
170     {
171       if ((f) && file_name_cmp (files[f].name, files[0].name) == 0)
172 	{
173 	  files[f].desc = files[0].desc;
174 	  files[f].stat = files[0].stat;
175 	}
176       else if (strcmp (files[f].name, "-") == 0)
177 	{
178 	  files[f].desc = STDIN_FILENO;
179 	  if (fstat (STDIN_FILENO, &files[f].stat) != 0)
180 	    files[f].desc = ERRNO_ENCODE (errno);
181 	  else
182 	    {
183 	      if (S_ISREG (files[f].stat.st_mode))
184 		{
185 		  off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR);
186 		  if (pos < 0)
187 		    files[f].desc = ERRNO_ENCODE (errno);
188 		  else
189 		    files[f].stat.st_size =
190 		      MAX (0, files[f].stat.st_size - pos);
191 		}
192 
193 	      /* POSIX 1003.1-2001 requires current time for
194 		 stdin.  */
195 	      set_mtime_to_now (&files[f].stat);
196 	    }
197 	}
198       else if (stat (files[f].name, &files[f].stat) != 0)
199 	files[f].desc = ERRNO_ENCODE (errno);
200     }
201 
202   for (f = 0; f < 2; f++)
203     {
204       int e = ERRNO_DECODE (files[f].desc);
205       if (0 <= e)
206 	{
207 	  errno = e;
208 	  perror_with_name (files[f].name);
209 	  status = EXIT_TROUBLE;
210 	}
211     }
212 
213   if (status != EXIT_SUCCESS)
214     /* One of the files should exist but does not.  */
215     return status;
216   else if (DIR_P (0) | DIR_P (1))
217     return EXIT_TROUBLE;
218   else
219     {
220       /* Both exist and neither is a directory.  */
221       /* Are they the same file ?                */
222       same_files
223 	= (files[0].desc != NONEXISTENT
224 	   && files[1].desc != NONEXISTENT
225 	   && 0 < same_file (&files[0].stat, &files[1].stat)
226 	   && same_file_attributes (&files[0].stat,
227 				    &files[1].stat));
228 
229       /* Open the files and record their descriptors.  */
230 
231       if (files[0].desc == UNOPENED)
232 	if ((files[0].desc = open (files[0].name, O_RDONLY, 0)) < 0)
233 	  {
234 	    perror_with_name (files[0].name);
235 	    status = EXIT_TROUBLE;
236 	  }
237       if (files[1].desc == UNOPENED)
238 	{
239 	  if ((same_files))
240 	    files[1].desc = files[0].desc;
241 	  else if ((files[1].desc = open (files[1].name, O_RDONLY, 0))
242 		   < 0)
243 	    {
244 	      perror_with_name (files[1].name);
245 	      status = EXIT_TROUBLE;
246 	    }
247 	}
248 
249 #if HAVE_SETMODE_DOS
250       if (binary)
251 	for (f = 0; f < 2; f++)
252 	  if (0 <= files[f].desc)
253 	    set_binary_mode (files[f].desc, 1);
254 #endif
255       return status;
256     }
257 }
258 
259 static
compare_files(const argslist * list,int * is_same_physical_file)260 int compare_files (const argslist* list, int* is_same_physical_file)
261 {
262   if ((files[0].desc != NONEXISTENT
263        && files[1].desc != NONEXISTENT
264        && 0 < same_file (&files[0].stat, &files[1].stat)
265        && same_file_attributes (&files[0].stat,
266 				   &files[1].stat)))
267     {
268       /* The two named files are actually the same physical file.
269 	 We know they are identical without actually reading them.  */
270       *is_same_physical_file = 1;
271       return 0;
272     }
273   else
274     {
275       int status = diff_2_files (files, list);
276       /*
277          STATUS is 0 if no changes have been found,
278          1 in case of detected changes, -1 if either
279          file is binary.
280       */
281 
282       *is_same_physical_file = 0;
283       return status;
284     }
285 }
286 
287 static
rewind_files(void)288 int rewind_files (void)
289 {
290   off_t pos0, pos1;
291   int status = EXIT_SUCCESS;
292 
293   if ((pos0 = lseek(files[0].desc, (off_t) 0, SEEK_SET)) < 0)
294     {
295       perror_with_name (files[0].name);
296       status = EXIT_TROUBLE;
297     }
298   if ((pos1 = lseek(files[1].desc, (off_t) 0, SEEK_SET)) < 0)
299     {
300       perror_with_name (files[1].name);
301       status = EXIT_TROUBLE;
302     }
303   return status;
304 }
305 
306 static
set_file_pointers(FILE ** fpp1,FILE ** fpp2)307 int set_file_pointers (FILE** fpp1, FILE** fpp2)
308 {
309   int status = EXIT_SUCCESS;
310 
311   if ( !(*fpp1 = fdopen (files[0].desc, "r")) )
312     {
313       perror_with_name (files[0].name);
314       status = EXIT_TROUBLE;
315     }
316   if ( !(*fpp2 = fdopen (files[1].desc, "r")) )
317     {
318       perror_with_name (files[1].name);
319       status = EXIT_TROUBLE;
320     }
321   return status;
322 }
323 
324 static
close_files(void)325 int close_files (void)
326 {
327   /* Close the file descriptors.  */
328 
329   if (0 <= files[0].desc && close (files[0].desc) != 0)
330     {
331       perror_with_name (files[0].name);
332       return EXIT_TROUBLE;
333     }
334   if (0 <= files[1].desc && files[0].desc != files[1].desc
335       && close (files[1].desc) != 0)
336     {
337       perror_with_name (files[1].name);
338       return EXIT_TROUBLE;
339     }
340   return EXIT_SUCCESS;
341 }
342 
343 static
isLocationDefined(difference_location loc)344 int isLocationDefined (difference_location loc)
345 {
346   return (loc.lineno1 + loc.fieldno1 + loc.lineno2 + loc.fieldno2 > 0
347 	  ? 1 : 0);
348 }
349 
350 static
print_statistics(const argslist * list,statlist * statres)351 void print_statistics (const argslist* list, statlist* statres)
352 {
353   Real qm_abserr, qm_relerr;
354 
355 #ifdef USE_GMP
356   initR (&qm_abserr);
357   initR (&qm_relerr);
358 #endif /* USE_GMP */
359   if (list->flag > 0)
360     {
361       fputs (_("\n  In the computation of the following quantities\n  only the errors with positive sign are considered:\n"),
362 	     stdout);
363       fputs (_("  differences due to numeric fields of the second file that are\n  less than the corresponding fields in the first file are neglected\n\n"),
364 	     stdout);
365     }
366   if (list->flag < 0)
367     {
368       fputs (_("\n  In the computation of the following quantities\n  only the errors with negative sign are considered:\n"),
369 	     stdout);
370       fputs (_("  differences due to numeric fields of the second file that are\n  greater than the corresponding fields in the first file are neglected\n\n"),
371 	     stdout);
372     }
373   if ( statres->Ndisperr == 0 )
374     {
375       if ( statres->Nentries == 0 )
376 	fputs (_("\nNo numeric comparison has been done\n"),
377 	       stdout);
378       else
379 	printf(ngettext (
380 			 "\nOne numeric comparison has been done and\nthe resulting numeric difference is negligible\n",
381 			 "\n%d numeric comparisons have been done and\nthe resulting numeric differences are all negligible\n",
382 			 statres->Nentries), statres->Nentries);
383     }
384   else if ( statres->Ndisperr == statres->Nentries )
385     {
386       printf(ngettext (
387 		       "\nOne numeric comparison has been done and\nhas produced an outcome beyond the tolerance threshold\n",
388 		       "\n%d numeric comparisons have been done, all of them\nhave produced an outcome beyond the tolerance threshold\n",
389 		       statres->Nentries), statres->Nentries);
390     }
391   else
392     {
393       /* Case  0 < LIST->NDISPERR < LIST->NENTRIES */
394       printf (ngettext (
395 			"\nOne numeric comparison has been done,\n",
396 			"\n%d numeric comparisons have been done,\n",
397 			statres->Nentries), statres->Nentries);
398 
399       printf (ngettext (
400 			"only one numeric comparison has produced an outcome\nbeyond the tolerance threshold\n",
401 			"%d numeric comparisons have produced an outcome\nbeyond the tolerance threshold\n",
402 			statres->Ndisperr), statres->Ndisperr);
403     }
404 
405   fputs (_("\nLargest absolute error in the set of the major numerical differences:\n"),
406 	 stdout);
407   printno (statres->Labserr, DEF_LIM);
408   fputs (_("\nCorresponding relative error:\n"), stdout);
409   printno (statres->Crelerr, DEF_LIM);
410   if ( (isLocationDefined (statres->Labserr_location)) )
411     {
412       printf (_("\nFirst occurrence (#line, #field) in the  first file: %lu, %lu\n"),
413 	      statres->Labserr_location.lineno1, statres->Labserr_location.fieldno1+1);
414       printf (_("First occurrence (#line, #field) in the second file: %lu, %lu\n"),
415 	      statres->Labserr_location.lineno2, statres->Labserr_location.fieldno2+1);
416     }
417 
418   fputs (_("\nLargest relative error in the set of the major numerical differences:\n"),
419 	 stdout);
420   printno (statres->Lrelerr, DEF_LIM);
421   fputs (_("\nCorresponding absolute error:\n"), stdout);
422   printno (statres->Cabserr, DEF_LIM);
423   if ( (isLocationDefined (statres->Rabserr_location)) )
424     {
425       printf (_("\nFirst occurrence (#line, #field) in the  first file: %lu, %lu\n"),
426 	      statres->Rabserr_location.lineno1, statres->Rabserr_location.fieldno1+1);
427       printf (_("First occurrence (#line, #field) in the second file: %lu, %lu\n"),
428 	      statres->Rabserr_location.lineno2, statres->Rabserr_location.fieldno2+1);
429     }
430 
431   fputs (_("\n\nSum of all absolute errors:\n"),
432 	 stdout);
433   printno (statres->N1abserr, DEF_LIM);
434   fputs (_("\nSum of the major absolute errors:\n"),
435 	 stdout);
436   printno (statres->N1disperr, DEF_LIM);
437   /* Arithmetic means */
438   divide_by_int (&statres->N1abserr, statres->Nentries, list->iscale);
439   divide_by_int (&statres->N1disperr, statres->Ndisperr, list->iscale);
440   fputs (_("\nArithmetic mean of all absolute errors:\n"),
441 	 stdout);
442   printno (statres->N1abserr, DEF_LIM);
443   fputs (_("\nArithmetic mean of the major absolute errors:\n"),
444 	 stdout);
445   printno (statres->N1disperr, DEF_LIM);
446 
447   /* 2-norms and quadratic means of the errors */
448   copyR (&qm_abserr, statres->N2abserr);
449   divide_by_int (&qm_abserr, statres->Nentries, list->iscale);
450   square_root (&qm_abserr, list->iscale);
451   square_root (&statres->N2abserr, list->iscale);
452   fputs (_("\nSquare root of the sum of the squares of all absolute errors:\n"),
453 	 stdout);
454   printno (statres->N2abserr, DEF_LIM);
455   fputs (_("\nQuadratic mean of all absolute errors:\n"),
456 	 stdout);
457   printno (qm_abserr, DEF_LIM);
458 
459   copyR (&qm_relerr, statres->N2disperr);
460   divide_by_int (&qm_relerr, statres->Ndisperr, list->iscale);
461   square_root (&qm_relerr, list->iscale);
462   square_root (&statres->N2disperr, list->iscale);
463   fputs (_("\nSquare root of the sum of the squares\nof the major absolute errors:\n"),
464 	 stdout);
465   printno (statres->N2disperr, DEF_LIM);
466   fputs (_("\nQuadratic mean of the major absolute errors:\n"),
467 	 stdout);
468   printno (qm_relerr, DEF_LIM);
469   putchar ('\n');
470   delR (&qm_relerr);
471   delR (&qm_abserr);
472 }
473 
474 char **def_ifs = NULL;
475 
476 static
clean_memory(argslist * pList,statlist * pRes)477 void clean_memory (argslist* pList, statlist* pRes)
478 {
479   emptyBitVector (&pList->optmask);
480   delete_string_vector (def_ifs);
481   dismiss_mpa_support (pList, pRes);
482   if ((pList->ifs1))
483     delete_string_vector (pList->ifs1);
484   if ((pList->ifs2))
485     delete_string_vector (pList->ifs2);
486 }
487 
main(int argc,char * argv[])488 int main (int argc, char* argv[])
489 {
490   argslist list;
491   statlist statres;
492   int pHelp, pVersion;
493 
494 #ifdef ENABLE_NLS
495   setlocale (LC_CTYPE, "");
496   setlocale (LC_MESSAGES, "");
497 #endif
498   bindtextdomain (PACKAGE, LOCALEDIR);
499   textdomain (PACKAGE);
500 
501   def_ifs = ssplit (DEF_IFS, I_DEF_SEP);
502   if (!def_ifs)
503     {
504       fprintf (stderr, _("***  %s: memory exhausted\n"), PACKAGE);
505       return -1;
506     }
507 
508   /* This code was used to discover the reason of a bug */
509   /*
510     #ifdef __USE_FILE_OFFSET64
511     printf ("\n %s: FILE OFFSET 64 in use, sizeof (struct stat) = %u\n", __FILE__,
512     sizeof (struct stat));
513     #else
514     printf ("\n %s: FILE OFFSET 64 NOT in use, sizeof (struct stat) = %u\n", __FILE__,
515     sizeof (struct stat));
516     #endif
517   */
518 
519   load_defaults (&list, &statres);
520   init_mpa_support (&list, &statres);
521   init_flags ();
522 
523   if ( setargs (argc, argv, &list) != 0 )
524     {
525       clean_memory (&list, &statres);
526       return -1;
527     }
528   pHelp = getBitAtPosition (&list.optmask, _H_MASK) == BIT_ON;
529   pVersion = getBitAtPosition (&list.optmask, _V_MASK) == BIT_ON;
530   if ((pHelp) || (pVersion))
531     {
532       if ((pVersion))
533 	print_version(PACKAGE);
534       if ((pHelp))
535 	print_help(PACKAGE);
536       clean_memory (&list, &statres);
537       return (argc > 2 ? -1 : 0);
538     }
539   else
540     {
541       int test = 0, ident_files = 0;
542       FILE *fp1, *fp2;
543       int qm = getBitAtPosition (&list.optmask, _Q_MASK);
544 
545       if ( open_files (list.file1, list.file2) != EXIT_SUCCESS )
546         {
547 	  clean_memory (&list, &statres);
548           return EXIT_TROUBLE;
549         }
550       if (getBitAtPosition (&list.optmask, _F_MASK) == BIT_ON ||
551 	  getBitAtPosition (&list.optmask, _Z_MASK) == BIT_ON ||
552 	  getBitAtPosition (&list.optmask, _SZ_MASK) == BIT_ON )
553 	test = compare_files (&list, &ident_files);
554 
555       if (test < 0)
556         {
557           fputs (_("\n***  The requested comparison cannot be performed:\n"), stdout);
558 	  printf (_("***  At least one between \"%s\" and \"%s\" is a binary file\n"),
559                   list.file1, list.file2);
560 	  clean_memory (&list, &statres);
561 	  close_files ();
562           return EXIT_TROUBLE;
563         }
564 
565       if ( getBitAtPosition (&list.optmask, _F_MASK) == BIT_OFF &&
566 	   !ident_files )
567 	{
568 	  if ( rewind_files () != EXIT_SUCCESS ||
569 	       set_file_pointers (&fp1, &fp2) != EXIT_SUCCESS )
570             {
571 	      clean_memory (&list, &statres);
572               close_files ();
573               return EXIT_TROUBLE;
574             }
575 	  test = cmp_files (fp1, fp2, &list, &statres);
576 	  if (getBitAtPosition (&list.optmask, _SS_MASK) == BIT_ON &&
577 	      test <= 1)
578 	    print_statistics (&list, &statres);
579 	}
580       if (test == 0 && !qm)
581 	{
582 	  if (getBitAtPosition (&list.optmask, _F_MASK) == BIT_ON)
583 	    printf (_("\n+++  Files \"%s\" and \"%s\" have the same structure\n"),
584 		    list.file1, list.file2);
585 	  else
586 	    printf (_("\n+++  Files \"%s\" and \"%s\" are equal\n"),
587 		    list.file1, list.file2);
588 	}
589       if (test == 1 && !qm)
590 	printf (_("\n+++  File \"%s\" differs from file \"%s\"\n"),
591 		list.file1, list.file2);
592       erase_flags ();
593       clean_memory (&list, &statres);
594       close_files ();
595       return test;
596     }
597 }
598