1 //	cssutil.c - utility for munging css files, version X0.1
2 
3 // Copyright 2009 William S. Yerazunis.
4 // This file is under GPLv3, as described in COPYING.
5 
6 //  include some standard files
7 #include "crm114_sysincludes.h"
8 
9 //  include any local crm114 configuration file
10 #include "crm114_config.h"
11 
12 //  include the crm114 data structures file
13 #include "crm114_structs.h"
14 
15 //  and include the routine declarations file
16 #include "crm114.h"
17 
main(int argc,char ** argv)18 int main (int argc, char **argv)
19 {
20 
21   long i,j,k;    //  some random counters, when we need a loop
22   long hfsize, hfsize1, hfsize2;
23 
24   long f1, f2;
25   long sim, diff, dom1, dom2, hclash, kclash;
26 
27   {
28     struct stat statbuf;    //  filestat buffer
29     FEATUREBUCKET_STRUCT *h1, *h2;              //  the text of the hash file
30     //   filename is argv [1]
31     //             and stat it to get it's length
32     if(!argv[1] || !argv[2])
33       {
34         fprintf (stdout, "Usage: cssdiff <cssfile1> <cssfile2>\n");
35         return (EXIT_SUCCESS);
36       };
37     //             quick check- does the first file even exist?
38     k = stat (argv[1], &statbuf);
39     if (k != 0)
40       {
41 	fprintf (stderr, "\n CSS file '%s' not found. \n", argv[1]);
42 	exit (EXIT_FAILURE);
43       };
44     //
45     hfsize = statbuf.st_size;
46     //         mmap the hash file into memory so we can bitwhack it
47     h1 = (FEATUREBUCKET_STRUCT *) crm_mmap_file (argv[1],
48 						 0, hfsize,
49 						 PROT_READ | PROT_WRITE,
50 						 MAP_SHARED,
51 						 NULL);
52 
53     if (h1 == MAP_FAILED)
54       {
55 	fprintf (stderr, "\n MMAP failed on file %s\n",
56 		 argv[1]);
57 	exit (EXIT_FAILURE);
58       };
59     hfsize1 = statbuf.st_size / sizeof (FEATUREBUCKET_STRUCT);
60 
61     //
62     //  and repeat the process for the second file:
63     k = stat (argv[2], &statbuf);
64     //             quick check- does the file even exist?
65     if (k != 0)
66       {
67 	fprintf (stderr, "\n.CSS file '%s' not found.\n", argv[2]);
68 	exit (EXIT_FAILURE);
69       };
70 
71     hfsize2 = statbuf.st_size;
72     //         mmap the hash file into memory so we can bitwhack it
73     h2 = (FEATUREBUCKET_STRUCT *) crm_mmap_file (argv[2],
74 						 0, hfsize2,
75 						 PROT_READ | PROT_WRITE,
76 						 MAP_SHARED,
77 						 NULL);
78     if (h2 == MAP_FAILED)
79       {
80 	fprintf (stderr, "\n MMAP failed on file %s\n",
81 		 argv[2]);
82 	exit (EXIT_FAILURE);
83       };
84 
85     hfsize2 = hfsize2 / sizeof (FEATUREBUCKET_STRUCT);
86 
87     fprintf (stderr, "Sparse spectra file %s has %ld bins total\n",
88 	     argv[1], hfsize1);
89 
90 
91     fprintf (stdout, "Sparse spectra file %s has %ld bins total\n",
92 	     argv[2], hfsize2);
93 
94     //
95     //
96     if (hfsize1 != hfsize2)
97       {
98 	fprintf (stderr,
99 		 "\n.CSS files %s, %s :\n lengths differ: %ld vs %ld.\n",
100 		 argv[1],argv[2], hfsize1, hfsize2);
101 	fprintf (stderr, "\n This is not a fatal error, but be warned.\n");
102       };
103 
104     f1 = 0;
105     f2 = 0;
106     sim  = 0;
107     diff = 0;
108     dom1 = 0;
109     dom2 = 0;
110     hclash = 0;
111     kclash = 0;
112     //
113     //   The algorithm - for each file,
114     //                      for each bucket in each file
115     //                          find corresponding bucket in other file
116     //                              increment dom1 or dom2 as appropriate
117     //                              always increment sim and diff
118     //                          end
119     //                      end
120     //                      divide sim and diff by 2, as they are doublecounted
121     //                      print statistics and exit.
122     //
123     // start at 1 - no need to check bin 0 (version).
124     for ( i = 1; i < hfsize1; i++)
125       {
126 	if (   h1[i].key != 0 )
127 	  {
128 	    f1 += h1[i].value;
129 	    k = h1[i].hash % hfsize2;
130 	    if (k == 0)
131 	      k = 1;
132 	    while (h2[k].value != 0 &&
133 		   (h2[k].hash != h1[i].hash
134 		    || h2[k].key != h1[i].key))
135 	      {
136 		k++;
137 		if (k >= hfsize2) k = 1;
138 	      };
139 
140 	    //   Now we've found the corresponding (or vacant) slot in
141 	    //   h2.  Do our tallies...
142 	    j = h1[i].value ;
143 	    if (j > h2[k].value ) j = h2[k].value;
144 	    sim +=  j;
145 
146 	    j = h1[i].value - h2[k].value;
147 	    if (j < 0) j = -j;
148 	    diff += j;
149 
150 	    j = h1[i].value - h2[k].value;
151 	    if (j < 0) j = 0;
152 	    dom1 += j;
153 	  };
154       };
155     //
156     //      And repeat for file 2.
157     for ( i = 1; i < hfsize2; i++)
158       {
159 	if (   h2[i].key != 0 )
160 	  {
161 	    f2 += h2[i].value;
162 	    k = h2[i].hash % hfsize1;
163 	    if (k == 0)
164 		k = 1;
165 	      while (h1[k].value != 0 &&
166 		     (h1[k].hash != h2[i].hash
167 		      || h1[k].key != h2[i].key))
168 		{
169 		  k++;
170 		  if (k >= hfsize1) k = 1;
171 		};
172 
173 	      //   Now we've found the corresponding (or vacant) slot in
174 	      //   h1.  Do our tallies...
175 	      j = h2[i].value ;
176 	      if (j > h1[k].value ) j = h1[k].value;
177 	      sim +=  j;
178 
179 	      j = h1[k].value - h2[i].value;
180 	      if (j < 0) j = -j;
181 	      diff += j;
182 
183 	      j = h2[i].value - h1[k].value;
184 	      if (j < 0) j = 0;
185 	      dom2 += j;
186 	  };
187       };
188 
189     fprintf (stdout, "\n File 1 total features            : %12ld", f1);
190     fprintf (stdout, "\n File 2 total features            : %12ld\n", f2);
191 
192     fprintf (stdout, "\n Similarities between files       : %12ld", sim/2);
193     fprintf (stdout, "\n Differences between files        : %12ld\n", diff/2);
194 
195     fprintf (stdout, "\n File 1 dominates file 2          : %12ld", dom1);
196     fprintf (stdout, "\n File 2 dominates file 1          : %12ld\n", dom2);
197 
198   }
199   return 0;
200 }
201