1 // cssutil.c - utility for munging css files, version X0.1
2
3 // Copyright 2009 William S. Yerazunis.
4 // This file is under GPLv3, as described in COPYING.
5
6 // include some standard files
7 #include "crm114_sysincludes.h"
8
9 // include any local crm114 configuration file
10 #include "crm114_config.h"
11
12 // include the crm114 data structures file
13 #include "crm114_structs.h"
14
15 // and include the routine declarations file
16 #include "crm114.h"
17
main(int argc,char ** argv)18 int main (int argc, char **argv)
19 {
20
21 long i,j,k; // some random counters, when we need a loop
22 long hfsize, hfsize1, hfsize2;
23
24 long f1, f2;
25 long sim, diff, dom1, dom2, hclash, kclash;
26
27 {
28 struct stat statbuf; // filestat buffer
29 FEATUREBUCKET_STRUCT *h1, *h2; // the text of the hash file
30 // filename is argv [1]
31 // and stat it to get it's length
32 if(!argv[1] || !argv[2])
33 {
34 fprintf (stdout, "Usage: cssdiff <cssfile1> <cssfile2>\n");
35 return (EXIT_SUCCESS);
36 };
37 // quick check- does the first file even exist?
38 k = stat (argv[1], &statbuf);
39 if (k != 0)
40 {
41 fprintf (stderr, "\n CSS file '%s' not found. \n", argv[1]);
42 exit (EXIT_FAILURE);
43 };
44 //
45 hfsize = statbuf.st_size;
46 // mmap the hash file into memory so we can bitwhack it
47 h1 = (FEATUREBUCKET_STRUCT *) crm_mmap_file (argv[1],
48 0, hfsize,
49 PROT_READ | PROT_WRITE,
50 MAP_SHARED,
51 NULL);
52
53 if (h1 == MAP_FAILED)
54 {
55 fprintf (stderr, "\n MMAP failed on file %s\n",
56 argv[1]);
57 exit (EXIT_FAILURE);
58 };
59 hfsize1 = statbuf.st_size / sizeof (FEATUREBUCKET_STRUCT);
60
61 //
62 // and repeat the process for the second file:
63 k = stat (argv[2], &statbuf);
64 // quick check- does the file even exist?
65 if (k != 0)
66 {
67 fprintf (stderr, "\n.CSS file '%s' not found.\n", argv[2]);
68 exit (EXIT_FAILURE);
69 };
70
71 hfsize2 = statbuf.st_size;
72 // mmap the hash file into memory so we can bitwhack it
73 h2 = (FEATUREBUCKET_STRUCT *) crm_mmap_file (argv[2],
74 0, hfsize2,
75 PROT_READ | PROT_WRITE,
76 MAP_SHARED,
77 NULL);
78 if (h2 == MAP_FAILED)
79 {
80 fprintf (stderr, "\n MMAP failed on file %s\n",
81 argv[2]);
82 exit (EXIT_FAILURE);
83 };
84
85 hfsize2 = hfsize2 / sizeof (FEATUREBUCKET_STRUCT);
86
87 fprintf (stderr, "Sparse spectra file %s has %ld bins total\n",
88 argv[1], hfsize1);
89
90
91 fprintf (stdout, "Sparse spectra file %s has %ld bins total\n",
92 argv[2], hfsize2);
93
94 //
95 //
96 if (hfsize1 != hfsize2)
97 {
98 fprintf (stderr,
99 "\n.CSS files %s, %s :\n lengths differ: %ld vs %ld.\n",
100 argv[1],argv[2], hfsize1, hfsize2);
101 fprintf (stderr, "\n This is not a fatal error, but be warned.\n");
102 };
103
104 f1 = 0;
105 f2 = 0;
106 sim = 0;
107 diff = 0;
108 dom1 = 0;
109 dom2 = 0;
110 hclash = 0;
111 kclash = 0;
112 //
113 // The algorithm - for each file,
114 // for each bucket in each file
115 // find corresponding bucket in other file
116 // increment dom1 or dom2 as appropriate
117 // always increment sim and diff
118 // end
119 // end
120 // divide sim and diff by 2, as they are doublecounted
121 // print statistics and exit.
122 //
123 // start at 1 - no need to check bin 0 (version).
124 for ( i = 1; i < hfsize1; i++)
125 {
126 if ( h1[i].key != 0 )
127 {
128 f1 += h1[i].value;
129 k = h1[i].hash % hfsize2;
130 if (k == 0)
131 k = 1;
132 while (h2[k].value != 0 &&
133 (h2[k].hash != h1[i].hash
134 || h2[k].key != h1[i].key))
135 {
136 k++;
137 if (k >= hfsize2) k = 1;
138 };
139
140 // Now we've found the corresponding (or vacant) slot in
141 // h2. Do our tallies...
142 j = h1[i].value ;
143 if (j > h2[k].value ) j = h2[k].value;
144 sim += j;
145
146 j = h1[i].value - h2[k].value;
147 if (j < 0) j = -j;
148 diff += j;
149
150 j = h1[i].value - h2[k].value;
151 if (j < 0) j = 0;
152 dom1 += j;
153 };
154 };
155 //
156 // And repeat for file 2.
157 for ( i = 1; i < hfsize2; i++)
158 {
159 if ( h2[i].key != 0 )
160 {
161 f2 += h2[i].value;
162 k = h2[i].hash % hfsize1;
163 if (k == 0)
164 k = 1;
165 while (h1[k].value != 0 &&
166 (h1[k].hash != h2[i].hash
167 || h1[k].key != h2[i].key))
168 {
169 k++;
170 if (k >= hfsize1) k = 1;
171 };
172
173 // Now we've found the corresponding (or vacant) slot in
174 // h1. Do our tallies...
175 j = h2[i].value ;
176 if (j > h1[k].value ) j = h1[k].value;
177 sim += j;
178
179 j = h1[k].value - h2[i].value;
180 if (j < 0) j = -j;
181 diff += j;
182
183 j = h2[i].value - h1[k].value;
184 if (j < 0) j = 0;
185 dom2 += j;
186 };
187 };
188
189 fprintf (stdout, "\n File 1 total features : %12ld", f1);
190 fprintf (stdout, "\n File 2 total features : %12ld\n", f2);
191
192 fprintf (stdout, "\n Similarities between files : %12ld", sim/2);
193 fprintf (stdout, "\n Differences between files : %12ld\n", diff/2);
194
195 fprintf (stdout, "\n File 1 dominates file 2 : %12ld", dom1);
196 fprintf (stdout, "\n File 2 dominates file 1 : %12ld\n", dom2);
197
198 }
199 return 0;
200 }
201