1 // osbf-util.c - utility for munging css files, version X0.1
2
3 // Copyright 2004 Fidelis Assis
4 // Copyright 2004-2009 William S. Yerazunis.
5 // This file is under GPLv3, as described in COPYING.
6
7 // OBS: This program is a modified version of the original cssutil,
8 // specific for the new osbf format. It is not compatible with
9 // the original css format. -- Fidelis Assis
10
11 // include some standard files
12 #include "crm114_sysincludes.h"
13
14 // include any local crm114 configuration file
15 #include "crm114_config.h"
16
17 // include the crm114 data structures file
18 #include "crm114_structs.h"
19
20 // and include the routine declarations file
21 #include "crm114.h"
22
23 #include "crm114_osbf.h"
24
25 char version[] = "1.1";
26
27 void
helptext()28 helptext ()
29 {
30 fprintf (stdout,
31 "osbf-util version %s - generic osbf file utility.\n"
32 "Usage: osbfutil [options]... css-filename\n"
33 " -b - brief; print only summary\n"
34 " -h - print this help\n"
35 " -q - quite mode; no warning messages\n"
36 " -r - report then exit (no menu)\n"
37 " -s css-size - if no css file found, create new\n"
38 " one with this many buckets.\n"
39 " -S css-size - same as -s, but round up to next\n"
40 " 2^n + 1 boundary.\n"
41 " -v - print version and exit\n"
42 " -D - dump css file to stdout in CSV format.\n"
43 " -R csv-file - create and restore css from CSV.\n"
44 " Options -s and -S are ignored when"
45 " restoring.\n", VERSION);
46 }
47
48 int
main(int argc,char ** argv)49 main (int argc, char **argv)
50 {
51
52 long i, k; // some random counters, when we need a loop
53 long v;
54 long sparse_spectrum_file_length = OSBF_DEFAULT_SPARSE_SPECTRUM_FILE_LENGTH;
55 long user_set_css_length = 0;
56 long hfsize;
57 long long sum; // sum of the hits... can be _big_.
58 int brief = 0, quiet = 0, dump = 0, restore = 0;
59 int opt, fields;
60 int report_only = 0;
61
62 long *bcounts;
63 long maxchain;
64 long curchain;
65 long totchain;
66 long fbuckets;
67 long nchains;
68 long ofbins;
69
70 char cmdstr[255];
71 char cssfile[255];
72 char csvfile[255];
73 unsigned char cmdchr[2];
74 char crapchr[2];
75 float cmdval;
76 int zloop, cmdloop, version_index;
77
78 // the following for crm114.h's happiness
79
80 char *newinputbuf;
81 newinputbuf = (char *) &hfsize;
82
83 bcounts = malloc (sizeof (unsigned long) * OSBF_FEATUREBUCKET_VALUE_MAX);
84
85 {
86 struct stat statbuf; // filestat buffer
87 OSBF_FEATURE_HEADER_STRUCT *header; // the header of the hash file
88 OSBF_FEATUREBUCKET_STRUCT *hashes; // the text of the hash file
89
90 // parse cmdline options
91 while ((opt = getopt (argc, argv, "bDhR:rqs:S:v")) != -1)
92 {
93 switch (opt)
94 {
95 case 'b':
96 brief = 1; // brief, no 'bin value ...' lines
97 break;
98 case 'D':
99 dump = 1; // dump css file, no cmd menu
100 break;
101 case 'q':
102 quiet = 1; // quiet mode, no warning messages
103 break;
104 case 'R':
105 {
106 FILE *f;
107 unsigned long key, hash, value;
108 OSBF_FEATURE_HEADER_STRUCT h;
109
110 // count lines to determine the number of buckets and check CSV format
111 if (user_trace)
112 fprintf (stderr, "Opening OSBF file %s for read\n", optarg);
113 if ((f = fopen (optarg, "rb")) != NULL)
114 {
115
116 // try to find the header reading first 2 "buckets"
117 if (fscanf
118 (f, "%lu;%lu;%lu\n", (unsigned long *) h.version,
119 &(h.flags), &(h.buckets_start)) != 3)
120 {
121 fprintf (stderr,
122 "\n %s is not in the right CSV format.\n",
123 optarg);
124 exit (EXIT_FAILURE);
125 }
126 if (*((unsigned long *) h.version) != OSBF_VERSION)
127 {
128 fprintf (stderr,
129 "\n %s is not an OSBF CSV file.\n", optarg);
130 fclose (f);
131 exit (EXIT_FAILURE);
132 }
133 if (fscanf (f, "%lu;%lu;%lu\n", &(h.buckets), &hash, &value)
134 != 3)
135 {
136 fprintf (stderr,
137 "\n %s is not in the right CSV format.\n",
138 optarg);
139 exit (EXIT_FAILURE);
140 }
141
142 // start with -headersize buckets, discounting 2 "buckets" alread read
143 sparse_spectrum_file_length = 2 - h.buckets_start;
144
145 while (!feof (f))
146 if (fscanf (f, "%lu;%lu;%lu\n", &key, &hash, &value) == 3)
147 sparse_spectrum_file_length++;
148 else
149 {
150 fprintf (stderr,
151 "\n %s is not in the right CSV format.\n",
152 optarg);
153 exit (EXIT_FAILURE);
154 }
155 fclose (f);
156
157 // check the number of buckets
158 if (sparse_spectrum_file_length != h.buckets)
159 {
160 fprintf (stderr,
161 "\n Wrong number of buckets! %s is not in the right CSV format.\n",
162 optarg);
163 exit (EXIT_FAILURE);
164 }
165 strcpy (csvfile, optarg);
166 }
167 else
168 {
169 fprintf (stderr,
170 "\n Couldn't open csv file %s; errno=%d.\n",
171 optarg, errno);
172 exit (EXIT_FAILURE);
173 }
174 }
175 restore = 1; // restore css file, no cmd menu
176 break;
177 case 'r':
178 report_only = 1; // print stats only, no cmd menu.
179 break;
180 case 's': // set css size to option value
181 case 'S': // same as above but round up to next 2^n+1
182 if (restore)
183 {
184 fprintf (stderr,
185 "\nOptions -s, -S ignored when restoring.\n");
186 break;
187 }
188 if (sscanf (optarg, "%ld", &sparse_spectrum_file_length))
189 {
190 if (!quiet)
191 fprintf (stderr,
192 "\nOverride css creation length to %ld\n",
193 sparse_spectrum_file_length);
194 user_set_css_length = 1;
195 }
196 else
197 {
198 fprintf (stderr,
199 "On -%c flag: Missing or incomprehensible number of buckets.\n",
200 opt);
201 exit (EXIT_FAILURE);
202 }
203 if (opt == 'S') // round up to next 2^n+1
204 {
205 int k;
206
207 k = (long) floor (log10 (sparse_spectrum_file_length - 1)
208 / log10 (2.0));
209 while ((2 << k) + 1 < sparse_spectrum_file_length)
210 k++;
211 sparse_spectrum_file_length = (2 << k) + 1;
212 user_set_css_length = 1;
213 }
214 break;
215 case 'v':
216 fprintf (stderr, " This is osbf-util, version %s\n", version);
217 fprintf (stderr, " Copyright 2004-2006 William S. Yerazunis.\n");
218 fprintf (stderr,
219 " This software is licensed under the GPL with ABSOLUTELY NO WARRANTY\n");
220 exit (EXIT_SUCCESS);
221 default:
222 helptext ();
223 exit (EXIT_SUCCESS);
224 break;
225 }
226 }
227
228 if (optind < argc)
229 strncpy (cssfile, argv[optind], sizeof (cssfile));
230 else
231 {
232 helptext ();
233 exit (EXIT_SUCCESS);
234 }
235
236 // and stat it to get it's length
237 k = stat (cssfile, &statbuf);
238 // quick check- does the file even exist?
239 if (k == 0)
240 {
241 if (restore)
242 {
243 fprintf (stderr,
244 "\n.CSS file %s exists! Restore operation aborted.\n",
245 cssfile);
246 exit (EXIT_FAILURE);
247 }
248 hfsize = statbuf.st_size;
249 if (!quiet && user_set_css_length)
250 fprintf (stderr,
251 "\n.CSS file %s exists; -s, -S options ignored.\n",
252 cssfile);
253 }
254 else
255 {
256 // file didn't exist... create it
257 if (!quiet && !restore)
258 fprintf (stdout, "\nHad to create .CSS file %s with %lu buckets\n",
259 cssfile, sparse_spectrum_file_length);
260 if (crm_osbf_create_cssfile
261 (cssfile, sparse_spectrum_file_length, OSBF_VERSION, 0,
262 OSBF_CSS_SPECTRA_START) != EXIT_SUCCESS)
263 exit (EXIT_FAILURE);
264 k = stat (cssfile, &statbuf);
265 hfsize = statbuf.st_size;
266 }
267 //
268 // mmap the hash file into memory so we can bitwhack it
269 header = crm_mmap_file ( cssfile,
270 0, hfsize,
271 PROT_READ | PROT_WRITE,
272 MAP_SHARED,
273 NULL);
274 if (header == MAP_FAILED)
275 {
276 fprintf (stderr,
277 "\n Couldn't mmap file %s into memory; errno=%d .\n",
278 cssfile, errno);
279 exit (EXIT_FAILURE);
280 }
281 if (*((unsigned long *) (header->version)) != OSBF_VERSION)
282 {
283 fprintf (stderr,
284 "\n %s is the wrong version. We're expecting a %s css file.\n",
285 cssfile, CSS_version_name[OSBF_VERSION]);
286 crm_munmap_file ((void *) header);
287 exit (EXIT_FAILURE);
288 }
289
290 hashes = (OSBF_FEATUREBUCKET_STRUCT *) header + header->buckets_start;
291 if (hashes == MAP_FAILED)
292 {
293 fprintf (stderr,
294 "\n Couldn't open RW file %s; errno=%d .\n", cssfile, errno);
295 exit (EXIT_FAILURE);
296 }
297 // from now on, hfsize is buckets, not bytes.
298 hfsize = statbuf.st_size / sizeof (OSBF_FEATUREBUCKET_STRUCT);
299
300 if (dump)
301 {
302 /* dump the css file */
303 OSBF_FEATUREBUCKET_STRUCT *bucket;
304 unsigned long *p;
305
306 bucket = (OSBF_FEATUREBUCKET_STRUCT *) header;
307 for (i = 0; i < hfsize; i++)
308 {
309 p = (unsigned long *) &bucket[i];
310 printf ("%lu;%lu;%lu\n", p[0], p[1], p[2]);
311 }
312 }
313
314 if (restore)
315 {
316 FILE *f;
317 OSBF_FEATUREBUCKET_STRUCT *bucket;
318 unsigned long *p;
319
320 // restore the css file - note that if we DIDN'T create
321 // it already, then this will fail.
322 //
323 if ((f = fopen (csvfile, "rb")) == NULL)
324 {
325 fprintf (stderr, "\n Couldn't open csv file %s; errno=%d.\n",
326 csvfile, errno);
327 exit (EXIT_FAILURE);
328 }
329
330 bucket = (OSBF_FEATUREBUCKET_STRUCT *) header;
331 for (i = 0; i < hfsize; i++)
332 {
333 p = (unsigned long *) &bucket[i];
334 dontcare = fscanf (f, "%lu;%lu;%lu\n", &p[0], &p[1], &p[2]);
335 }
336 fclose (f);
337 }
338
339 zloop = 1;
340 while (zloop == 1 && !restore && !dump)
341 {
342 zloop = 0;
343 crm_osbf_packcss (header, 0, header->buckets - 1);
344 sum = 0;
345 maxchain = 0;
346 curchain = 0;
347 totchain = 0;
348 fbuckets = 0;
349 nchains = 0;
350 ofbins = 0;
351 for (i = 0; i < header->buckets; i++)
352 {
353 sum += GET_BUCKET_VALUE(hashes[i]);
354 if (GET_BUCKET_VALUE(hashes[i]) != 0)
355 {
356 fbuckets++;
357 curchain++;
358 if (GET_BUCKET_VALUE(hashes[i]) >= OSBF_FEATUREBUCKET_VALUE_MAX)
359 ofbins++;
360 }
361 else
362 {
363 if (curchain > 0)
364 {
365 totchain += curchain;
366 nchains++;
367 if (curchain > maxchain)
368 maxchain = curchain;
369 curchain = 0;
370 }
371 }
372 }
373
374 version_index = *((unsigned long *) header->version);
375 if (version_index < 0 || version_index > UNKNOWN_VERSION)
376 version_index = UNKNOWN_VERSION;
377 fprintf (stdout, "\n Sparse spectra file %s statistics: \n", cssfile);
378 fprintf (stdout, "\n CSS file version : %12s",
379 CSS_version_name[version_index]);
380 fprintf (stdout, "\n Header size (bytes) : %12ld",
381 header->buckets_start * sizeof (OSBF_FEATUREBUCKET_STRUCT));
382 fprintf (stdout, "\n Bucket size (bytes) : %12lu",
383 (unsigned long)sizeof(OSBF_FEATUREBUCKET_STRUCT));
384 fprintf (stdout, "\n Total available buckets : %12ld",
385 header->buckets);
386 fprintf (stdout, "\n Total buckets in use : %12ld",
387 fbuckets);
388 fprintf (stdout, "\n Number of trainings : %12lu",
389 header->learnings);
390 fprintf (stdout, "\n Total buckets with value >= max : %12ld",
391 ofbins);
392 fprintf (stdout, "\n Total hashed datums in file : %12lld", sum);
393 fprintf (stdout, "\n Average datums per bucket : %12.2f",
394 (fbuckets > 0) ? (sum * 1.0) / (fbuckets * 1.0) : 0);
395 fprintf (stdout, "\n Number of chains : %12ld",
396 nchains);
397 fprintf (stdout, "\n Maximum length of overflow chain : %12ld",
398 maxchain);
399 fprintf (stdout, "\n Average length of overflow chain : %12.2f",
400 nchains > 0 ? (totchain * 1.0) / (nchains * 1.0) : 0);
401 fprintf (stdout, "\n Average packing density : %12.2f\n",
402 (fbuckets * 1.0) / (header->buckets * 1.0));
403 for (i = 0; i < OSBF_FEATUREBUCKET_VALUE_MAX; i++)
404 bcounts[i] = 0;
405 for (v = 0; v < header->buckets; v++)
406 {
407 if (GET_BUCKET_VALUE(hashes[v]) < OSBF_FEATUREBUCKET_VALUE_MAX)
408 bcounts[GET_BUCKET_VALUE(hashes[v])]++;
409 }
410
411 if (!brief)
412 for (i = 0; i < OSBF_FEATUREBUCKET_VALUE_MAX; i++)
413 {
414 if (bcounts[i] > 0)
415 {
416 fprintf (stdout, "\n bin value %8ld found %9ld times",
417 i, bcounts[i]);
418 }
419 }
420
421 fprintf (stdout, "\n");
422 cmdloop = 1;
423 while (!report_only && cmdloop)
424 {
425 // clear command buffer
426 cmdchr[0] = '\0';
427 fprintf (stdout, "Options:\n");
428 fprintf (stdout, " Z n - zero bins at or below a value\n");
429 fprintf (stdout, " S n - subtract a constant from all bins\n");
430 fprintf (stdout, " D n - divide all bins by a constant\n");
431 fprintf (stdout, " R - rescan\n");
432 fprintf (stdout, " P - pack\n");
433 fprintf (stdout, " Q - quit\n");
434 fprintf (stdout, ">>> ");
435 clearerr (stdin);
436 dontcare = fscanf (stdin, "%[^\n]", cmdstr);
437 dontcare = fscanf (stdin, "%c", crapchr);
438 fields = sscanf (cmdstr, "%s %f", cmdchr, &cmdval);
439 if (strlen ( (char *)cmdchr) != 1)
440 {
441 fprintf (stdout, "Unknown command: %s\n", cmdchr);
442 continue;
443 }
444 switch (tolower ((int)cmdchr[0]))
445 {
446 case 'z':
447 if (fields != 2)
448 fprintf (stdout,
449 "Z command requires a numeric argument!\n");
450 else
451 {
452 fprintf (stdout, "Working...");
453 for (i = 0; i < header->buckets; i++)
454 if (GET_BUCKET_VALUE(hashes[i]) <= cmdval)
455 BUCKET_RAW_VALUE(hashes[i]) = 0;
456 fprintf (stdout, "done.\n");
457 }
458 break;
459 case 's':
460 if (fields != 2)
461 fprintf (stdout,
462 "S command requires a numeric argument!\n");
463 else
464 {
465 fprintf (stdout, "Working...");
466 for (i = 0; i < header->buckets; i++)
467 {
468 if (GET_BUCKET_VALUE(hashes[i]) > (int) cmdval)
469 {
470 BUCKET_RAW_VALUE(hashes[i]) =
471 GET_BUCKET_VALUE(hashes[i]) - cmdval;
472 }
473 else
474 {
475 BUCKET_RAW_VALUE(hashes[i]) = 0;
476 }
477 }
478 fprintf (stdout, "done.\n");
479 }
480 break;
481 case 'd':
482 if (fields != 2)
483 fprintf (stdout,
484 "D command requires a numeric argument!\n");
485 else if (cmdval == 0)
486 fprintf (stdout, "You can't divide by zero, nimrod!\n");
487 else
488 {
489 fprintf (stdout, "Working...");
490 for (i = 0; i < header->buckets; i++)
491 BUCKET_RAW_VALUE(hashes[i]) =
492 GET_BUCKET_VALUE(hashes[i]) / cmdval;
493 fprintf (stdout, "done.\n");
494 }
495 break;
496 case 'r':
497 zloop = 1;
498 cmdloop = 0;
499 break;
500 case 'p':
501 fprintf (stdout, "Working...");
502 crm_osbf_packcss (header, 0, header->buckets - 1);
503 zloop = 1;
504 cmdloop = 0;
505 break;
506 case 'q':
507 fprintf (stdout, "Bye! \n");
508 cmdloop = 0;
509 break;
510 default:
511 fprintf (stdout, "Unknown command: %c\n", cmdchr[0]);
512 break;
513 }
514 }
515 }
516 }
517 return 0;
518 }
519