1 //	osbf-util.c - utility for munging css files, version X0.1
2 
3 // Copyright 2004 Fidelis Assis
4 // Copyright 2004-2009 William S. Yerazunis.
5 // This file is under GPLv3, as described in COPYING.
6 
7 //  OBS: This program is a modified version of the original cssutil,
8 //       specific for the new osbf format. It is not compatible with
9 //       the original css format. -- Fidelis Assis
10 
11 //  include some standard files
12 #include "crm114_sysincludes.h"
13 
14 //  include any local crm114 configuration file
15 #include "crm114_config.h"
16 
17 //  include the crm114 data structures file
18 #include "crm114_structs.h"
19 
20 //  and include the routine declarations file
21 #include "crm114.h"
22 
23 #include "crm114_osbf.h"
24 
25 char version[] = "1.1";
26 
27 void
helptext()28 helptext ()
29 {
30   fprintf (stdout,
31 	   "osbf-util version %s - generic osbf file utility.\n"
32 	   "Usage: osbfutil [options]... css-filename\n"
33 	   "		-b   - brief; print only summary\n"
34 	   "		-h   - print this help\n"
35 	   "		-q   - quite mode; no warning messages\n"
36 	   "		-r   - report then exit (no menu)\n"
37 	   "		-s css-size  - if no css file found, create new\n"
38 	   "			       one with this many buckets.\n"
39 	   "		-S css-size  - same as -s, but round up to next\n"
40 	   "			       2^n + 1 boundary.\n"
41 	   "		-v   - print version and exit\n"
42 	   "		-D   - dump css file to stdout in CSV format.\n"
43 	   "		-R csv-file  - create and restore css from CSV.\n"
44 	   "		               Options -s and -S are ignored when"
45 	   " restoring.\n", VERSION);
46 }
47 
48 int
main(int argc,char ** argv)49 main (int argc, char **argv)
50 {
51 
52   long i, k;			//  some random counters, when we need a loop
53   long v;
54   long sparse_spectrum_file_length = OSBF_DEFAULT_SPARSE_SPECTRUM_FILE_LENGTH;
55   long user_set_css_length = 0;
56   long hfsize;
57   long long sum;		// sum of the hits... can be _big_.
58   int brief = 0, quiet = 0, dump = 0, restore = 0;
59   int opt, fields;
60   int report_only = 0;
61 
62   long *bcounts;
63   long maxchain;
64   long curchain;
65   long totchain;
66   long fbuckets;
67   long nchains;
68   long ofbins;
69 
70   char cmdstr[255];
71   char cssfile[255];
72   char csvfile[255];
73   unsigned char cmdchr[2];
74   char crapchr[2];
75   float cmdval;
76   int zloop, cmdloop, version_index;
77 
78   //    the following for crm114.h's happiness
79 
80   char *newinputbuf;
81   newinputbuf = (char *) &hfsize;
82 
83   bcounts = malloc (sizeof (unsigned long) * OSBF_FEATUREBUCKET_VALUE_MAX);
84 
85   {
86     struct stat statbuf;	//  filestat buffer
87     OSBF_FEATURE_HEADER_STRUCT *header;	//  the header of the hash file
88     OSBF_FEATUREBUCKET_STRUCT *hashes;	//  the text of the hash file
89 
90     // parse cmdline options
91     while ((opt = getopt (argc, argv, "bDhR:rqs:S:v")) != -1)
92       {
93 	switch (opt)
94 	  {
95 	  case 'b':
96 	    brief = 1;		// brief, no 'bin value ...' lines
97 	    break;
98 	  case 'D':
99 	    dump = 1;		// dump css file, no cmd menu
100 	    break;
101 	  case 'q':
102 	    quiet = 1;		// quiet mode, no warning messages
103 	    break;
104 	  case 'R':
105 	    {
106 	      FILE *f;
107 	      unsigned long key, hash, value;
108 	      OSBF_FEATURE_HEADER_STRUCT h;
109 
110 	      // count lines to determine the number of buckets and check CSV format
111 	      if (user_trace)
112 		fprintf (stderr, "Opening OSBF file %s for read\n", optarg);
113 	      if ((f = fopen (optarg, "rb")) != NULL)
114 		{
115 
116 		  // try to find the header reading first 2 "buckets"
117 		  if (fscanf
118 		      (f, "%lu;%lu;%lu\n", (unsigned long *) h.version,
119 		       &(h.flags), &(h.buckets_start)) != 3)
120 		    {
121 		      fprintf (stderr,
122 			       "\n %s is not in the right CSV format.\n",
123 			       optarg);
124 		      exit (EXIT_FAILURE);
125 		    }
126 		  if (*((unsigned long *) h.version) != OSBF_VERSION)
127 		    {
128 		      fprintf (stderr,
129 			       "\n %s is not an OSBF CSV file.\n", optarg);
130 		      fclose (f);
131 		      exit (EXIT_FAILURE);
132 		    }
133 		  if (fscanf (f, "%lu;%lu;%lu\n", &(h.buckets), &hash, &value)
134 		      != 3)
135 		    {
136 		      fprintf (stderr,
137 			       "\n %s is not in the right CSV format.\n",
138 			       optarg);
139 		      exit (EXIT_FAILURE);
140 		    }
141 
142 		  // start with -headersize buckets, discounting 2 "buckets" alread read
143 		  sparse_spectrum_file_length = 2 - h.buckets_start;
144 
145 		  while (!feof (f))
146 		    if (fscanf (f, "%lu;%lu;%lu\n", &key, &hash, &value) == 3)
147 		      sparse_spectrum_file_length++;
148 		    else
149 		      {
150 			fprintf (stderr,
151 				 "\n %s is not in the right CSV format.\n",
152 				 optarg);
153 			exit (EXIT_FAILURE);
154 		      }
155 		  fclose (f);
156 
157 		  // check the number of buckets
158 		  if (sparse_spectrum_file_length != h.buckets)
159 		    {
160 		      fprintf (stderr,
161 			       "\n Wrong number of buckets! %s is not in the right CSV format.\n",
162 			       optarg);
163 		      exit (EXIT_FAILURE);
164 		    }
165 		  strcpy (csvfile, optarg);
166 		}
167 	      else
168 		{
169 		  fprintf (stderr,
170 			   "\n Couldn't open csv file %s; errno=%d.\n",
171 			   optarg, errno);
172 		  exit (EXIT_FAILURE);
173 		}
174 	    }
175 	    restore = 1;	// restore css file, no cmd menu
176 	    break;
177 	  case 'r':
178 	    report_only = 1;	// print stats only, no cmd menu.
179 	    break;
180 	  case 's':		// set css size to option value
181 	  case 'S':		// same as above but round up to next 2^n+1
182 	    if (restore)
183 	      {
184 		fprintf (stderr,
185 			 "\nOptions -s, -S ignored when restoring.\n");
186 		break;
187 	      }
188 	    if (sscanf (optarg, "%ld", &sparse_spectrum_file_length))
189 	      {
190 		if (!quiet)
191 		  fprintf (stderr,
192 			   "\nOverride css creation length to %ld\n",
193 			   sparse_spectrum_file_length);
194 		user_set_css_length = 1;
195 	      }
196 	    else
197 	      {
198 		fprintf (stderr,
199 			 "On -%c flag: Missing or incomprehensible number of buckets.\n",
200 			 opt);
201 		exit (EXIT_FAILURE);
202 	      }
203 	    if (opt == 'S')	// round up to next 2^n+1
204 	      {
205 		int k;
206 
207 		k = (long) floor (log10 (sparse_spectrum_file_length - 1)
208 				  / log10 (2.0));
209 		while ((2 << k) + 1 < sparse_spectrum_file_length)
210 		  k++;
211 		sparse_spectrum_file_length = (2 << k) + 1;
212 		user_set_css_length = 1;
213 	      }
214 	    break;
215 	  case 'v':
216 	    fprintf (stderr, " This is osbf-util, version %s\n", version);
217 	    fprintf (stderr, " Copyright 2004-2006 William S. Yerazunis.\n");
218 	    fprintf (stderr,
219 		     " This software is licensed under the GPL with ABSOLUTELY NO WARRANTY\n");
220 	    exit (EXIT_SUCCESS);
221 	  default:
222 	    helptext ();
223 	    exit (EXIT_SUCCESS);
224 	    break;
225 	  }
226       }
227 
228     if (optind < argc)
229       strncpy (cssfile, argv[optind], sizeof (cssfile));
230     else
231       {
232 	helptext ();
233 	exit (EXIT_SUCCESS);
234       }
235 
236     //       and stat it to get it's length
237     k = stat (cssfile, &statbuf);
238     //       quick check- does the file even exist?
239     if (k == 0)
240       {
241 	if (restore)
242 	  {
243 	    fprintf (stderr,
244 		     "\n.CSS file %s exists! Restore operation aborted.\n",
245 		     cssfile);
246 	    exit (EXIT_FAILURE);
247 	  }
248 	hfsize = statbuf.st_size;
249 	if (!quiet && user_set_css_length)
250 	  fprintf (stderr,
251 		   "\n.CSS file %s exists; -s, -S options ignored.\n",
252 		   cssfile);
253       }
254     else
255       {
256 	//      file didn't exist... create it
257 	if (!quiet && !restore)
258 	  fprintf (stdout, "\nHad to create .CSS file %s with %lu buckets\n",
259 		   cssfile, sparse_spectrum_file_length);
260 	if (crm_osbf_create_cssfile
261 	    (cssfile, sparse_spectrum_file_length, OSBF_VERSION, 0,
262 	     OSBF_CSS_SPECTRA_START) != EXIT_SUCCESS)
263 	  exit (EXIT_FAILURE);
264 	k = stat (cssfile, &statbuf);
265 	hfsize = statbuf.st_size;
266       }
267     //
268     //   mmap the hash file into memory so we can bitwhack it
269     header = crm_mmap_file ( cssfile,
270 			     0, hfsize,
271 			     PROT_READ | PROT_WRITE,
272 			     MAP_SHARED,
273 			     NULL);
274     if (header == MAP_FAILED)
275       {
276 	fprintf (stderr,
277 		 "\n Couldn't mmap file %s into memory; errno=%d .\n",
278 		 cssfile, errno);
279 	exit (EXIT_FAILURE);
280       }
281     if (*((unsigned long *) (header->version)) != OSBF_VERSION)
282       {
283 	fprintf (stderr,
284 		 "\n %s is the wrong version. We're expecting a %s css file.\n",
285 		 cssfile, CSS_version_name[OSBF_VERSION]);
286 	crm_munmap_file ((void *) header);
287 	exit (EXIT_FAILURE);
288       }
289 
290     hashes = (OSBF_FEATUREBUCKET_STRUCT *) header + header->buckets_start;
291     if (hashes == MAP_FAILED)
292       {
293 	fprintf (stderr,
294 		 "\n Couldn't open RW file %s; errno=%d .\n", cssfile, errno);
295 	exit (EXIT_FAILURE);
296       }
297     //   from now on, hfsize is buckets, not bytes.
298     hfsize = statbuf.st_size / sizeof (OSBF_FEATUREBUCKET_STRUCT);
299 
300     if (dump)
301       {
302 	/* dump the css file */
303 	OSBF_FEATUREBUCKET_STRUCT *bucket;
304 	unsigned long *p;
305 
306 	bucket = (OSBF_FEATUREBUCKET_STRUCT *) header;
307 	for (i = 0; i < hfsize; i++)
308 	  {
309 	    p = (unsigned long *) &bucket[i];
310 	    printf ("%lu;%lu;%lu\n", p[0], p[1], p[2]);
311 	  }
312       }
313 
314     if (restore)
315       {
316 	FILE *f;
317 	OSBF_FEATUREBUCKET_STRUCT *bucket;
318 	unsigned long *p;
319 
320 	// restore the css file  - note that if we DIDN'T create
321 	// it already, then this will fail.
322 	//
323 	if ((f = fopen (csvfile, "rb")) == NULL)
324 	  {
325 	    fprintf (stderr, "\n Couldn't open csv file %s; errno=%d.\n",
326 		     csvfile, errno);
327 	    exit (EXIT_FAILURE);
328 	  }
329 
330 	bucket = (OSBF_FEATUREBUCKET_STRUCT *) header;
331 	for (i = 0; i < hfsize; i++)
332 	  {
333 	    p = (unsigned long *) &bucket[i];
334 	    dontcare = fscanf (f, "%lu;%lu;%lu\n", &p[0], &p[1], &p[2]);
335 	  }
336 	fclose (f);
337       }
338 
339     zloop = 1;
340     while (zloop == 1 && !restore && !dump)
341       {
342 	zloop = 0;
343 	crm_osbf_packcss (header, 0, header->buckets - 1);
344 	sum = 0;
345 	maxchain = 0;
346 	curchain = 0;
347 	totchain = 0;
348 	fbuckets = 0;
349 	nchains = 0;
350 	ofbins = 0;
351 	for (i = 0; i < header->buckets; i++)
352 	  {
353 	    sum += GET_BUCKET_VALUE(hashes[i]);
354 	    if (GET_BUCKET_VALUE(hashes[i]) != 0)
355 	      {
356 		fbuckets++;
357 		curchain++;
358 		if (GET_BUCKET_VALUE(hashes[i]) >= OSBF_FEATUREBUCKET_VALUE_MAX)
359 		  ofbins++;
360 	      }
361 	    else
362 	      {
363 		if (curchain > 0)
364 		  {
365 		    totchain += curchain;
366 		    nchains++;
367 	            if (curchain > maxchain)
368 	              maxchain = curchain;
369 		    curchain = 0;
370 		  }
371 	      }
372 	  }
373 
374 	version_index = *((unsigned long *) header->version);
375 	if (version_index < 0 || version_index > UNKNOWN_VERSION)
376 	  version_index = UNKNOWN_VERSION;
377 	fprintf (stdout, "\n Sparse spectra file %s statistics: \n", cssfile);
378 	fprintf (stdout, "\n CSS file version                 : %12s",
379 		 CSS_version_name[version_index]);
380 	fprintf (stdout, "\n Header size (bytes)              : %12ld",
381 		 header->buckets_start * sizeof (OSBF_FEATUREBUCKET_STRUCT));
382 	fprintf (stdout, "\n Bucket size (bytes)              : %12lu",
383 		 (unsigned long)sizeof(OSBF_FEATUREBUCKET_STRUCT));
384 	fprintf (stdout, "\n Total available buckets          : %12ld",
385 		 header->buckets);
386 	fprintf (stdout, "\n Total buckets in use             : %12ld",
387 		 fbuckets);
388 	fprintf (stdout, "\n Number of trainings              : %12lu",
389 		 header->learnings);
390 	fprintf (stdout, "\n Total buckets with value >= max  : %12ld",
391 		 ofbins);
392 	fprintf (stdout, "\n Total hashed datums in file      : %12lld", sum);
393 	fprintf (stdout, "\n Average datums per bucket        : %12.2f",
394 		 (fbuckets > 0) ? (sum * 1.0) / (fbuckets * 1.0) : 0);
395 	fprintf (stdout, "\n Number of chains                 : %12ld",
396 		 nchains);
397 	fprintf (stdout, "\n Maximum length of overflow chain : %12ld",
398 		 maxchain);
399 	fprintf (stdout, "\n Average length of overflow chain : %12.2f",
400 		 nchains > 0 ? (totchain * 1.0) / (nchains * 1.0) : 0);
401 	fprintf (stdout, "\n Average packing density          : %12.2f\n",
402 		 (fbuckets * 1.0) / (header->buckets * 1.0));
403 	for (i = 0; i < OSBF_FEATUREBUCKET_VALUE_MAX; i++)
404 	  bcounts[i] = 0;
405 	for (v = 0; v < header->buckets; v++)
406 	  {
407 	    if (GET_BUCKET_VALUE(hashes[v]) < OSBF_FEATUREBUCKET_VALUE_MAX)
408 	      bcounts[GET_BUCKET_VALUE(hashes[v])]++;
409 	  }
410 
411 	if (!brief)
412 	  for (i = 0; i < OSBF_FEATUREBUCKET_VALUE_MAX; i++)
413 	    {
414 	      if (bcounts[i] > 0)
415 		{
416 		  fprintf (stdout, "\n bin value %8ld found %9ld times",
417 			   i, bcounts[i]);
418 		}
419 	    }
420 
421 	fprintf (stdout, "\n");
422 	cmdloop = 1;
423 	while (!report_only && cmdloop)
424 	  {
425 	    // clear command buffer
426 	    cmdchr[0] = '\0';
427 	    fprintf (stdout, "Options:\n");
428 	    fprintf (stdout, "   Z n - zero bins at or below a value\n");
429 	    fprintf (stdout, "   S n - subtract a constant from all bins\n");
430 	    fprintf (stdout, "   D n - divide all bins by a constant\n");
431 	    fprintf (stdout, "   R - rescan\n");
432 	    fprintf (stdout, "   P - pack\n");
433 	    fprintf (stdout, "   Q - quit\n");
434 	    fprintf (stdout, ">>> ");
435 	    clearerr (stdin);
436 	    dontcare = fscanf (stdin, "%[^\n]", cmdstr);
437 	    dontcare = fscanf (stdin, "%c", crapchr);
438 	    fields = sscanf (cmdstr, "%s %f", cmdchr, &cmdval);
439 	    if (strlen ( (char *)cmdchr) != 1)
440 	      {
441 		fprintf (stdout, "Unknown command: %s\n", cmdchr);
442 		continue;
443 	      }
444 	    switch (tolower ((int)cmdchr[0]))
445 	      {
446 	      case 'z':
447 		if (fields != 2)
448 		  fprintf (stdout,
449 			   "Z command requires a numeric argument!\n");
450 		else
451 		  {
452 		    fprintf (stdout, "Working...");
453 		    for (i = 0; i < header->buckets; i++)
454 		      if (GET_BUCKET_VALUE(hashes[i]) <= cmdval)
455 			BUCKET_RAW_VALUE(hashes[i]) = 0;
456 		    fprintf (stdout, "done.\n");
457 		  }
458 		break;
459 	      case 's':
460 		if (fields != 2)
461 		  fprintf (stdout,
462 			   "S command requires a numeric argument!\n");
463 		else
464 		  {
465 		    fprintf (stdout, "Working...");
466 		    for (i = 0; i < header->buckets; i++)
467 		      {
468 			if (GET_BUCKET_VALUE(hashes[i]) > (int) cmdval)
469 			  {
470 			    BUCKET_RAW_VALUE(hashes[i]) =
471 			      GET_BUCKET_VALUE(hashes[i]) - cmdval;
472 			  }
473 			else
474 			  {
475 			    BUCKET_RAW_VALUE(hashes[i]) = 0;
476 			  }
477 		      }
478 		    fprintf (stdout, "done.\n");
479 		  }
480 		break;
481 	      case 'd':
482 		if (fields != 2)
483 		  fprintf (stdout,
484 			   "D command requires a numeric argument!\n");
485 		else if (cmdval == 0)
486 		  fprintf (stdout, "You can't divide by zero, nimrod!\n");
487 		else
488 		  {
489 		    fprintf (stdout, "Working...");
490 		    for (i = 0; i < header->buckets; i++)
491 		      BUCKET_RAW_VALUE(hashes[i]) =
492 			  GET_BUCKET_VALUE(hashes[i]) / cmdval;
493 		    fprintf (stdout, "done.\n");
494 		  }
495 		break;
496 	      case 'r':
497 		zloop = 1;
498 		cmdloop = 0;
499 		break;
500 	      case 'p':
501 		fprintf (stdout, "Working...");
502 		crm_osbf_packcss (header, 0, header->buckets - 1);
503 		zloop = 1;
504 		cmdloop = 0;
505 		break;
506 	      case 'q':
507 		fprintf (stdout, "Bye! \n");
508 		cmdloop = 0;
509 		break;
510 	      default:
511 		fprintf (stdout, "Unknown command: %c\n", cmdchr[0]);
512 		break;
513 	      }
514 	  }
515       }
516   }
517   return 0;
518 }
519