1 // @(#) $Revision: 4.1 $ $Source: /judy/test/manual/StringCompare.c $
2 //=======================================================================
3 // Author Douglas L. Baskins, Jan 2003.
4 // Permission to use this code is freely granted, provided that this
5 // statement is retained.
6 // email - dougbaskins .at, yahoo.com
7 //=======================================================================
8
9 //=======================================================================
10 //
11 // This program will time various ADTs that store and retrieve strings.
12 // Currently there are 7 ADTs implemented:
13
14 /*
15 1) Hash - this is the fastest one when table size matches data size
16 2) JudyHS - 2nd in speed and very scaleable.
17 3) JLHash - this uses a JudyL() array instead of hash table and is
18 very scaleable because the hash table size is ~4 billion.
19 4) JudySL - ordered and requires null terminated strings.
20 5) Ternary - code borrowed from Mr Dobbs, perhaps 2000
21 6) Redblack - code borrowed from J. Zobel, April 2001.
22 7) Splay - code borrowed from J. Zobel, April 2001.
23
24 Note: Splay, Redblack and Ternary methods are not very fast, so they
25 have not been completed and made bug free. I.E. no Delete and Free
26 Array routines. Ternary is fastest retrieve of these three, but uses
27 an extraordinary amount of memory.
28 */
29 //=======================================================================
30 //
31 // Compile:
32 //
33 // cc -O StringCompare.c -lm -lJudy -o StringCompare
34 // -or-
35 // cc -O -DCPUMHZ=1299 StringCompare.c -lm -lJudy -o StringCompare
36
37 /* Notes:
38 1) Use '-DCPUMHZ=1299' in cc line if better clock resolution is desired
39 and it compiles successfully. The 1299 is the cpu MHz : 1298.916 from
40 cat /proc/cpuinfo in a Linux system.
41
42 2) -static will generally get better performance because memcmp(),
43 memcpy() routines are usually slower with shared librarys.
44 */
45
46 // Usage:
47 //
48 // StringCompare -A Hash <options> textfile
49 // StringCompare -A JudyHS <options> textfile
50 // StringCompare -A JLHash <options> textfile
51 // StringCompare -A JudySL <options> textfile
52 // StringCompare -A Ternary <options> textfile
53 // StringCompare -A Redblack <options> textfile
54 // StringCompare -A Splay <options> textfile
55
56 #include <stdlib.h> // malloc(3)
57 #include <unistd.h> // getopt(3)
58 #include <stdio.h> // printf(3)
59 #include <fcntl.h> // open(2)
60 #include <string.h> // memcmp(3), memcpy(3)
61 #include <errno.h> // errno(3)
62 #include <sys/mman.h> // mmap(2)
63 #include <sys/time.h> // gettimeofday(2)
64 #include <math.h> // pow(3)
65 #include <sys/utsname.h> // uname(2)
66 #include <Judy.h> // Judy arrays
67
68 //=======================================================================
69 // D e f i n e: T O T U R N O F F A S S E R T I O N S !!!!!!!!
70 //=======================================================================
71 //#define NDEBUG 1
72 #include <assert.h> // assert(3)
73
74 //=======================================================================
75 // G L O B A L D A T A
76 //=======================================================================
77
78 int foolflag = 0; // fool compiler from optimizing
79 static Word_t gStored = 0; // number of strings inserted
80 static Word_t gChainln = 0; // links traversed during RETRIVE
81
82 static Word_t PtsPdec = 40; // default measurement points per decade
83
84 #define INFSTRGS 1000000000 // 1 billion strings is infinity
85
86 static Word_t nStrg = INFSTRGS; // infinity -- measure all strings
87 static Word_t TValues = 100000; // max measure points for RETRIVE tests
88 static int pFlag = 0; // pre-fault hash table pages into RAM
89 static int rFlag = 0; // do not randomize input file
90 static int aFlag = 0; // word align string buffers
91 static int DFlag = 0; // do the delete measurement
92 static int CFlag = 0; // build sequential Get buffers
93 static Word_t aCount = 0; // Count of missaligned string buffers
94
95 // define the maximum length of a string allowed
96 #define MAXSTRLEN (100000)
97 static int MLength = MAXSTRLEN;
98 static Word_t HTblsz; // 1M default hash table size
99 static int fileidx; // argv[fileidx] == file string
100
101 // for saving input string data
102 typedef struct STRING_
103 {
104 int dt_strlen;
105 uint8_t *dt_string;
106
107 } dt_t , *Pdt_t;
108
109 static Pdt_t PdtS_ = NULL; // memory for Cache access Gets
110 static uint8_t *Strbuf_ = NULL;
111 static Word_t Strsiz_ = 0;
112
113 // Roundup BYTES to an even number of words
114
115 /*
116 On Linux 2.6.3-4mdkenterprise (Mandrake 10.0 Community) printing (even
117 to a file) makes the timings inaccurate. So, use -L2 or greater to
118 average (actually save min times) and print results after all tests are
119 completed.
120 */
121 #define Printf if (Pass == 0) printf
122
123 #define ROUNDUPWORD(BYTES) (((BYTES) + sizeof(Word_t) - 1) & (-sizeof(Word_t)))
124 #define BYTES2WORDS(BYTES) (((BYTES) + sizeof(Word_t) - 1) / (sizeof(Word_t)))
125
126 //=======================================================================
127 // T I M I N G M A C R O S
128 //=======================================================================
129
130 static double DeltaUSec; // Global for remembering delta times
131
132 // Some operating systems have get_cycles() in /usr/include/asm/timex.h
133
134 #ifdef CPUMHZ
135
136 // For a 1.34 nS clock cycle processor (750Mhz)
137
138 #define CPUSPEED (1.0 / (CPUMHZ))
139
140 #include <asm/timex.h>
141
142 #define TIMER_vars(T) cycles_t __TVBeg_##T
143
144 #define STARTTm(T) __TVBeg_##T = get_cycles()
145
146 #define ENDTm(D,T) { (D) = (double)(get_cycles() - __TVBeg_##T) * CPUSPEED; }
147
148 #else // ! CPUMHZ
149
150 #define TIMER_vars(T) struct timeval __TVBeg_##T, __TVEnd_##T
151
152 #define STARTTm(T) gettimeofday(&__TVBeg_##T, NULL)
153
154 #define ENDTm(D,T) \
155 { \
156 gettimeofday(&__TVEnd_##T, NULL); \
157 (D) = (double)(__TVEnd_##T.tv_sec - __TVBeg_##T.tv_sec) * 1E6 + \
158 ((double)(__TVEnd_##T.tv_usec - __TVBeg_##T.tv_usec)); \
159 }
160
161 #endif // ! CPUMHZ
162
163 //=======================================================================
164 // M E M O R Y S I Z E M A C R O S
165 //=======================================================================
166
167 // use mallinfo() instead of sbrk() for memory usage measurements
168 // this should include the RAM that was mmap()ed in malloc()
169
170 static Word_t DeltaMem; // for remembering
171
172 // Some mallocs have mallinfo()
173 // #define MALLINFO 1
174
175 #ifdef MALLINFO
176 #include <malloc.h> // mallinfo()
177
178 static struct mallinfo malStart;
179
180 #define STARTmem malStart = mallinfo()
181 #define ENDmem(DELTAMEM) \
182 { \
183 struct mallinfo malEnd = mallinfo(); \
184 /* strange little dance from signed to unsigned to double */ \
185 unsigned int _un_int = malEnd.arena - malStart.arena; \
186 (DELTAMEM) = (double)_un_int; /* to double */ \
187 }
188 #else // NO MALLINFO
189
190 // this usually works for machines with less than 1-2Gb RAM.
191 // (it does NOT include memory ACQUIRED by mmap())
192
193 static char *malStart;
194
195 #define STARTmem (malStart = (char *)sbrk(0))
196 #define ENDmem(DELTAMEM) \
197 { \
198 char *malEnd = (char *)sbrk(0); \
199 (DELTAMEM) = (double)(malEnd - malStart); \
200 }
201 #endif // NO MALLINFO
202
203 //=======================================================================
204 // F I L E O P E N and M A L L O C F A I L M A C R O S
205 //=======================================================================
206
207 #define FILERROR \
208 { \
209 printf("\n !! OOps - Open file error \"%s\": %s (errno = %d)\n", \
210 argv[fileidx], strerror(errno), errno); \
211 fprintf(stderr, " OOps - Open file error \"%s\": %s (errno = %d)\n",\
212 argv[fileidx], strerror(errno), errno); \
213 exit(1); \
214 }
215
216 #define MALLOCERROR \
217 { \
218 printf("\n !! OOps - malloc failed at Line = %d\n", __LINE__); \
219 fprintf(stderr, " OOps - malloc failed at Line = %d\n", __LINE__); \
220 exit(1); \
221 }
222
223 //=======================================================================
224 // This alternate form of JudyMalloc() is used to keep track how much ram is
225 // used on some of the below ADT's
226 //=======================================================================
227
228 // JUDY INCLUDE FILES
229 //#include "Judy.h"
230
231 // ****************************************************************************
232 // J U D Y M A L L O C
233 //
234 // Allocate RAM. This is the single location in Judy code that calls
235 // malloc(3C). Note: JPM accounting occurs at a higher level.
236
237 static Word_t TotalJudyMalloc = 0;
238
239 Word_t
JudyMalloc(Word_t Words)240 JudyMalloc(Word_t Words)
241 {
242 Word_t Addr;
243
244 Addr = (Word_t)malloc(Words * sizeof(Word_t));
245
246 if (Addr)
247 TotalJudyMalloc += Words;
248
249 return (Addr);
250
251 } // JudyMalloc()
252
253 // ****************************************************************************
254 // J U D Y F R E E
255
256 void
JudyFree(void * PWord,Word_t Words)257 JudyFree(void *PWord, Word_t Words)
258 {
259 free(PWord);
260 assert((long)(TotalJudyMalloc - Words) >= 0L);
261
262 TotalJudyMalloc -= Words;
263
264 } // JudyFree()
265
266 // ****************************************************************************
267 // J U D Y M A L L O C
268 //
269 // Higher-level "wrapper" for allocating objects that need not be in RAM,
270 // although at this time they are in fact only in RAM. Later we hope that some
271 // entire subtrees (at a JPM or branch) can be "virtual", so their allocations
272 // and frees should go through this level.
273
274 Word_t
JudyMallocVirtual(Word_t Words)275 JudyMallocVirtual(Word_t Words)
276 {
277 return (JudyMalloc(Words));
278
279 } // JudyMallocVirtual()
280
281 // ****************************************************************************
282 // J U D Y F R E E
283
284 void
JudyFreeVirtual(void * PWord,Word_t Words)285 JudyFreeVirtual(void *PWord, Word_t Words)
286 {
287 JudyFree(PWord, Words);
288
289 } // JudyFreeVirtual()
290
291 //=======================================================================
292 // Routine to get next size of Indexes
293 //=======================================================================
294
295 static int
NextNumb(Word_t * PNumber,double * PDNumb,double DMult,Word_t MaxNumb)296 NextNumb(Word_t *PNumber, // pointer to returned next number
297 double *PDNumb, // Temp double of above
298 double DMult, // Multiplier
299 Word_t MaxNumb) // Max number to return
300 {
301 // Save prev number
302 double PrevPDNumb = *PDNumb;
303 double DDiff;
304
305 // Calc next number >= 1.0 beyond previous
306 do
307 {
308 *PDNumb *= DMult;
309 DDiff = *PDNumb - PrevPDNumb;
310 }
311 while (DDiff < 0.5);
312
313 // Return it in integer format
314 if (DDiff < 100.0)
315 *PNumber += (Word_t)(DDiff + 0.5);
316 else
317 *PNumber = (Word_t)(*PDNumb + 0.5);
318
319 // Verify it did not exceed max number
320 if (*PNumber >= MaxNumb)
321 {
322 *PNumber = MaxNumb; // it did, so return max
323 return (1); // flag it
324 }
325 return (0); // more available
326 }
327
328 //=======================================================================
329 // M E A S U R E M E N T S T R U C T U R E
330 //=======================================================================
331
332 typedef struct _MEASUREMENTS_STRUCT *Pms_t;
333 typedef struct _MEASUREMENTS_STRUCT
334 {
335 Word_t ms_delta; // number of points in current group
336 double ms_Bytes; // average allocated memory/per string
337 double ms_mininsert; // Min Retrive number
338 double ms_minretrive; // Min Retrive number
339 } ms_t;
340
341 static Pms_t Pms; // array of MEASUREMENTS_STRUCT
342
343 // Method type
344
345 typedef enum
346 {
347 M_invalid,
348 M_Print,
349 M_Hash,
350 M_JLHash,
351 M_JudySL,
352 M_JudyHS,
353 M_Splay,
354 M_Redblack,
355 M_Ternary
356 } Method_t;
357
358 //=======================================================================
359 // R a n d o m i z e i n p u t s t r i n g s
360 //=======================================================================
361
362 static void
Randomize(Pdt_t Pstrstr,Word_t Len)363 Randomize(Pdt_t Pstrstr, Word_t Len)
364 {
365 Word_t ii;
366
367 // swap the "random" index with the sequential one
368 for (ii = 1; ii < Len; ii++)
369 {
370 dt_t dttemp;
371 Word_t swapii;
372
373 // get "random" index
374 swapii = (Word_t)rand() % Len;
375
376 // and swap
377 dttemp = Pstrstr[ii];
378 Pstrstr[ii] = Pstrstr[swapii];
379 Pstrstr[swapii] = dttemp;
380 }
381 }
382
383 //=======================================================================
384 // B u i l d s e q u e n c i a l s t r i n g b u f f e r
385 //=======================================================================
386
387 Pdt_t
BuildSeqBuf(Pdt_t Pstrstr,Word_t Len)388 BuildSeqBuf(Pdt_t Pstrstr, Word_t Len)
389 {
390 Word_t SumStrings = 0;
391 Word_t ii;
392 Word_t Strlen;
393 uint8_t *string;
394
395 assert(Len <= TValues);
396
397 // calculate how much memory needed for strings
398 for (ii = 0; ii < Len; ii++)
399 {
400 Strlen = Pstrstr[ii].dt_strlen;
401 if (aFlag)
402 SumStrings += ROUNDUPWORD(Strlen + 1);
403 else
404 SumStrings += Strlen + 1;
405 }
406 // check if old string buffer is big enough
407 if (SumStrings > Strsiz_)
408 {
409 if (Strbuf_)
410 free(Strbuf_);
411 else
412 SumStrings += SumStrings / 5; // bump 20%
413
414 Strbuf_ = (uint8_t *) malloc(SumStrings);
415 if (Strbuf_ == NULL)
416 MALLOCERROR;
417 Strsiz_ = SumStrings;
418 }
419 for (ii = 0, string = Strbuf_; ii < Len; ii++)
420 {
421 Strlen = Pstrstr[ii].dt_strlen;
422
423 PdtS_[ii].dt_strlen = Strlen;
424 PdtS_[ii].dt_string = string;
425
426 memcpy(string, Pstrstr[ii].dt_string, Strlen + 1);
427
428 if (aFlag)
429 string += ROUNDUPWORD(Strlen + 1);
430 else
431 string += Strlen + 1;
432 }
433 return (PdtS_);
434 }
435
436 //=======================================================================
437 // H A S H M E T H O D S T R U C T U R E S
438 //=======================================================================
439
440 // These structures are used in Hash() and JLHash() ADTs
441
442 // for storing length of string
443
444 // Hash chain structure (varible length depending on string)
445 // static part of the length
446 #define HSTRUCTOVD (sizeof(hrec_t) - sizeof(int))
447 typedef struct HASHREC_ *Phrec_t;
448 typedef struct HASHREC_
449 {
450 Phrec_t hr_Next; // collision chain link pointer
451 Word_t hr_Value; // Data associated with string
452 int hr_Strlen; // length of string 2 billion max
453 uint8_t hr_String[sizeof(int)]; // string is allocated with struct
454
455 } hrec_t;
456
457 // hash head structure to keep hash array information
458 typedef struct HASHINFO_
459 {
460 Pvoid_t hi_Htbl; // Hash table
461 Word_t hi_tblsize; // Hash table size (Words)
462 Word_t hi_TotalWords; // Hash array total words
463 Word_t hi_Pop1; // Hash array total population
464
465 } hinfo_t, *Phinfo_t;
466
467 // size in words of the header structure
468 #define HASHHEADSZ (sizeof(hinfo_t) / sizeof(Word_t))
469
470 //=======================================================================
471 // H A S H A L G O R I T H M
472 //=======================================================================
473 //
474 // For CPUs with a slow mod (%) use table size a power of 2. A test is
475 // made to see if the SIZE is a power of 2, and if so an .AND.(&) is used
476 // instead of a .MOD.(%) to trim the hash return size. Note: a SIZE == 0,
477 // results in no trimming of hash return size.
478
479 #define HASHSTR(STRING,LENGTH,SIZE) \
480 ((SIZE) == ((SIZE) & -(SIZE))) ? \
481 (HashStr(STRING, LENGTH) & ((SIZE) -1)) : \
482 (HashStr(STRING, LENGTH) % (SIZE))
483
484 // String hash function. Hash string to a unsigned int (uint32_t) This
485 // one needs a fast 32 bit mpy, which is often very slow on older(RISC)
486 // machines. If you are sure you will not over populate the hash table,
487 // then a poorer/faster hash algorithm should be used. Replace with your
488 // own, milage may vary. This program measures the speed, whether used
489 // or not.
490
491 static uint32_t
HashStr(void * Str,Word_t Len)492 HashStr(void *Str, Word_t Len)
493 {
494 uint32_t A = 31415;
495 uint32_t hashv = Len;
496 uint8_t *k = (uint8_t *) Str;
497
498 while (Len--)
499 {
500 hashv = (A * hashv) + *k++;
501 A *= 27183;
502 }
503 return (hashv);
504 }
505
506 //=======================================================================
507 // S T O R E and R E T R I V E R O U T I N E S
508 //=======================================================================
509
510 //=======================================================================
511 // H A S H M E T H O D U S I N G J U D Y L A S H A S H T A B L E
512 //=======================================================================
513
514 PWord_t
JLHashGet(Pvoid_t JLHash,uint8_t * String,Word_t Strlen)515 JLHashGet(Pvoid_t JLHash, uint8_t * String, Word_t Strlen)
516 {
517 Phinfo_t PHash = (Phinfo_t) JLHash;
518 Phrec_t Phrec, *PPhrec;
519 uint32_t hval;
520
521 if (PHash == NULL)
522 return (NULL);
523
524 // get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
525 hval = HASHSTR(String, Strlen, PHash->hi_tblsize);
526
527 JLG(PPhrec, PHash->hi_Htbl, hval); // use JudyL to get &pointer
528
529 if (PPhrec == NULL)
530 return (NULL); // no table entry
531
532 // search for matching string
533 for (Phrec = *PPhrec; Phrec != NULL; Phrec = Phrec->hr_Next)
534 {
535 gChainln++; // Hash chain length
536 if (Phrec->hr_Strlen == Strlen) // length match?
537 {
538 if (memcmp(Phrec->hr_String, String, Strlen) == 0)
539 return (&(Phrec->hr_Value)); // match! pointer to Value
540 }
541 }
542 return (NULL);
543 }
544
545 // Return pointer to struct hrec_t associated with string
546
547 PWord_t
JLHashIns(PPvoid_t PPHash,uint8_t * String,Word_t Strlen,Word_t TblSize)548 JLHashIns(PPvoid_t PPHash, uint8_t * String, Word_t Strlen, Word_t TblSize)
549 {
550 Phrec_t Phrec, *PPhrec;
551 Phinfo_t PHash;
552 Word_t Len;
553 uint32_t hval;
554
555 PHash = (Phinfo_t) * PPHash; // core-dump if calling error
556 if (PHash == NULL) // if hash table not allocated
557 {
558 // allocate the header
559 PHash = (Phinfo_t) JudyMalloc(HASHHEADSZ);
560 if (PHash == NULL)
561 MALLOCERROR;
562
563 // Initialize the header struct
564 PHash->hi_tblsize = TblSize;
565 PHash->hi_TotalWords = HASHHEADSZ;
566 PHash->hi_Pop1 = 0; // none yet
567 PHash->hi_Htbl = NULL;
568 *PPHash = (Pvoid_t)PHash; // return header to caller
569 }
570 // get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
571 hval = HASHSTR(String, Strlen, PHash->hi_tblsize);
572
573 // get pointer to hash table entry
574 JLI(PPhrec, PHash->hi_Htbl, hval); // JLI will exit if out of memory
575
576 // search for matching string
577 for (Phrec = *PPhrec; Phrec != NULL; Phrec = Phrec->hr_Next)
578 {
579 if (Phrec->hr_Strlen == Strlen) // string length match?
580 {
581 if (memcmp(Phrec->hr_String, String, Strlen) == 0)
582 {
583 return (&(Phrec->hr_Value)); // match! pointer to Value
584 }
585 }
586 }
587
588 // String match not found, so do an insert
589
590 Len = BYTES2WORDS(Strlen + HSTRUCTOVD);
591 Phrec = (Phrec_t) JudyMalloc(Len); // get memory for storing string
592 if (Phrec == NULL)
593 MALLOCERROR;
594
595 PHash->hi_TotalWords += Len; // keep track of total mallocs
596
597 Phrec->hr_Strlen = Strlen; // set string length
598 memcpy(Phrec->hr_String, String, Strlen);
599
600 Phrec->hr_Next = *PPhrec; // pointer to synonym
601 *PPhrec = Phrec; // place new struct in front of list
602 (PHash->hi_Pop1)++; // add one to population
603 Phrec->hr_Value = (Word_t)0; // zero the associated Value
604
605 return (&(Phrec->hr_Value)); // return pointer to Value
606 }
607
608 // Return 1 if successful, else 0
609
610 int
JLHashDel(PPvoid_t PPHash,uint8_t * String,Word_t Strlen)611 JLHashDel(PPvoid_t PPHash, uint8_t * String, Word_t Strlen)
612 {
613 Phrec_t Phrec, *PPhrec, *PPhrec1;
614 Phinfo_t PHash;
615 uint32_t hval;
616
617 // avoid an core dump here
618 if (PPHash == NULL)
619 return (0);
620 PHash = (Phinfo_t) (*PPHash); // get header
621 if (PHash == NULL)
622 return (0); // not found
623
624 // get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
625 hval = HASHSTR(String, Strlen, PHash->hi_tblsize);
626
627 // get pointer hash table entry
628 JLG(PPhrec, PHash->hi_Htbl, hval);
629 if (PPhrec == NULL)
630 return (0); // hash entry not found
631
632 PPhrec1 = PPhrec; // save head hash entry ^
633
634 // search for matching string
635 for (Phrec = *PPhrec; Phrec != NULL; Phrec = Phrec->hr_Next)
636 {
637 if (Phrec->hr_Strlen == Strlen) // string length match?
638 {
639 if (memcmp(Phrec->hr_String, String, Strlen) == 0) // string match?
640 {
641 int Rc; // not used
642 Word_t Len;
643
644 *PPhrec = Phrec->hr_Next; // put next in previous
645
646 Len = BYTES2WORDS(Strlen + HSTRUCTOVD);
647 JudyFree(Phrec, Len);
648 PHash->hi_TotalWords -= Len; // ram usage accounting
649
650 (PHash->hi_Pop1)--; // Decrement population
651
652 if (*PPhrec1 == NULL) // no chain left
653 {
654 // delete hash table entry
655 JLD(Rc, PHash->hi_Htbl, hval);
656 assert(Rc == 1);
657 }
658 // If last element, free everything
659 if (PHash->hi_Pop1 == 0)
660 {
661 assert(PHash->hi_TotalWords == HASHHEADSZ);
662
663 JudyFree(PHash, HASHHEADSZ); // the header table
664 *PPHash = NULL; // from caller
665 }
666 return (1); // successful
667 }
668 }
669 PPhrec = &(Phrec->hr_Next); // previous = current
670 }
671 return (0); // string not found
672 }
673
674 // Free the whole JLHash structure
675
676 Word_t
JLHashFreeArray(PPvoid_t PPHash)677 JLHashFreeArray(PPvoid_t PPHash)
678 {
679 Phrec_t Phrec, *PPhrec;
680 Phinfo_t PHash;
681 Word_t DeletedWords, Bytes;
682 Word_t Index = 0; // for First, Next loop
683
684 // avoid an core dump here
685 if (PPHash == NULL)
686 return ((Word_t)0);
687 PHash = (Phinfo_t) (*PPHash); // get header
688 if (PHash == NULL)
689 return ((Word_t)0); // not found
690
691 // get bytes of memory usage in (JudyL) Hash table
692 JLMU(Bytes, PHash->hi_Htbl);
693
694 DeletedWords = HASHHEADSZ; // start with header
695
696 // Get 1st table entry in Hash table
697 JLF(PPhrec, PHash->hi_Htbl, Index);
698
699 // found an entry in hash table?
700 while (PPhrec != NULL)
701 {
702 int Rc; // not used
703 Phrec = *PPhrec;
704
705 // walk the synonym linked list
706 while (Phrec != NULL)
707 {
708 Word_t Len;
709 Phrec_t Phrecfree = Phrec;
710
711 // number of words to free -- struct hrec_t
712 Len = BYTES2WORDS(Phrec->hr_Strlen + HSTRUCTOVD);
713
714 // sum total length of mallocs in words
715 DeletedWords += Len;
716
717 (PHash->hi_Pop1)--; // Decrement population
718
719 // get pointer to next synonym on list
720 Phrec = Phrec->hr_Next;
721
722 // free the struct hrec_t
723 JudyFree(Phrecfree, Len);
724 }
725 // delete hash table entry
726 JLD(Rc, PHash->hi_Htbl, Index);
727 assert(Rc == 1);
728
729 // get next hash table entry
730 JLN(PPhrec, PHash->hi_Htbl, Index);
731 }
732 assert(PHash->hi_TotalWords == DeletedWords);
733 assert(PHash->hi_Pop1 == 0);
734
735 JudyFree(PHash, HASHHEADSZ); // the header table
736 *PPHash = NULL; // set pointer null
737
738 // return total bytes freed
739 return ((DeletedWords * sizeof(Word_t)) + Bytes);
740 }
741
742 //=======================================================================
743 // H A S H M E T H O D
744 //=======================================================================
745
746 PWord_t
HashGet(Phinfo_t PHash,uint8_t * String,Word_t Strlen)747 HashGet(Phinfo_t PHash, uint8_t * String, Word_t Strlen)
748 {
749 Phrec_t Phrec, *Htbl;
750 uint32_t hval;
751
752 // avoid an core dump here
753 if (PHash == NULL)
754 return (NULL);
755
756 // get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
757 hval = HASHSTR(String, Strlen, PHash->hi_tblsize);
758
759 // type Hash table pointer
760 Htbl = (Phrec_t *) PHash->hi_Htbl;
761
762 // search for matching string
763 for (Phrec = Htbl[hval]; Phrec != NULL; Phrec = Phrec->hr_Next)
764 {
765 gChainln++; // Hash chain length
766 if (Phrec->hr_Strlen == Strlen) // length match?
767 {
768 if (memcmp(Phrec->hr_String, String, Strlen) == 0)
769 return (&(Phrec->hr_Value)); // match! pointer to Value
770 }
771 }
772 return (NULL);
773 }
774
775 // Return pointer to struct hrec_t associated with string
776
777 Pvoid_t
HashIns(Phinfo_t * PPHash,uint8_t * String,Word_t Strlen,Word_t TblSize)778 HashIns(Phinfo_t * PPHash, uint8_t * String, Word_t Strlen, Word_t TblSize)
779 {
780 Phrec_t Phrec, *Htbl;
781 Phinfo_t PHash;
782 Word_t Len;
783 uint32_t hval;
784
785 PHash = *PPHash; // core-dump if calling error
786 if (PHash == NULL) // if hash table not allocated
787 {
788 // allocate the header
789 PHash = (Phinfo_t) JudyMalloc(HASHHEADSZ);
790 if (PHash == NULL)
791 MALLOCERROR;
792
793 // allocate the hash table
794 PHash->hi_Htbl = (Pvoid_t)JudyMalloc(TblSize);
795 if (PHash->hi_Htbl == NULL)
796 MALLOCERROR;
797
798 // you cant beat this with modern compilers/librarys
799 memset(PHash->hi_Htbl, 0, TblSize * sizeof(Word_t));
800
801 // Initialize the header struct
802 PHash->hi_tblsize = TblSize;
803 PHash->hi_TotalWords = TblSize + HASHHEADSZ;
804 PHash->hi_Pop1 = 0; // none yet
805 *PPHash = PHash; // return header to caller
806 }
807 // get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
808 hval = HASHSTR(String, Strlen, TblSize);
809
810 // type Hash table pointer
811 Htbl = (Phrec_t *) PHash->hi_Htbl;
812
813 // search for matching string in hash table entry
814 for (Phrec = Htbl[hval]; Phrec != NULL; Phrec = Phrec->hr_Next)
815 {
816 if (Phrec->hr_Strlen == Strlen) // string length match?
817 {
818 if (memcmp(Phrec->hr_String, String, Strlen) == 0) // string match?
819 {
820 return (&(Phrec->hr_Value)); // match! pointer to Value
821 }
822 }
823 }
824 // string not found, so do an insert
825 Len = BYTES2WORDS(Strlen + HSTRUCTOVD);
826 Phrec = (Phrec_t) JudyMalloc(Len);
827 if (Phrec == NULL)
828 MALLOCERROR;
829 PHash->hi_TotalWords += Len; // keep track of total mallocs
830
831 Phrec->hr_Strlen = Strlen; // set string length
832 memcpy(Phrec->hr_String, String, Strlen); // copy it
833
834 // place new allocation first in chain
835 Phrec->hr_Next = Htbl[hval];
836 Htbl[hval] = Phrec;
837
838 (PHash->hi_Pop1)++; // add one to population
839 Phrec->hr_Value = (Word_t)0; // zero the associated Value
840
841 return (&(Phrec->hr_Value)); // return pointer to Value
842 }
843
844 // Delete entry in hash array, return 1 if successful, else 0
845
846 int
HashDel(Phinfo_t * PPHash,uint8_t * String,Word_t Strlen)847 HashDel(Phinfo_t * PPHash, uint8_t * String, Word_t Strlen)
848 {
849 Phrec_t Phrec, *PPhrec, *Htbl;
850 Phinfo_t PHash;
851 uint32_t hval;
852
853 // avoid an core dump here
854 if (PPHash == NULL)
855 return (0);
856 PHash = *PPHash; // get header
857 if (PHash == NULL)
858 return (0); // not found
859
860 // get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
861 hval = HASHSTR(String, Strlen, PHash->hi_tblsize);
862
863 // type Hash table pointer
864 Htbl = (Phrec_t *) PHash->hi_Htbl;
865
866 // get pointer hash table entry
867 PPhrec = &Htbl[hval];
868
869 // search for matching string
870 for (Phrec = *PPhrec; Phrec != NULL; Phrec = Phrec->hr_Next)
871 {
872 if (Phrec->hr_Strlen == Strlen) // length match?
873 {
874 if (memcmp(Phrec->hr_String, String, Strlen) == 0)
875 {
876 Word_t Len;
877
878 // put next hrec_t in previous hrec_t
879 *PPhrec = Phrec->hr_Next;
880
881 Len = BYTES2WORDS(Strlen + HSTRUCTOVD);
882 JudyFree(Phrec, Len);
883 PHash->hi_TotalWords -= Len;
884 (PHash->hi_Pop1)--; // Decrement population
885
886 // If last element, free everything
887 if (PHash->hi_Pop1 == 0)
888 {
889 assert(PHash->hi_TotalWords ==
890 (HASHHEADSZ + PHash->hi_tblsize));
891
892 JudyFree(Htbl, PHash->hi_tblsize); // hash table
893 JudyFree(PHash, HASHHEADSZ); // header struct
894 *PPHash = NULL; // from caller
895 }
896 return (1); // successful
897 }
898 }
899 PPhrec = &(Phrec->hr_Next); // previous = current
900 }
901 return (0); // not found
902 }
903
904 Word_t
HashFreeArray(Phinfo_t * PPHash)905 HashFreeArray(Phinfo_t * PPHash)
906 {
907 int ii;
908 Phrec_t Phrec, *Htbl;
909 Phinfo_t PHash;
910 Word_t DeletedWords;
911
912 // avoid an core dump here
913 if (PPHash == NULL)
914 return ((Word_t)0);
915 PHash = (Phinfo_t) (*PPHash); // get header
916 if (PHash == NULL)
917 return ((Word_t)0);
918
919 // start accumulator of deleted memory
920 DeletedWords = HASHHEADSZ + PHash->hi_tblsize;
921
922 // type Hash table pointer
923 Htbl = (Phrec_t *) PHash->hi_Htbl;
924
925 // walk thru all table entrys
926 for (ii = 0; ii < PHash->hi_tblsize; ii++)
927 {
928 Phrec = Htbl[ii]; // next hash table entry
929
930 while (Phrec != NULL) // walk the synonym linked list
931 {
932 Word_t Len;
933 Phrec_t Phrecfree;
934
935 // get pointer to next synonym on list
936 Phrecfree = Phrec;
937 Phrec = Phrec->hr_Next;
938
939 (PHash->hi_Pop1)--; // Decrement population
940
941 // number of words to free -- struct hrec_t
942 Len = BYTES2WORDS(Phrecfree->hr_Strlen + HSTRUCTOVD);
943 DeletedWords += Len; // sum words freed
944
945 // free the struct hrec_t
946 JudyFree(Phrecfree, Len);
947 }
948 }
949
950 // and free the hash table
951 JudyFree(Htbl, PHash->hi_tblsize);
952 assert(PHash->hi_TotalWords == DeletedWords);
953 assert(PHash->hi_Pop1 == 0);
954
955 JudyFree(PHash, HASHHEADSZ); // the header table
956 *PPHash = NULL; // set pointer null
957
958 // return total bytes freed
959 return (DeletedWords * sizeof(Word_t));
960 }
961
962 //=======================================================================
963 // S P L A Y M E T H O D
964 //=======================================================================
965
966 /* Author J. Zobel, April 2001.
967 Permission to use this code is freely granted, provided that this
968 statement is retained. */
969
970 #define ROTATEFAC 11
971
972 typedef struct spwordrec
973 {
974 char *word;
975 struct spwordrec *left, *right;
976 struct spwordrec *par;
977 } SPTREEREC;
978
979 typedef struct spansrec
980 {
981 struct spwordrec *root;
982 struct spwordrec *ans;
983 } SPANSREC;
984
985 SPANSREC spans = { 0 };
986
987 #define ONELEVEL(PAR,CURR,DIR,RID) \
988 { \
989 PAR->DIR = CURR->RID; \
990 if(PAR->DIR!=NULL) \
991 PAR->DIR->PAR = PAR; \
992 CURR->RID = PAR; \
993 PAR->PAR = CURR; \
994 CURR->PAR = NULL; \
995 }
996
997 #define ZIGZIG(GPAR,PAR,CURR,DIR,RID) \
998 { \
999 CURR->PAR = GPAR->PAR; \
1000 if (CURR->PAR != NULL) \
1001 { \
1002 if (CURR->PAR->DIR == GPAR) \
1003 CURR->PAR->DIR = CURR; \
1004 else \
1005 CURR->PAR->RID = CURR; \
1006 } \
1007 GPAR->DIR = PAR->RID; \
1008 if (GPAR->DIR != NULL) \
1009 GPAR->DIR->PAR = GPAR; \
1010 PAR->DIR = CURR->RID; \
1011 if (CURR->RID != NULL) \
1012 CURR->RID->PAR = PAR; \
1013 CURR->RID = PAR; \
1014 PAR->PAR = CURR; \
1015 PAR->RID = GPAR; \
1016 GPAR->PAR = PAR; \
1017 }
1018
1019 #define ZIGZAG(GPAR,PAR,CURR,DIR,RID) \
1020 { \
1021 CURR->PAR = GPAR->PAR; \
1022 if (CURR->PAR != NULL) \
1023 { \
1024 if (CURR->PAR->DIR == GPAR) \
1025 CURR->PAR->DIR = CURR; \
1026 else \
1027 CURR->PAR->RID = CURR; \
1028 } \
1029 PAR->RID = CURR->DIR; \
1030 if (PAR->RID != NULL) \
1031 PAR->RID->PAR = PAR; \
1032 GPAR->DIR = CURR->RID; \
1033 if (GPAR->DIR != NULL) \
1034 GPAR->DIR->PAR = GPAR; \
1035 CURR->DIR = PAR; \
1036 PAR->PAR = CURR; \
1037 CURR->RID = GPAR; \
1038 GPAR->PAR = CURR; \
1039 }
1040
1041 int scount = ROTATEFAC;
1042
1043 /* Create a node to hold a word */
1044
1045 static SPTREEREC *
spwcreate(char * word,SPTREEREC * par)1046 spwcreate(char *word, SPTREEREC * par)
1047 {
1048 SPTREEREC *tmp;
1049
1050 tmp = (SPTREEREC *) malloc(sizeof(SPTREEREC));
1051 tmp->word = (char *)malloc(strlen(word) + 1);
1052 strcpy(tmp->word, word);
1053 tmp->left = tmp->right = NULL;
1054
1055 tmp->par = par;
1056
1057 gStored++; // count stored
1058
1059 return (tmp);
1060 }
1061
1062 /* Search for word in a splay tree. If word is found, bring it to
1063 root, possibly intermittently. Structure ans is used to pass
1064 in the root, and to pass back both the new root (which may or
1065 may not be changed) and the looked-for record. */
1066
1067 static void
splaysearch(SPANSREC * ans,char * word)1068 splaysearch(SPANSREC * ans, char *word)
1069 {
1070 SPTREEREC *curr = ans->root, *par, *gpar;
1071 int val;
1072
1073 scount--;
1074
1075 if (ans->root == NULL)
1076 {
1077 ans->ans = NULL;
1078 return;
1079 }
1080 while (curr != NULL && (val = strcmp(word, curr->word)) != 0)
1081 {
1082 if (val > 0)
1083 curr = curr->right;
1084 else
1085 curr = curr->left;
1086 }
1087
1088 ans->ans = curr;
1089
1090 if (curr == ans->root)
1091 {
1092 return;
1093 }
1094
1095 if (scount <= 0 && curr != NULL) /* Move node towards root */
1096 {
1097 scount = ROTATEFAC;
1098
1099 while ((par = curr->par) != NULL)
1100 {
1101 if (par->left == curr)
1102 {
1103 if ((gpar = par->par) == NULL)
1104 {
1105 ONELEVEL(par, curr, left, right);
1106 }
1107 else if (gpar->left == par)
1108 {
1109 ZIGZIG(gpar, par, curr, left, right);
1110 }
1111 else
1112 {
1113 ZIGZAG(gpar, par, curr, right, left);
1114 }
1115 }
1116 else
1117 {
1118 if ((gpar = par->par) == NULL)
1119 {
1120 ONELEVEL(par, curr, right, left);
1121 }
1122 else if (gpar->left == par)
1123 {
1124 ZIGZAG(gpar, par, curr, left, right);
1125 }
1126 else
1127 {
1128 ZIGZIG(gpar, par, curr, right, left);
1129 }
1130 }
1131 }
1132 ans->root = curr;
1133 }
1134
1135 return;
1136 }
1137
1138 /* Insert word into a splay tree. If word is already present, bring it to
1139 root, possibly intermittently. Structure ans is used to pass
1140 in the root, and to pass back both the new root (which may or
1141 may not be changed) and the looked-for record. */
1142
1143 static void
splayinsert(SPANSREC * ans,char * word)1144 splayinsert(SPANSREC * ans, char *word)
1145 {
1146 SPTREEREC *curr = ans->root, *par, *gpar, *prev = NULL, *spwcreate();
1147 int val = 0;
1148
1149 scount--;
1150
1151 if (ans->root == NULL)
1152 {
1153 ans->ans = ans->root = spwcreate(word, NULL);
1154 return;
1155 }
1156
1157 while (curr != NULL && (val = strcmp(word, curr->word)) != 0)
1158 {
1159 prev = curr;
1160 if (val > 0)
1161 curr = curr->right;
1162 else
1163 curr = curr->left;
1164 }
1165
1166 if (curr == NULL)
1167 {
1168 if (val > 0)
1169 curr = prev->right = spwcreate(word, prev);
1170 else
1171 curr = prev->left = spwcreate(word, prev);
1172 }
1173
1174 ans->ans = curr;
1175
1176 if (scount <= 0) /* Move node towards root */
1177 {
1178 scount = ROTATEFAC;
1179
1180 while ((par = curr->par) != NULL)
1181 {
1182 if (par->left == curr)
1183 {
1184 if ((gpar = par->par) == NULL)
1185 {
1186 ONELEVEL(par, curr, left, right);
1187 }
1188 else if (gpar->left == par)
1189 {
1190 ZIGZIG(gpar, par, curr, left, right);
1191 }
1192 else
1193 {
1194 ZIGZAG(gpar, par, curr, right, left);
1195 }
1196 }
1197 else
1198 {
1199 if ((gpar = par->par) == NULL)
1200 {
1201 ONELEVEL(par, curr, right, left);
1202 }
1203 else if (gpar->left == par)
1204 {
1205 ZIGZAG(gpar, par, curr, left, right);
1206 }
1207 else
1208 {
1209 ZIGZIG(gpar, par, curr, right, left);
1210 }
1211 }
1212 }
1213 ans->root = curr;
1214 }
1215 return;
1216 }
1217
1218 //=======================================================================
1219 // R E D B L A C K M E T H O D
1220 //=======================================================================
1221
1222 /* Author J. Zobel, April 2001.
1223 Permission to use this code is freely granted, provided that this
1224 statement is retained. */
1225
1226 typedef struct rbwordrec
1227 {
1228 char *word;
1229 struct rbwordrec *left, *right;
1230 struct rbwordrec *par;
1231 char colour;
1232 } RBTREEREC;
1233
1234 typedef struct rbansrec
1235 {
1236 struct rbwordrec *root;
1237 struct rbwordrec *ans;
1238 } BRANSREC;
1239
1240 BRANSREC rbans = { 0 };
1241
1242 #define RED 0
1243 #define BLACK 1
1244
1245 /* Find word in a redblack tree */
1246
1247 static void
redblacksearch(BRANSREC * ans,char * word)1248 redblacksearch(BRANSREC * ans, char *word)
1249 {
1250 RBTREEREC *curr = ans->root;
1251 int val;
1252
1253 if (ans->root != NULL)
1254 {
1255 while (curr != NULL && (val = strcmp(word, curr->word)) != 0)
1256 {
1257 if (val > 0)
1258 curr = curr->right;
1259 else
1260 curr = curr->left;
1261 }
1262 }
1263
1264 ans->ans = curr;
1265
1266 return;
1267 }
1268
1269 /* Rotate the right child of par upwards */
1270
1271 /* Could be written as a macro, but not really necessary
1272 as it is only called on insertion */
1273
1274 void
leftrotate(BRANSREC * ans,RBTREEREC * par)1275 leftrotate(BRANSREC * ans, RBTREEREC * par)
1276 {
1277 RBTREEREC *curr, *gpar;
1278
1279 if ((curr = par->right) != NULL)
1280 {
1281 par->right = curr->left;
1282 if (curr->left != NULL)
1283 curr->left->par = par;
1284 curr->par = par->par;
1285 if ((gpar = par->par) == NULL)
1286 ans->root = curr;
1287 else
1288 {
1289 if (par == gpar->left)
1290 gpar->left = curr;
1291 else
1292 gpar->right = curr;
1293 }
1294 curr->left = par;
1295 par->par = curr;
1296 }
1297 }
1298
1299 /* Rotate the left child of par upwards */
1300 void
rightrotate(BRANSREC * ans,RBTREEREC * par)1301 rightrotate(BRANSREC * ans, RBTREEREC * par)
1302 {
1303 RBTREEREC *curr, *gpar;
1304
1305 if ((curr = par->left) != NULL)
1306 {
1307 par->left = curr->right;
1308 if (curr->right != NULL)
1309 curr->right->par = par;
1310 curr->par = par->par;
1311 if ((gpar = par->par) == NULL)
1312 ans->root = curr;
1313 else
1314 {
1315 if (par == gpar->left)
1316 gpar->left = curr;
1317 else
1318 gpar->right = curr;
1319 }
1320 curr->right = par;
1321 par->par = curr;
1322 }
1323 }
1324
1325 /* Create a node to hold a word */
1326
1327 RBTREEREC *
rbwcreate(char * word,RBTREEREC * par)1328 rbwcreate(char *word, RBTREEREC * par)
1329 {
1330 RBTREEREC *tmp;
1331
1332 tmp = (RBTREEREC *) malloc(sizeof(RBTREEREC));
1333 tmp->word = (char *)malloc(strlen(word) + 1);
1334 strcpy(tmp->word, word);
1335 tmp->left = tmp->right = NULL;
1336
1337 tmp->par = par;
1338
1339 gStored++; // count stored
1340
1341 return (tmp);
1342 }
1343
1344 /* Insert word into a redblack tree */
1345
1346 void
redblackinsert(BRANSREC * ans,char * word)1347 redblackinsert(BRANSREC * ans, char *word)
1348 {
1349 RBTREEREC *curr = ans->root, *par, *gpar, *prev = NULL, *rbwcreate();
1350 int val = 0;
1351
1352 if (ans->root == NULL)
1353 {
1354 ans->ans = ans->root = rbwcreate(word, NULL);
1355 return;
1356 }
1357 while (curr != NULL && (val = strcmp(word, curr->word)) != 0)
1358 {
1359 prev = curr;
1360 if (val > 0)
1361 curr = curr->right;
1362 else
1363 curr = curr->left;
1364 }
1365
1366 ans->ans = curr;
1367
1368 if (curr == NULL)
1369 /* Insert a new node, rotate up if necessary */
1370 {
1371 if (val > 0)
1372 curr = prev->right = rbwcreate(word, prev);
1373 else
1374 curr = prev->left = rbwcreate(word, prev);
1375
1376 curr->colour = RED;
1377 while ((par = curr->par) != NULL
1378 && (gpar = par->par) != NULL && curr->par->colour == RED)
1379 {
1380 if (par == gpar->left)
1381 {
1382 if (gpar->right != NULL && gpar->right->colour == RED)
1383 {
1384 par->colour = BLACK;
1385 gpar->right->colour = BLACK;
1386 gpar->colour = RED;
1387 curr = gpar;
1388 }
1389 else
1390 {
1391 if (curr == par->right)
1392 {
1393 curr = par;
1394 leftrotate(ans, curr);
1395 par = curr->par;
1396 }
1397 par->colour = BLACK;
1398 if ((gpar = par->par) != NULL)
1399 {
1400 gpar->colour = RED;
1401 rightrotate(ans, gpar);
1402 }
1403 }
1404 }
1405 else
1406 {
1407 if (gpar->left != NULL && gpar->left->colour == RED)
1408 {
1409 par->colour = BLACK;
1410 gpar->left->colour = BLACK;
1411 gpar->colour = RED;
1412 curr = gpar;
1413 }
1414 else
1415 {
1416 if (curr == par->left)
1417 {
1418 curr = par;
1419 rightrotate(ans, curr);
1420 par = curr->par;
1421 }
1422 par->colour = BLACK;
1423 if ((gpar = par->par) != NULL)
1424 {
1425 gpar->colour = RED;
1426 leftrotate(ans, gpar);
1427 }
1428 }
1429 }
1430 }
1431 if (curr->par == NULL)
1432 ans->root = curr;
1433 ans->root->colour = BLACK;
1434 }
1435
1436 return;
1437 }
1438
1439 //=======================================================================
1440 // T E R N A R Y M E T H O D
1441 //=======================================================================
1442
1443 typedef struct tnode *Tptr;
1444 typedef struct tnode
1445 {
1446 uint8_t splitchar;
1447 Tptr lokid, eqkid, hikid;
1448 }
1449 Tnode;
1450
1451 void
TernaryIns(Tptr * p,uint8_t * s,int Len)1452 TernaryIns(Tptr * p, uint8_t * s, int Len)
1453 {
1454 int d;
1455 uint8_t *instr = s;
1456 Tptr pp;
1457 while ((pp = *p) != NULL)
1458 {
1459 if ((d = *s - pp->splitchar) == 0)
1460 {
1461 if (*s++ == 0)
1462 {
1463 printf("Oops duplicate Ternary string %s\n", instr);
1464 return;
1465 }
1466 p = &(pp->eqkid);
1467 }
1468 else if (d < 0)
1469 p = &(pp->lokid);
1470 else
1471 p = &(pp->hikid);
1472 }
1473 for (;;)
1474 {
1475 *p = (Tptr) malloc(sizeof(Tnode));
1476 pp = *p;
1477 pp->splitchar = *s;
1478 pp->lokid = pp->eqkid = pp->hikid = 0;
1479 if (*s++ == 0)
1480 {
1481 pp->eqkid = (Tptr) instr;
1482 gStored++; // number of strings stored
1483 return;
1484 }
1485 p = &(pp->eqkid);
1486 }
1487 }
1488
1489 int
TernaryGet(Tptr p,uint8_t * s,int Len)1490 TernaryGet(Tptr p, uint8_t * s, int Len)
1491 {
1492 while (p)
1493 {
1494 if (*s < p->splitchar)
1495 p = p->lokid;
1496 else if (*s == p->splitchar)
1497 {
1498 if (*s++ == 0)
1499 return 1;
1500 p = p->eqkid;
1501 }
1502 else
1503 p = p->hikid;
1504 }
1505 return 0;
1506 }
1507
1508 //=======================================================================
1509 // M E A S U R E A D T S P E E D and M E M O R Y U S A G E
1510 //=======================================================================
1511
1512 //Word_t TotalJudyMalloc;
1513
1514 #define GETSTRING(PCurStr, Strlen)
1515
1516 int
main(int argc,char * argv[])1517 main(int argc, char *argv[])
1518 {
1519 TIMER_vars(tm); // declare timer variables
1520 FILE *fid; // to read file.
1521 int Chr; // char read from fgetc
1522 Pdt_t Pdt, Pdts; // array of lengths and pointers to str
1523 uint8_t *PCurStr; // Current string pointer
1524 Word_t LineCnt; // line counter
1525 int Strlen; // = strlen();
1526 Word_t StrTot; // Total len of strings
1527 Word_t StrNumb; // current line number
1528 Word_t ReadLin; // strings to read
1529 double Mult; // multiplier between groups
1530 Word_t Groups; // number of measurement groups
1531 Word_t grp; // current group
1532 Pvoid_t JudySL = NULL; // JudySL root pointer
1533 Pvoid_t JudyHS = NULL; // JudyHS root pointer
1534 Pvoid_t JLHash = NULL; // JLHash root pointer
1535 Phinfo_t HRoot = NULL; // hash table root pointer
1536 Tptr Ternary = { 0 }; // Ternary struct root pointer
1537
1538 Method_t Method = M_invalid; // the method to measure
1539 Word_t lines = 0; // to shut up compiler
1540 Word_t Bytes = 0; // Bytes deallocated from FreeArray
1541 Word_t StringMemory; // Bytes allocated for input data
1542 int Pass;
1543 int Passes = 1;
1544
1545 int Opt;
1546 extern char *optarg;
1547 int ErrorFlag = 0;
1548
1549 // un-buffer output
1550 setbuf(stdout, NULL);
1551
1552 //============================================================
1553 // PARSE INPUT PARAMETERS
1554 //============================================================
1555
1556 while ((Opt = getopt(argc, argv, "A:H:L:n:T:P:M:praDC")) != -1)
1557 {
1558 switch (Opt)
1559 {
1560 case 'A':
1561 if (Method != M_invalid)
1562 {
1563 printf("\nOnly ONE '-A<ADT>' is allowed!!!!!!\n");
1564 ErrorFlag++;
1565 break;
1566 }
1567 if (strcmp(optarg, "Print") == 0)
1568 Method = M_Print;
1569 if (strcmp(optarg, "Hash") == 0)
1570 {
1571 Method = M_Hash;
1572 HTblsz = 1LU << 20; // default 1.0+ million
1573 }
1574 if (strcmp(optarg, "JLHash") == 0)
1575 {
1576 Method = M_JLHash;
1577 HTblsz = 0; // max 2^32
1578 }
1579 if (strcmp(optarg, "JudySL") == 0)
1580 Method = M_JudySL;
1581 if (strcmp(optarg, "Splay") == 0)
1582 Method = M_Splay;
1583 if (strcmp(optarg, "Redblack") == 0)
1584 Method = M_Redblack;
1585 if (strcmp(optarg, "JudyHS") == 0)
1586 Method = M_JudyHS;
1587 if (strcmp(optarg, "Ternary") == 0)
1588 Method = M_Ternary;
1589 break;
1590
1591 case 'H': // Size of Hash table
1592 HTblsz = strtoul(optarg, NULL, 0);
1593 break;
1594
1595 case 'L': // Number of Loops
1596 Passes = atoi(optarg);
1597 if (Passes <= 0)
1598 {
1599 printf("\n !! OOps - Number of Loops must be > 0\n");
1600 ErrorFlag++;
1601 }
1602 break;
1603
1604 case 'n': // Max population of arrays
1605 nStrg = strtoul(optarg, NULL, 0); // Size of Linear Array
1606 if (nStrg == 0)
1607 {
1608 printf("\n !! OOps - Number of strings must be > 0\n");
1609 ErrorFlag++;
1610 }
1611 break;
1612
1613 case 'T': // Maximum retrieve tests for timing
1614 TValues = strtoul(optarg, NULL, 0);
1615 break;
1616
1617 case 'P': // measurement points per decade
1618 PtsPdec = strtoul(optarg, NULL, 0);
1619 break;
1620
1621 case 'M': // maximum length of input string
1622 MLength = atoi(optarg);
1623 break;
1624
1625 case 'p': // pre-initialize Hash table
1626 pFlag = 1;
1627 break;
1628
1629 case 'r': // do not randomize input
1630 if (CFlag)
1631 {
1632 printf
1633 ("\n !! OOps '-r' and '-C' flag are mutually exclusive\n");
1634 ErrorFlag++;
1635 break;
1636 }
1637 rFlag = 1;
1638 break;
1639
1640 case 'a': // word align string buffers
1641 aFlag = 1;
1642 break;
1643
1644 case 'D': // do a delete at end
1645 DFlag = 1;
1646 break;
1647
1648 case 'C': // build sequential Get string buffers
1649 if (rFlag)
1650 {
1651 printf
1652 ("\n !! OOps '-C' and '-r' flag are mutually exclusive\n");
1653 ErrorFlag++;
1654 break;
1655 }
1656 CFlag = 1;
1657 break;
1658
1659 default:
1660 ErrorFlag++;
1661 break;
1662 }
1663 }
1664 if (Method == -1)
1665 {
1666 printf
1667 ("\n !! OOps -- '-A <ADT>' I.E. '-AHash' or '-AJudyHS' is a required option\n");
1668 ErrorFlag++;
1669 }
1670
1671 fileidx = optind;
1672 if (optind >= argc)
1673 {
1674 printf("\n !! OOps -- No input file specified\n");
1675 ErrorFlag++;
1676 }
1677
1678 if (ErrorFlag)
1679 {
1680 printf("\n");
1681 printf("$ %s -A<ADT> -n# -H# -P# -T# -p -r -a InputStringFile\n\n",
1682 argv[0]);
1683 printf("Where: ");
1684 printf("'InputStringFile' is text file of strings to use in test\n\n");
1685 printf
1686 ("-A <ADT> is Hash|JLHash|JudySL|JudyHS|Splay|Redblack|Ternary|Print\n");
1687 printf("\n");
1688 printf("-n <#> max number of strings to use in tests (all)\n");
1689 printf("-H <#> is number elements in Hash table\n");
1690 printf("-P <#> number of measurement points per decade (40)\n");
1691 printf
1692 ("-T <#> Change the 'Get' number_of_strings to measure per data point\n");
1693 printf("-D Use 'Delete' routine instead of 'FreeArray' routine\n");
1694 printf("-p pre-zero hash table to fault in all pages\n");
1695 printf("-r Do not randomize Insert and Get order of strings\n");
1696 printf("-C Build contigious string buffers for 'Get' tests\n");
1697 printf("-a Word_t align the start address of input strings\n");
1698 printf("-M <#> Change the maximum 'strlen(String)' of input Strings\n");
1699 printf("\n\n");
1700
1701 exit(1);
1702 }
1703
1704 // calculate max number mask used in hash routine
1705
1706 //============================================================
1707 // PRINT COMMAND NAME + RUN ARGUMENTS
1708 //============================================================
1709
1710 printf("# %s", argv[0]);
1711 if (nStrg != INFSTRGS)
1712 printf(" -n%lu", nStrg);
1713 switch (Method)
1714 {
1715 case M_Hash:
1716 printf(" -A Hash");
1717 break;
1718 case M_JLHash:
1719 printf(" -A JLHash");
1720 break;
1721 case M_JudySL:
1722 printf(" -A JudySL");
1723 break;
1724 case M_JudyHS:
1725 printf(" -A JudyHS");
1726 break;
1727 case M_Splay:
1728 printf(" -A Splay");
1729 break;
1730 case M_Redblack:
1731 printf(" -A Redblack");
1732 break;
1733 case M_Ternary:
1734 printf(" -A Ternary");
1735 break;
1736 default:
1737 break;
1738 }
1739 if (HTblsz)
1740 printf(" -H%lu", HTblsz);
1741 printf(" -P%lu", PtsPdec);
1742 printf(" -L%d", Passes);
1743 if (pFlag)
1744 printf(" -p");
1745 if (rFlag)
1746 printf(" -r");
1747 if (DFlag)
1748 printf(" -D");
1749 if (CFlag)
1750 printf(" -C");
1751 printf(" -M%d", MLength);
1752 printf(" %s", argv[fileidx]);
1753 printf("\n");
1754
1755 // print some header
1756
1757 printf("# This file is in a format to input to 'jbgraph'\n");
1758 printf("# XLABEL Stored\n");
1759 printf("# YLABEL Microseconds / Index\n");
1760 printf("# COLHEAD 1 Total Insert attempts\n");
1761 printf("# COLHEAD 2 Number Gets\n");
1762 printf("# COLHEAD 3 Duplicate strings\n");
1763 printf("# COLHEAD 4 Insert Time (uS)\n");
1764 printf("# COLHEAD 5 Get Time (uS)\n");
1765 printf("# COLHEAD 6 Hash Chain Length\n");
1766 printf("# COLHEAD 7 Average RAM/String\n");
1767
1768 // uname(2) strings describing the machine
1769 {
1770 struct utsname ubuf; // for system name
1771
1772 if (uname(&ubuf) == -1)
1773 printf("# Uname(2) failed\n");
1774 else
1775 printf("# %s %s %s %s %s\n", ubuf.sysname, ubuf.nodename,
1776 ubuf.release, ubuf.version, ubuf.machine);
1777 }
1778 if (sizeof(Word_t) == 8)
1779 printf("# 64 Bit CPU\n");
1780 else if (sizeof(Word_t) == 4)
1781 printf("# 32 Bit CPU\n");
1782 #ifdef CPUMHZ
1783 printf("# Processor speed compiled at %d Mhz\n", CPUMHZ);
1784 #endif // CPUMHZ
1785
1786 if (Method == M_Hash)
1787 printf("# Hash record struct: sizeof(hrec_t) = %d\n", sizeof(hrec_t));
1788 if (Method == M_Ternary)
1789 printf("# Ternary record struct: sizeof(Tnode) = %d\n", sizeof(Tnode));
1790
1791 // OPEN INPUT FILE:
1792
1793 if ((fid = fopen(argv[fileidx], "r")) == NULL)
1794 FILERROR;
1795
1796 for (StrTot = Strlen = LineCnt = 0; (Chr = fgetc(fid)) != EOF;)
1797 {
1798 if (Chr == '\n')
1799 {
1800 if (Strlen) // eat zero length lines
1801 {
1802 if (Strlen > MLength)
1803 Strlen = MLength;
1804 LineCnt++; // increase string count
1805 Strlen++; // add a \0 for JudySL
1806
1807 if (aFlag) // for word alignment
1808 StrTot += ROUNDUPWORD(Strlen);
1809 else
1810 StrTot += Strlen; // memory needed to store strings
1811
1812 if (LineCnt == nStrg) // shorten if required by -n option
1813 break;
1814
1815 Strlen = 0;
1816 }
1817 }
1818 else
1819 {
1820 Strlen++;
1821 }
1822 }
1823 fclose(fid);
1824 fid = NULL;
1825 nStrg = LineCnt; // adj if necessary
1826
1827 // get struct to keep track of the strings
1828 StringMemory = sizeof(dt_t) * nStrg;
1829 Pdt = (Pdt_t) malloc(sizeof(dt_t) * nStrg);
1830 if (Pdt == NULL)
1831 MALLOCERROR;
1832
1833 // get memory to store the strings
1834 StringMemory += StrTot;
1835 PCurStr = (uint8_t *) malloc(StrTot);
1836 if (PCurStr == NULL)
1837 MALLOCERROR;
1838
1839 // BRING FILE INTO RAM, COUNT LINES and CHECK LENGTH
1840
1841 //============================================================
1842 // CALCULATE NUMBER OF MEASUREMENT GROUPS -- points per decade
1843 //============================================================
1844
1845 // Calculate Multiplier for number of points per decade
1846 Mult = pow(10.0, 1.0 / (double)PtsPdec);
1847 {
1848 double sum;
1849 Word_t numb, prevnumb;
1850
1851 // Count number of measurements needed (10K max)
1852 sum = numb = 1;
1853 for (Groups = 2; Groups < 10000; Groups++)
1854 if (NextNumb(&numb, &sum, Mult, nStrg))
1855 break;
1856
1857 // Get memory for measurements
1858 Pms = (Pms_t) calloc(Groups, sizeof(ms_t));
1859 if (Pms == NULL)
1860 MALLOCERROR;
1861
1862 // Now calculate number of Indexes for each measurement point
1863 numb = sum = 1;
1864 prevnumb = 0;
1865 for (grp = 0; grp < Groups; grp++)
1866 {
1867 Pms[grp].ms_delta = numb - prevnumb;
1868 Pms[grp].ms_mininsert = 10000000.0; // infinity
1869 Pms[grp].ms_minretrive = 10000000.0; // infinity
1870 Pms[grp].ms_Bytes = 0.0;
1871
1872 prevnumb = numb;
1873
1874 NextNumb(&numb, &sum, Mult, nStrg);
1875 }
1876 } // Groups = number of sizes
1877
1878 // print remaining header
1879
1880 if (Method == M_Hash)
1881 {
1882 printf("# Allocate Hash table = %lu elements\n", HTblsz);
1883 }
1884 if (Method == M_JLHash)
1885 {
1886 if (HTblsz)
1887 printf("# JLHash table virtual size = %lu\n", HTblsz);
1888 else
1889 printf("# JLHash table virtual size = 4294967296\n");
1890 }
1891
1892 //=======================================================================
1893 // Read text input file into RAM
1894 //=======================================================================
1895
1896 if ((fid = fopen(argv[fileidx], "r")) == NULL)
1897 FILERROR;
1898
1899 for (Strlen = LineCnt = 0; LineCnt < nStrg;)
1900 {
1901 Chr = fgetc(fid);
1902 if (Chr == '\n')
1903 {
1904 if (Strlen) // eat zero length lines
1905 {
1906 if (Strlen > MLength)
1907 Strlen = MLength;
1908 Pdt[LineCnt].dt_string = PCurStr - Strlen;
1909 Pdt[LineCnt].dt_strlen = Strlen;
1910 LineCnt++;
1911
1912 Strlen = 0;
1913 *PCurStr++ = '\0'; // for JudySL
1914 if (aFlag) // for word alignment
1915 PCurStr = (uint8_t *) ROUNDUPWORD((Word_t)PCurStr);
1916
1917 if ((Word_t)PCurStr % sizeof(Word_t))
1918 aCount++;
1919 }
1920 }
1921 else
1922 {
1923 if (Strlen < MLength)
1924 {
1925 Strlen++;
1926 if (Chr == '\0')
1927 Chr = ' '; // for JudySL
1928 *PCurStr++ = (uint8_t) Chr;
1929 }
1930 }
1931 }
1932 fclose(fid);
1933 fid = NULL;
1934 assert(nStrg == LineCnt);
1935
1936 printf("# %lu (%.1f%%) non-Word_t aligned string buffers\n",
1937 aCount, (double)aCount / (double)LineCnt * 100.0);
1938
1939 printf("# Ram used for input data = %lu bytes\n", StringMemory);
1940
1941 printf("# Average string length = %.1f bytes\n",
1942 (double)(StrTot - LineCnt) / LineCnt);
1943
1944 // Allocate memory for Cached assess to 'Get' (largest delta). This flag
1945 // will put the 'randomized' 'Get' order strings in a sequential buffer.
1946 // Modern processors will 'read ahead' with an access to RAM is sequential
1947 // -- thus saving the 'Get' having to bring the string into cache.
1948 if (CFlag)
1949 {
1950 PdtS_ = (Pdt_t) malloc(TValues * sizeof(dt_t));
1951 if (PdtS_ == NULL)
1952 MALLOCERROR;
1953
1954 // now guess how much memory will be needed for the strings
1955 Strsiz_ = ((StrTot / nStrg) * TValues);
1956 Strsiz_ += Strsiz_; // bump %20
1957
1958 Strbuf_ = (uint8_t *) malloc(Strsiz_);
1959 if (Strbuf_ == NULL)
1960 MALLOCERROR;
1961
1962 printf
1963 ("# %lu bytes malloc() for 'cached' strings for Get measurement\n",
1964 Strsiz_);
1965 }
1966
1967 //=======================================================================
1968 // TIME GETSTRING() from Cache (most of the time)
1969 //=======================================================================
1970
1971 STARTTm(tm); // start timer
1972 for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
1973 {
1974 GETSTRING(PCurStr, Strlen);
1975 Strlen = Pdt[LineCnt].dt_strlen;
1976 PCurStr = Pdt[LineCnt].dt_string;
1977
1978 if (strlen(PCurStr) != Strlen) // bring string into Cache
1979 {
1980 // necessary to prevent cc from optimizing out
1981 printf(" !! OOps Bug, wrong string length\n");
1982 exit(1);
1983 }
1984 }
1985 ENDTm(DeltaUSec, tm); // end timer
1986
1987 printf
1988 ("# Access Time = %6.3f uS average per string (mostly from Cache)\n",
1989 DeltaUSec / nStrg);
1990
1991 //=======================================================================
1992 // TIME GETSTRING() + HASHSTR() from Cache (most of the time)
1993 //=======================================================================
1994
1995 STARTTm(tm); // start timer
1996 for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
1997 {
1998 uint32_t hval;
1999 GETSTRING(PCurStr, Strlen);
2000 PCurStr = Pdt[LineCnt].dt_string;
2001 Strlen = Pdt[LineCnt].dt_strlen;
2002 hval = HASHSTR(PCurStr, Strlen, HTblsz);
2003 if (foolflag)
2004 printf("OOps foolflag is set, hval = %d\n", hval);
2005 }
2006 ENDTm(DeltaUSec, tm); // end timer
2007
2008 printf
2009 ("# HashStr() Time = %6.3f uS average per string (mostly from Cache)\n",
2010 DeltaUSec / nStrg);
2011
2012 // randomize the input strings (adjacent strings will not be on same page)
2013
2014 if (rFlag == 0)
2015 {
2016 Randomize(Pdt, nStrg); // Randomize ALL to be stored
2017
2018 //=======================================================================
2019 // TIME GETSTRING() from RAM (most of the time)
2020 //=======================================================================
2021
2022 STARTTm(tm); // start timer
2023 for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
2024 {
2025 GETSTRING(PCurStr, Strlen);
2026 Strlen = Pdt[LineCnt].dt_strlen;
2027 PCurStr = Pdt[LineCnt].dt_string;
2028
2029 if (strlen(PCurStr) != Strlen) // bring string into Cache
2030 {
2031 // necessary to prevent cc from optimizing out
2032 printf(" !! OOps Bug, wrong string length\n");
2033 exit(1);
2034 }
2035 }
2036 ENDTm(DeltaUSec, tm); // end timer
2037
2038 printf
2039 ("# Access Time = %6.3f uS average per string (mostly from RAM)\n",
2040 DeltaUSec / nStrg);
2041
2042 //=======================================================================
2043 // TIME GETSTRING() + HASHSTR() from RAM (most of the time)
2044 //=======================================================================
2045
2046 STARTTm(tm); // start timer
2047 for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
2048 {
2049 uint32_t hval;
2050 GETSTRING(PCurStr, Strlen);
2051 Strlen = Pdt[LineCnt].dt_strlen;
2052 PCurStr = Pdt[LineCnt].dt_string;
2053 hval = HASHSTR(PCurStr, Strlen, HTblsz);
2054 if (foolflag)
2055 printf("OOps foolflag is set, hval = %u\n", hval);
2056 }
2057 ENDTm(DeltaUSec, tm); // end timer
2058
2059 printf
2060 ("# HashStr() Time = %6.3f uS average per string (mostly from RAM)\n",
2061 DeltaUSec / nStrg);
2062 }
2063
2064 //=======================================================================
2065 // Insert, Get and Delete loops
2066 //=======================================================================
2067
2068 for (Pass = 0; Pass < Passes; Pass++)
2069 {
2070 printf("# Pass %d\n", Pass);
2071
2072 // heading of table
2073 Printf
2074 ("# TotInserts DeltaGets DupStrs InsTime GetTime HChainLen Ram/String\n");
2075 gStored = 0; // number of strings inserted
2076 StrNumb = 0; // number of attempted strings inserted
2077
2078 STARTmem; // current malloc() mem usage
2079 for (grp = 0; grp < Groups; grp++)
2080 {
2081 PWord_t PValue;
2082 Word_t Begin = gStored; // remember current STOREed
2083 Word_t Delta = Pms[grp].ms_delta;
2084
2085 switch (Method)
2086 {
2087 case M_Print:
2088 {
2089 STARTTm(tm); // start timer
2090 for (lines = 0; lines < Delta; lines++, StrNumb++)
2091 {
2092 GETSTRING(PCurStr, Strlen);
2093 PCurStr = Pdt[StrNumb].dt_string;
2094 Printf("%s\n", (char *)PCurStr);
2095 }
2096 ENDTm(DeltaUSec, tm); // end timer
2097 break;
2098 }
2099 case M_Hash:
2100 {
2101 STARTTm(tm); // start timer
2102 for (lines = 0; lines < Delta; lines++, StrNumb++)
2103 {
2104 GETSTRING(PCurStr, Strlen);
2105 Strlen = Pdt[StrNumb].dt_strlen;
2106 PCurStr = Pdt[StrNumb].dt_string;
2107
2108 PValue = HashIns(&HRoot, PCurStr, Strlen, HTblsz);
2109 if ((*PValue)++ == 0)
2110 gStored++; // number of strings stored
2111 }
2112 ENDTm(DeltaUSec, tm); // end timer
2113 break;
2114 }
2115 case M_JLHash:
2116 {
2117 STARTTm(tm); // start timer
2118 for (lines = 0; lines < Delta; lines++, StrNumb++)
2119 {
2120 GETSTRING(PCurStr, Strlen);
2121 Strlen = Pdt[StrNumb].dt_strlen;
2122 PCurStr = Pdt[StrNumb].dt_string;
2123 PValue = JLHashIns(&JLHash, PCurStr, Strlen, HTblsz);
2124 if ((*PValue)++ == 0)
2125 gStored++; // number of strings stored
2126 }
2127 ENDTm(DeltaUSec, tm); // end timer
2128 break;
2129 }
2130 case M_JudySL:
2131 {
2132 STARTTm(tm); // start timer
2133 for (lines = 0; lines < Delta; lines++, StrNumb++)
2134 {
2135 GETSTRING(PCurStr, Strlen);
2136 PCurStr = Pdt[StrNumb].dt_string;
2137
2138 JSLI(PValue, JudySL, PCurStr); // insert string
2139 if ((*PValue)++ == 0)
2140 gStored++; // number of strings stored
2141 }
2142 ENDTm(DeltaUSec, tm); // end timer
2143 break;
2144 }
2145 case M_JudyHS:
2146 {
2147 STARTTm(tm); // start timer
2148 for (lines = 0; lines < Delta; lines++, StrNumb++)
2149 {
2150 GETSTRING(PCurStr, Strlen);
2151 Strlen = Pdt[StrNumb].dt_strlen;
2152 PCurStr = Pdt[StrNumb].dt_string;
2153
2154 JHSI(PValue, JudyHS, PCurStr, Strlen); // insert string
2155 if ((*PValue)++ == 0)
2156 gStored++; // number of strings stored
2157 }
2158 ENDTm(DeltaUSec, tm); // end timer
2159 break;
2160 }
2161
2162 // NOTE: the ADT's below here are so slow, that I did not add much effort
2163 // to clean them up. (dlb)
2164
2165 case M_Splay:
2166 {
2167 STARTTm(tm); // start timer
2168 for (lines = 0; lines < Delta; lines++, StrNumb++)
2169 {
2170 GETSTRING(PCurStr, Strlen);
2171 PCurStr = Pdt[StrNumb].dt_string;
2172
2173 splayinsert(&spans, (char *)PCurStr);
2174 }
2175 ENDTm(DeltaUSec, tm); // end timer
2176 break;
2177 }
2178 case M_Redblack:
2179 {
2180 STARTTm(tm); // start timer
2181 for (lines = 0; lines < Delta; lines++, StrNumb++)
2182 {
2183 GETSTRING(PCurStr, Strlen);
2184 PCurStr = Pdt[StrNumb].dt_string;
2185
2186 redblackinsert(&rbans, (char *)PCurStr);
2187 }
2188 ENDTm(DeltaUSec, tm); // end timer
2189 break;
2190 }
2191 case M_Ternary:
2192 {
2193 STARTTm(tm); // start timer
2194 for (lines = 0; lines < Delta; lines++, StrNumb++)
2195 {
2196 GETSTRING(PCurStr, Strlen);
2197 Strlen = Pdt[StrNumb].dt_strlen;
2198 PCurStr = Pdt[StrNumb].dt_string;
2199
2200 TernaryIns(&Ternary, PCurStr, Strlen);
2201 }
2202 ENDTm(DeltaUSec, tm); // end timer
2203 break;
2204 }
2205 default:
2206 assert(0); // cant happen
2207 break;
2208 }
2209 ENDmem(DeltaMem); // current malloc() mem usage
2210
2211 ReadLin = StrNumb; // adjust delta
2212 if (ReadLin > TValues)
2213 ReadLin = TValues;
2214 // if (Delta > TValues)
2215 // ReadLin = Delta; // use the Delta
2216
2217 Printf(" %11lu", StrNumb); // Total stored
2218 Printf(" %10lu", ReadLin); // Number to read back
2219 Begin = gStored - Begin; // actual STORED
2220 assert(lines == Delta);
2221 Printf(" %8lu", Delta - Begin); // Duplicate strings
2222
2223 // Average time per line to store (including duplicate strings)
2224 Mult = DeltaUSec / (double)Delta;
2225
2226 if (Mult < Pms[grp].ms_mininsert)
2227 Pms[grp].ms_mininsert = Mult;
2228
2229 Printf(" %7.3f", Mult);
2230
2231 // Bytes allocated thru malloc()
2232 if (TotalJudyMalloc == 0)
2233 Pms[grp].ms_Bytes = (double)DeltaMem;
2234 else
2235 Pms[grp].ms_Bytes = (double)(TotalJudyMalloc * sizeof(Word_t));
2236
2237 Pms[grp].ms_Bytes /= (double)gStored;
2238
2239 fflush(stdout);
2240
2241 //=======================================================================
2242 // READ BACK LOOP
2243 //=======================================================================
2244
2245 Pdts = Pdt; // Strings to 'Get'
2246 gChainln = 0; // total chain lengths
2247
2248 if (rFlag == 0)
2249 {
2250 Randomize(Pdt, StrNumb); // Randomize ONLY those stored
2251
2252 if (CFlag)
2253 {
2254 // Allocate and make sequencial string buffer
2255 Pdts = BuildSeqBuf(Pdt, ReadLin);
2256 }
2257 }
2258 switch (Method)
2259 {
2260 case M_Print:
2261 break;
2262 case M_Hash:
2263 {
2264 STARTTm(tm); // start timer
2265 for (lines = 0; lines < ReadLin; lines++)
2266 {
2267 GETSTRING(PCurStr, Strlen);
2268 Strlen = Pdts[lines].dt_strlen;
2269 PCurStr = Pdts[lines].dt_string;
2270
2271 PValue = HashGet(HRoot, PCurStr, Strlen); // get string
2272 assert(PValue != NULL);
2273 assert(*PValue > 0);
2274 }
2275 ENDTm(DeltaUSec, tm); // end timer
2276 break;
2277 }
2278 case M_JLHash:
2279 {
2280 STARTTm(tm); // start timer
2281 for (lines = 0; lines < ReadLin; lines++)
2282 {
2283 GETSTRING(PCurStr, Strlen);
2284 Strlen = Pdts[lines].dt_strlen;
2285 PCurStr = Pdts[lines].dt_string;
2286
2287 PValue = JLHashGet(JLHash, PCurStr, Strlen); // get string
2288 assert(PValue != NULL);
2289 assert(*PValue > 0);
2290 }
2291 ENDTm(DeltaUSec, tm); // end timer
2292 break;
2293 }
2294 case M_JudySL:
2295 {
2296 STARTTm(tm); // start timer
2297 for (lines = 0; lines < ReadLin; lines++)
2298 {
2299 GETSTRING(PCurStr, Strlen);
2300 PCurStr = Pdts[lines].dt_string;
2301
2302 JSLG(PValue, JudySL, PCurStr); // get string
2303 assert(PValue != NULL);
2304 assert(*PValue > 0);
2305 }
2306 ENDTm(DeltaUSec, tm); // end timer
2307 break;
2308 }
2309 case M_JudyHS:
2310 {
2311 STARTTm(tm); // start timer
2312 for (lines = 0; lines < ReadLin; lines++)
2313 {
2314 GETSTRING(PCurStr, Strlen);
2315 Strlen = Pdts[lines].dt_strlen;
2316 PCurStr = Pdts[lines].dt_string;
2317
2318 JHSG(PValue, JudyHS, PCurStr, Strlen); // get string
2319 assert(PValue != NULL);
2320 assert(*PValue > 0);
2321 }
2322 ENDTm(DeltaUSec, tm); // end timer
2323 break;
2324 }
2325
2326 // NOTE: the ADT's below here are so slow, that I did not add much effort
2327 // to clean them up. (dlb)
2328
2329 case M_Splay:
2330 {
2331 STARTTm(tm); // start timer
2332 for (lines = 0; lines < ReadLin; lines++)
2333 {
2334 GETSTRING(PCurStr, Strlen);
2335 PCurStr = Pdts[lines].dt_string;
2336
2337 splaysearch(&spans, (char *)PCurStr);
2338 }
2339 ENDTm(DeltaUSec, tm); // end timer
2340 break;
2341 }
2342 case M_Redblack:
2343 {
2344 STARTTm(tm); // start timer
2345 for (lines = 0; lines < ReadLin; lines++)
2346 {
2347 GETSTRING(PCurStr, Strlen);
2348 PCurStr = Pdts[lines].dt_string;
2349
2350 redblacksearch(&rbans, (char *)PCurStr);
2351 }
2352 ENDTm(DeltaUSec, tm); // end timer
2353 break;
2354 }
2355 case M_Ternary:
2356 {
2357 STARTTm(tm); // start timer
2358 for (lines = 0; lines < ReadLin; lines++)
2359 {
2360 GETSTRING(PCurStr, Strlen);
2361 Strlen = Pdts[lines].dt_strlen;
2362 PCurStr = Pdts[lines].dt_string;
2363
2364 if (TernaryGet(Ternary, PCurStr, Strlen) == 0)
2365 {
2366 printf("\n OOps - Ternary Bug at Line = %d\n",
2367 __LINE__);
2368 exit(1);
2369 }
2370 }
2371 ENDTm(DeltaUSec, tm); // end timer
2372 break;
2373 }
2374 default:
2375 assert(0); // cant happen
2376 break;
2377 }
2378 Mult = DeltaUSec / (double)ReadLin;
2379
2380 // save least value
2381 if (Mult < Pms[grp].ms_minretrive)
2382 Pms[grp].ms_minretrive = Mult;
2383
2384 Printf(" %7.3f", Mult); // RETRIVE per string
2385 Printf(" %9.6f", (double)gChainln / (double)ReadLin);
2386
2387 // RAM USED PER STRING TO STORE DATA
2388
2389 Printf(" %13.1f", (double)Pms[grp].ms_Bytes);
2390 Printf("\n");
2391 fflush(stdout);
2392 }
2393 if (Method == M_Print)
2394 exit(0);
2395
2396 Printf("# Total Duplicate strings = %lu\n", nStrg - gStored);
2397
2398 //=======================================================================
2399 // Delete loop
2400 //=======================================================================
2401
2402 DeltaUSec = -1.0; // set deleted flag
2403
2404 if (rFlag == 0)
2405 {
2406 Randomize(Pdt, StrNumb); // Randomize ONLY those stored
2407 }
2408 switch (Method)
2409 {
2410 case M_JudySL:
2411 {
2412 if (DFlag)
2413 {
2414 Printf("# Begin JudySLDel() loop...\n");
2415 STARTTm(tm); // start timer
2416 for (lines = 0; lines < nStrg; lines++)
2417 {
2418 int Rc;
2419 GETSTRING(PCurStr, Strlen);
2420 PCurStr = Pdt[lines].dt_string;
2421 JSLD(Rc, JudySL, PCurStr); // delete string
2422 assert(Rc != JERR);
2423 }
2424 ENDTm(DeltaUSec, tm); // end timer
2425 }
2426 else
2427 {
2428 Printf("# Begin JudySLFreeArray()...\n");
2429 STARTTm(tm); // start timer
2430 JSLFA(Bytes, JudySL);
2431 ENDTm(DeltaUSec, tm); // end timer
2432 }
2433 break;
2434 }
2435 case M_JudyHS:
2436 {
2437 if (DFlag)
2438 {
2439 int Rc;
2440 Printf("# Begin JudyHSDel() loop...");
2441 STARTTm(tm); // start timer
2442 for (lines = 0; lines < nStrg; lines++)
2443 {
2444 GETSTRING(PCurStr, Strlen);
2445 Strlen = Pdt[lines].dt_strlen;
2446 PCurStr = Pdt[lines].dt_string;
2447 JHSD(Rc, JudyHS, PCurStr, Strlen); // Delete string
2448 assert(Rc != JERR);
2449 }
2450 ENDTm(DeltaUSec, tm); // end timer
2451 }
2452 else
2453 {
2454 Printf("# Begin JudyHSFreeArray()...\n");
2455 STARTTm(tm); // start timer
2456 JHSFA(Bytes, JudyHS);
2457 ENDTm(DeltaUSec, tm); // end timer
2458 }
2459 break;
2460 }
2461 case M_Hash:
2462 {
2463 if (DFlag)
2464 {
2465 Printf("# Begin HashDel() loop...\n");
2466 STARTTm(tm); // start timer
2467 for (lines = 0; lines < nStrg; lines++)
2468 {
2469 GETSTRING(PCurStr, Strlen);
2470 Strlen = Pdt[lines].dt_strlen;
2471 PCurStr = Pdt[lines].dt_string;
2472 HashDel(&HRoot, PCurStr, Strlen); // Delete string
2473 }
2474 ENDTm(DeltaUSec, tm); // end timer
2475 }
2476 else
2477 {
2478 Printf("# Begin HashFreeArray()...\n");
2479 STARTTm(tm); // start timer
2480 Bytes = HashFreeArray(&HRoot);
2481 ENDTm(DeltaUSec, tm); // end timer
2482 }
2483 break;
2484 }
2485 case M_JLHash:
2486 {
2487 if (DFlag)
2488 {
2489 Printf("# Begin JLHashDel() loop...\n");
2490 STARTTm(tm); // start timer
2491 for (lines = 0; lines < nStrg; lines++)
2492 {
2493 GETSTRING(PCurStr, Strlen);
2494 Strlen = Pdt[lines].dt_strlen;
2495 PCurStr = Pdt[lines].dt_string;
2496 JLHashDel(&JLHash, PCurStr, Strlen); // Delete string
2497 }
2498 ENDTm(DeltaUSec, tm); // end timer
2499 }
2500 else
2501 {
2502 Printf("# Begin JLHashFreeArray()...\n");
2503 STARTTm(tm); // start timer
2504 Bytes = JLHashFreeArray(&JLHash);
2505 ENDTm(DeltaUSec, tm); // end timer
2506 }
2507 break;
2508 }
2509 default:
2510 printf("# Delete not implemented yet, so quit\n");
2511 Passes = 1; // No, delete, so quit
2512 break;
2513 }
2514 // average time per line to delete (including duplicate strings)
2515 if (Bytes) // Measured freed bytes?
2516 {
2517 Printf("# returned %lu bytes\n", Bytes);
2518 }
2519 if (TotalJudyMalloc) // Any bytes left after free?
2520 {
2521 printf
2522 ("# !!! BUG, %lu bytes not deleted in *Free()\n",
2523 TotalJudyMalloc * sizeof(Word_t));
2524 }
2525 if (DeltaUSec != -1.0) // Measured how long to free?
2526 {
2527 Printf("# Free %lu strings, %0.3f uSecs Ave/string\n",
2528 gStored, DeltaUSec / (double)gStored);
2529 }
2530 Printf("\n");
2531 }
2532
2533 if (Passes != 1)
2534 {
2535 printf("# TotInserts 0 0 InsTime GetTime 0 Ram/String\n");
2536 StrNumb = 0;
2537 for (grp = 0; grp < Groups; grp++)
2538 {
2539 StrNumb += Pms[grp].ms_delta;
2540
2541 printf(" %11lu", StrNumb); // Total stored
2542 printf(" 0 0"); // place holder
2543 printf(" %7.3f", Pms[grp].ms_mininsert);
2544 printf(" %7.3f", Pms[grp].ms_minretrive);
2545 printf(" 0"); // place holder
2546 printf(" %7.3f", Pms[grp].ms_Bytes);
2547 printf("\n");
2548 }
2549 }
2550 exit(0); // done
2551 } // main()
2552