1 // @(#) $Revision: 4.1 $ $Source: /judy/test/manual/StringCompare.c $
2 //=======================================================================
3 //   Author Douglas L. Baskins, Jan 2003.
4 //   Permission to use this code is freely granted, provided that this
5 //   statement is retained.
6 //   email - dougbaskins .at, yahoo.com
7 //=======================================================================
8 
9 //=======================================================================
10 //
11 // This program will time various ADTs that store and retrieve strings.
12 // Currently there are 7 ADTs implemented:
13 
14 /*
15    1) Hash   - this is the fastest one when table size matches data size
16    2) JudyHS - 2nd in speed and very scaleable.
17    3) JLHash - this uses a JudyL() array instead of hash table and is
18         very scaleable because the hash table size is ~4 billion.
19    4) JudySL - ordered and requires null terminated strings.
20    5) Ternary - code borrowed from Mr Dobbs, perhaps 2000
21    6) Redblack - code borrowed from J. Zobel, April 2001.
22    7) Splay  - code borrowed from J. Zobel, April 2001.
23 
24    Note: Splay, Redblack and Ternary methods are not very fast, so they
25    have not been completed and made bug free.  I.E. no Delete and Free
26    Array routines.  Ternary is fastest retrieve of these three, but uses
27    an extraordinary amount of memory.
28 */
29 //=======================================================================
30 //
31 // Compile:
32 //
33 //   cc -O StringCompare.c -lm -lJudy -o StringCompare
34 //           -or-
35 //   cc -O -DCPUMHZ=1299 StringCompare.c -lm -lJudy -o StringCompare
36 
37 /* Notes:
38    1) Use '-DCPUMHZ=1299' in cc line if better clock resolution is desired
39       and it compiles successfully.  The 1299 is the cpu MHz : 1298.916 from
40       cat /proc/cpuinfo in a Linux system.
41 
42    2) -static will generally get better performance because memcmp(),
43     memcpy() routines are usually slower with shared librarys.
44 */
45 
46 // Usage:
47 //
48 //   StringCompare -A Hash     <options>  textfile
49 //   StringCompare -A JudyHS   <options>  textfile
50 //   StringCompare -A JLHash   <options>  textfile
51 //   StringCompare -A JudySL   <options>  textfile
52 //   StringCompare -A Ternary  <options>  textfile
53 //   StringCompare -A Redblack <options>  textfile
54 //   StringCompare -A Splay    <options>  textfile
55 
56 #include <stdlib.h>                     // malloc(3)
57 #include <unistd.h>                     // getopt(3)
58 #include <stdio.h>                      // printf(3)
59 #include <fcntl.h>                      // open(2)
60 #include <string.h>                     // memcmp(3), memcpy(3)
61 #include <errno.h>                      // errno(3)
62 #include <sys/mman.h>                   // mmap(2)
63 #include <sys/time.h>                   // gettimeofday(2)
64 #include <math.h>                       // pow(3)
65 #include <sys/utsname.h>                // uname(2)
66 #include <Judy.h>                       // Judy arrays
67 
68 //=======================================================================
69 //   D e f i n e:    T O   T U R N   O F F   A S S E R T I O N S !!!!!!!!
70 //=======================================================================
71 //#define NDEBUG 1
72 #include <assert.h>                     // assert(3)
73 
74 //=======================================================================
75 //      G L O B A L  D A T A
76 //=======================================================================
77 
78 int       foolflag = 0;                 // fool compiler from optimizing
79 static Word_t gStored = 0;              // number of strings inserted
80 static Word_t gChainln = 0;             // links traversed during RETRIVE
81 
82 static Word_t PtsPdec = 40;             // default measurement points per decade
83 
84 #define INFSTRGS 1000000000             // 1 billion strings is infinity
85 
86 static Word_t nStrg = INFSTRGS;         // infinity -- measure all strings
87 static Word_t TValues = 100000;         // max measure points for RETRIVE tests
88 static int pFlag = 0;                   // pre-fault hash table pages into RAM
89 static int rFlag = 0;                   // do not randomize input file
90 static int aFlag = 0;                   // word align string buffers
91 static int DFlag = 0;                   // do the delete measurement
92 static int CFlag = 0;                   // build sequential Get buffers
93 static Word_t aCount = 0;               // Count of missaligned string buffers
94 
95 //  define the maximum length of a string allowed
96 #define MAXSTRLEN       (100000)
97 static int MLength = MAXSTRLEN;
98 static Word_t HTblsz;                   // 1M default hash table size
99 static int fileidx;                     // argv[fileidx] == file string
100 
101 // for saving input string data
102 typedef struct STRING_
103 {
104     int       dt_strlen;
105     uint8_t  *dt_string;
106 
107 } dt_t   , *Pdt_t;
108 
109 static Pdt_t PdtS_ = NULL;              // memory for Cache access Gets
110 static uint8_t *Strbuf_ = NULL;
111 static Word_t Strsiz_ = 0;
112 
113 // Roundup BYTES to an even number of words
114 
115 /*
116  On Linux 2.6.3-4mdkenterprise (Mandrake 10.0 Community) printing (even
117  to a file) makes the timings inaccurate.  So, use -L2 or greater to
118  average (actually save min times) and print results after all tests are
119  completed.
120 */
121 #define Printf if (Pass == 0) printf
122 
123 #define ROUNDUPWORD(BYTES) (((BYTES) + sizeof(Word_t) - 1) & (-sizeof(Word_t)))
124 #define BYTES2WORDS(BYTES) (((BYTES) + sizeof(Word_t) - 1) / (sizeof(Word_t)))
125 
126 //=======================================================================
127 //      T I M I N G   M A C R O S
128 //=======================================================================
129 
130 static double DeltaUSec;                // Global for remembering delta times
131 
132 // Some operating systems have get_cycles() in /usr/include/asm/timex.h
133 
134 #ifdef  CPUMHZ
135 
136 //  For a 1.34 nS clock cycle processor (750Mhz)
137 
138 #define CPUSPEED      (1.0 / (CPUMHZ))
139 
140 #include <asm/timex.h>
141 
142 #define TIMER_vars(T) cycles_t  __TVBeg_##T
143 
144 #define STARTTm(T) __TVBeg_##T = get_cycles()
145 
146 #define ENDTm(D,T) { (D) = (double)(get_cycles() - __TVBeg_##T) * CPUSPEED; }
147 
148 #else  // ! CPUMHZ
149 
150 #define TIMER_vars(T) struct timeval __TVBeg_##T, __TVEnd_##T
151 
152 #define STARTTm(T) gettimeofday(&__TVBeg_##T, NULL)
153 
154 #define ENDTm(D,T)                                                      \
155 {                                                                       \
156     gettimeofday(&__TVEnd_##T, NULL);                                   \
157     (D) = (double)(__TVEnd_##T.tv_sec  - __TVBeg_##T.tv_sec) * 1E6 +    \
158          ((double)(__TVEnd_##T.tv_usec - __TVBeg_##T.tv_usec));         \
159 }
160 
161 #endif // ! CPUMHZ
162 
163 //=======================================================================
164 //      M E M O R Y   S I Z E   M A C R O S
165 //=======================================================================
166 
167 // use mallinfo() instead of sbrk() for memory usage measurements
168 // this should include the RAM that was mmap()ed in malloc()
169 
170 static Word_t DeltaMem;                 // for remembering
171 
172 // Some mallocs have mallinfo()
173 // #define MALLINFO 1
174 
175 #ifdef MALLINFO
176 #include <malloc.h>                     // mallinfo()
177 
178 static struct mallinfo malStart;
179 
180 #define STARTmem malStart = mallinfo()
181 #define ENDmem(DELTAMEM)                                        \
182 {                                                               \
183     struct mallinfo malEnd = mallinfo();                        \
184 /* strange little dance from signed to unsigned to double */    \
185     unsigned int _un_int = malEnd.arena - malStart.arena;       \
186     (DELTAMEM) = (double)_un_int;      /* to double */          \
187 }
188 #else  // NO MALLINFO
189 
190 // this usually works for machines with less than 1-2Gb RAM.
191 // (it does NOT include memory ACQUIRED by mmap())
192 
193 static char *malStart;
194 
195 #define STARTmem (malStart = (char *)sbrk(0))
196 #define ENDmem(DELTAMEM)                                        \
197 {                                                               \
198     char  *malEnd =  (char *)sbrk(0);                           \
199     (DELTAMEM) = (double)(malEnd - malStart);                   \
200 }
201 #endif // NO MALLINFO
202 
203 //=======================================================================
204 //      F I L E  O P E N  and  M A L L O C  F A I L  M A C R O S
205 //=======================================================================
206 
207 #define FILERROR                                                        \
208 {                                                                       \
209     printf("\n !! OOps - Open file error \"%s\": %s (errno = %d)\n",    \
210             argv[fileidx], strerror(errno), errno);                     \
211     fprintf(stderr, " OOps - Open file error \"%s\": %s (errno = %d)\n",\
212             argv[fileidx], strerror(errno), errno);                     \
213     exit(1);                                                            \
214 }
215 
216 #define MALLOCERROR                                                     \
217 {                                                                       \
218     printf("\n !! OOps - malloc failed at Line = %d\n", __LINE__);      \
219     fprintf(stderr, " OOps - malloc failed at Line = %d\n", __LINE__);  \
220     exit(1);                                                            \
221 }
222 
223 //=======================================================================
224 // This alternate form of JudyMalloc() is used to keep track how much ram is
225 // used on some of the below ADT's
226 //=======================================================================
227 
228 // JUDY INCLUDE FILES
229 //#include "Judy.h"
230 
231 // ****************************************************************************
232 // J U D Y   M A L L O C
233 //
234 // Allocate RAM.  This is the single location in Judy code that calls
235 // malloc(3C).  Note:  JPM accounting occurs at a higher level.
236 
237 static Word_t TotalJudyMalloc = 0;
238 
239 Word_t
JudyMalloc(Word_t Words)240 JudyMalloc(Word_t Words)
241 {
242     Word_t    Addr;
243 
244     Addr = (Word_t)malloc(Words * sizeof(Word_t));
245 
246     if (Addr)
247         TotalJudyMalloc += Words;
248 
249     return (Addr);
250 
251 }                                       // JudyMalloc()
252 
253 // ****************************************************************************
254 // J U D Y   F R E E
255 
256 void
JudyFree(void * PWord,Word_t Words)257 JudyFree(void *PWord, Word_t Words)
258 {
259     free(PWord);
260     assert((long)(TotalJudyMalloc - Words) >= 0L);
261 
262     TotalJudyMalloc -= Words;
263 
264 }                                       // JudyFree()
265 
266 // ****************************************************************************
267 // J U D Y   M A L L O C
268 //
269 // Higher-level "wrapper" for allocating objects that need not be in RAM,
270 // although at this time they are in fact only in RAM.  Later we hope that some
271 // entire subtrees (at a JPM or branch) can be "virtual", so their allocations
272 // and frees should go through this level.
273 
274 Word_t
JudyMallocVirtual(Word_t Words)275 JudyMallocVirtual(Word_t Words)
276 {
277     return (JudyMalloc(Words));
278 
279 }                                       // JudyMallocVirtual()
280 
281 // ****************************************************************************
282 // J U D Y   F R E E
283 
284 void
JudyFreeVirtual(void * PWord,Word_t Words)285 JudyFreeVirtual(void *PWord, Word_t Words)
286 {
287     JudyFree(PWord, Words);
288 
289 }                                       // JudyFreeVirtual()
290 
291 //=======================================================================
292 // Routine to get next size of Indexes
293 //=======================================================================
294 
295 static int
NextNumb(Word_t * PNumber,double * PDNumb,double DMult,Word_t MaxNumb)296 NextNumb(Word_t *PNumber,               // pointer to returned next number
297          double *PDNumb,                // Temp double of above
298          double DMult,                  // Multiplier
299          Word_t MaxNumb)                // Max number to return
300 {
301 //  Save prev number
302     double    PrevPDNumb = *PDNumb;
303     double    DDiff;
304 
305 //  Calc next number >= 1.0 beyond previous
306     do
307     {
308         *PDNumb *= DMult;
309         DDiff = *PDNumb - PrevPDNumb;
310     }
311     while (DDiff < 0.5);
312 
313 //  Return it in integer format
314     if (DDiff < 100.0)
315         *PNumber += (Word_t)(DDiff + 0.5);
316     else
317         *PNumber = (Word_t)(*PDNumb + 0.5);
318 
319 //  Verify it did not exceed max number
320     if (*PNumber >= MaxNumb)
321     {
322         *PNumber = MaxNumb;             // it did, so return max
323         return (1);                     // flag it
324     }
325     return (0);                         // more available
326 }
327 
328 //=======================================================================
329 //      M E A S U R E M E N T  S T R U C T U R E
330 //=======================================================================
331 
332 typedef struct _MEASUREMENTS_STRUCT *Pms_t;
333 typedef struct _MEASUREMENTS_STRUCT
334 {
335     Word_t    ms_delta;                 // number of points in current group
336     double    ms_Bytes;                 // average allocated memory/per string
337     double    ms_mininsert;             // Min Retrive number
338     double    ms_minretrive;            // Min Retrive number
339 } ms_t;
340 
341 static Pms_t Pms;                       // array of MEASUREMENTS_STRUCT
342 
343 // Method type
344 
345 typedef enum
346 {
347     M_invalid,
348     M_Print,
349     M_Hash,
350     M_JLHash,
351     M_JudySL,
352     M_JudyHS,
353     M_Splay,
354     M_Redblack,
355     M_Ternary
356 } Method_t;
357 
358 //=======================================================================
359 //      R a n d o m i z e  i n p u t  s t r i n g s
360 //=======================================================================
361 
362 static void
Randomize(Pdt_t Pstrstr,Word_t Len)363 Randomize(Pdt_t Pstrstr, Word_t Len)
364 {
365     Word_t    ii;
366 
367 // swap the "random" index with the sequential one
368     for (ii = 1; ii < Len; ii++)
369     {
370         dt_t      dttemp;
371         Word_t    swapii;
372 
373 //      get "random" index
374         swapii = (Word_t)rand() % Len;
375 
376 //      and swap
377         dttemp = Pstrstr[ii];
378         Pstrstr[ii] = Pstrstr[swapii];
379         Pstrstr[swapii] = dttemp;
380     }
381 }
382 
383 //=======================================================================
384 //      B u i l d   s e q u e n c i a l   s t r i n g  b u f f e r
385 //=======================================================================
386 
387 Pdt_t
BuildSeqBuf(Pdt_t Pstrstr,Word_t Len)388 BuildSeqBuf(Pdt_t Pstrstr, Word_t Len)
389 {
390     Word_t    SumStrings = 0;
391     Word_t    ii;
392     Word_t    Strlen;
393     uint8_t  *string;
394 
395     assert(Len <= TValues);
396 
397 //  calculate how much memory needed for strings
398     for (ii = 0; ii < Len; ii++)
399     {
400         Strlen = Pstrstr[ii].dt_strlen;
401         if (aFlag)
402             SumStrings += ROUNDUPWORD(Strlen + 1);
403         else
404             SumStrings += Strlen + 1;
405     }
406 //  check if old string buffer is big enough
407     if (SumStrings > Strsiz_)
408     {
409         if (Strbuf_)
410             free(Strbuf_);
411         else
412             SumStrings += SumStrings / 5;       // bump 20%
413 
414         Strbuf_ = (uint8_t *) malloc(SumStrings);
415         if (Strbuf_ == NULL)
416             MALLOCERROR;
417         Strsiz_ = SumStrings;
418     }
419     for (ii = 0, string = Strbuf_; ii < Len; ii++)
420     {
421         Strlen = Pstrstr[ii].dt_strlen;
422 
423         PdtS_[ii].dt_strlen = Strlen;
424         PdtS_[ii].dt_string = string;
425 
426         memcpy(string, Pstrstr[ii].dt_string, Strlen + 1);
427 
428         if (aFlag)
429             string += ROUNDUPWORD(Strlen + 1);
430         else
431             string += Strlen + 1;
432     }
433     return (PdtS_);
434 }
435 
436 //=======================================================================
437 //      H A S H   M E T H O D   S T R U C T U R E S
438 //=======================================================================
439 
440 // These structures are used in Hash() and JLHash() ADTs
441 
442 // for storing length of string
443 
444 // Hash chain structure (varible length depending on string)
445 // static part of the length
446 #define HSTRUCTOVD      (sizeof(hrec_t) - sizeof(int))
447 typedef struct HASHREC_ *Phrec_t;
448 typedef struct HASHREC_
449 {
450     Phrec_t   hr_Next;                  // collision chain link pointer
451     Word_t    hr_Value;                 // Data associated with string
452     int       hr_Strlen;                // length of string 2 billion max
453     uint8_t   hr_String[sizeof(int)];   // string is allocated with struct
454 
455 } hrec_t;
456 
457 //  hash head structure to keep hash array information
458 typedef struct HASHINFO_
459 {
460     Pvoid_t   hi_Htbl;                  // Hash table
461     Word_t    hi_tblsize;               // Hash table size (Words)
462     Word_t    hi_TotalWords;            // Hash array total words
463     Word_t    hi_Pop1;                  // Hash array total population
464 
465 } hinfo_t, *Phinfo_t;
466 
467 // size in words of the header structure
468 #define HASHHEADSZ  (sizeof(hinfo_t) / sizeof(Word_t))
469 
470 //=======================================================================
471 //      H A S H   A L G O R I T H M
472 //=======================================================================
473 //
474 //  For CPUs with a slow mod (%) use table size a power of 2.  A test is
475 //  made to see if the SIZE is a power of 2, and if so an .AND.(&) is used
476 //  instead of a .MOD.(%) to trim the hash return size.  Note: a SIZE == 0,
477 //  results in no trimming of hash return size.
478 
479 #define HASHSTR(STRING,LENGTH,SIZE)                     \
480     ((SIZE) == ((SIZE) & -(SIZE))) ?                    \
481         (HashStr(STRING, LENGTH) & ((SIZE) -1)) :       \
482         (HashStr(STRING, LENGTH) % (SIZE))
483 
484 //  String hash function.  Hash string to a unsigned int (uint32_t) This
485 //  one needs a fast 32 bit mpy, which is often very slow on older(RISC)
486 //  machines.  If you are sure you will not over populate the hash table,
487 //  then a poorer/faster hash algorithm should be used.  Replace with your
488 //  own, milage may vary.  This program measures the speed, whether used
489 //  or not.
490 
491 static uint32_t
HashStr(void * Str,Word_t Len)492 HashStr(void *Str, Word_t Len)
493 {
494     uint32_t  A = 31415;
495     uint32_t  hashv = Len;
496     uint8_t  *k = (uint8_t *) Str;
497 
498     while (Len--)
499     {
500         hashv = (A * hashv) + *k++;
501         A *= 27183;
502     }
503     return (hashv);
504 }
505 
506 //=======================================================================
507 //      S T O R E  and  R E T R I V E  R O U T I N E S
508 //=======================================================================
509 
510 //=======================================================================
511 //      H A S H  M E T H O D  U S I N G  J U D Y L  A S  H A S H  T A B L E
512 //=======================================================================
513 
514 PWord_t
JLHashGet(Pvoid_t JLHash,uint8_t * String,Word_t Strlen)515 JLHashGet(Pvoid_t JLHash, uint8_t * String, Word_t Strlen)
516 {
517     Phinfo_t  PHash = (Phinfo_t) JLHash;
518     Phrec_t   Phrec, *PPhrec;
519     uint32_t  hval;
520 
521     if (PHash == NULL)
522         return (NULL);
523 
524 //  get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
525     hval = HASHSTR(String, Strlen, PHash->hi_tblsize);
526 
527     JLG(PPhrec, PHash->hi_Htbl, hval);  // use JudyL to get &pointer
528 
529     if (PPhrec == NULL)
530         return (NULL);                  // no table entry
531 
532 //  search for matching string
533     for (Phrec = *PPhrec; Phrec != NULL; Phrec = Phrec->hr_Next)
534     {
535         gChainln++;                     // Hash chain length
536         if (Phrec->hr_Strlen == Strlen) // length match?
537         {
538             if (memcmp(Phrec->hr_String, String, Strlen) == 0)
539                 return (&(Phrec->hr_Value));    // match! pointer to Value
540         }
541     }
542     return (NULL);
543 }
544 
545 // Return pointer to struct hrec_t associated with string
546 
547 PWord_t
JLHashIns(PPvoid_t PPHash,uint8_t * String,Word_t Strlen,Word_t TblSize)548 JLHashIns(PPvoid_t PPHash, uint8_t * String, Word_t Strlen, Word_t TblSize)
549 {
550     Phrec_t   Phrec, *PPhrec;
551     Phinfo_t  PHash;
552     Word_t    Len;
553     uint32_t  hval;
554 
555     PHash = (Phinfo_t) * PPHash;        // core-dump if calling error
556     if (PHash == NULL)                  // if hash table not allocated
557     {
558 //      allocate the header
559         PHash = (Phinfo_t) JudyMalloc(HASHHEADSZ);
560         if (PHash == NULL)
561             MALLOCERROR;
562 
563 //      Initialize the header struct
564         PHash->hi_tblsize = TblSize;
565         PHash->hi_TotalWords = HASHHEADSZ;
566         PHash->hi_Pop1 = 0;             // none yet
567         PHash->hi_Htbl = NULL;
568         *PPHash = (Pvoid_t)PHash;       // return header to caller
569     }
570 //  get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
571     hval = HASHSTR(String, Strlen, PHash->hi_tblsize);
572 
573 //  get pointer to hash table entry
574     JLI(PPhrec, PHash->hi_Htbl, hval);  // JLI will exit if out of memory
575 
576 //  search for matching string
577     for (Phrec = *PPhrec; Phrec != NULL; Phrec = Phrec->hr_Next)
578     {
579         if (Phrec->hr_Strlen == Strlen) // string length match?
580         {
581             if (memcmp(Phrec->hr_String, String, Strlen) == 0)
582             {
583                 return (&(Phrec->hr_Value));    // match! pointer to Value
584             }
585         }
586     }
587 
588 //  String match not found, so do an insert
589 
590     Len = BYTES2WORDS(Strlen + HSTRUCTOVD);
591     Phrec = (Phrec_t) JudyMalloc(Len);  // get memory for storing string
592     if (Phrec == NULL)
593         MALLOCERROR;
594 
595     PHash->hi_TotalWords += Len;        // keep track of total mallocs
596 
597     Phrec->hr_Strlen = Strlen;          // set string length
598     memcpy(Phrec->hr_String, String, Strlen);
599 
600     Phrec->hr_Next = *PPhrec;           // pointer to synonym
601     *PPhrec = Phrec;                    // place new struct in front of list
602     (PHash->hi_Pop1)++;                 // add one to population
603     Phrec->hr_Value = (Word_t)0;        // zero the associated Value
604 
605     return (&(Phrec->hr_Value));        // return pointer to Value
606 }
607 
608 // Return 1 if successful, else 0
609 
610 int
JLHashDel(PPvoid_t PPHash,uint8_t * String,Word_t Strlen)611 JLHashDel(PPvoid_t PPHash, uint8_t * String, Word_t Strlen)
612 {
613     Phrec_t   Phrec, *PPhrec, *PPhrec1;
614     Phinfo_t  PHash;
615     uint32_t  hval;
616 
617 //  avoid an core dump here
618     if (PPHash == NULL)
619         return (0);
620     PHash = (Phinfo_t) (*PPHash);       // get header
621     if (PHash == NULL)
622         return (0);                     // not found
623 
624 //  get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
625     hval = HASHSTR(String, Strlen, PHash->hi_tblsize);
626 
627 //  get pointer hash table entry
628     JLG(PPhrec, PHash->hi_Htbl, hval);
629     if (PPhrec == NULL)
630         return (0);                     // hash entry not found
631 
632     PPhrec1 = PPhrec;                   // save head hash entry ^
633 
634 //  search for matching string
635     for (Phrec = *PPhrec; Phrec != NULL; Phrec = Phrec->hr_Next)
636     {
637         if (Phrec->hr_Strlen == Strlen) // string length match?
638         {
639             if (memcmp(Phrec->hr_String, String, Strlen) == 0)  // string match?
640             {
641                 int       Rc;           // not used
642                 Word_t    Len;
643 
644                 *PPhrec = Phrec->hr_Next;       // put next in previous
645 
646                 Len = BYTES2WORDS(Strlen + HSTRUCTOVD);
647                 JudyFree(Phrec, Len);
648                 PHash->hi_TotalWords -= Len;    // ram usage accounting
649 
650                 (PHash->hi_Pop1)--;     // Decrement population
651 
652                 if (*PPhrec1 == NULL)   // no chain left
653                 {
654 //                  delete hash table entry
655                     JLD(Rc, PHash->hi_Htbl, hval);
656                     assert(Rc == 1);
657                 }
658 //              If last element, free everything
659                 if (PHash->hi_Pop1 == 0)
660                 {
661                     assert(PHash->hi_TotalWords == HASHHEADSZ);
662 
663                     JudyFree(PHash, HASHHEADSZ);        // the header table
664                     *PPHash = NULL;     // from caller
665                 }
666                 return (1);             // successful
667             }
668         }
669         PPhrec = &(Phrec->hr_Next);     // previous = current
670     }
671     return (0);                         // string not found
672 }
673 
674 // Free the whole JLHash structure
675 
676 Word_t
JLHashFreeArray(PPvoid_t PPHash)677 JLHashFreeArray(PPvoid_t PPHash)
678 {
679     Phrec_t   Phrec, *PPhrec;
680     Phinfo_t  PHash;
681     Word_t    DeletedWords, Bytes;
682     Word_t    Index = 0;                // for First, Next loop
683 
684 //  avoid an core dump here
685     if (PPHash == NULL)
686         return ((Word_t)0);
687     PHash = (Phinfo_t) (*PPHash);       // get header
688     if (PHash == NULL)
689         return ((Word_t)0);             // not found
690 
691 //  get bytes of memory usage in (JudyL) Hash table
692     JLMU(Bytes, PHash->hi_Htbl);
693 
694     DeletedWords = HASHHEADSZ;          // start with header
695 
696 //  Get 1st table entry in Hash table
697     JLF(PPhrec, PHash->hi_Htbl, Index);
698 
699 //  found an entry in hash table?
700     while (PPhrec != NULL)
701     {
702         int       Rc;                   // not used
703         Phrec = *PPhrec;
704 
705 //      walk the synonym linked list
706         while (Phrec != NULL)
707         {
708             Word_t    Len;
709             Phrec_t   Phrecfree = Phrec;
710 
711 //          number of words to free -- struct hrec_t
712             Len = BYTES2WORDS(Phrec->hr_Strlen + HSTRUCTOVD);
713 
714 //          sum total length of mallocs in words
715             DeletedWords += Len;
716 
717             (PHash->hi_Pop1)--;         // Decrement population
718 
719 //          get pointer to next synonym on list
720             Phrec = Phrec->hr_Next;
721 
722 //          free the struct hrec_t
723             JudyFree(Phrecfree, Len);
724         }
725 //      delete hash table entry
726         JLD(Rc, PHash->hi_Htbl, Index);
727         assert(Rc == 1);
728 
729 //      get next hash table entry
730         JLN(PPhrec, PHash->hi_Htbl, Index);
731     }
732     assert(PHash->hi_TotalWords == DeletedWords);
733     assert(PHash->hi_Pop1 == 0);
734 
735     JudyFree(PHash, HASHHEADSZ);        // the header table
736     *PPHash = NULL;                     // set pointer null
737 
738 //  return total bytes freed
739     return ((DeletedWords * sizeof(Word_t)) + Bytes);
740 }
741 
742 //=======================================================================
743 //      H A S H  M E T H O D
744 //=======================================================================
745 
746 PWord_t
HashGet(Phinfo_t PHash,uint8_t * String,Word_t Strlen)747 HashGet(Phinfo_t PHash, uint8_t * String, Word_t Strlen)
748 {
749     Phrec_t   Phrec, *Htbl;
750     uint32_t  hval;
751 
752 //  avoid an core dump here
753     if (PHash == NULL)
754         return (NULL);
755 
756 //  get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
757     hval = HASHSTR(String, Strlen, PHash->hi_tblsize);
758 
759 //  type Hash table pointer
760     Htbl = (Phrec_t *) PHash->hi_Htbl;
761 
762 //  search for matching string
763     for (Phrec = Htbl[hval]; Phrec != NULL; Phrec = Phrec->hr_Next)
764     {
765         gChainln++;                     // Hash chain length
766         if (Phrec->hr_Strlen == Strlen) // length match?
767         {
768             if (memcmp(Phrec->hr_String, String, Strlen) == 0)
769                 return (&(Phrec->hr_Value));    // match! pointer to Value
770         }
771     }
772     return (NULL);
773 }
774 
775 // Return pointer to struct hrec_t associated with string
776 
777 Pvoid_t
HashIns(Phinfo_t * PPHash,uint8_t * String,Word_t Strlen,Word_t TblSize)778 HashIns(Phinfo_t * PPHash, uint8_t * String, Word_t Strlen, Word_t TblSize)
779 {
780     Phrec_t   Phrec, *Htbl;
781     Phinfo_t  PHash;
782     Word_t    Len;
783     uint32_t  hval;
784 
785     PHash = *PPHash;                    // core-dump if calling error
786     if (PHash == NULL)                  // if hash table not allocated
787     {
788 //      allocate the header
789         PHash = (Phinfo_t) JudyMalloc(HASHHEADSZ);
790         if (PHash == NULL)
791             MALLOCERROR;
792 
793 //      allocate the hash table
794         PHash->hi_Htbl = (Pvoid_t)JudyMalloc(TblSize);
795         if (PHash->hi_Htbl == NULL)
796             MALLOCERROR;
797 
798 //      you cant beat this with modern compilers/librarys
799         memset(PHash->hi_Htbl, 0, TblSize * sizeof(Word_t));
800 
801 //      Initialize the header struct
802         PHash->hi_tblsize = TblSize;
803         PHash->hi_TotalWords = TblSize + HASHHEADSZ;
804         PHash->hi_Pop1 = 0;             // none yet
805         *PPHash = PHash;                // return header to caller
806     }
807 //  get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
808     hval = HASHSTR(String, Strlen, TblSize);
809 
810 //  type Hash table pointer
811     Htbl = (Phrec_t *) PHash->hi_Htbl;
812 
813 //  search for matching string in hash table entry
814     for (Phrec = Htbl[hval]; Phrec != NULL; Phrec = Phrec->hr_Next)
815     {
816         if (Phrec->hr_Strlen == Strlen) // string length match?
817         {
818             if (memcmp(Phrec->hr_String, String, Strlen) == 0)  // string match?
819             {
820                 return (&(Phrec->hr_Value));    // match! pointer to Value
821             }
822         }
823     }
824 //  string not found, so do an insert
825     Len = BYTES2WORDS(Strlen + HSTRUCTOVD);
826     Phrec = (Phrec_t) JudyMalloc(Len);
827     if (Phrec == NULL)
828         MALLOCERROR;
829     PHash->hi_TotalWords += Len;        // keep track of total mallocs
830 
831     Phrec->hr_Strlen = Strlen;          // set string length
832     memcpy(Phrec->hr_String, String, Strlen);   // copy it
833 
834 //  place new allocation first in chain
835     Phrec->hr_Next = Htbl[hval];
836     Htbl[hval] = Phrec;
837 
838     (PHash->hi_Pop1)++;                 // add one to population
839     Phrec->hr_Value = (Word_t)0;        // zero the associated Value
840 
841     return (&(Phrec->hr_Value));        // return pointer to Value
842 }
843 
844 // Delete entry in hash array, return 1 if successful, else 0
845 
846 int
HashDel(Phinfo_t * PPHash,uint8_t * String,Word_t Strlen)847 HashDel(Phinfo_t * PPHash, uint8_t * String, Word_t Strlen)
848 {
849     Phrec_t   Phrec, *PPhrec, *Htbl;
850     Phinfo_t  PHash;
851     uint32_t  hval;
852 
853 //  avoid an core dump here
854     if (PPHash == NULL)
855         return (0);
856     PHash = *PPHash;                    // get header
857     if (PHash == NULL)
858         return (0);                     // not found
859 
860 //  get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
861     hval = HASHSTR(String, Strlen, PHash->hi_tblsize);
862 
863 //  type Hash table pointer
864     Htbl = (Phrec_t *) PHash->hi_Htbl;
865 
866 //  get pointer hash table entry
867     PPhrec = &Htbl[hval];
868 
869 //  search for matching string
870     for (Phrec = *PPhrec; Phrec != NULL; Phrec = Phrec->hr_Next)
871     {
872         if (Phrec->hr_Strlen == Strlen) // length match?
873         {
874             if (memcmp(Phrec->hr_String, String, Strlen) == 0)
875             {
876                 Word_t    Len;
877 
878 //              put next hrec_t in previous hrec_t
879                 *PPhrec = Phrec->hr_Next;
880 
881                 Len = BYTES2WORDS(Strlen + HSTRUCTOVD);
882                 JudyFree(Phrec, Len);
883                 PHash->hi_TotalWords -= Len;
884                 (PHash->hi_Pop1)--;     // Decrement population
885 
886 //              If last element, free everything
887                 if (PHash->hi_Pop1 == 0)
888                 {
889                     assert(PHash->hi_TotalWords ==
890                            (HASHHEADSZ + PHash->hi_tblsize));
891 
892                     JudyFree(Htbl, PHash->hi_tblsize);  // hash table
893                     JudyFree(PHash, HASHHEADSZ);        // header struct
894                     *PPHash = NULL;     // from caller
895                 }
896                 return (1);             // successful
897             }
898         }
899         PPhrec = &(Phrec->hr_Next);     // previous = current
900     }
901     return (0);                         // not found
902 }
903 
904 Word_t
HashFreeArray(Phinfo_t * PPHash)905 HashFreeArray(Phinfo_t * PPHash)
906 {
907     int       ii;
908     Phrec_t   Phrec, *Htbl;
909     Phinfo_t  PHash;
910     Word_t    DeletedWords;
911 
912 //  avoid an core dump here
913     if (PPHash == NULL)
914         return ((Word_t)0);
915     PHash = (Phinfo_t) (*PPHash);       // get header
916     if (PHash == NULL)
917         return ((Word_t)0);
918 
919 //  start accumulator of deleted memory
920     DeletedWords = HASHHEADSZ + PHash->hi_tblsize;
921 
922 //  type Hash table pointer
923     Htbl = (Phrec_t *) PHash->hi_Htbl;
924 
925 //  walk thru all table entrys
926     for (ii = 0; ii < PHash->hi_tblsize; ii++)
927     {
928         Phrec = Htbl[ii];               // next hash table entry
929 
930         while (Phrec != NULL)           // walk the synonym linked list
931         {
932             Word_t    Len;
933             Phrec_t   Phrecfree;
934 
935 //          get pointer to next synonym on list
936             Phrecfree = Phrec;
937             Phrec = Phrec->hr_Next;
938 
939             (PHash->hi_Pop1)--;         // Decrement population
940 
941 //          number of words to free -- struct hrec_t
942             Len = BYTES2WORDS(Phrecfree->hr_Strlen + HSTRUCTOVD);
943             DeletedWords += Len;        // sum words freed
944 
945 //          free the struct hrec_t
946             JudyFree(Phrecfree, Len);
947         }
948     }
949 
950 //  and free the hash table
951     JudyFree(Htbl, PHash->hi_tblsize);
952     assert(PHash->hi_TotalWords == DeletedWords);
953     assert(PHash->hi_Pop1 == 0);
954 
955     JudyFree(PHash, HASHHEADSZ);        // the header table
956     *PPHash = NULL;                     // set pointer null
957 
958 //  return total bytes freed
959     return (DeletedWords * sizeof(Word_t));
960 }
961 
962 //=======================================================================
963 //      S P L A Y   M E T H O D
964 //=======================================================================
965 
966 /* Author J. Zobel, April 2001.
967    Permission to use this code is freely granted, provided that this
968    statement is retained. */
969 
970 #define ROTATEFAC 11
971 
972 typedef struct spwordrec
973 {
974     char     *word;
975     struct spwordrec *left, *right;
976     struct spwordrec *par;
977 } SPTREEREC;
978 
979 typedef struct spansrec
980 {
981     struct spwordrec *root;
982     struct spwordrec *ans;
983 } SPANSREC;
984 
985 SPANSREC  spans = { 0 };
986 
987 #define ONELEVEL(PAR,CURR,DIR,RID)      \
988     {                                   \
989         PAR->DIR = CURR->RID;           \
990         if(PAR->DIR!=NULL)              \
991             PAR->DIR->PAR = PAR;        \
992         CURR->RID = PAR;                \
993         PAR->PAR = CURR;                \
994         CURR->PAR = NULL;               \
995     }
996 
997 #define ZIGZIG(GPAR,PAR,CURR,DIR,RID)   \
998     {                                   \
999         CURR->PAR = GPAR->PAR;          \
1000         if (CURR->PAR != NULL)          \
1001         {                               \
1002             if (CURR->PAR->DIR == GPAR) \
1003                 CURR->PAR->DIR = CURR;  \
1004             else                        \
1005                 CURR->PAR->RID = CURR;  \
1006         }                               \
1007         GPAR->DIR = PAR->RID;           \
1008         if (GPAR->DIR != NULL)          \
1009             GPAR->DIR->PAR = GPAR;      \
1010         PAR->DIR = CURR->RID;           \
1011         if (CURR->RID != NULL)          \
1012             CURR->RID->PAR = PAR;       \
1013         CURR->RID = PAR;                \
1014         PAR->PAR = CURR;                \
1015         PAR->RID = GPAR;                \
1016         GPAR->PAR = PAR;                \
1017     }
1018 
1019 #define ZIGZAG(GPAR,PAR,CURR,DIR,RID)   \
1020     {                                   \
1021         CURR->PAR = GPAR->PAR;          \
1022         if (CURR->PAR != NULL)          \
1023         {                               \
1024             if (CURR->PAR->DIR == GPAR) \
1025                 CURR->PAR->DIR = CURR;  \
1026             else                        \
1027                 CURR->PAR->RID = CURR;  \
1028         }                               \
1029         PAR->RID = CURR->DIR;           \
1030         if (PAR->RID != NULL)           \
1031             PAR->RID->PAR = PAR;        \
1032         GPAR->DIR = CURR->RID;          \
1033         if (GPAR->DIR != NULL)          \
1034             GPAR->DIR->PAR = GPAR;      \
1035         CURR->DIR = PAR;                \
1036         PAR->PAR = CURR;                \
1037         CURR->RID = GPAR;               \
1038         GPAR->PAR = CURR;               \
1039     }
1040 
1041 int       scount = ROTATEFAC;
1042 
1043 /* Create a node to hold a word */
1044 
1045 static SPTREEREC *
spwcreate(char * word,SPTREEREC * par)1046 spwcreate(char *word, SPTREEREC * par)
1047 {
1048     SPTREEREC *tmp;
1049 
1050     tmp = (SPTREEREC *) malloc(sizeof(SPTREEREC));
1051     tmp->word = (char *)malloc(strlen(word) + 1);
1052     strcpy(tmp->word, word);
1053     tmp->left = tmp->right = NULL;
1054 
1055     tmp->par = par;
1056 
1057     gStored++;                          // count stored
1058 
1059     return (tmp);
1060 }
1061 
1062 /* Search for word in a splay tree.  If word is found, bring it to
1063    root, possibly intermittently.  Structure ans is used to pass
1064    in the root, and to pass back both the new root (which may or
1065    may not be changed) and the looked-for record. */
1066 
1067 static void
splaysearch(SPANSREC * ans,char * word)1068 splaysearch(SPANSREC * ans, char *word)
1069 {
1070     SPTREEREC *curr = ans->root, *par, *gpar;
1071     int       val;
1072 
1073     scount--;
1074 
1075     if (ans->root == NULL)
1076     {
1077         ans->ans = NULL;
1078         return;
1079     }
1080     while (curr != NULL && (val = strcmp(word, curr->word)) != 0)
1081     {
1082         if (val > 0)
1083             curr = curr->right;
1084         else
1085             curr = curr->left;
1086     }
1087 
1088     ans->ans = curr;
1089 
1090     if (curr == ans->root)
1091     {
1092         return;
1093     }
1094 
1095     if (scount <= 0 && curr != NULL)    /* Move node towards root */
1096     {
1097         scount = ROTATEFAC;
1098 
1099         while ((par = curr->par) != NULL)
1100         {
1101             if (par->left == curr)
1102             {
1103                 if ((gpar = par->par) == NULL)
1104                 {
1105                     ONELEVEL(par, curr, left, right);
1106                 }
1107                 else if (gpar->left == par)
1108                 {
1109                     ZIGZIG(gpar, par, curr, left, right);
1110                 }
1111                 else
1112                 {
1113                     ZIGZAG(gpar, par, curr, right, left);
1114                 }
1115             }
1116             else
1117             {
1118                 if ((gpar = par->par) == NULL)
1119                 {
1120                     ONELEVEL(par, curr, right, left);
1121                 }
1122                 else if (gpar->left == par)
1123                 {
1124                     ZIGZAG(gpar, par, curr, left, right);
1125                 }
1126                 else
1127                 {
1128                     ZIGZIG(gpar, par, curr, right, left);
1129                 }
1130             }
1131         }
1132         ans->root = curr;
1133     }
1134 
1135     return;
1136 }
1137 
1138 /* Insert word into a splay tree.  If word is already present, bring it to
1139    root, possibly intermittently.  Structure ans is used to pass
1140    in the root, and to pass back both the new root (which may or
1141    may not be changed) and the looked-for record. */
1142 
1143 static void
splayinsert(SPANSREC * ans,char * word)1144 splayinsert(SPANSREC * ans, char *word)
1145 {
1146     SPTREEREC *curr = ans->root, *par, *gpar, *prev = NULL, *spwcreate();
1147     int       val = 0;
1148 
1149     scount--;
1150 
1151     if (ans->root == NULL)
1152     {
1153         ans->ans = ans->root = spwcreate(word, NULL);
1154         return;
1155     }
1156 
1157     while (curr != NULL && (val = strcmp(word, curr->word)) != 0)
1158     {
1159         prev = curr;
1160         if (val > 0)
1161             curr = curr->right;
1162         else
1163             curr = curr->left;
1164     }
1165 
1166     if (curr == NULL)
1167     {
1168         if (val > 0)
1169             curr = prev->right = spwcreate(word, prev);
1170         else
1171             curr = prev->left = spwcreate(word, prev);
1172     }
1173 
1174     ans->ans = curr;
1175 
1176     if (scount <= 0)                    /* Move node towards root */
1177     {
1178         scount = ROTATEFAC;
1179 
1180         while ((par = curr->par) != NULL)
1181         {
1182             if (par->left == curr)
1183             {
1184                 if ((gpar = par->par) == NULL)
1185                 {
1186                     ONELEVEL(par, curr, left, right);
1187                 }
1188                 else if (gpar->left == par)
1189                 {
1190                     ZIGZIG(gpar, par, curr, left, right);
1191                 }
1192                 else
1193                 {
1194                     ZIGZAG(gpar, par, curr, right, left);
1195                 }
1196             }
1197             else
1198             {
1199                 if ((gpar = par->par) == NULL)
1200                 {
1201                     ONELEVEL(par, curr, right, left);
1202                 }
1203                 else if (gpar->left == par)
1204                 {
1205                     ZIGZAG(gpar, par, curr, left, right);
1206                 }
1207                 else
1208                 {
1209                     ZIGZIG(gpar, par, curr, right, left);
1210                 }
1211             }
1212         }
1213         ans->root = curr;
1214     }
1215     return;
1216 }
1217 
1218 //=======================================================================
1219 //      R E D B L A C K   M E T H O D
1220 //=======================================================================
1221 
1222 /* Author J. Zobel, April 2001.
1223    Permission to use this code is freely granted, provided that this
1224    statement is retained. */
1225 
1226 typedef struct rbwordrec
1227 {
1228     char     *word;
1229     struct rbwordrec *left, *right;
1230     struct rbwordrec *par;
1231     char      colour;
1232 } RBTREEREC;
1233 
1234 typedef struct rbansrec
1235 {
1236     struct rbwordrec *root;
1237     struct rbwordrec *ans;
1238 } BRANSREC;
1239 
1240 BRANSREC  rbans = { 0 };
1241 
1242 #define RED                0
1243 #define BLACK                1
1244 
1245 /* Find word in a redblack tree */
1246 
1247 static void
redblacksearch(BRANSREC * ans,char * word)1248 redblacksearch(BRANSREC * ans, char *word)
1249 {
1250     RBTREEREC *curr = ans->root;
1251     int       val;
1252 
1253     if (ans->root != NULL)
1254     {
1255         while (curr != NULL && (val = strcmp(word, curr->word)) != 0)
1256         {
1257             if (val > 0)
1258                 curr = curr->right;
1259             else
1260                 curr = curr->left;
1261         }
1262     }
1263 
1264     ans->ans = curr;
1265 
1266     return;
1267 }
1268 
1269 /* Rotate the right child of par upwards */
1270 
1271 /* Could be written as a macro, but not really necessary
1272 as it is only called on insertion */
1273 
1274 void
leftrotate(BRANSREC * ans,RBTREEREC * par)1275 leftrotate(BRANSREC * ans, RBTREEREC * par)
1276 {
1277     RBTREEREC *curr, *gpar;
1278 
1279     if ((curr = par->right) != NULL)
1280     {
1281         par->right = curr->left;
1282         if (curr->left != NULL)
1283             curr->left->par = par;
1284         curr->par = par->par;
1285         if ((gpar = par->par) == NULL)
1286             ans->root = curr;
1287         else
1288         {
1289             if (par == gpar->left)
1290                 gpar->left = curr;
1291             else
1292                 gpar->right = curr;
1293         }
1294         curr->left = par;
1295         par->par = curr;
1296     }
1297 }
1298 
1299 /* Rotate the left child of par upwards */
1300 void
rightrotate(BRANSREC * ans,RBTREEREC * par)1301 rightrotate(BRANSREC * ans, RBTREEREC * par)
1302 {
1303     RBTREEREC *curr, *gpar;
1304 
1305     if ((curr = par->left) != NULL)
1306     {
1307         par->left = curr->right;
1308         if (curr->right != NULL)
1309             curr->right->par = par;
1310         curr->par = par->par;
1311         if ((gpar = par->par) == NULL)
1312             ans->root = curr;
1313         else
1314         {
1315             if (par == gpar->left)
1316                 gpar->left = curr;
1317             else
1318                 gpar->right = curr;
1319         }
1320         curr->right = par;
1321         par->par = curr;
1322     }
1323 }
1324 
1325 /* Create a node to hold a word */
1326 
1327 RBTREEREC *
rbwcreate(char * word,RBTREEREC * par)1328 rbwcreate(char *word, RBTREEREC * par)
1329 {
1330     RBTREEREC *tmp;
1331 
1332     tmp = (RBTREEREC *) malloc(sizeof(RBTREEREC));
1333     tmp->word = (char *)malloc(strlen(word) + 1);
1334     strcpy(tmp->word, word);
1335     tmp->left = tmp->right = NULL;
1336 
1337     tmp->par = par;
1338 
1339     gStored++;                          // count stored
1340 
1341     return (tmp);
1342 }
1343 
1344 /* Insert word into a redblack tree */
1345 
1346 void
redblackinsert(BRANSREC * ans,char * word)1347 redblackinsert(BRANSREC * ans, char *word)
1348 {
1349     RBTREEREC *curr = ans->root, *par, *gpar, *prev = NULL, *rbwcreate();
1350     int       val = 0;
1351 
1352     if (ans->root == NULL)
1353     {
1354         ans->ans = ans->root = rbwcreate(word, NULL);
1355         return;
1356     }
1357     while (curr != NULL && (val = strcmp(word, curr->word)) != 0)
1358     {
1359         prev = curr;
1360         if (val > 0)
1361             curr = curr->right;
1362         else
1363             curr = curr->left;
1364     }
1365 
1366     ans->ans = curr;
1367 
1368     if (curr == NULL)
1369         /* Insert a new node, rotate up if necessary */
1370     {
1371         if (val > 0)
1372             curr = prev->right = rbwcreate(word, prev);
1373         else
1374             curr = prev->left = rbwcreate(word, prev);
1375 
1376         curr->colour = RED;
1377         while ((par = curr->par) != NULL
1378                && (gpar = par->par) != NULL && curr->par->colour == RED)
1379         {
1380             if (par == gpar->left)
1381             {
1382                 if (gpar->right != NULL && gpar->right->colour == RED)
1383                 {
1384                     par->colour = BLACK;
1385                     gpar->right->colour = BLACK;
1386                     gpar->colour = RED;
1387                     curr = gpar;
1388                 }
1389                 else
1390                 {
1391                     if (curr == par->right)
1392                     {
1393                         curr = par;
1394                         leftrotate(ans, curr);
1395                         par = curr->par;
1396                     }
1397                     par->colour = BLACK;
1398                     if ((gpar = par->par) != NULL)
1399                     {
1400                         gpar->colour = RED;
1401                         rightrotate(ans, gpar);
1402                     }
1403                 }
1404             }
1405             else
1406             {
1407                 if (gpar->left != NULL && gpar->left->colour == RED)
1408                 {
1409                     par->colour = BLACK;
1410                     gpar->left->colour = BLACK;
1411                     gpar->colour = RED;
1412                     curr = gpar;
1413                 }
1414                 else
1415                 {
1416                     if (curr == par->left)
1417                     {
1418                         curr = par;
1419                         rightrotate(ans, curr);
1420                         par = curr->par;
1421                     }
1422                     par->colour = BLACK;
1423                     if ((gpar = par->par) != NULL)
1424                     {
1425                         gpar->colour = RED;
1426                         leftrotate(ans, gpar);
1427                     }
1428                 }
1429             }
1430         }
1431         if (curr->par == NULL)
1432             ans->root = curr;
1433         ans->root->colour = BLACK;
1434     }
1435 
1436     return;
1437 }
1438 
1439 //=======================================================================
1440 //      T E R N A R Y   M E T H O D
1441 //=======================================================================
1442 
1443 typedef struct tnode *Tptr;
1444 typedef struct tnode
1445 {
1446     uint8_t   splitchar;
1447     Tptr      lokid, eqkid, hikid;
1448 }
1449 Tnode;
1450 
1451 void
TernaryIns(Tptr * p,uint8_t * s,int Len)1452 TernaryIns(Tptr * p, uint8_t * s, int Len)
1453 {
1454     int       d;
1455     uint8_t  *instr = s;
1456     Tptr      pp;
1457     while ((pp = *p) != NULL)
1458     {
1459         if ((d = *s - pp->splitchar) == 0)
1460         {
1461             if (*s++ == 0)
1462             {
1463                 printf("Oops duplicate Ternary string %s\n", instr);
1464                 return;
1465             }
1466             p = &(pp->eqkid);
1467         }
1468         else if (d < 0)
1469             p = &(pp->lokid);
1470         else
1471             p = &(pp->hikid);
1472     }
1473     for (;;)
1474     {
1475         *p = (Tptr) malloc(sizeof(Tnode));
1476         pp = *p;
1477         pp->splitchar = *s;
1478         pp->lokid = pp->eqkid = pp->hikid = 0;
1479         if (*s++ == 0)
1480         {
1481             pp->eqkid = (Tptr) instr;
1482             gStored++;                  // number of strings stored
1483             return;
1484         }
1485         p = &(pp->eqkid);
1486     }
1487 }
1488 
1489 int
TernaryGet(Tptr p,uint8_t * s,int Len)1490 TernaryGet(Tptr p, uint8_t * s, int Len)
1491 {
1492     while (p)
1493     {
1494         if (*s < p->splitchar)
1495             p = p->lokid;
1496         else if (*s == p->splitchar)
1497         {
1498             if (*s++ == 0)
1499                 return 1;
1500             p = p->eqkid;
1501         }
1502         else
1503             p = p->hikid;
1504     }
1505     return 0;
1506 }
1507 
1508 //=======================================================================
1509 //      M E A S U R E  A D T   S P E E D  and  M E M O R Y  U S A G E
1510 //=======================================================================
1511 
1512 //Word_t    TotalJudyMalloc;
1513 
1514 #define         GETSTRING(PCurStr, Strlen)
1515 
1516 int
main(int argc,char * argv[])1517 main(int argc, char *argv[])
1518 {
1519     TIMER_vars(tm);                     // declare timer variables
1520     FILE     *fid;                      // to read file.
1521     int       Chr;                      // char read from fgetc
1522     Pdt_t     Pdt, Pdts;                // array of lengths and pointers to str
1523     uint8_t  *PCurStr;                  // Current string pointer
1524     Word_t    LineCnt;                  // line counter
1525     int       Strlen;                   // = strlen();
1526     Word_t    StrTot;                   // Total len of strings
1527     Word_t    StrNumb;                  // current line number
1528     Word_t    ReadLin;                  // strings to read
1529     double    Mult;                     // multiplier between groups
1530     Word_t    Groups;                   // number of measurement groups
1531     Word_t    grp;                      // current group
1532     Pvoid_t   JudySL = NULL;            // JudySL root pointer
1533     Pvoid_t   JudyHS = NULL;            // JudyHS root pointer
1534     Pvoid_t   JLHash = NULL;            // JLHash root pointer
1535     Phinfo_t  HRoot = NULL;             // hash table root pointer
1536     Tptr      Ternary = { 0 };          // Ternary struct root pointer
1537 
1538     Method_t  Method = M_invalid;       // the method to measure
1539     Word_t    lines = 0;                // to shut up compiler
1540     Word_t    Bytes = 0;                // Bytes deallocated from FreeArray
1541     Word_t    StringMemory;             // Bytes allocated for input data
1542     int       Pass;
1543     int       Passes = 1;
1544 
1545     int       Opt;
1546     extern char *optarg;
1547     int       ErrorFlag = 0;
1548 
1549 //  un-buffer output
1550     setbuf(stdout, NULL);
1551 
1552 //============================================================
1553 // PARSE INPUT PARAMETERS
1554 //============================================================
1555 
1556     while ((Opt = getopt(argc, argv, "A:H:L:n:T:P:M:praDC")) != -1)
1557     {
1558         switch (Opt)
1559         {
1560         case 'A':
1561             if (Method != M_invalid)
1562             {
1563                 printf("\nOnly ONE '-A<ADT>' is allowed!!!!!!\n");
1564                 ErrorFlag++;
1565                 break;
1566             }
1567             if (strcmp(optarg, "Print") == 0)
1568                 Method = M_Print;
1569             if (strcmp(optarg, "Hash") == 0)
1570             {
1571                 Method = M_Hash;
1572                 HTblsz = 1LU << 20;     // default 1.0+ million
1573             }
1574             if (strcmp(optarg, "JLHash") == 0)
1575             {
1576                 Method = M_JLHash;
1577                 HTblsz = 0;             // max 2^32
1578             }
1579             if (strcmp(optarg, "JudySL") == 0)
1580                 Method = M_JudySL;
1581             if (strcmp(optarg, "Splay") == 0)
1582                 Method = M_Splay;
1583             if (strcmp(optarg, "Redblack") == 0)
1584                 Method = M_Redblack;
1585             if (strcmp(optarg, "JudyHS") == 0)
1586                 Method = M_JudyHS;
1587             if (strcmp(optarg, "Ternary") == 0)
1588                 Method = M_Ternary;
1589             break;
1590 
1591         case 'H':                      // Size of Hash table
1592             HTblsz = strtoul(optarg, NULL, 0);
1593             break;
1594 
1595         case 'L':                      // Number of Loops
1596             Passes = atoi(optarg);
1597             if (Passes <= 0)
1598             {
1599                 printf("\n !! OOps - Number of Loops must be > 0\n");
1600                 ErrorFlag++;
1601             }
1602             break;
1603 
1604         case 'n':                      // Max population of arrays
1605             nStrg = strtoul(optarg, NULL, 0);   // Size of Linear Array
1606             if (nStrg == 0)
1607             {
1608                 printf("\n !! OOps - Number of strings must be > 0\n");
1609                 ErrorFlag++;
1610             }
1611             break;
1612 
1613         case 'T':                      // Maximum retrieve tests for timing
1614             TValues = strtoul(optarg, NULL, 0);
1615             break;
1616 
1617         case 'P':                      // measurement points per decade
1618             PtsPdec = strtoul(optarg, NULL, 0);
1619             break;
1620 
1621         case 'M':                      // maximum length of input string
1622             MLength = atoi(optarg);
1623             break;
1624 
1625         case 'p':                      // pre-initialize Hash table
1626             pFlag = 1;
1627             break;
1628 
1629         case 'r':                      // do not randomize input
1630             if (CFlag)
1631             {
1632                 printf
1633                     ("\n !! OOps '-r' and '-C' flag are mutually exclusive\n");
1634                 ErrorFlag++;
1635                 break;
1636             }
1637             rFlag = 1;
1638             break;
1639 
1640         case 'a':                      // word align string buffers
1641             aFlag = 1;
1642             break;
1643 
1644         case 'D':                      // do a delete at end
1645             DFlag = 1;
1646             break;
1647 
1648         case 'C':                      // build sequential Get string buffers
1649             if (rFlag)
1650             {
1651                 printf
1652                     ("\n !! OOps '-C' and '-r' flag are mutually exclusive\n");
1653                 ErrorFlag++;
1654                 break;
1655             }
1656             CFlag = 1;
1657             break;
1658 
1659         default:
1660             ErrorFlag++;
1661             break;
1662         }
1663     }
1664     if (Method == -1)
1665     {
1666         printf
1667             ("\n !! OOps -- '-A <ADT>' I.E. '-AHash' or '-AJudyHS' is a required option\n");
1668         ErrorFlag++;
1669     }
1670 
1671     fileidx = optind;
1672     if (optind >= argc)
1673     {
1674         printf("\n !! OOps -- No input file specified\n");
1675         ErrorFlag++;
1676     }
1677 
1678     if (ErrorFlag)
1679     {
1680         printf("\n");
1681         printf("$ %s -A<ADT> -n# -H# -P# -T# -p -r -a InputStringFile\n\n",
1682                argv[0]);
1683         printf("Where: ");
1684         printf("'InputStringFile' is text file of strings to use in test\n\n");
1685         printf
1686             ("-A <ADT> is Hash|JLHash|JudySL|JudyHS|Splay|Redblack|Ternary|Print\n");
1687         printf("\n");
1688         printf("-n <#> max number of strings to use in tests (all)\n");
1689         printf("-H <#> is number elements in Hash table\n");
1690         printf("-P <#> number of measurement points per decade (40)\n");
1691         printf
1692             ("-T <#> Change the 'Get' number_of_strings to measure per data point\n");
1693         printf("-D     Use 'Delete' routine instead of 'FreeArray' routine\n");
1694         printf("-p     pre-zero hash table to fault in all pages\n");
1695         printf("-r     Do not randomize Insert and Get order of strings\n");
1696         printf("-C     Build contigious string buffers for 'Get' tests\n");
1697         printf("-a     Word_t align the start address of input strings\n");
1698         printf("-M <#> Change the maximum 'strlen(String)' of input Strings\n");
1699         printf("\n\n");
1700 
1701         exit(1);
1702     }
1703 
1704 //  calculate max number mask used in hash routine
1705 
1706 //============================================================
1707 //  PRINT COMMAND NAME + RUN ARGUMENTS
1708 //============================================================
1709 
1710     printf("# %s", argv[0]);
1711     if (nStrg != INFSTRGS)
1712         printf(" -n%lu", nStrg);
1713     switch (Method)
1714     {
1715     case M_Hash:
1716         printf(" -A Hash");
1717         break;
1718     case M_JLHash:
1719         printf(" -A JLHash");
1720         break;
1721     case M_JudySL:
1722         printf(" -A JudySL");
1723         break;
1724     case M_JudyHS:
1725         printf(" -A JudyHS");
1726         break;
1727     case M_Splay:
1728         printf(" -A Splay");
1729         break;
1730     case M_Redblack:
1731         printf(" -A Redblack");
1732         break;
1733     case M_Ternary:
1734         printf(" -A Ternary");
1735         break;
1736     default:
1737         break;
1738     }
1739     if (HTblsz)
1740         printf(" -H%lu", HTblsz);
1741     printf(" -P%lu", PtsPdec);
1742     printf(" -L%d", Passes);
1743     if (pFlag)
1744         printf(" -p");
1745     if (rFlag)
1746         printf(" -r");
1747     if (DFlag)
1748         printf(" -D");
1749     if (CFlag)
1750         printf(" -C");
1751     printf(" -M%d", MLength);
1752     printf(" %s", argv[fileidx]);
1753     printf("\n");
1754 
1755 //  print some header
1756 
1757     printf("# This file is in a format to input to 'jbgraph'\n");
1758     printf("# XLABEL Stored\n");
1759     printf("# YLABEL Microseconds / Index\n");
1760     printf("# COLHEAD 1 Total Insert attempts\n");
1761     printf("# COLHEAD 2 Number Gets\n");
1762     printf("# COLHEAD 3 Duplicate strings\n");
1763     printf("# COLHEAD 4 Insert Time (uS)\n");
1764     printf("# COLHEAD 5 Get Time (uS)\n");
1765     printf("# COLHEAD 6 Hash Chain Length\n");
1766     printf("# COLHEAD 7 Average RAM/String\n");
1767 
1768 //  uname(2) strings describing the machine
1769     {
1770         struct utsname ubuf;            // for system name
1771 
1772         if (uname(&ubuf) == -1)
1773             printf("# Uname(2) failed\n");
1774         else
1775             printf("# %s %s %s %s %s\n", ubuf.sysname, ubuf.nodename,
1776                    ubuf.release, ubuf.version, ubuf.machine);
1777     }
1778     if (sizeof(Word_t) == 8)
1779         printf("# 64 Bit CPU\n");
1780     else if (sizeof(Word_t) == 4)
1781         printf("# 32 Bit CPU\n");
1782 #ifdef  CPUMHZ
1783     printf("# Processor speed compiled at %d Mhz\n", CPUMHZ);
1784 #endif // CPUMHZ
1785 
1786     if (Method == M_Hash)
1787         printf("# Hash record struct: sizeof(hrec_t) = %d\n", sizeof(hrec_t));
1788     if (Method == M_Ternary)
1789         printf("# Ternary record struct: sizeof(Tnode) = %d\n", sizeof(Tnode));
1790 
1791 //  OPEN INPUT FILE:
1792 
1793     if ((fid = fopen(argv[fileidx], "r")) == NULL)
1794         FILERROR;
1795 
1796     for (StrTot = Strlen = LineCnt = 0; (Chr = fgetc(fid)) != EOF;)
1797     {
1798         if (Chr == '\n')
1799         {
1800             if (Strlen)                 // eat zero length lines
1801             {
1802                 if (Strlen > MLength)
1803                     Strlen = MLength;
1804                 LineCnt++;              // increase string count
1805                 Strlen++;               // add a \0 for JudySL
1806 
1807                 if (aFlag)              // for word alignment
1808                     StrTot += ROUNDUPWORD(Strlen);
1809                 else
1810                     StrTot += Strlen;   // memory needed to store strings
1811 
1812                 if (LineCnt == nStrg)   // shorten if required by -n option
1813                     break;
1814 
1815                 Strlen = 0;
1816             }
1817         }
1818         else
1819         {
1820             Strlen++;
1821         }
1822     }
1823     fclose(fid);
1824     fid = NULL;
1825     nStrg = LineCnt;                    // adj if necessary
1826 
1827 //  get struct to keep track of the strings
1828     StringMemory = sizeof(dt_t) * nStrg;
1829     Pdt = (Pdt_t) malloc(sizeof(dt_t) * nStrg);
1830     if (Pdt == NULL)
1831         MALLOCERROR;
1832 
1833 //  get memory to store the strings
1834     StringMemory += StrTot;
1835     PCurStr = (uint8_t *) malloc(StrTot);
1836     if (PCurStr == NULL)
1837         MALLOCERROR;
1838 
1839 //  BRING FILE INTO RAM, COUNT LINES and CHECK LENGTH
1840 
1841 //============================================================
1842 // CALCULATE NUMBER OF MEASUREMENT GROUPS -- points per decade
1843 //============================================================
1844 
1845 //  Calculate Multiplier for number of points per decade
1846     Mult = pow(10.0, 1.0 / (double)PtsPdec);
1847     {
1848         double    sum;
1849         Word_t    numb, prevnumb;
1850 
1851 //      Count number of measurements needed (10K max)
1852         sum = numb = 1;
1853         for (Groups = 2; Groups < 10000; Groups++)
1854             if (NextNumb(&numb, &sum, Mult, nStrg))
1855                 break;
1856 
1857 //      Get memory for measurements
1858         Pms = (Pms_t) calloc(Groups, sizeof(ms_t));
1859         if (Pms == NULL)
1860             MALLOCERROR;
1861 
1862 //      Now calculate number of Indexes for each measurement point
1863         numb = sum = 1;
1864         prevnumb = 0;
1865         for (grp = 0; grp < Groups; grp++)
1866         {
1867             Pms[grp].ms_delta = numb - prevnumb;
1868             Pms[grp].ms_mininsert = 10000000.0; // infinity
1869             Pms[grp].ms_minretrive = 10000000.0;        // infinity
1870             Pms[grp].ms_Bytes = 0.0;
1871 
1872             prevnumb = numb;
1873 
1874             NextNumb(&numb, &sum, Mult, nStrg);
1875         }
1876     }                                   // Groups = number of sizes
1877 
1878 //  print remaining header
1879 
1880     if (Method == M_Hash)
1881     {
1882         printf("# Allocate Hash table = %lu elements\n", HTblsz);
1883     }
1884     if (Method == M_JLHash)
1885     {
1886         if (HTblsz)
1887             printf("# JLHash table virtual size = %lu\n", HTblsz);
1888         else
1889             printf("# JLHash table virtual size = 4294967296\n");
1890     }
1891 
1892 //=======================================================================
1893 // Read text input file into RAM
1894 //=======================================================================
1895 
1896     if ((fid = fopen(argv[fileidx], "r")) == NULL)
1897         FILERROR;
1898 
1899     for (Strlen = LineCnt = 0; LineCnt < nStrg;)
1900     {
1901         Chr = fgetc(fid);
1902         if (Chr == '\n')
1903         {
1904             if (Strlen)                 // eat zero length lines
1905             {
1906                 if (Strlen > MLength)
1907                     Strlen = MLength;
1908                 Pdt[LineCnt].dt_string = PCurStr - Strlen;
1909                 Pdt[LineCnt].dt_strlen = Strlen;
1910                 LineCnt++;
1911 
1912                 Strlen = 0;
1913                 *PCurStr++ = '\0';      // for JudySL
1914                 if (aFlag)              // for word alignment
1915                     PCurStr = (uint8_t *) ROUNDUPWORD((Word_t)PCurStr);
1916 
1917                 if ((Word_t)PCurStr % sizeof(Word_t))
1918                     aCount++;
1919             }
1920         }
1921         else
1922         {
1923             if (Strlen < MLength)
1924             {
1925                 Strlen++;
1926                 if (Chr == '\0')
1927                     Chr = ' ';          // for JudySL
1928                 *PCurStr++ = (uint8_t) Chr;
1929             }
1930         }
1931     }
1932     fclose(fid);
1933     fid = NULL;
1934     assert(nStrg == LineCnt);
1935 
1936     printf("# %lu (%.1f%%) non-Word_t aligned string buffers\n",
1937            aCount, (double)aCount / (double)LineCnt * 100.0);
1938 
1939     printf("# Ram used for input data = %lu bytes\n", StringMemory);
1940 
1941     printf("# Average string length = %.1f bytes\n",
1942            (double)(StrTot - LineCnt) / LineCnt);
1943 
1944 // Allocate memory for Cached assess to 'Get' (largest delta). This flag
1945 // will put the 'randomized' 'Get' order strings in a sequential buffer.
1946 // Modern processors will 'read ahead' with an access to RAM is sequential
1947 // -- thus saving the 'Get' having to bring the string into cache.
1948     if (CFlag)
1949     {
1950         PdtS_ = (Pdt_t) malloc(TValues * sizeof(dt_t));
1951         if (PdtS_ == NULL)
1952             MALLOCERROR;
1953 
1954 //      now guess how much memory will be needed for the strings
1955         Strsiz_ = ((StrTot / nStrg) * TValues);
1956         Strsiz_ += Strsiz_;             // bump %20
1957 
1958         Strbuf_ = (uint8_t *) malloc(Strsiz_);
1959         if (Strbuf_ == NULL)
1960             MALLOCERROR;
1961 
1962         printf
1963             ("# %lu bytes malloc() for 'cached' strings for Get measurement\n",
1964              Strsiz_);
1965     }
1966 
1967 //=======================================================================
1968 //  TIME GETSTRING() from Cache (most of the time)
1969 //=======================================================================
1970 
1971     STARTTm(tm);                        // start timer
1972     for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
1973     {
1974         GETSTRING(PCurStr, Strlen);
1975         Strlen = Pdt[LineCnt].dt_strlen;
1976         PCurStr = Pdt[LineCnt].dt_string;
1977 
1978         if (strlen(PCurStr) != Strlen)  // bring string into Cache
1979         {
1980 //          necessary to prevent cc from optimizing out
1981             printf(" !! OOps Bug, wrong string length\n");
1982             exit(1);
1983         }
1984     }
1985     ENDTm(DeltaUSec, tm);               // end timer
1986 
1987     printf
1988         ("# Access Time    = %6.3f uS average per string (mostly from Cache)\n",
1989          DeltaUSec / nStrg);
1990 
1991 //=======================================================================
1992 //  TIME GETSTRING() + HASHSTR() from Cache (most of the time)
1993 //=======================================================================
1994 
1995     STARTTm(tm);                        // start timer
1996     for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
1997     {
1998         uint32_t  hval;
1999         GETSTRING(PCurStr, Strlen);
2000         PCurStr = Pdt[LineCnt].dt_string;
2001         Strlen = Pdt[LineCnt].dt_strlen;
2002         hval = HASHSTR(PCurStr, Strlen, HTblsz);
2003         if (foolflag)
2004             printf("OOps foolflag is set, hval = %d\n", hval);
2005     }
2006     ENDTm(DeltaUSec, tm);               // end timer
2007 
2008     printf
2009         ("# HashStr() Time = %6.3f uS average per string (mostly from Cache)\n",
2010          DeltaUSec / nStrg);
2011 
2012 //  randomize the input strings (adjacent strings will not be on same page)
2013 
2014     if (rFlag == 0)
2015     {
2016         Randomize(Pdt, nStrg);          // Randomize ALL to be stored
2017 
2018 //=======================================================================
2019 //  TIME GETSTRING() from RAM (most of the time)
2020 //=======================================================================
2021 
2022         STARTTm(tm);                    // start timer
2023         for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
2024         {
2025             GETSTRING(PCurStr, Strlen);
2026             Strlen = Pdt[LineCnt].dt_strlen;
2027             PCurStr = Pdt[LineCnt].dt_string;
2028 
2029             if (strlen(PCurStr) != Strlen)      // bring string into Cache
2030             {
2031 //              necessary to prevent cc from optimizing out
2032                 printf(" !! OOps Bug, wrong string length\n");
2033                 exit(1);
2034             }
2035         }
2036         ENDTm(DeltaUSec, tm);           // end timer
2037 
2038         printf
2039             ("# Access Time    = %6.3f uS average per string (mostly from RAM)\n",
2040              DeltaUSec / nStrg);
2041 
2042 //=======================================================================
2043 //  TIME GETSTRING() + HASHSTR() from RAM (most of the time)
2044 //=======================================================================
2045 
2046         STARTTm(tm);                    // start timer
2047         for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
2048         {
2049             uint32_t  hval;
2050             GETSTRING(PCurStr, Strlen);
2051             Strlen = Pdt[LineCnt].dt_strlen;
2052             PCurStr = Pdt[LineCnt].dt_string;
2053             hval = HASHSTR(PCurStr, Strlen, HTblsz);
2054             if (foolflag)
2055                 printf("OOps foolflag is set, hval = %u\n", hval);
2056         }
2057         ENDTm(DeltaUSec, tm);           // end timer
2058 
2059         printf
2060             ("# HashStr() Time = %6.3f uS average per string (mostly from RAM)\n",
2061              DeltaUSec / nStrg);
2062     }
2063 
2064 //=======================================================================
2065 //  Insert, Get and Delete loops
2066 //=======================================================================
2067 
2068     for (Pass = 0; Pass < Passes; Pass++)
2069     {
2070         printf("# Pass %d\n", Pass);
2071 
2072 //      heading of table
2073         Printf
2074             ("# TotInserts  DeltaGets  DupStrs InsTime GetTime HChainLen Ram/String\n");
2075         gStored = 0;                    // number of strings inserted
2076         StrNumb = 0;                    // number of attempted strings inserted
2077 
2078         STARTmem;                       // current malloc() mem usage
2079         for (grp = 0; grp < Groups; grp++)
2080         {
2081             PWord_t   PValue;
2082             Word_t    Begin = gStored;  // remember current STOREed
2083             Word_t    Delta = Pms[grp].ms_delta;
2084 
2085             switch (Method)
2086             {
2087             case M_Print:
2088             {
2089                 STARTTm(tm);            // start timer
2090                 for (lines = 0; lines < Delta; lines++, StrNumb++)
2091                 {
2092                     GETSTRING(PCurStr, Strlen);
2093                     PCurStr = Pdt[StrNumb].dt_string;
2094                     Printf("%s\n", (char *)PCurStr);
2095                 }
2096                 ENDTm(DeltaUSec, tm);   // end timer
2097                 break;
2098             }
2099             case M_Hash:
2100             {
2101                 STARTTm(tm);            // start timer
2102                 for (lines = 0; lines < Delta; lines++, StrNumb++)
2103                 {
2104                     GETSTRING(PCurStr, Strlen);
2105                     Strlen = Pdt[StrNumb].dt_strlen;
2106                     PCurStr = Pdt[StrNumb].dt_string;
2107 
2108                     PValue = HashIns(&HRoot, PCurStr, Strlen, HTblsz);
2109                     if ((*PValue)++ == 0)
2110                         gStored++;      // number of strings stored
2111                 }
2112                 ENDTm(DeltaUSec, tm);   // end timer
2113                 break;
2114             }
2115             case M_JLHash:
2116             {
2117                 STARTTm(tm);            // start timer
2118                 for (lines = 0; lines < Delta; lines++, StrNumb++)
2119                 {
2120                     GETSTRING(PCurStr, Strlen);
2121                     Strlen = Pdt[StrNumb].dt_strlen;
2122                     PCurStr = Pdt[StrNumb].dt_string;
2123                     PValue = JLHashIns(&JLHash, PCurStr, Strlen, HTblsz);
2124                     if ((*PValue)++ == 0)
2125                         gStored++;      // number of strings stored
2126                 }
2127                 ENDTm(DeltaUSec, tm);   // end timer
2128                 break;
2129             }
2130             case M_JudySL:
2131             {
2132                 STARTTm(tm);            // start timer
2133                 for (lines = 0; lines < Delta; lines++, StrNumb++)
2134                 {
2135                     GETSTRING(PCurStr, Strlen);
2136                     PCurStr = Pdt[StrNumb].dt_string;
2137 
2138                     JSLI(PValue, JudySL, PCurStr);      // insert string
2139                     if ((*PValue)++ == 0)
2140                         gStored++;      // number of strings stored
2141                 }
2142                 ENDTm(DeltaUSec, tm);   // end timer
2143                 break;
2144             }
2145             case M_JudyHS:
2146             {
2147                 STARTTm(tm);            // start timer
2148                 for (lines = 0; lines < Delta; lines++, StrNumb++)
2149                 {
2150                     GETSTRING(PCurStr, Strlen);
2151                     Strlen = Pdt[StrNumb].dt_strlen;
2152                     PCurStr = Pdt[StrNumb].dt_string;
2153 
2154                     JHSI(PValue, JudyHS, PCurStr, Strlen);      // insert string
2155                     if ((*PValue)++ == 0)
2156                         gStored++;      // number of strings stored
2157                 }
2158                 ENDTm(DeltaUSec, tm);   // end timer
2159                 break;
2160             }
2161 
2162 // NOTE:  the ADT's below here are so slow, that I did not add much effort
2163 // to clean them up. (dlb)
2164 
2165             case M_Splay:
2166             {
2167                 STARTTm(tm);            // start timer
2168                 for (lines = 0; lines < Delta; lines++, StrNumb++)
2169                 {
2170                     GETSTRING(PCurStr, Strlen);
2171                     PCurStr = Pdt[StrNumb].dt_string;
2172 
2173                     splayinsert(&spans, (char *)PCurStr);
2174                 }
2175                 ENDTm(DeltaUSec, tm);   // end timer
2176                 break;
2177             }
2178             case M_Redblack:
2179             {
2180                 STARTTm(tm);            // start timer
2181                 for (lines = 0; lines < Delta; lines++, StrNumb++)
2182                 {
2183                     GETSTRING(PCurStr, Strlen);
2184                     PCurStr = Pdt[StrNumb].dt_string;
2185 
2186                     redblackinsert(&rbans, (char *)PCurStr);
2187                 }
2188                 ENDTm(DeltaUSec, tm);   // end timer
2189                 break;
2190             }
2191             case M_Ternary:
2192             {
2193                 STARTTm(tm);            // start timer
2194                 for (lines = 0; lines < Delta; lines++, StrNumb++)
2195                 {
2196                     GETSTRING(PCurStr, Strlen);
2197                     Strlen = Pdt[StrNumb].dt_strlen;
2198                     PCurStr = Pdt[StrNumb].dt_string;
2199 
2200                     TernaryIns(&Ternary, PCurStr, Strlen);
2201                 }
2202                 ENDTm(DeltaUSec, tm);   // end timer
2203                 break;
2204             }
2205             default:
2206                 assert(0);              // cant happen
2207                 break;
2208             }
2209             ENDmem(DeltaMem);           // current malloc() mem usage
2210 
2211             ReadLin = StrNumb;          // adjust delta
2212             if (ReadLin > TValues)
2213                 ReadLin = TValues;
2214 //            if (Delta > TValues)
2215 //                ReadLin = Delta;        // use the Delta
2216 
2217             Printf(" %11lu", StrNumb);  // Total stored
2218             Printf(" %10lu", ReadLin);  // Number to read back
2219             Begin = gStored - Begin;    // actual STORED
2220             assert(lines == Delta);
2221             Printf(" %8lu", Delta - Begin);     // Duplicate strings
2222 
2223 //          Average time per line to store (including duplicate strings)
2224             Mult = DeltaUSec / (double)Delta;
2225 
2226             if (Mult < Pms[grp].ms_mininsert)
2227                 Pms[grp].ms_mininsert = Mult;
2228 
2229             Printf(" %7.3f", Mult);
2230 
2231 //          Bytes allocated thru malloc()
2232             if (TotalJudyMalloc == 0)
2233                 Pms[grp].ms_Bytes = (double)DeltaMem;
2234             else
2235                 Pms[grp].ms_Bytes = (double)(TotalJudyMalloc * sizeof(Word_t));
2236 
2237             Pms[grp].ms_Bytes /= (double)gStored;
2238 
2239             fflush(stdout);
2240 
2241 //=======================================================================
2242 //  READ BACK LOOP
2243 //=======================================================================
2244 
2245             Pdts = Pdt;                 // Strings to 'Get'
2246             gChainln = 0;               // total chain lengths
2247 
2248             if (rFlag == 0)
2249             {
2250                 Randomize(Pdt, StrNumb);        // Randomize ONLY those stored
2251 
2252                 if (CFlag)
2253                 {
2254 //                  Allocate and make sequencial string buffer
2255                     Pdts = BuildSeqBuf(Pdt, ReadLin);
2256                 }
2257             }
2258             switch (Method)
2259             {
2260             case M_Print:
2261                 break;
2262             case M_Hash:
2263             {
2264                 STARTTm(tm);            // start timer
2265                 for (lines = 0; lines < ReadLin; lines++)
2266                 {
2267                     GETSTRING(PCurStr, Strlen);
2268                     Strlen = Pdts[lines].dt_strlen;
2269                     PCurStr = Pdts[lines].dt_string;
2270 
2271                     PValue = HashGet(HRoot, PCurStr, Strlen);   // get string
2272                     assert(PValue != NULL);
2273                     assert(*PValue > 0);
2274                 }
2275                 ENDTm(DeltaUSec, tm);   // end timer
2276                 break;
2277             }
2278             case M_JLHash:
2279             {
2280                 STARTTm(tm);            // start timer
2281                 for (lines = 0; lines < ReadLin; lines++)
2282                 {
2283                     GETSTRING(PCurStr, Strlen);
2284                     Strlen = Pdts[lines].dt_strlen;
2285                     PCurStr = Pdts[lines].dt_string;
2286 
2287                     PValue = JLHashGet(JLHash, PCurStr, Strlen);        // get string
2288                     assert(PValue != NULL);
2289                     assert(*PValue > 0);
2290                 }
2291                 ENDTm(DeltaUSec, tm);   // end timer
2292                 break;
2293             }
2294             case M_JudySL:
2295             {
2296                 STARTTm(tm);            // start timer
2297                 for (lines = 0; lines < ReadLin; lines++)
2298                 {
2299                     GETSTRING(PCurStr, Strlen);
2300                     PCurStr = Pdts[lines].dt_string;
2301 
2302                     JSLG(PValue, JudySL, PCurStr);      // get string
2303                     assert(PValue != NULL);
2304                     assert(*PValue > 0);
2305                 }
2306                 ENDTm(DeltaUSec, tm);   // end timer
2307                 break;
2308             }
2309             case M_JudyHS:
2310             {
2311                 STARTTm(tm);            // start timer
2312                 for (lines = 0; lines < ReadLin; lines++)
2313                 {
2314                     GETSTRING(PCurStr, Strlen);
2315                     Strlen = Pdts[lines].dt_strlen;
2316                     PCurStr = Pdts[lines].dt_string;
2317 
2318                     JHSG(PValue, JudyHS, PCurStr, Strlen);      // get string
2319                     assert(PValue != NULL);
2320                     assert(*PValue > 0);
2321                 }
2322                 ENDTm(DeltaUSec, tm);   // end timer
2323                 break;
2324             }
2325 
2326 // NOTE:  the ADT's below here are so slow, that I did not add much effort
2327 // to clean them up. (dlb)
2328 
2329             case M_Splay:
2330             {
2331                 STARTTm(tm);            // start timer
2332                 for (lines = 0; lines < ReadLin; lines++)
2333                 {
2334                     GETSTRING(PCurStr, Strlen);
2335                     PCurStr = Pdts[lines].dt_string;
2336 
2337                     splaysearch(&spans, (char *)PCurStr);
2338                 }
2339                 ENDTm(DeltaUSec, tm);   // end timer
2340                 break;
2341             }
2342             case M_Redblack:
2343             {
2344                 STARTTm(tm);            // start timer
2345                 for (lines = 0; lines < ReadLin; lines++)
2346                 {
2347                     GETSTRING(PCurStr, Strlen);
2348                     PCurStr = Pdts[lines].dt_string;
2349 
2350                     redblacksearch(&rbans, (char *)PCurStr);
2351                 }
2352                 ENDTm(DeltaUSec, tm);   // end timer
2353                 break;
2354             }
2355             case M_Ternary:
2356             {
2357                 STARTTm(tm);            // start timer
2358                 for (lines = 0; lines < ReadLin; lines++)
2359                 {
2360                     GETSTRING(PCurStr, Strlen);
2361                     Strlen = Pdts[lines].dt_strlen;
2362                     PCurStr = Pdts[lines].dt_string;
2363 
2364                     if (TernaryGet(Ternary, PCurStr, Strlen) == 0)
2365                     {
2366                         printf("\n OOps - Ternary Bug at Line = %d\n",
2367                                __LINE__);
2368                         exit(1);
2369                     }
2370                 }
2371                 ENDTm(DeltaUSec, tm);   // end timer
2372                 break;
2373             }
2374             default:
2375                 assert(0);              // cant happen
2376                 break;
2377             }
2378             Mult = DeltaUSec / (double)ReadLin;
2379 
2380 //          save least value
2381             if (Mult < Pms[grp].ms_minretrive)
2382                 Pms[grp].ms_minretrive = Mult;
2383 
2384             Printf(" %7.3f", Mult);     // RETRIVE per string
2385             Printf(" %9.6f", (double)gChainln / (double)ReadLin);
2386 
2387 //      RAM USED PER STRING TO STORE DATA
2388 
2389             Printf(" %13.1f", (double)Pms[grp].ms_Bytes);
2390             Printf("\n");
2391             fflush(stdout);
2392         }
2393         if (Method == M_Print)
2394             exit(0);
2395 
2396         Printf("# Total Duplicate strings = %lu\n", nStrg - gStored);
2397 
2398 //=======================================================================
2399 //  Delete loop
2400 //=======================================================================
2401 
2402         DeltaUSec = -1.0;               // set deleted flag
2403 
2404         if (rFlag == 0)
2405         {
2406             Randomize(Pdt, StrNumb);    // Randomize ONLY those stored
2407         }
2408         switch (Method)
2409         {
2410         case M_JudySL:
2411         {
2412             if (DFlag)
2413             {
2414                 Printf("# Begin JudySLDel() loop...\n");
2415                 STARTTm(tm);            // start timer
2416                 for (lines = 0; lines < nStrg; lines++)
2417                 {
2418                     int       Rc;
2419                     GETSTRING(PCurStr, Strlen);
2420                     PCurStr = Pdt[lines].dt_string;
2421                     JSLD(Rc, JudySL, PCurStr);  // delete string
2422                     assert(Rc != JERR);
2423                 }
2424                 ENDTm(DeltaUSec, tm);   // end timer
2425             }
2426             else
2427             {
2428                 Printf("# Begin JudySLFreeArray()...\n");
2429                 STARTTm(tm);            // start timer
2430                 JSLFA(Bytes, JudySL);
2431                 ENDTm(DeltaUSec, tm);   // end timer
2432             }
2433             break;
2434         }
2435         case M_JudyHS:
2436         {
2437             if (DFlag)
2438             {
2439                 int       Rc;
2440                 Printf("# Begin JudyHSDel() loop...");
2441                 STARTTm(tm);            // start timer
2442                 for (lines = 0; lines < nStrg; lines++)
2443                 {
2444                     GETSTRING(PCurStr, Strlen);
2445                     Strlen = Pdt[lines].dt_strlen;
2446                     PCurStr = Pdt[lines].dt_string;
2447                     JHSD(Rc, JudyHS, PCurStr, Strlen);  // Delete string
2448                     assert(Rc != JERR);
2449                 }
2450                 ENDTm(DeltaUSec, tm);   // end timer
2451             }
2452             else
2453             {
2454                 Printf("# Begin JudyHSFreeArray()...\n");
2455                 STARTTm(tm);            // start timer
2456                 JHSFA(Bytes, JudyHS);
2457                 ENDTm(DeltaUSec, tm);   // end timer
2458             }
2459             break;
2460         }
2461         case M_Hash:
2462         {
2463             if (DFlag)
2464             {
2465                 Printf("# Begin HashDel() loop...\n");
2466                 STARTTm(tm);            // start timer
2467                 for (lines = 0; lines < nStrg; lines++)
2468                 {
2469                     GETSTRING(PCurStr, Strlen);
2470                     Strlen = Pdt[lines].dt_strlen;
2471                     PCurStr = Pdt[lines].dt_string;
2472                     HashDel(&HRoot, PCurStr, Strlen);   // Delete string
2473                 }
2474                 ENDTm(DeltaUSec, tm);   // end timer
2475             }
2476             else
2477             {
2478                 Printf("# Begin HashFreeArray()...\n");
2479                 STARTTm(tm);            // start timer
2480                 Bytes = HashFreeArray(&HRoot);
2481                 ENDTm(DeltaUSec, tm);   // end timer
2482             }
2483             break;
2484         }
2485         case M_JLHash:
2486         {
2487             if (DFlag)
2488             {
2489                 Printf("# Begin JLHashDel() loop...\n");
2490                 STARTTm(tm);            // start timer
2491                 for (lines = 0; lines < nStrg; lines++)
2492                 {
2493                     GETSTRING(PCurStr, Strlen);
2494                     Strlen = Pdt[lines].dt_strlen;
2495                     PCurStr = Pdt[lines].dt_string;
2496                     JLHashDel(&JLHash, PCurStr, Strlen);        // Delete string
2497                 }
2498                 ENDTm(DeltaUSec, tm);   // end timer
2499             }
2500             else
2501             {
2502                 Printf("# Begin JLHashFreeArray()...\n");
2503                 STARTTm(tm);            // start timer
2504                 Bytes = JLHashFreeArray(&JLHash);
2505                 ENDTm(DeltaUSec, tm);   // end timer
2506             }
2507             break;
2508         }
2509         default:
2510             printf("# Delete not implemented yet, so quit\n");
2511             Passes = 1;                 // No, delete, so quit
2512             break;
2513         }
2514 //  average time per line to delete (including duplicate strings)
2515         if (Bytes)                      // Measured freed bytes?
2516         {
2517             Printf("#                      returned %lu bytes\n", Bytes);
2518         }
2519         if (TotalJudyMalloc)            // Any bytes left after free?
2520         {
2521             printf
2522                 ("# !!! BUG, %lu bytes not deleted in *Free()\n",
2523                  TotalJudyMalloc * sizeof(Word_t));
2524         }
2525         if (DeltaUSec != -1.0)          // Measured how long to free?
2526         {
2527             Printf("# Free %lu strings, %0.3f uSecs Ave/string\n",
2528                    gStored, DeltaUSec / (double)gStored);
2529         }
2530         Printf("\n");
2531     }
2532 
2533     if (Passes != 1)
2534     {
2535         printf("# TotInserts      0     0 InsTime GetTime      0 Ram/String\n");
2536         StrNumb = 0;
2537         for (grp = 0; grp < Groups; grp++)
2538         {
2539             StrNumb += Pms[grp].ms_delta;
2540 
2541             printf(" %11lu", StrNumb);  // Total stored
2542             printf("      0     0");    // place holder
2543             printf(" %7.3f", Pms[grp].ms_mininsert);
2544             printf(" %7.3f", Pms[grp].ms_minretrive);
2545             printf("      0");          // place holder
2546             printf(" %7.3f", Pms[grp].ms_Bytes);
2547             printf("\n");
2548         }
2549     }
2550     exit(0);                            // done
2551 }                                       // main()
2552