1 //
2 // This program is free software; you can redistribute it and/or modify it
3 // under the term of the GNU Lesser General Public License as published by the
4 // Free Software Foundation; either version 2 of the License, or (at your
5 // option) any later version.
6 //
7 // This program is distributed in the hope that it will be useful, but WITHOUT
8 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
9 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
10 // for more details.
11 //
12 // You should have received a copy of the GNU Lesser General Public License
13 // along with this program; if not, write to the Free Software Foundation,
14 // Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15 // _________________
16
17 // JUDY FUNCTIONS FOR STRING INDEXES, where associated values are longs. One
18 // JudySL*() corresponds to each JudyL*() function (with exceptions).
19 //
20 // See the manual entry for details.
21 //
22 // METHOD: Break up each null-terminated Index (string) into chunks of W
23 // bytes, where W is the machines word size, with null-padding in the last
24 // word if necessary. Store strings as a tree of JudyL arrays, that is, array
25 // of array of array... where each level consumes W bytes (one word) as an
26 // index to the JudyL array at that level. Since strings can begin on
27 // arbitrary byte boundaries, copy each chunk of W bytes from Index into a
28 // word-aligned object before using it as a Judy index.
29 //
30 // The JudySL tree also supports "single-index shortcut leaves". A simple
31 // JudySL array (tree of JudyL arrays) would go as many levels deep as the
32 // Index (string) is long, which wastes time and memory when an Index is unique
33 // beyond a certain point. When theres just one Index under a pointer, given
34 // a reliable way to tell that the pointer is not a root pointer to another
35 // JudyL array, it should save a lot of time to instead point to a "leaf"
36 // object, similar to leaves in JudyL arrays.
37 //
38 // TBD: Multi-index leaves, like those in JudyL, are also worth considering,
39 // but their payback for JudySL is less certain. Likewise, shortcut branches
40 // are worth considering too.
41 //
42 // This code uses the Judy.h definitions and Doug Baskins convention of a "P"
43 // prefix for pointers, except no "P" for the first level of char * (strings).
44
45 // IMPORTS:
46
47 #include <string.h> // for strcmp(), strlen(), strcpy()
48 #include <Judy.h>
49
50 #ifndef NDEDUG
51 #define NDEBUG 1
52 #endif
53 #include <assert.h>
54
55 //=======================================================================
56 // Compile:
57 //
58 // cc -O JudyHS.c -c
59 //
60 // Notes:
61 // 1) use -DJU_64BIT for 64 bit compiles (HP, Sun, IPF, Motorola/IBM? etc..)
62 // 2) In gcc version 3.3.1 for a Centrino, -O2 is faster than -O
63 // 3) In gcc version 3.3.2 for a Centrino, -O3 is faster than -O2
64 //=======================================================================
65
66 #define JU_SET_ERRNO(PJERROR, JERRNO) \
67 { \
68 if (PJERROR != (PJError_t)NULL) \
69 { \
70 JU_ERRNO(PJERROR) = (JERRNO); \
71 JU_ERRID(PJERROR) = __LINE__; \
72 } \
73 }
74
75 #define JU_SET_ERRNO_NONNULL(PJERROR, JERRNO) \
76 { \
77 JU_ERRNO(PJERROR) = (JERRNO); \
78 JU_ERRID(PJERROR) = __LINE__; \
79 }
80
81 // SUPPORT FOR HANDLING WORDS:
82
83 #define WORDSIZE (sizeof (Word_t)) // bytes in word = JudyL index.
84 #define WORDS(BYTES) (((BYTES) + WORDSIZE - 1) / WORDSIZE) // round up.
85
86 // To mark a pointer is to a "short cut leaf", set least bit
87
88 #define IS_PSCL(PSCL) (((Word_t) (PSCL)) & JLAP_INVALID)
89 #define CLEAR_PSCL(PSCL) ((Pscl_t)(((Word_t) (PSCL)) & (~JLAP_INVALID)))
90 #define SET_PSCL(PSCL) (((Word_t) (PSCL)) | JLAP_INVALID)
91
92 // MISCELLANEOUS GLOBALS:
93
94 // Get the Index (string) length in bytes, including the trailing \0, which
95 // is an integral part of the string:
96
97 // A string is "in the last word" if a previously-set byte count is at or below
98 // the system word size, or in some cases if the last byte in the (null-padded)
99 // word is null (assume big-endian, including in a register on a little-endian
100 // machine):
101
102 #define LASTWORD_BY_VALUE(WORD) (! ((WORD) & 0xffL))
103
104 #ifdef JU_64BIT
105
106 // copy from 1..7 bytes from string to Word_t and test if \0 bytes
107 //
108 #define COPYSTRINGtoWORD(WORD,STR) \
109 { \
110 do \
111 { \
112 uint8_t chr; \
113 WORD = (Word_t)(STR)[0] << 56; \
114 if (!(WORD)) break; \
115 if (!(chr = (STR)[1])) break; \
116 WORD += ((Word_t)(chr) << 48); \
117 if (!(chr = (STR)[2])) break; \
118 WORD += ((Word_t)(chr) << 40); \
119 if (!(chr = (STR)[3])) break; \
120 WORD += ((Word_t)(chr) << 32); \
121 if (!(chr = (STR)[4])) break; \
122 WORD += ((Word_t)(chr) << 24); \
123 if (!(chr = (STR)[5])) break; \
124 WORD += ((Word_t)(chr) << 16); \
125 if (!(chr = (STR)[6])) break; \
126 WORD += ((Word_t)(chr) << 8) + (STR)[7]; \
127 } while(0); \
128 }
129
130 // copy Word_t from 1..8 bytes to string and test of \0 bytes
131 //
132 #define COPYWORDtoSTRING(STR,WORD) \
133 { \
134 do \
135 { \
136 if (!((STR)[0] = (uint8_t)((WORD) >> 56))) break; \
137 if (!((STR)[1] = (uint8_t)((WORD) >> 48))) break; \
138 if (!((STR)[2] = (uint8_t)((WORD) >> 40))) break; \
139 if (!((STR)[3] = (uint8_t)((WORD) >> 32))) break; \
140 if (!((STR)[4] = (uint8_t)((WORD) >> 24))) break; \
141 if (!((STR)[5] = (uint8_t)((WORD) >> 16))) break; \
142 if (!((STR)[6] = (uint8_t)((WORD) >> 8))) break; \
143 (STR)[7] = (uint8_t)(WORD); \
144 } while(0); \
145 }
146
147 #else // JU_32BIT
148
149 // copy from 1..4 bytes from string to Word_t and test if \0 bytes
150
151 #define COPYSTRINGtoWORD(WORD,STR) \
152 { \
153 do \
154 { \
155 uint8_t chr; \
156 WORD = (STR)[0] << 24; \
157 if (WORD == 0) break; \
158 if (!(chr = (STR)[1])) break; \
159 WORD += (Word_t)(chr << 16); \
160 if (!(chr = (STR)[2])) break; \
161 WORD += (Word_t)(chr << 8) + (STR)[3]; \
162 } while(0); \
163 }
164
165 // copy Word_t from 1..4 bytes to string and test of \0 bytes
166
167 #define COPYWORDtoSTRING(STR,WORD) \
168 { \
169 do \
170 { \
171 if (!((STR)[0] = (uint8_t)((WORD) >> 24))) break; \
172 if (!((STR)[1] = (uint8_t)((WORD) >> 16))) break; \
173 if (!((STR)[2] = (uint8_t)((WORD) >> 8))) break; \
174 (STR)[3] = (uint8_t)(WORD); \
175 } while(0); \
176 }
177 #endif // JU_32BIT
178
179
180 // SUPPORT FOR SINGLE-INDEX SHORTCUT LEAVES:
181
182 typedef struct SHORCUTLEAF
183 {
184 Pvoid_t scl_Pvalue; // callers value area.
185 uint8_t scl_Index[WORDSIZE]; // base Index string.
186 } scl_t , *Pscl_t;
187
188 // overhead of the scl_Pvalue only, the scl_Index is calculate elsewhere
189
190 #define STRUCTOVD (sizeof(scl_t) - WORDSIZE)
191
192 // How big to malloc a shortcut leaf; stringlen should already include the
193 // trailing null char:
194
195 #define SCLSIZE(LEN) (((LEN) + STRUCTOVD + WORDSIZE - 1) / WORDSIZE)
196
197 // string routines, may replace with your own
198 //
199 #define STRCMP(S1,S2) strcmp((void *)(S1), (void *)(S2))
200 #define STRCPY(S1,S2) strcpy((void *)(S1), (void *)(S2))
201 #define STRLEN(S1) (strlen((void *)(S1)) + 1)
202
203
204 // Index and value area for a shortcut leaf, depending on how it matches the
205 // undecoded remainder of the Index, given a Pscl_t that includes type bits
206 // that must be cleared:
207 //
208 // PSCLINDEX() and PSCLVALUE() are also useful when Pscl contains uncleared
209 // TYPE bits.
210 //
211 // Note: SCLCMP() cannot take advantage of knowing the Index length because
212 // the scl_Index length is not pre-known when these macros are used.
213
214 #define PSCLINDEX(PSCL) ((CLEAR_PSCL(PSCL))->scl_Index)
215 #define PSCLVALUE(PSCL) ((CLEAR_PSCL(PSCL))->scl_Pvalue)
216
217 #define SCLCMP(INDEX,PSCL) STRCMP(INDEX, PSCLINDEX(PSCL))
218
219 #define PPSCLVALUE_EQ(INDEX,PSCL) \
220 ((SCLCMP(INDEX, PSCL) == 0) ? &PSCLVALUE(PSCL) : (PPvoid_t)NULL)
221
222 #define PPSCLVALUE_LT(INDEX,PSCL) \
223 ((SCLCMP(INDEX, PSCL) < 0) ? &PSCLVALUE(PSCL) : (PPvoid_t)NULL)
224
225 #define PPSCLVALUE_GT(INDEX,PSCL) \
226 ((SCLCMP(INDEX, PSCL) > 0) ? &PSCLVALUE(PSCL) : (PPvoid_t)NULL)
227
228 // Common in-lined code to append or free a shortcut leaf:
229 //
230 // See header comments about premature return(). Note that malloc() does not
231 // pre-zero the memory, so ensure scl_Pvalue is zeroed, just like a value area
232 // in a JudyL array. Hope strcpy() is fast enough in this context.
233
234 #define APPEND_SCL(PSCL,PPARRAY,INDEX,LEN,PJERROR) \
235 { \
236 if (((PSCL) = (Pscl_t) JudyMalloc(SCLSIZE(LEN))) == (Pscl_t)NULL) \
237 { \
238 JU_SET_ERRNO(PJERROR, JU_ERRNO_NOMEM); \
239 return (PPJERR); \
240 } \
241 *(PPARRAY) = (Pvoid_t)SET_PSCL(PSCL); \
242 ((PSCL)->scl_Pvalue) = (Pvoid_t)NULL; \
243 (void)STRCPY((PSCL)->scl_Index, INDEX); \
244 }
245
246 // "FORWARD" DECLARATIONS:
247
248 static void JudySLModifyErrno(PJError_t PJError,
249 Pcvoid_t PArray, Pcvoid_t PArrayOrig);
250 static int JudySLDelSub(PPvoid_t PPArray, PPvoid_t PPArrayOrig,
251 const uint8_t * Index, Word_t len, PJError_t PJError);
252 static PPvoid_t JudySLPrevSub(Pcvoid_t PArray, uint8_t * Index, int orig,
253 Word_t len, PJError_t PJError);
254 static PPvoid_t JudySLNextSub(Pcvoid_t PArray, uint8_t * Index, int orig,
255 Word_t len, PJError_t PJError);
256
257 // ****************************************************************************
258 // J U D Y S L M O D I F Y E R R N O
259 //
260 // Common code for error translation: When a caller passes an invalid JAP
261 // ("not a JudyL pointer"), OR if the JudySL array is corrupted at a lower
262 // level, various JudyL*() calls return JU_ERRNO_NOTJUDYL. If the caller wants
263 // detailed error info, convert this particular error to JU_ERRNO_NOTJUDYSL if
264 // at the top of the tree, otherwise convert it to JU_ERRNO_CORRUPT, meaning
265 // there was a corruption (the only one even detectable outside JudyL) in the
266 // JudySL tree; but pass through any other errors unaltered.
267
268 static void
JudySLModifyErrno(PJError_t PJError,Pcvoid_t PArray,Pcvoid_t PArrayOrig)269 JudySLModifyErrno(PJError_t PJError, // to modify if non-null.
270 Pcvoid_t PArray, // current JudyL array.
271 Pcvoid_t PArrayOrig // top-of-tree JudyL array.
272 )
273 { // map this Judy errno.
274 if ((PJError != PJE0) && (JU_ERRNO(PJError) == JU_ERRNO_NOTJUDYL))
275 {
276 if (PArray == PArrayOrig) // callers fault.
277 {
278 JU_SET_ERRNO_NONNULL(PJError, JU_ERRNO_NOTJUDYSL);
279 }
280 else // lower level.
281 {
282 JU_SET_ERRNO_NONNULL(PJError, JU_ERRNO_CORRUPT);
283 }
284 }
285 } // JudySLModifyErrno()
286
287 // ****************************************************************************
288 // J U D Y S L G E T
289 //
290 // See comments in file header and below.
291
292 PPvoid_t
JudySLGet(Pcvoid_t PArray,const uint8_t * Index,PJError_t PJError)293 JudySLGet(Pcvoid_t PArray, const uint8_t * Index, PJError_t PJError)
294 {
295 const uint8_t *pos = Index; // place in Index.
296 Word_t indexword; // buffer for aligned copy.
297 PPvoid_t PPValue; // from JudyL array.
298
299 // CHECK FOR CALLER ERROR (NULL POINTER):
300
301 if (Index == (uint8_t *) NULL)
302 {
303 JU_SET_ERRNO(PJError, JU_ERRNO_NULLPINDEX);
304 return (PPJERR);
305 }
306
307 // SEARCH NEXT LEVEL JUDYL ARRAY IN TREE:
308 //
309 // Use or copy each word from the Index string and check for it in the next
310 // level JudyL array in the array tree, but first watch for shortcut leaves.
311 // Upon invalid Index or end of Index (string) in current word, return.
312
313 do // until return.
314 {
315 if (IS_PSCL(PArray)) // a shortcut leaf.
316 return (PPSCLVALUE_EQ(pos, PArray));
317
318 COPYSTRINGtoWORD(indexword, pos); // copy next 4[8] bytes.
319
320 JLG(PPValue, PArray, indexword);
321
322 if ((PPValue == (PPvoid_t) NULL) || LASTWORD_BY_VALUE(indexword))
323 return (PPValue);
324
325 // CONTINUE TO NEXT LEVEL DOWN JUDYL ARRAY TREE:
326 //
327 // If a previous JudySLIns() ran out of memory partway down the tree, it left a
328 // null *PPValue; this is automatically treated here as a dead-end (not a core
329 // dump or assertion; see version 1.25).
330
331 pos += WORDSIZE;
332 PArray = *PPValue; // each value -> next array.
333 } while(1); // forever
334 // NOTREACHED JudySLGet()
335 }
336
337 // ****************************************************************************
338 // J U D Y S L I N S
339 //
340 // See also the comments in JudySLGet(), which is somewhat similar, though
341 // simpler.
342 //
343 // Theory of operation:
344 //
345 // Upon encountering a null pointer in the tree of JudyL arrays, insert a
346 // shortcut leaf -- including directly under a null root pointer for the first
347 // Index in the JudySL array.
348 //
349 // Upon encountering a pre-existing shortcut leaf, if the old Index is equal to
350 // the new one, return the old value area. Otherwise, "carry down" the old
351 // Index until the old and new Indexes diverge, at which point each Index
352 // either terminates in the last JudyL array or a new shortcut leaf is inserted
353 // under it for the Indexs remainder.
354 //
355 // TBD: Running out of memory below the starting point causes a premature
356 // return below (in several places) and leaves a dead-end in the JudySL tree.
357 // Ideally the code here would back this out rather than wasting a little
358 // memory, but in lieu of that, the get, delete, and search functions
359 // understand dead-ends and handle them appropriately.
360
361 PPvoid_t
JudySLIns(PPvoid_t PPArray,const uint8_t * Index,PJError_t PJError)362 JudySLIns(PPvoid_t PPArray, const uint8_t * Index, PJError_t PJError)
363 {
364 PPvoid_t PPArrayOrig = PPArray; // for error reporting.
365 const uint8_t *pos = Index; // place in Index.
366 const uint8_t *pos2 = (uint8_t *) NULL; // old Index (SCL being moved).
367 Word_t len; // bytes remaining.
368
369 // Note: len2 is set when needed and only used when valid, but this is not
370 // clear to gcc -Wall, so initialize it here to avoid a warning:
371
372 Word_t len2 = 0; // for old Index (SCL being moved).
373 Word_t scl2 = 0; // size in words of SCL
374 Word_t indexword; // buffer for aligned copy.
375 Word_t indexword2; // for old Index (SCL being moved).
376 PPvoid_t PPValue; // from JudyL array.
377 PPvoid_t PPValue2; // for old Index (SCL being moved).
378 Pscl_t Pscl = (Pscl_t) NULL; // shortcut leaf.
379 Pscl_t Pscl2; // for old Index (SCL being moved).
380
381 // CHECK FOR CALLER ERROR (NULL POINTERS):
382
383 if (PPArray == (PPvoid_t) NULL)
384 {
385 JU_SET_ERRNO(PJError, JU_ERRNO_NULLPPARRAY);
386 return (PPJERR);
387 }
388 if (Index == (uint8_t *) NULL)
389 {
390 JU_SET_ERRNO(PJError, JU_ERRNO_NULLPINDEX);
391 return (PPJERR);
392 }
393
394 len = STRLEN(Index); // bytes remaining.
395
396 // APPEND SHORTCUT LEAF:
397 //
398 // If PPArray, which is the root pointer to the first or next JudyL array in
399 // the tree, points to null (no next JudyL array), AND there is no shortcut
400 // leaf being carried down, append a shortcut leaf here for the new Index, no
401 // matter how much of the Index string remains (one or more bytes, including
402 // the trailing \0).
403
404 while (1) // until return.
405 {
406 if (*PPArray == (Pvoid_t)NULL) // no next JudyL array.
407 {
408 if (Pscl == (Pscl_t) NULL) // no SCL being carried down.
409 {
410 APPEND_SCL(Pscl, PPArray, pos, len, PJError); // returns if error.
411 return (&(Pscl->scl_Pvalue));
412 }
413 // else do nothing here; see below.
414 }
415
416 // CARRY DOWN PRE-EXISTING SHORTCUT LEAF:
417 //
418 // When PPArray points to a pre-existing shortcut leaf, if its Index is equal
419 // to the Index to be inserted, meaning no insertion is required, return its
420 // value area; otherwise, "move it aside" and "carry it down" -- replace it
421 // (see below) with one or more levels of JudyL arrays. Moving it aside
422 // initially just means setting Pscl non-null, both as a flag and for later
423 // use, and clearing the pointer to the SCL in the JudyL array.
424
425 else if (IS_PSCL(*PPArray))
426 {
427 assert(Pscl == (Pscl_t) NULL); // no nested SCLs.
428
429 Pscl = CLEAR_PSCL(*PPArray);
430
431 pos2 = Pscl->scl_Index; // note: pos2 is always word-aligned.
432 len2 = STRLEN(pos2); // bytes remaining.
433
434 // first check if string is already inserted
435
436 if ((len == len2) && (STRCMP(pos, pos2) == 0))
437 return (&(Pscl->scl_Pvalue));
438
439 *PPArray = (Pvoid_t)NULL; // disconnect SCL.
440
441 scl2 = SCLSIZE(len2); // save for JudyFree
442
443 // continue with *PPArray now clear, and Pscl, pos2, len2 set.
444 }
445
446 // CHECK IF OLD AND NEW INDEXES DIVERGE IN THE CURRENT INDEX WORD:
447 //
448 // If a shortcut leaf is being carried down and its remaining Index chars now
449 // diverge from the remaining chars of the Index being inserted, that is, if
450 // the next words of each Index differ, "plug in" the old Index here, in a new
451 // JudyL array, before proceeding.
452 //
453 // Note: Call JudyLIns() for the SCL Index word before calling it for the new
454 // Index word, so PPValue remains correct for the latter. (JudyLIns() return
455 // values are not stable across multiple calls.)
456 //
457 // Note: Although pos2 is word-aligned, and a Pscl_t is a whole number of
458 // words in size, pos2 is not certain to be null-padded through a whole word,
459 // so copy it first to an index word for later use.
460 //
461 // See header comments about premature return().
462
463 COPYSTRINGtoWORD(indexword, pos); // copy next 4[8] bytes.
464
465 if (Pscl != (Pscl_t) NULL)
466 {
467 COPYSTRINGtoWORD(indexword2, pos2); // copy next 4[8] bytes.
468
469 if (indexword != indexword2) // SCL and new Indexes diverge.
470 {
471 assert(*PPArray == (Pvoid_t)NULL); // should be new JudyL array.
472
473 // Note: If JudyLIns() returns JU_ERRNO_NOTJUDYL here, *PPArray should not be
474 // modified, so JudySLModifyErrno() can do the right thing.
475
476 if ((PPValue2 = JudyLIns(PPArray, indexword2, PJError))
477 == PPJERR)
478 {
479 JudySLModifyErrno(PJError, *PPArray, *PPArrayOrig);
480 return (PPJERR);
481 }
482
483 assert(PPValue2 != (PPvoid_t) NULL);
484
485 // If the old (SCL) Index terminates here, copy its value directly into the
486 // JudyL value area; otherwise create a new shortcut leaf for it, under
487 // *PPValue2 (skipping the word just inserted), and copy its value to the new
488 // SCL:
489
490 if (len2 <= WORDSIZE)
491 {
492 *((PWord_t)PPValue2) = (Word_t)(Pscl->scl_Pvalue);
493 }
494 else
495 {
496 APPEND_SCL(Pscl2, PPValue2, pos2 + WORDSIZE,
497 len2 - WORDSIZE, PJError);
498 (Pscl2->scl_Pvalue) = Pscl->scl_Pvalue;
499 }
500 // old SCL no longer needed.
501
502 JudyFree((void *)Pscl, scl2);
503
504 Pscl = (Pscl_t) NULL;
505 }
506 }
507
508 // APPEND NEXT LEVEL JUDYL ARRAY TO TREE:
509 //
510 // If a shortcut leaf was carried down and diverged at this level, the code
511 // above already appended the new JudyL array, but the next word of the new
512 // Index still must be inserted in it.
513 //
514 // See header comments about premature return().
515 //
516 // Note: If JudyLIns() returns JU_ERRNO_NOTJUDYL here, *PPArray should not be
517 // modified, so JudySLModifyErrno() can do the right thing.
518
519 if ((PPValue = JudyLIns(PPArray, indexword, PJError)) == PPJERR)
520 {
521 JudySLModifyErrno(PJError, *PPArray, *PPArrayOrig);
522 return (PPJERR);
523 }
524
525 assert(PPValue != (PPvoid_t) NULL);
526
527 // CHECK IF NEW INDEX TERMINATES:
528 //
529 // Note that if it does, and an old SCL was being carried down, it must have
530 // diverged by this point, and is already handled.
531
532 if (len <= WORDSIZE)
533 {
534 assert(Pscl == (Pscl_t) NULL);
535 return (PPValue); // is value for whole Index string.
536 }
537
538 pos += WORDSIZE;
539 len -= WORDSIZE;
540 pos2 += WORDSIZE; // useless unless Pscl is set.
541 len2 -= WORDSIZE;
542
543 PPArray = PPValue; // each value -> next array.
544 } // while.
545 } // NOTREACHED, JudySLIns()
546
547 // ****************************************************************************
548 // J U D Y S L D E L
549 //
550 // See the comments in JudySLGet(), which is somewhat similar.
551 //
552 // Unlike JudySLGet() and JudySLIns(), recurse downward through the tree of
553 // JudyL arrays to find and delete the given Index, if present, and then on the
554 // way back up, any of its parent arrays which ends up empty.
555 //
556 // TECHNICAL NOTES:
557 //
558 // Recursion seems bad, but this allows for an arbitrary-length Index. Also, a
559 // more clever iterative solution that used JudyLCount() (see below) would
560 // still require a function call per tree level, so why not just recurse?
561 //
562 // An earlier version (1.20) used a fixed-size stack, which limited the Index
563 // size. We were going to replace this with using JudyLCount(), in order to
564 // note and return to (read this carefully) the highest level JudyL array with
565 // a count of 1, all of whose descendant JudyL arrays also have a count of 1,
566 // and delete from that point downwards. This solution would traverse the
567 // array tree downward looking to see if the given Index is in the tree, then
568 // if so, delete layers downwards starting below the last one that contains
569 // other Indexes than the one being deleted.
570 //
571 // TBD: To save time coding, and to very likely save time overall during
572 // execution, this function does "lazy deletions", or putting it more nicely,
573 // it allows "hysteresis" in the JudySL tree, when shortcut leafs are present.
574 // It only removes the specified Index, and recursively any empty JudyL arrays
575 // above it, without fully reversing the effects of JudySLIns(). This is
576 // probably OK because any application that calls JudySLDel() is likely to call
577 // JudySLIns() again with the same or a neighbor Index.
578
579 int
JudySLDel(PPvoid_t PPArray,const uint8_t * Index,PJError_t PJError)580 JudySLDel(PPvoid_t PPArray, const uint8_t * Index, PJError_t PJError) // optional, for returning error info.
581 {
582
583 // Check for caller error (null pointer):
584
585 if (PPArray == (PPvoid_t) NULL)
586 {
587 JU_SET_ERRNO(PJError, JU_ERRNO_NULLPPARRAY);
588 return (JERR);
589 }
590 if (Index == (uint8_t *) NULL)
591 {
592 JU_SET_ERRNO(PJError, JU_ERRNO_NULLPINDEX);
593 return (JERR);
594 }
595
596 // Do the deletion:
597
598 return (JudySLDelSub(PPArray, PPArray, Index, STRLEN(Index), PJError));
599
600 } // JudySLDel()
601
602 // ****************************************************************************
603 // J U D Y S L D E L S U B
604 //
605 // This is the "engine" for JudySLDel() that expects aligned and len to already
606 // be computed (only once). See the header comments for JudySLDel().
607
608 static int
JudySLDelSub(PPvoid_t PPArray,PPvoid_t PPArrayOrig,const uint8_t * Index,Word_t len,PJError_t PJError)609 JudySLDelSub(PPvoid_t PPArray, // in which to delete.
610 PPvoid_t PPArrayOrig, // for error reporting.
611 const uint8_t * Index, // to delete.
612 Word_t len, // bytes remaining.
613 PJError_t PJError) // optional, for returning error info.
614 {
615 Word_t indexword; // next word to find.
616 PPvoid_t PPValue; // from JudyL array.
617 int retcode; // from lower-level call.
618
619 assert(PPArray != (PPvoid_t) NULL);
620 assert(Index != (uint8_t *) NULL);
621
622 // DELETE SHORTCUT LEAF:
623 //
624 // As described above, this can leave an empty JudyL array, or one containing
625 // only a single other Index word -- which could be, but is not, condensed into
626 // a higher-level shortcut leaf. More precisely, at this level it leaves a
627 // temporary "dead end" in the JudySL tree, similar to when running out of
628 // memory during JudySLIns(), and this is somewhat cleaned up by higher
629 // recursions of the same function (see below); but remaining shortcut leaves
630 // for other Indexes are not coalesced.
631
632 if (IS_PSCL(*PPArray))
633 {
634 Pscl_t Pscll = CLEAR_PSCL(*PPArray);
635 Word_t words;
636
637 if (STRCMP(Index, Pscll->scl_Index))
638 return (0); // incorrect index.
639
640 words = SCLSIZE(STRLEN(Pscll->scl_Index));
641 JudyFree((void *)Pscll, words);
642
643 *PPArray = (Pvoid_t)NULL;
644 return (1); // correct index deleted.
645 }
646
647 // DELETE LAST INDEX WORD, FROM CURRENT JUDYL ARRAY:
648 //
649 // When at the end of the full Index, delete the last word, if present, from
650 // the current JudyL array, and return the result all the way up.
651
652 COPYSTRINGtoWORD(indexword, Index); // copy next 4[8] bytes.
653
654 if (len <= WORDSIZE)
655 {
656 if ((retcode = JudyLDel(PPArray, indexword, PJError)) == JERR)
657 {
658 JudySLModifyErrno(PJError, *PPArray, *PPArrayOrig);
659 return (JERR);
660 }
661 return (retcode);
662 }
663
664 // DELETE BELOW NON-LAST INDEX WORD IN CURRENT JUDYL ARRAY:
665 //
666 // If a word before the end of the full Index is present in the current JudyL
667 // array, recurse through its value, which must be a pointer to another JudyL
668 // array, to continue the deletion at the next level. Return the JudyLGet()
669 // return if the Indexs current word is not in the JudyL array, or if no
670 // delete occurs below this level, both of which mean the whole Index is not
671 // currently valid.
672 //
673
674 JLG(PPValue, *PPArray, indexword);
675 if (PPValue == (PPvoid_t) NULL)
676 return (0); // Index not in JudySL array.
677 // If a previous JudySLIns() ran out of memory partway down the tree, it left a
678 // null *PPValue; this is automatically treated as a dead-end (not a core dump
679 // or assertion; see version 1.25).
680 if ((retcode =
681 JudySLDelSub(PPValue, PPArrayOrig, Index + WORDSIZE,
682 len - WORDSIZE, PJError)) != 1)
683 {
684 return (retcode); // no lower-level delete, or error.
685 }
686
687 // DELETE EMPTY JUDYL ARRAY:
688 //
689 // A delete occurred below in the tree. If the child JudyL array became empty,
690 // delete the current Index word from the current JudyL array, which could
691 // empty the current array and null out *PPArray in turn (or pass through an
692 // error). Otherwise simply indicate that a deletion did occur.
693
694 if (*PPValue == (Pvoid_t)NULL)
695 {
696 if ((retcode = JudyLDel(PPArray, indexword, PJError)) == JERR)
697 {
698 JudySLModifyErrno(PJError, *PPArray, *PPArrayOrig);
699 return (JERR);
700 }
701
702 return (retcode);
703 }
704
705 return (1);
706 } // JudySLDelSub()
707
708 // ****************************************************************************
709 // J U D Y S L P R E V
710 //
711 // Recursively traverse the JudySL tree downward using JudyLGet() to look for
712 // each successive index word from Index in the JudyL array at each level. At
713 // the last level for the Index (LASTWORD_BY_LEN()), use JudyLPrev() instead of
714 // JudyLGet(), to exclude the initial Index. If this doesnt result in finding
715 // a previous Index, work back up the tree using JudyLPrev() at each higher
716 // level to search for a previous index word. Upon finding a previous index
717 // word, descend again if/as necessary, this time inclusively, to find and
718 // return the full previous Index.
719 //
720 // Also support shortcut leaves.
721 //
722 // Since this function is recursive and it also needs to know if its still
723 // looking for the original Index (to exclude it at the LASTWORD_BY_LEN()
724 // level) or for the remaining words of the previous Index (inclusive),
725 // actually call a subroutine that takes an additional parameter.
726 //
727 // See also the technical notes in JudySLDel() regarding the use of recursion
728 // rather than iteration.
729
730 PPvoid_t
JudySLPrev(Pcvoid_t PArray,uint8_t * Index,PJError_t PJError)731 JudySLPrev(Pcvoid_t PArray, uint8_t * Index, PJError_t PJError) // optional, for returning error info.
732 {
733 // Check for caller error (null pointer), or empty JudySL array:
734
735 if (Index == (uint8_t *) NULL)
736 {
737 JU_SET_ERRNO(PJError, JU_ERRNO_NULLPINDEX);
738 return (PPJERR);
739 }
740
741 if (PArray == (Pvoid_t)NULL)
742 return ((PPvoid_t) NULL);
743 // Do the search:
744 return (JudySLPrevSub(PArray, Index, /* original = */ 1,
745 STRLEN(Index), PJError));
746 } // JudySLPrev()
747
748 // ****************************************************************************
749 // J U D Y S L P R E V S U B
750 //
751 // This is the "engine" for JudySLPrev() that knows whether its still looking
752 // for the original Index (exclusive) or a neighbor index (inclusive), and that
753 // expects aligned and len to already be computed (only once). See the header
754 // comments for JudySLPrev().
755
756 static PPvoid_t
JudySLPrevSub(Pcvoid_t PArray,uint8_t * Index,int orig,Word_t len,PJError_t PJError)757 JudySLPrevSub(Pcvoid_t PArray, uint8_t * Index, int orig,
758 Word_t len, // bytes remaining.
759 PJError_t PJError) // optional, for returning error info.
760 {
761 Word_t indexword; // next word to find.
762 PPvoid_t PPValue; // from JudyL array.
763 // ORIGINAL SEARCH:
764 //
765 // When at a shortcut leaf, copy its remaining Index (string) chars into Index
766 // and return its value area if the current Index is after (greater than) the
767 // SCLs index; otherwise return null.
768 if (orig)
769 {
770 if (IS_PSCL(PArray))
771 {
772 if ((PPValue = PPSCLVALUE_GT(Index, PArray)) != (PPvoid_t) NULL)
773 (void)STRCPY(Index, PSCLINDEX(PArray));
774 return (PPValue);
775 }
776
777 // If the current Index word:
778 // - is not the last word in Index (end of string),
779 // - exists in the current JudyL array, and,
780 // - a previous Index is found below it, return that Indexs value area.
781
782 COPYSTRINGtoWORD(indexword, Index); // copy next 4[8] bytes.
783 if (len > WORDSIZE) // not at end of Index.
784 {
785 JLG(PPValue, PArray, indexword);
786 if (PPValue != (PPvoid_t) NULL)
787 {
788
789 // If a previous JudySLIns() ran out of memory partway down the tree, it left a
790 // null *PPValue; this is automatically treated as a dead-end (not a core dump
791 // or assertion; see version 1.25):
792
793 PPValue = JudySLPrevSub(*PPValue, Index + WORDSIZE,
794 /* original = */ 1,
795 len - WORDSIZE, PJError);
796 if (PPValue == PPJERR)
797 return (PPJERR); // propagate error.
798 if (PPValue != (PPvoid_t) NULL)
799 return (PPValue); // see above.
800 }
801 }
802
803 // Search for previous index word:
804 //
805 // One of the above conditions is false. Search the current JudyL array for
806 // the Index word, if any, prior to the current index word. If none is found,
807 // return null; otherwise fall through to common later code.
808
809 if ((PPValue = JudyLPrev(PArray, &indexword, PJError)) == PPJERR)
810 {
811 JudySLModifyErrno(PJError, PArray, orig ? PArray : (Pvoid_t)NULL);
812 return (PPJERR);
813 }
814
815 if (PPValue == (PPvoid_t) NULL)
816 return ((PPvoid_t) NULL); // no previous index word.
817 } // if.
818
819 // SUBSEQUENT SEARCH:
820 //
821 // A higher level search already excluded the initial Index, then found a
822 // previous index word, and is now traversing down to determine the rest of the
823 // Index and to obtain its value area. If at a shortcut leaf, return its value
824 // area. Otherwise search the current JudyL array backward from the upper
825 // limit for its last index word. If no index word is found, return null --
826 // should never happen unless the JudySL tree is corrupt; otherwise fall
827 // through to common later code.
828
829 else
830 {
831 if (IS_PSCL(PArray)) // at shortcut leaf.
832 {
833 (void)STRCPY(Index, PSCLINDEX(PArray));
834 return (&PSCLVALUE(PArray));
835 }
836
837 indexword = ~0UL;
838 if ((PPValue = JudyLLast(PArray, &indexword, PJError)) == PPJERR)
839 {
840 JudySLModifyErrno(PJError, PArray, orig ? PArray : (Pvoid_t)NULL);
841 return (PPJERR);
842 }
843
844 // If a previous JudySLIns() ran out of memory partway down the tree, it left a
845 // null *PPValue; this is automatically treated as a dead-end (not a core dump
846 // or assertion; see version 1.25):
847
848 if (PPValue == (PPvoid_t) NULL)
849 return ((PPvoid_t) NULL); // no previous index word.
850 }
851
852 // FOUND PREVIOUS INDEX WORD:
853 //
854 // A previous (if original) or last (if subsequent) index word was located in
855 // the current JudyL array. Store it into the callers Index (string). Then
856 // if the found (previous) Index ends here, return its value area; otherwise do
857 // a subsequent search below this point, which should never fail unless the
858 // JudySL tree is corrupt, but this is detected at a lower level by the above
859 // assertion.
860 //
861 // Note: Treat Index as unaligned, even if it is aligned, to avoid writing
862 // past the end of allocated memory (in case its less than a whole number of
863 // words).
864
865 COPYWORDtoSTRING(Index, indexword); // copy next 4[8] bytes.
866 if (LASTWORD_BY_VALUE(indexword))
867 return (PPValue);
868 // If a previous JudySLIns() ran out of memory partway down the tree, it left a
869 // null *PPValue; this is automatically treated as a dead-end (not a core dump
870 // or assertion; see version 1.25):
871 return (JudySLPrevSub(*PPValue, Index + WORDSIZE, /* original = */ 0,
872 len - WORDSIZE, PJError));
873 } // JudySLPrevSub()
874
875 // ****************************************************************************
876 // J U D Y S L N E X T
877 //
878 // See the comments in JudySLPrev(), which is very similar.
879 //
880 // TBD: Could the two functions call a common engine function with various
881 // subfunctions and other constants specified?
882
883 PPvoid_t
JudySLNext(Pcvoid_t PArray,uint8_t * Index,PJError_t PJError)884 JudySLNext(Pcvoid_t PArray, uint8_t * Index, PJError_t PJError) // optional, for returning error info.
885 {
886 // Check for caller error (null pointer), or empty JudySL array:
887
888 if (Index == (uint8_t *) NULL)
889 {
890 JU_SET_ERRNO(PJError, JU_ERRNO_NULLPINDEX);
891 return (PPJERR);
892 }
893
894 if (PArray == (Pvoid_t)NULL)
895 return ((PPvoid_t) NULL);
896 // Do the search:
897 return (JudySLNextSub(PArray, Index, /* original = */ 1,
898 STRLEN(Index), PJError));
899 } // JudySLNext()
900
901 // ****************************************************************************
902 // J U D Y S L N E X T S U B
903 //
904 // See the comments in JudySLPrevSub(), which is very similar.
905
906 static PPvoid_t
JudySLNextSub(Pcvoid_t PArray,uint8_t * Index,int orig,Word_t len,PJError_t PJError)907 JudySLNextSub(Pcvoid_t PArray, uint8_t * Index, int orig,
908 Word_t len, // bytes remaining.
909 PJError_t PJError) // optional, for returning error info.
910 {
911 Word_t indexword; // next word to find.
912 PPvoid_t PPValue; // from JudyL array.
913 if (orig)
914 {
915 if (IS_PSCL(PArray))
916 {
917 if ((PPValue = PPSCLVALUE_LT(Index, PArray)) != (PPvoid_t) NULL)
918 (void)STRCPY(Index, PSCLINDEX(PArray));
919 return (PPValue);
920 }
921
922 COPYSTRINGtoWORD(indexword, Index); // copy next 4[8] bytes.
923
924 if (len > WORDSIZE) // not at end of Index.
925 {
926 JLG(PPValue, PArray, indexword);
927 if (PPValue != (PPvoid_t) NULL)
928 {
929 // If a previous JudySLIns() ran out of memory partway down the tree, it left a
930 // null *PPValue; this is automatically treated as a dead-end (not a core dump
931 // or assertion; see version 1.25):
932
933 PPValue = JudySLNextSub(*PPValue, Index + WORDSIZE,
934 /* original = */ 1,
935 len - WORDSIZE, PJError);
936 if (PPValue == PPJERR)
937 return (PPJERR); // propagate error.
938 if (PPValue != (PPvoid_t) NULL)
939 return (PPValue); // see above.
940 }
941 }
942
943 if ((PPValue = JudyLNext(PArray, &indexword, PJError)) == PPJERR)
944 {
945 JudySLModifyErrno(PJError, PArray, orig ? PArray : (Pvoid_t)NULL);
946 return (PPJERR);
947 }
948
949 if (PPValue == (PPvoid_t) NULL)
950 return ((PPvoid_t) NULL); // no next index word.
951 }
952 else
953 {
954 if (IS_PSCL(PArray)) // at shortcut leaf.
955 {
956 (void)STRCPY(Index, PSCLINDEX(PArray));
957 return (&PSCLVALUE(PArray));
958 }
959
960 indexword = 0;
961 if ((PPValue = JudyLFirst(PArray, &indexword, PJError)) == PPJERR)
962 {
963 JudySLModifyErrno(PJError, PArray, orig ? PArray : (Pvoid_t)NULL);
964 return (PPJERR);
965 }
966
967 // If a previous JudySLIns() ran out of memory partway down the tree, it left a
968 // null *PPValue; this is automatically treated as a dead-end (not a core dump
969 // or assertion; see version 1.25):
970
971 if (PPValue == (PPvoid_t) NULL)
972 return ((PPvoid_t) NULL); // no next index word.
973 }
974
975 COPYWORDtoSTRING(Index, indexword); // copy next 4[8] bytes
976 if (LASTWORD_BY_VALUE(indexword))
977 return (PPValue);
978 // If a previous JudySLIns() ran out of memory partway down the tree, it left a
979 // null *PPValue; this is automatically treated as a dead-end (not a core dump
980 // or assertion; see version 1.25):
981 return (JudySLNextSub(*PPValue, Index + WORDSIZE, /* original = */ 0,
982 len - WORDSIZE, PJError));
983 } // JudySLNextSub()
984
985 // ****************************************************************************
986 // J U D Y S L F I R S T
987 //
988 // Like JudyLFirst(), do a JudySLGet(), then if necessary a JudySLNext().
989
990 PPvoid_t
JudySLFirst(Pcvoid_t PArray,uint8_t * Index,PJError_t PJError)991 JudySLFirst(Pcvoid_t PArray, uint8_t * Index, PJError_t PJError) // optional, for returning error info.
992 {
993 PPvoid_t PPValue; // from JudyL array.
994 if (Index == (uint8_t *) NULL)
995 {
996 JU_SET_ERRNO(PJError, JU_ERRNO_NULLPINDEX);
997 return (PPJERR);
998 }
999
1000 if ((PPValue = JudySLGet(PArray, Index, PJError)) == PPJERR)
1001 return (PPJERR); // propagate serious error.
1002 if ((PPValue == (PPvoid_t) NULL) // first try failed.
1003 && ((PPValue = JudySLNext(PArray, Index, PJError)) == PPJERR))
1004 {
1005 return (PPJERR); // propagate serious error.
1006 }
1007
1008 return (PPValue);
1009 } // JudySLFirst()
1010
1011 // ****************************************************************************
1012 // J U D Y S L L A S T
1013 //
1014 // Like JudyLLast(), do a JudySLGet(), then if necessary a JudySLPrev().
1015
1016 PPvoid_t
JudySLLast(Pcvoid_t PArray,uint8_t * Index,PJError_t PJError)1017 JudySLLast(Pcvoid_t PArray, uint8_t * Index, PJError_t PJError) // optional, for returning error info.
1018 {
1019 PPvoid_t PPValue; // from JudyL array.
1020 if (Index == (uint8_t *) NULL)
1021 {
1022 JU_SET_ERRNO(PJError, JU_ERRNO_NULLPINDEX);
1023 return (PPJERR);
1024 }
1025
1026 if ((PPValue = JudySLGet(PArray, Index, PJError)) == PPJERR)
1027 return (PPJERR); // propagate serious error.
1028 if ((PPValue == (PPvoid_t) NULL) // first try failed.
1029 && ((PPValue = JudySLPrev(PArray, Index, PJError)) == PPJERR))
1030 {
1031 return (PPJERR); // propagate serious error.
1032 }
1033
1034 return (PPValue);
1035 } // JudySLLast()
1036
1037 // ****************************************************************************
1038 // J U D Y S L F R E E A R R A Y
1039 //
1040 // Walk the JudySL tree of JudyL arrays to free each JudyL array, depth-first.
1041 // During the walk, ignore indexes (strings) that end in the current JudyL
1042 // array to be freed. Just recurse through those indexes which do not end,
1043 // that is, those whose associated value areas point to subsidiary JudyL
1044 // arrays, except for those which point to shortcut leaves. Return the total
1045 // bytes freed in all of the JudyL arrays at or below the current level.
1046 //
1047 // Like the JudyLFreeArray() and Judy1FreeArray() code, this is written
1048 // recursively, which is probably fast enough, to allow indexes (strings) of
1049 // arbitrary size. If recursion turns out to be a problem, consider instead
1050 // doing some large, fixed number of iterative descents (like 100) using a
1051 // fixed-size "stack" (array), then recursing upon overflow (relatively
1052 // rarely).
1053
1054 Word_t
JudySLFreeArray(PPvoid_t PPArray,PJError_t PJError)1055 JudySLFreeArray(PPvoid_t PPArray, PJError_t PJError) // optional, for returning error info.
1056 {
1057 PPvoid_t PPArrayOrig = PPArray; // for error reporting.
1058 Word_t indexword = 0; // word just found.
1059 PPvoid_t PPValue; // from Judy array.
1060 Word_t bytes_freed = 0; // bytes freed at this level.
1061 Word_t bytes_total = 0; // bytes freed at all levels.
1062 if (PPArray == (PPvoid_t) NULL)
1063 {
1064 JU_SET_ERRNO(PJError, JU_ERRNO_NULLPPARRAY);
1065 return (JERR);
1066 }
1067
1068 // FREE SHORTCUT LEAF:
1069
1070 if (IS_PSCL(*PPArray))
1071 {
1072 Word_t freewords;
1073 Pscl_t Pscl;
1074
1075 Pscl = CLEAR_PSCL(*PPArray);
1076
1077 freewords = SCLSIZE(STRLEN(Pscl->scl_Index));
1078
1079 JudyFree((void *)Pscl, freewords);
1080
1081 *PPArray = (Pvoid_t)NULL;
1082
1083 return (freewords * WORDSIZE);
1084 }
1085
1086 // FREE EACH SUB-ARRAY (DEPTH-FIRST):
1087 //
1088 // If a previous JudySLIns() ran out of memory partway down the tree, it left a
1089 // null *PPValue. This is automatically treated correctly here as a dead-end.
1090 //
1091 // An Index (string) ends in the current word iff the last byte of the
1092 // (null-padded) word is null.
1093
1094 for (PPValue = JudyLFirst(*PPArray, &indexword, PJError);
1095 (PPValue != (PPvoid_t) NULL) && (PPValue != PPJERR);
1096 PPValue = JudyLNext(*PPArray, &indexword, PJError))
1097 {
1098 if (!LASTWORD_BY_VALUE(indexword))
1099 {
1100 if ((bytes_freed = JudySLFreeArray(PPValue, PJError)) == JERR)
1101 return (JERR); // propagate serious error.
1102 bytes_total += bytes_freed;
1103 }
1104 }
1105
1106 // Check for a serious error in a JudyL*() call:
1107
1108 if (PPValue == PPJERR)
1109 {
1110 JudySLModifyErrno(PJError, *PPArray, *PPArrayOrig);
1111 return (JERR);
1112 }
1113
1114 // Now free the current array, which also nulls the pointer:
1115 //
1116 // Note: *PPArray can be null here for a totally null JudySL array =>
1117 // JudyLFreeArray() returns zero.
1118
1119 if ((bytes_freed = JudyLFreeArray(PPArray, PJError)) == JERR)
1120 {
1121 JudySLModifyErrno(PJError, *PPArray, *PPArrayOrig);
1122 return (JERR);
1123 }
1124 return (bytes_total + bytes_freed);
1125 } // JudySLFreeArray()
1126