1 /*
2  * "$Id: mxml-index.c 426 2011-01-01 23:42:17Z mike $"
3  *
4  * Index support code for Mini-XML, a small XML-like file parsing library.
5  *
6  * Copyright 2003-2011 by Michael R Sweet.
7  *
8  * These coded instructions, statements, and computer programs are the
9  * property of Michael R Sweet and are protected by Federal copyright
10  * law.  Distribution and use rights are outlined in the file "COPYING"
11  * which should have been included with this file.  If this file is
12  * missing or damaged, see the license at:
13  *
14  *     http://www.minixml.org/
15  *
16  * Contents:
17  *
18  */
19 
20 /*
21  * Include necessary headers...
22  */
23 
24 #include "mxml-config.h"
25 #include "mxml.h"
26 
27 
28 /*
29  * Sort functions...
30  */
31 
32 static int	index_compare(mxml_index_t *ind, mxml_node_t *first,
33 		              mxml_node_t *second);
34 static int	index_find(mxml_index_t *ind, const char *element,
35 		           const char *value, mxml_node_t *node);
36 static void	index_sort(mxml_index_t *ind, int left, int right);
37 
38 
39 /*
40  * 'mxmlIndexDelete()' - Delete an index.
41  */
42 
43 void
mxmlIndexDelete(mxml_index_t * ind)44 mxmlIndexDelete(mxml_index_t *ind)	/* I - Index to delete */
45 {
46  /*
47   * Range check input..
48   */
49 
50   if (!ind)
51     return;
52 
53  /*
54   * Free memory...
55   */
56 
57   if (ind->attr)
58     free(ind->attr);
59 
60   if (ind->alloc_nodes)
61     free(ind->nodes);
62 
63   free(ind);
64 }
65 
66 
67 /*
68  * 'mxmlIndexEnum()' - Return the next node in the index.
69  *
70  * Nodes are returned in the sorted order of the index.
71  */
72 
73 mxml_node_t *				/* O - Next node or NULL if there is none */
mxmlIndexEnum(mxml_index_t * ind)74 mxmlIndexEnum(mxml_index_t *ind)	/* I - Index to enumerate */
75 {
76  /*
77   * Range check input...
78   */
79 
80   if (!ind)
81     return (NULL);
82 
83  /*
84   * Return the next node...
85   */
86 
87   if (ind->cur_node < ind->num_nodes)
88     return (ind->nodes[ind->cur_node ++]);
89   else
90     return (NULL);
91 }
92 
93 
94 /*
95  * 'mxmlIndexFind()' - Find the next matching node.
96  *
97  * You should call mxmlIndexReset() prior to using this function for
98  * the first time with a particular set of "element" and "value"
99  * strings. Passing NULL for both "element" and "value" is equivalent
100  * to calling mxmlIndexEnum().
101  */
102 
103 mxml_node_t *				/* O - Node or NULL if none found */
mxmlIndexFind(mxml_index_t * ind,const char * element,const char * value)104 mxmlIndexFind(mxml_index_t *ind,	/* I - Index to search */
105               const char   *element,	/* I - Element name to find, if any */
106 	      const char   *value)	/* I - Attribute value, if any */
107 {
108   int		diff,			/* Difference between names */
109 		current,		/* Current entity in search */
110 		first,			/* First entity in search */
111 		last;			/* Last entity in search */
112 
113 
114 #ifdef DEBUG
115   printf("mxmlIndexFind(ind=%p, element=\"%s\", value=\"%s\")\n",
116          ind, element ? element : "(null)", value ? value : "(null)");
117 #endif /* DEBUG */
118 
119  /*
120   * Range check input...
121   */
122 
123   if (!ind || (!ind->attr && value))
124   {
125 #ifdef DEBUG
126     puts("    returning NULL...");
127     printf("    ind->attr=\"%s\"\n", ind->attr ? ind->attr : "(null)");
128 #endif /* DEBUG */
129 
130     return (NULL);
131   }
132 
133  /*
134   * If both element and value are NULL, just enumerate the nodes in the
135   * index...
136   */
137 
138   if (!element && !value)
139     return (mxmlIndexEnum(ind));
140 
141  /*
142   * If there are no nodes in the index, return NULL...
143   */
144 
145   if (!ind->num_nodes)
146   {
147 #ifdef DEBUG
148     puts("    returning NULL...");
149     puts("    no nodes!");
150 #endif /* DEBUG */
151 
152     return (NULL);
153   }
154 
155  /*
156   * If cur_node == 0, then find the first matching node...
157   */
158 
159   if (ind->cur_node == 0)
160   {
161    /*
162     * Find the first node using a modified binary search algorithm...
163     */
164 
165     first = 0;
166     last  = ind->num_nodes - 1;
167 
168 #ifdef DEBUG
169     printf("    find first time, num_nodes=%d...\n", ind->num_nodes);
170 #endif /* DEBUG */
171 
172     while ((last - first) > 1)
173     {
174       current = (first + last) / 2;
175 
176 #ifdef DEBUG
177       printf("    first=%d, last=%d, current=%d\n", first, last, current);
178 #endif /* DEBUG */
179 
180       if ((diff = index_find(ind, element, value, ind->nodes[current])) == 0)
181       {
182        /*
183         * Found a match, move back to find the first...
184 	*/
185 
186 #ifdef DEBUG
187         puts("    match!");
188 #endif /* DEBUG */
189 
190         while (current > 0 &&
191 	       !index_find(ind, element, value, ind->nodes[current - 1]))
192 	  current --;
193 
194 #ifdef DEBUG
195         printf("    returning first match=%d\n", current);
196 #endif /* DEBUG */
197 
198        /*
199         * Return the first match and save the index to the next...
200 	*/
201 
202         ind->cur_node = current + 1;
203 
204 	return (ind->nodes[current]);
205       }
206       else if (diff < 0)
207 	last = current;
208       else
209 	first = current;
210 
211 #ifdef DEBUG
212       printf("    diff=%d\n", diff);
213 #endif /* DEBUG */
214     }
215 
216    /*
217     * If we get this far, then we found exactly 0 or 1 matches...
218     */
219 
220     for (current = first; current <= last; current ++)
221       if (!index_find(ind, element, value, ind->nodes[current]))
222       {
223        /*
224 	* Found exactly one (or possibly two) match...
225 	*/
226 
227 #ifdef DEBUG
228 	printf("    returning only match %d...\n", current);
229 #endif /* DEBUG */
230 
231 	ind->cur_node = current + 1;
232 
233 	return (ind->nodes[current]);
234       }
235 
236    /*
237     * No matches...
238     */
239 
240     ind->cur_node = ind->num_nodes;
241 
242 #ifdef DEBUG
243     puts("    returning NULL...");
244 #endif /* DEBUG */
245 
246     return (NULL);
247   }
248   else if (ind->cur_node < ind->num_nodes &&
249            !index_find(ind, element, value, ind->nodes[ind->cur_node]))
250   {
251    /*
252     * Return the next matching node...
253     */
254 
255 #ifdef DEBUG
256     printf("    returning next match %d...\n", ind->cur_node);
257 #endif /* DEBUG */
258 
259     return (ind->nodes[ind->cur_node ++]);
260   }
261 
262  /*
263   * If we get this far, then we have no matches...
264   */
265 
266   ind->cur_node = ind->num_nodes;
267 
268 #ifdef DEBUG
269   puts("    returning NULL...");
270 #endif /* DEBUG */
271 
272   return (NULL);
273 }
274 
275 
276 /*
277  * 'mxmlIndexGetCount()' - Get the number of nodes in an index.
278  *
279  * @since Mini-XML 2.7@
280  */
281 
282 int					/* I - Number of nodes in index */
mxmlIndexGetCount(mxml_index_t * ind)283 mxmlIndexGetCount(mxml_index_t *ind)	/* I - Index of nodes */
284 {
285  /*
286   * Range check input...
287   */
288 
289   if (!ind)
290     return (0);
291 
292  /*
293   * Return the number of nodes in the index...
294   */
295 
296   return (ind->num_nodes);
297 }
298 
299 
300 /*
301  * 'mxmlIndexNew()' - Create a new index.
302  *
303  * The index will contain all nodes that contain the named element and/or
304  * attribute. If both "element" and "attr" are NULL, then the index will
305  * contain a sorted list of the elements in the node tree.  Nodes are
306  * sorted by element name and optionally by attribute value if the "attr"
307  * argument is not NULL.
308  */
309 
310 mxml_index_t *				/* O - New index */
mxmlIndexNew(mxml_node_t * node,const char * element,const char * attr)311 mxmlIndexNew(mxml_node_t *node,		/* I - XML node tree */
312              const char  *element,	/* I - Element to index or NULL for all */
313              const char  *attr)		/* I - Attribute to index or NULL for none */
314 {
315   mxml_index_t	*ind;			/* New index */
316   mxml_node_t	*current,		/* Current node in index */
317   		**temp;			/* Temporary node pointer array */
318 
319 
320  /*
321   * Range check input...
322   */
323 
324 #ifdef DEBUG
325   printf("mxmlIndexNew(node=%p, element=\"%s\", attr=\"%s\")\n",
326          node, element ? element : "(null)", attr ? attr : "(null)");
327 #endif /* DEBUG */
328 
329   if (!node)
330     return (NULL);
331 
332  /*
333   * Create a new index...
334   */
335 
336   if ((ind = calloc(1, sizeof(mxml_index_t))) == NULL)
337   {
338     mxml_error("Unable to allocate %d bytes for index - %s",
339                sizeof(mxml_index_t), strerror(errno));
340     return (NULL);
341   }
342 
343   if (attr)
344     ind->attr = strdup(attr);
345 
346   if (!element && !attr)
347     current = node;
348   else
349     current = mxmlFindElement(node, node, element, attr, NULL, MXML_DESCEND);
350 
351   while (current)
352   {
353     if (ind->num_nodes >= ind->alloc_nodes)
354     {
355       if (!ind->alloc_nodes)
356         temp = malloc(64 * sizeof(mxml_node_t *));
357       else
358         temp = realloc(ind->nodes, (ind->alloc_nodes + 64) * sizeof(mxml_node_t *));
359 
360       if (!temp)
361       {
362        /*
363         * Unable to allocate memory for the index, so abort...
364 	*/
365 
366         mxml_error("Unable to allocate %d bytes for index: %s",
367 	           (ind->alloc_nodes + 64) * sizeof(mxml_node_t *),
368 		   strerror(errno));
369 
370         mxmlIndexDelete(ind);
371 	return (NULL);
372       }
373 
374       ind->nodes       = temp;
375       ind->alloc_nodes += 64;
376     }
377 
378     ind->nodes[ind->num_nodes ++] = current;
379 
380     current = mxmlFindElement(current, node, element, attr, NULL, MXML_DESCEND);
381   }
382 
383  /*
384   * Sort nodes based upon the search criteria...
385   */
386 
387 #ifdef DEBUG
388   {
389     int i;				/* Looping var */
390 
391 
392     printf("%d node(s) in index.\n\n", ind->num_nodes);
393 
394     if (attr)
395     {
396       printf("Node      Address   Element         %s\n", attr);
397       puts("--------  --------  --------------  ------------------------------");
398 
399       for (i = 0; i < ind->num_nodes; i ++)
400 	printf("%8d  %-8p  %-14.14s  %s\n", i, ind->nodes[i],
401 	       ind->nodes[i]->value.element.name,
402 	       mxmlElementGetAttr(ind->nodes[i], attr));
403     }
404     else
405     {
406       puts("Node      Address   Element");
407       puts("--------  --------  --------------");
408 
409       for (i = 0; i < ind->num_nodes; i ++)
410 	printf("%8d  %-8p  %s\n", i, ind->nodes[i],
411 	       ind->nodes[i]->value.element.name);
412     }
413 
414     putchar('\n');
415   }
416 #endif /* DEBUG */
417 
418   if (ind->num_nodes > 1)
419     index_sort(ind, 0, ind->num_nodes - 1);
420 
421 #ifdef DEBUG
422   {
423     int i;				/* Looping var */
424 
425 
426     puts("After sorting:\n");
427 
428     if (attr)
429     {
430       printf("Node      Address   Element         %s\n", attr);
431       puts("--------  --------  --------------  ------------------------------");
432 
433       for (i = 0; i < ind->num_nodes; i ++)
434 	printf("%8d  %-8p  %-14.14s  %s\n", i, ind->nodes[i],
435 	       ind->nodes[i]->value.element.name,
436 	       mxmlElementGetAttr(ind->nodes[i], attr));
437     }
438     else
439     {
440       puts("Node      Address   Element");
441       puts("--------  --------  --------------");
442 
443       for (i = 0; i < ind->num_nodes; i ++)
444 	printf("%8d  %-8p  %s\n", i, ind->nodes[i],
445 	       ind->nodes[i]->value.element.name);
446     }
447 
448     putchar('\n');
449   }
450 #endif /* DEBUG */
451 
452  /*
453   * Return the new index...
454   */
455 
456   return (ind);
457 }
458 
459 
460 /*
461  * 'mxmlIndexReset()' - Reset the enumeration/find pointer in the index and
462  *                      return the first node in the index.
463  *
464  * This function should be called prior to using mxmlIndexEnum() or
465  * mxmlIndexFind() for the first time.
466  */
467 
468 mxml_node_t *				/* O - First node or NULL if there is none */
mxmlIndexReset(mxml_index_t * ind)469 mxmlIndexReset(mxml_index_t *ind)	/* I - Index to reset */
470 {
471 #ifdef DEBUG
472   printf("mxmlIndexReset(ind=%p)\n", ind);
473 #endif /* DEBUG */
474 
475  /*
476   * Range check input...
477   */
478 
479   if (!ind)
480     return (NULL);
481 
482  /*
483   * Set the index to the first element...
484   */
485 
486   ind->cur_node = 0;
487 
488  /*
489   * Return the first node...
490   */
491 
492   if (ind->num_nodes)
493     return (ind->nodes[0]);
494   else
495     return (NULL);
496 }
497 
498 
499 /*
500  * 'index_compare()' - Compare two nodes.
501  */
502 
503 static int				/* O - Result of comparison */
index_compare(mxml_index_t * ind,mxml_node_t * first,mxml_node_t * second)504 index_compare(mxml_index_t *ind,	/* I - Index */
505               mxml_node_t  *first,	/* I - First node */
506               mxml_node_t  *second)	/* I - Second node */
507 {
508   int	diff;				/* Difference */
509 
510 
511  /*
512   * Check the element name...
513   */
514 
515   if ((diff = strcmp(first->value.element.name,
516                      second->value.element.name)) != 0)
517     return (diff);
518 
519  /*
520   * Check the attribute value...
521   */
522 
523   if (ind->attr)
524   {
525     if ((diff = strcmp(mxmlElementGetAttr(first, ind->attr),
526                        mxmlElementGetAttr(second, ind->attr))) != 0)
527       return (diff);
528   }
529 
530  /*
531   * No difference, return 0...
532   */
533 
534   return (0);
535 }
536 
537 
538 /*
539  * 'index_find()' - Compare a node with index values.
540  */
541 
542 static int				/* O - Result of comparison */
index_find(mxml_index_t * ind,const char * element,const char * value,mxml_node_t * node)543 index_find(mxml_index_t *ind,		/* I - Index */
544            const char   *element,	/* I - Element name or NULL */
545 	   const char   *value,		/* I - Attribute value or NULL */
546            mxml_node_t  *node)		/* I - Node */
547 {
548   int	diff;				/* Difference */
549 
550 
551  /*
552   * Check the element name...
553   */
554 
555   if (element)
556   {
557     if ((diff = strcmp(element, node->value.element.name)) != 0)
558       return (diff);
559   }
560 
561  /*
562   * Check the attribute value...
563   */
564 
565   if (value)
566   {
567     if ((diff = strcmp(value, mxmlElementGetAttr(node, ind->attr))) != 0)
568       return (diff);
569   }
570 
571  /*
572   * No difference, return 0...
573   */
574 
575   return (0);
576 }
577 
578 
579 /*
580  * 'index_sort()' - Sort the nodes in the index...
581  *
582  * This function implements the classic quicksort algorithm...
583  */
584 
585 static void
index_sort(mxml_index_t * ind,int left,int right)586 index_sort(mxml_index_t *ind,		/* I - Index to sort */
587            int          left,		/* I - Left node in partition */
588 	   int          right)		/* I - Right node in partition */
589 {
590   mxml_node_t	*pivot,			/* Pivot node */
591 		*temp;			/* Swap node */
592   int		templ,			/* Temporary left node */
593 		tempr;			/* Temporary right node */
594 
595 
596  /*
597   * Loop until we have sorted all the way to the right...
598   */
599 
600   do
601   {
602    /*
603     * Sort the pivot in the current partition...
604     */
605 
606     pivot = ind->nodes[left];
607 
608     for (templ = left, tempr = right; templ < tempr;)
609     {
610      /*
611       * Move left while left node <= pivot node...
612       */
613 
614       while ((templ < right) &&
615              index_compare(ind, ind->nodes[templ], pivot) <= 0)
616 	templ ++;
617 
618      /*
619       * Move right while right node > pivot node...
620       */
621 
622       while ((tempr > left) &&
623              index_compare(ind, ind->nodes[tempr], pivot) > 0)
624 	tempr --;
625 
626      /*
627       * Swap nodes if needed...
628       */
629 
630       if (templ < tempr)
631       {
632 	temp              = ind->nodes[templ];
633 	ind->nodes[templ] = ind->nodes[tempr];
634 	ind->nodes[tempr] = temp;
635       }
636     }
637 
638    /*
639     * When we get here, the right (tempr) node is the new position for the
640     * pivot node...
641     */
642 
643     if (index_compare(ind, pivot, ind->nodes[tempr]) > 0)
644     {
645       ind->nodes[left]  = ind->nodes[tempr];
646       ind->nodes[tempr] = pivot;
647     }
648 
649    /*
650     * Recursively sort the left partition as needed...
651     */
652 
653     if (left < (tempr - 1))
654       index_sort(ind, left, tempr - 1);
655   }
656   while (right > (left = tempr + 1));
657 }
658 
659 
660 /*
661  * End of "$Id: mxml-index.c 426 2011-01-01 23:42:17Z mike $".
662  */
663