1 /*
2  * Index support code for Mini-XML, a small XML file parsing library.
3  *
4  * https://www.msweet.org/mxml
5  *
6  * Copyright © 2003-2019 by Michael R Sweet.
7  *
8  * Licensed under Apache License v2.0.  See the file "LICENSE" for more
9  * information.
10  */
11 
12 /*
13  * Include necessary headers...
14  */
15 
16 #include "config.h"
17 #include "mxml-private.h"
18 
19 
20 /*
21  * Sort functions...
22  */
23 
24 static int	index_compare(mxml_index_t *ind, mxml_node_t *first,
25 		              mxml_node_t *second);
26 static int	index_find(mxml_index_t *ind, const char *element,
27 		           const char *value, mxml_node_t *node);
28 static void	index_sort(mxml_index_t *ind, int left, int right);
29 
30 
31 /*
32  * 'mxmlIndexDelete()' - Delete an index.
33  */
34 
35 void
36 mxmlIndexDelete(mxml_index_t *ind)	/* I - Index to delete */
37 {
38  /*
39   * Range check input..
40   */
41 
42   if (!ind)
43     return;
44 
45  /*
46   * Free memory...
47   */
48 
49   if (ind->attr)
50     free(ind->attr);
51 
52   if (ind->alloc_nodes)
53     free(ind->nodes);
54 
55   free(ind);
56 }
57 
58 
59 /*
60  * 'mxmlIndexEnum()' - Return the next node in the index.
61  *
62  * You should call @link mxmlIndexReset@ prior to using this function to get
63  * the first node in the index.  Nodes are returned in the sorted order of the
64  * index.
65  */
66 
67 mxml_node_t *				/* O - Next node or @code NULL@ if there is none */
68 mxmlIndexEnum(mxml_index_t *ind)	/* I - Index to enumerate */
69 {
70  /*
71   * Range check input...
72   */
73 
74   if (!ind)
75     return (NULL);
76 
77  /*
78   * Return the next node...
79   */
80 
81   if (ind->cur_node < ind->num_nodes)
82     return (ind->nodes[ind->cur_node ++]);
83   else
84     return (NULL);
85 }
86 
87 
88 /*
89  * 'mxmlIndexFind()' - Find the next matching node.
90  *
91  * You should call @link mxmlIndexReset@ prior to using this function for
92  * the first time with a particular set of "element" and "value"
93  * strings. Passing @code NULL@ for both "element" and "value" is equivalent
94  * to calling @link mxmlIndexEnum@.
95  */
96 
97 mxml_node_t *				/* O - Node or @code NULL@ if none found */
98 mxmlIndexFind(mxml_index_t *ind,	/* I - Index to search */
99               const char   *element,	/* I - Element name to find, if any */
100 	      const char   *value)	/* I - Attribute value, if any */
101 {
102   int		diff,			/* Difference between names */
103 		current,		/* Current entity in search */
104 		first,			/* First entity in search */
105 		last;			/* Last entity in search */
106 
107 
108 #ifdef DEBUG
109   printf("mxmlIndexFind(ind=%p, element=\"%s\", value=\"%s\")\n",
110          ind, element ? element : "(null)", value ? value : "(null)");
111 #endif /* DEBUG */
112 
113  /*
114   * Range check input...
115   */
116 
117   if (!ind || (!ind->attr && value))
118   {
119 #ifdef DEBUG
120     puts("    returning NULL...");
121     if (ind)
122       printf("    ind->attr=\"%s\"\n", ind->attr ? ind->attr : "(null)");
123 #endif /* DEBUG */
124 
125     return (NULL);
126   }
127 
128  /*
129   * If both element and value are NULL, just enumerate the nodes in the
130   * index...
131   */
132 
133   if (!element && !value)
134     return (mxmlIndexEnum(ind));
135 
136  /*
137   * If there are no nodes in the index, return NULL...
138   */
139 
140   if (!ind->num_nodes)
141   {
142 #ifdef DEBUG
143     puts("    returning NULL...");
144     puts("    no nodes!");
145 #endif /* DEBUG */
146 
147     return (NULL);
148   }
149 
150  /*
151   * If cur_node == 0, then find the first matching node...
152   */
153 
154   if (ind->cur_node == 0)
155   {
156    /*
157     * Find the first node using a modified binary search algorithm...
158     */
159 
160     first = 0;
161     last  = ind->num_nodes - 1;
162 
163 #ifdef DEBUG
164     printf("    find first time, num_nodes=%d...\n", ind->num_nodes);
165 #endif /* DEBUG */
166 
167     while ((last - first) > 1)
168     {
169       current = (first + last) / 2;
170 
171 #ifdef DEBUG
172       printf("    first=%d, last=%d, current=%d\n", first, last, current);
173 #endif /* DEBUG */
174 
175       if ((diff = index_find(ind, element, value, ind->nodes[current])) == 0)
176       {
177        /*
178         * Found a match, move back to find the first...
179 	*/
180 
181 #ifdef DEBUG
182         puts("    match!");
183 #endif /* DEBUG */
184 
185         while (current > 0 &&
186 	       !index_find(ind, element, value, ind->nodes[current - 1]))
187 	  current --;
188 
189 #ifdef DEBUG
190         printf("    returning first match=%d\n", current);
191 #endif /* DEBUG */
192 
193        /*
194         * Return the first match and save the index to the next...
195 	*/
196 
197         ind->cur_node = current + 1;
198 
199 	return (ind->nodes[current]);
200       }
201       else if (diff < 0)
202 	last = current;
203       else
204 	first = current;
205 
206 #ifdef DEBUG
207       printf("    diff=%d\n", diff);
208 #endif /* DEBUG */
209     }
210 
211    /*
212     * If we get this far, then we found exactly 0 or 1 matches...
213     */
214 
215     for (current = first; current <= last; current ++)
216       if (!index_find(ind, element, value, ind->nodes[current]))
217       {
218        /*
219 	* Found exactly one (or possibly two) match...
220 	*/
221 
222 #ifdef DEBUG
223 	printf("    returning only match %d...\n", current);
224 #endif /* DEBUG */
225 
226 	ind->cur_node = current + 1;
227 
228 	return (ind->nodes[current]);
229       }
230 
231    /*
232     * No matches...
233     */
234 
235     ind->cur_node = ind->num_nodes;
236 
237 #ifdef DEBUG
238     puts("    returning NULL...");
239 #endif /* DEBUG */
240 
241     return (NULL);
242   }
243   else if (ind->cur_node < ind->num_nodes &&
244            !index_find(ind, element, value, ind->nodes[ind->cur_node]))
245   {
246    /*
247     * Return the next matching node...
248     */
249 
250 #ifdef DEBUG
251     printf("    returning next match %d...\n", ind->cur_node);
252 #endif /* DEBUG */
253 
254     return (ind->nodes[ind->cur_node ++]);
255   }
256 
257  /*
258   * If we get this far, then we have no matches...
259   */
260 
261   ind->cur_node = ind->num_nodes;
262 
263 #ifdef DEBUG
264   puts("    returning NULL...");
265 #endif /* DEBUG */
266 
267   return (NULL);
268 }
269 
270 
271 /*
272  * 'mxmlIndexGetCount()' - Get the number of nodes in an index.
273  *
274  * @since Mini-XML 2.7@
275  */
276 
277 int					/* I - Number of nodes in index */
278 mxmlIndexGetCount(mxml_index_t *ind)	/* I - Index of nodes */
279 {
280  /*
281   * Range check input...
282   */
283 
284   if (!ind)
285     return (0);
286 
287  /*
288   * Return the number of nodes in the index...
289   */
290 
291   return (ind->num_nodes);
292 }
293 
294 
295 /*
296  * 'mxmlIndexNew()' - Create a new index.
297  *
298  * The index will contain all nodes that contain the named element and/or
299  * attribute.  If both "element" and "attr" are @code NULL@, then the index will
300  * contain a sorted list of the elements in the node tree.  Nodes are
301  * sorted by element name and optionally by attribute value if the "attr"
302  * argument is not NULL.
303  */
304 
305 mxml_index_t *				/* O - New index */
306 mxmlIndexNew(mxml_node_t *node,		/* I - XML node tree */
307              const char  *element,	/* I - Element to index or @code NULL@ for all */
308              const char  *attr)		/* I - Attribute to index or @code NULL@ for none */
309 {
310   mxml_index_t	*ind;			/* New index */
311   mxml_node_t	*current,		/* Current node in index */
312   		**temp;			/* Temporary node pointer array */
313 
314 
315  /*
316   * Range check input...
317   */
318 
319 #ifdef DEBUG
320   printf("mxmlIndexNew(node=%p, element=\"%s\", attr=\"%s\")\n",
321          node, element ? element : "(null)", attr ? attr : "(null)");
322 #endif /* DEBUG */
323 
324   if (!node)
325     return (NULL);
326 
327  /*
328   * Create a new index...
329   */
330 
331   if ((ind = calloc(1, sizeof(mxml_index_t))) == NULL)
332   {
333     mxml_error("Unable to allocate %d bytes for index - %s",
334                sizeof(mxml_index_t), strerror(errno));
335     return (NULL);
336   }
337 
338   if (attr)
339     ind->attr = strdup(attr);
340 
341   if (!element && !attr)
342     current = node;
343   else
344     current = mxmlFindElement(node, node, element, attr, NULL, MXML_DESCEND);
345 
346   while (current)
347   {
348     if (ind->num_nodes >= ind->alloc_nodes)
349     {
350       if (!ind->alloc_nodes)
351         temp = malloc(64 * sizeof(mxml_node_t *));
352       else
353         temp = realloc(ind->nodes, (ind->alloc_nodes + 64) * sizeof(mxml_node_t *));
354 
355       if (!temp)
356       {
357        /*
358         * Unable to allocate memory for the index, so abort...
359 	*/
360 
361         mxml_error("Unable to allocate %d bytes for index: %s",
362 	           (ind->alloc_nodes + 64) * sizeof(mxml_node_t *),
363 		   strerror(errno));
364 
365         mxmlIndexDelete(ind);
366 	return (NULL);
367       }
368 
369       ind->nodes       = temp;
370       ind->alloc_nodes += 64;
371     }
372 
373     ind->nodes[ind->num_nodes ++] = current;
374 
375     current = mxmlFindElement(current, node, element, attr, NULL, MXML_DESCEND);
376   }
377 
378  /*
379   * Sort nodes based upon the search criteria...
380   */
381 
382 #ifdef DEBUG
383   {
384     int i;				/* Looping var */
385 
386 
387     printf("%d node(s) in index.\n\n", ind->num_nodes);
388 
389     if (attr)
390     {
391       printf("Node      Address   Element         %s\n", attr);
392       puts("--------  --------  --------------  ------------------------------");
393 
394       for (i = 0; i < ind->num_nodes; i ++)
395 	printf("%8d  %-8p  %-14.14s  %s\n", i, ind->nodes[i],
396 	       ind->nodes[i]->value.element.name,
397 	       mxmlElementGetAttr(ind->nodes[i], attr));
398     }
399     else
400     {
401       puts("Node      Address   Element");
402       puts("--------  --------  --------------");
403 
404       for (i = 0; i < ind->num_nodes; i ++)
405 	printf("%8d  %-8p  %s\n", i, ind->nodes[i],
406 	       ind->nodes[i]->value.element.name);
407     }
408 
409     putchar('\n');
410   }
411 #endif /* DEBUG */
412 
413   if (ind->num_nodes > 1)
414     index_sort(ind, 0, ind->num_nodes - 1);
415 
416 #ifdef DEBUG
417   {
418     int i;				/* Looping var */
419 
420 
421     puts("After sorting:\n");
422 
423     if (attr)
424     {
425       printf("Node      Address   Element         %s\n", attr);
426       puts("--------  --------  --------------  ------------------------------");
427 
428       for (i = 0; i < ind->num_nodes; i ++)
429 	printf("%8d  %-8p  %-14.14s  %s\n", i, ind->nodes[i],
430 	       ind->nodes[i]->value.element.name,
431 	       mxmlElementGetAttr(ind->nodes[i], attr));
432     }
433     else
434     {
435       puts("Node      Address   Element");
436       puts("--------  --------  --------------");
437 
438       for (i = 0; i < ind->num_nodes; i ++)
439 	printf("%8d  %-8p  %s\n", i, ind->nodes[i],
440 	       ind->nodes[i]->value.element.name);
441     }
442 
443     putchar('\n');
444   }
445 #endif /* DEBUG */
446 
447  /*
448   * Return the new index...
449   */
450 
451   return (ind);
452 }
453 
454 
455 /*
456  * 'mxmlIndexReset()' - Reset the enumeration/find pointer in the index and
457  *                      return the first node in the index.
458  *
459  * This function should be called prior to using @link mxmlIndexEnum@ or
460  * @link mxmlIndexFind@ for the first time.
461  */
462 
463 mxml_node_t *				/* O - First node or @code NULL@ if there is none */
464 mxmlIndexReset(mxml_index_t *ind)	/* I - Index to reset */
465 {
466 #ifdef DEBUG
467   printf("mxmlIndexReset(ind=%p)\n", ind);
468 #endif /* DEBUG */
469 
470  /*
471   * Range check input...
472   */
473 
474   if (!ind)
475     return (NULL);
476 
477  /*
478   * Set the index to the first element...
479   */
480 
481   ind->cur_node = 0;
482 
483  /*
484   * Return the first node...
485   */
486 
487   if (ind->num_nodes)
488     return (ind->nodes[0]);
489   else
490     return (NULL);
491 }
492 
493 
494 /*
495  * 'index_compare()' - Compare two nodes.
496  */
497 
498 static int				/* O - Result of comparison */
499 index_compare(mxml_index_t *ind,	/* I - Index */
500               mxml_node_t  *first,	/* I - First node */
501               mxml_node_t  *second)	/* I - Second node */
502 {
503   int	diff;				/* Difference */
504 
505 
506  /*
507   * Check the element name...
508   */
509 
510   if ((diff = strcmp(first->value.element.name,
511                      second->value.element.name)) != 0)
512     return (diff);
513 
514  /*
515   * Check the attribute value...
516   */
517 
518   if (ind->attr)
519   {
520     if ((diff = strcmp(mxmlElementGetAttr(first, ind->attr),
521                        mxmlElementGetAttr(second, ind->attr))) != 0)
522       return (diff);
523   }
524 
525  /*
526   * No difference, return 0...
527   */
528 
529   return (0);
530 }
531 
532 
533 /*
534  * 'index_find()' - Compare a node with index values.
535  */
536 
537 static int				/* O - Result of comparison */
538 index_find(mxml_index_t *ind,		/* I - Index */
539            const char   *element,	/* I - Element name or @code NULL@ */
540 	   const char   *value,		/* I - Attribute value or @code NULL@ */
541            mxml_node_t  *node)		/* I - Node */
542 {
543   int	diff;				/* Difference */
544 
545 
546  /*
547   * Check the element name...
548   */
549 
550   if (element)
551   {
552     if ((diff = strcmp(element, node->value.element.name)) != 0)
553       return (diff);
554   }
555 
556  /*
557   * Check the attribute value...
558   */
559 
560   if (value)
561   {
562     if ((diff = strcmp(value, mxmlElementGetAttr(node, ind->attr))) != 0)
563       return (diff);
564   }
565 
566  /*
567   * No difference, return 0...
568   */
569 
570   return (0);
571 }
572 
573 
574 /*
575  * 'index_sort()' - Sort the nodes in the index...
576  *
577  * This function implements the classic quicksort algorithm...
578  */
579 
580 static void
581 index_sort(mxml_index_t *ind,		/* I - Index to sort */
582            int          left,		/* I - Left node in partition */
583 	   int          right)		/* I - Right node in partition */
584 {
585   mxml_node_t	*pivot,			/* Pivot node */
586 		*temp;			/* Swap node */
587   int		templ,			/* Temporary left node */
588 		tempr;			/* Temporary right node */
589 
590 
591  /*
592   * Loop until we have sorted all the way to the right...
593   */
594 
595   do
596   {
597    /*
598     * Sort the pivot in the current partition...
599     */
600 
601     pivot = ind->nodes[left];
602 
603     for (templ = left, tempr = right; templ < tempr;)
604     {
605      /*
606       * Move left while left node <= pivot node...
607       */
608 
609       while ((templ < right) &&
610              index_compare(ind, ind->nodes[templ], pivot) <= 0)
611 	templ ++;
612 
613      /*
614       * Move right while right node > pivot node...
615       */
616 
617       while ((tempr > left) &&
618              index_compare(ind, ind->nodes[tempr], pivot) > 0)
619 	tempr --;
620 
621      /*
622       * Swap nodes if needed...
623       */
624 
625       if (templ < tempr)
626       {
627 	temp              = ind->nodes[templ];
628 	ind->nodes[templ] = ind->nodes[tempr];
629 	ind->nodes[tempr] = temp;
630       }
631     }
632 
633    /*
634     * When we get here, the right (tempr) node is the new position for the
635     * pivot node...
636     */
637 
638     if (index_compare(ind, pivot, ind->nodes[tempr]) > 0)
639     {
640       ind->nodes[left]  = ind->nodes[tempr];
641       ind->nodes[tempr] = pivot;
642     }
643 
644    /*
645     * Recursively sort the left partition as needed...
646     */
647 
648     if (left < (tempr - 1))
649       index_sort(ind, left, tempr - 1);
650   }
651   while (right > (left = tempr + 1));
652 }
653