1 /* 2 * Index support code for Mini-XML, a small XML file parsing library. 3 * 4 * https://www.msweet.org/mxml 5 * 6 * Copyright © 2003-2019 by Michael R Sweet. 7 * 8 * Licensed under Apache License v2.0. See the file "LICENSE" for more 9 * information. 10 */ 11 12 /* 13 * Include necessary headers... 14 */ 15 16 #include "config.h" 17 #include "mxml-private.h" 18 19 20 /* 21 * Sort functions... 22 */ 23 24 static int index_compare(mxml_index_t *ind, mxml_node_t *first, 25 mxml_node_t *second); 26 static int index_find(mxml_index_t *ind, const char *element, 27 const char *value, mxml_node_t *node); 28 static void index_sort(mxml_index_t *ind, int left, int right); 29 30 31 /* 32 * 'mxmlIndexDelete()' - Delete an index. 33 */ 34 35 void 36 mxmlIndexDelete(mxml_index_t *ind) /* I - Index to delete */ 37 { 38 /* 39 * Range check input.. 40 */ 41 42 if (!ind) 43 return; 44 45 /* 46 * Free memory... 47 */ 48 49 if (ind->attr) 50 free(ind->attr); 51 52 if (ind->alloc_nodes) 53 free(ind->nodes); 54 55 free(ind); 56 } 57 58 59 /* 60 * 'mxmlIndexEnum()' - Return the next node in the index. 61 * 62 * You should call @link mxmlIndexReset@ prior to using this function to get 63 * the first node in the index. Nodes are returned in the sorted order of the 64 * index. 65 */ 66 67 mxml_node_t * /* O - Next node or @code NULL@ if there is none */ 68 mxmlIndexEnum(mxml_index_t *ind) /* I - Index to enumerate */ 69 { 70 /* 71 * Range check input... 72 */ 73 74 if (!ind) 75 return (NULL); 76 77 /* 78 * Return the next node... 79 */ 80 81 if (ind->cur_node < ind->num_nodes) 82 return (ind->nodes[ind->cur_node ++]); 83 else 84 return (NULL); 85 } 86 87 88 /* 89 * 'mxmlIndexFind()' - Find the next matching node. 90 * 91 * You should call @link mxmlIndexReset@ prior to using this function for 92 * the first time with a particular set of "element" and "value" 93 * strings. Passing @code NULL@ for both "element" and "value" is equivalent 94 * to calling @link mxmlIndexEnum@. 95 */ 96 97 mxml_node_t * /* O - Node or @code NULL@ if none found */ 98 mxmlIndexFind(mxml_index_t *ind, /* I - Index to search */ 99 const char *element, /* I - Element name to find, if any */ 100 const char *value) /* I - Attribute value, if any */ 101 { 102 int diff, /* Difference between names */ 103 current, /* Current entity in search */ 104 first, /* First entity in search */ 105 last; /* Last entity in search */ 106 107 108 #ifdef DEBUG 109 printf("mxmlIndexFind(ind=%p, element=\"%s\", value=\"%s\")\n", 110 ind, element ? element : "(null)", value ? value : "(null)"); 111 #endif /* DEBUG */ 112 113 /* 114 * Range check input... 115 */ 116 117 if (!ind || (!ind->attr && value)) 118 { 119 #ifdef DEBUG 120 puts(" returning NULL..."); 121 if (ind) 122 printf(" ind->attr=\"%s\"\n", ind->attr ? ind->attr : "(null)"); 123 #endif /* DEBUG */ 124 125 return (NULL); 126 } 127 128 /* 129 * If both element and value are NULL, just enumerate the nodes in the 130 * index... 131 */ 132 133 if (!element && !value) 134 return (mxmlIndexEnum(ind)); 135 136 /* 137 * If there are no nodes in the index, return NULL... 138 */ 139 140 if (!ind->num_nodes) 141 { 142 #ifdef DEBUG 143 puts(" returning NULL..."); 144 puts(" no nodes!"); 145 #endif /* DEBUG */ 146 147 return (NULL); 148 } 149 150 /* 151 * If cur_node == 0, then find the first matching node... 152 */ 153 154 if (ind->cur_node == 0) 155 { 156 /* 157 * Find the first node using a modified binary search algorithm... 158 */ 159 160 first = 0; 161 last = ind->num_nodes - 1; 162 163 #ifdef DEBUG 164 printf(" find first time, num_nodes=%d...\n", ind->num_nodes); 165 #endif /* DEBUG */ 166 167 while ((last - first) > 1) 168 { 169 current = (first + last) / 2; 170 171 #ifdef DEBUG 172 printf(" first=%d, last=%d, current=%d\n", first, last, current); 173 #endif /* DEBUG */ 174 175 if ((diff = index_find(ind, element, value, ind->nodes[current])) == 0) 176 { 177 /* 178 * Found a match, move back to find the first... 179 */ 180 181 #ifdef DEBUG 182 puts(" match!"); 183 #endif /* DEBUG */ 184 185 while (current > 0 && 186 !index_find(ind, element, value, ind->nodes[current - 1])) 187 current --; 188 189 #ifdef DEBUG 190 printf(" returning first match=%d\n", current); 191 #endif /* DEBUG */ 192 193 /* 194 * Return the first match and save the index to the next... 195 */ 196 197 ind->cur_node = current + 1; 198 199 return (ind->nodes[current]); 200 } 201 else if (diff < 0) 202 last = current; 203 else 204 first = current; 205 206 #ifdef DEBUG 207 printf(" diff=%d\n", diff); 208 #endif /* DEBUG */ 209 } 210 211 /* 212 * If we get this far, then we found exactly 0 or 1 matches... 213 */ 214 215 for (current = first; current <= last; current ++) 216 if (!index_find(ind, element, value, ind->nodes[current])) 217 { 218 /* 219 * Found exactly one (or possibly two) match... 220 */ 221 222 #ifdef DEBUG 223 printf(" returning only match %d...\n", current); 224 #endif /* DEBUG */ 225 226 ind->cur_node = current + 1; 227 228 return (ind->nodes[current]); 229 } 230 231 /* 232 * No matches... 233 */ 234 235 ind->cur_node = ind->num_nodes; 236 237 #ifdef DEBUG 238 puts(" returning NULL..."); 239 #endif /* DEBUG */ 240 241 return (NULL); 242 } 243 else if (ind->cur_node < ind->num_nodes && 244 !index_find(ind, element, value, ind->nodes[ind->cur_node])) 245 { 246 /* 247 * Return the next matching node... 248 */ 249 250 #ifdef DEBUG 251 printf(" returning next match %d...\n", ind->cur_node); 252 #endif /* DEBUG */ 253 254 return (ind->nodes[ind->cur_node ++]); 255 } 256 257 /* 258 * If we get this far, then we have no matches... 259 */ 260 261 ind->cur_node = ind->num_nodes; 262 263 #ifdef DEBUG 264 puts(" returning NULL..."); 265 #endif /* DEBUG */ 266 267 return (NULL); 268 } 269 270 271 /* 272 * 'mxmlIndexGetCount()' - Get the number of nodes in an index. 273 * 274 * @since Mini-XML 2.7@ 275 */ 276 277 int /* I - Number of nodes in index */ 278 mxmlIndexGetCount(mxml_index_t *ind) /* I - Index of nodes */ 279 { 280 /* 281 * Range check input... 282 */ 283 284 if (!ind) 285 return (0); 286 287 /* 288 * Return the number of nodes in the index... 289 */ 290 291 return (ind->num_nodes); 292 } 293 294 295 /* 296 * 'mxmlIndexNew()' - Create a new index. 297 * 298 * The index will contain all nodes that contain the named element and/or 299 * attribute. If both "element" and "attr" are @code NULL@, then the index will 300 * contain a sorted list of the elements in the node tree. Nodes are 301 * sorted by element name and optionally by attribute value if the "attr" 302 * argument is not NULL. 303 */ 304 305 mxml_index_t * /* O - New index */ 306 mxmlIndexNew(mxml_node_t *node, /* I - XML node tree */ 307 const char *element, /* I - Element to index or @code NULL@ for all */ 308 const char *attr) /* I - Attribute to index or @code NULL@ for none */ 309 { 310 mxml_index_t *ind; /* New index */ 311 mxml_node_t *current, /* Current node in index */ 312 **temp; /* Temporary node pointer array */ 313 314 315 /* 316 * Range check input... 317 */ 318 319 #ifdef DEBUG 320 printf("mxmlIndexNew(node=%p, element=\"%s\", attr=\"%s\")\n", 321 node, element ? element : "(null)", attr ? attr : "(null)"); 322 #endif /* DEBUG */ 323 324 if (!node) 325 return (NULL); 326 327 /* 328 * Create a new index... 329 */ 330 331 if ((ind = calloc(1, sizeof(mxml_index_t))) == NULL) 332 { 333 mxml_error("Unable to allocate %d bytes for index - %s", 334 sizeof(mxml_index_t), strerror(errno)); 335 return (NULL); 336 } 337 338 if (attr) 339 ind->attr = strdup(attr); 340 341 if (!element && !attr) 342 current = node; 343 else 344 current = mxmlFindElement(node, node, element, attr, NULL, MXML_DESCEND); 345 346 while (current) 347 { 348 if (ind->num_nodes >= ind->alloc_nodes) 349 { 350 if (!ind->alloc_nodes) 351 temp = malloc(64 * sizeof(mxml_node_t *)); 352 else 353 temp = realloc(ind->nodes, (ind->alloc_nodes + 64) * sizeof(mxml_node_t *)); 354 355 if (!temp) 356 { 357 /* 358 * Unable to allocate memory for the index, so abort... 359 */ 360 361 mxml_error("Unable to allocate %d bytes for index: %s", 362 (ind->alloc_nodes + 64) * sizeof(mxml_node_t *), 363 strerror(errno)); 364 365 mxmlIndexDelete(ind); 366 return (NULL); 367 } 368 369 ind->nodes = temp; 370 ind->alloc_nodes += 64; 371 } 372 373 ind->nodes[ind->num_nodes ++] = current; 374 375 current = mxmlFindElement(current, node, element, attr, NULL, MXML_DESCEND); 376 } 377 378 /* 379 * Sort nodes based upon the search criteria... 380 */ 381 382 #ifdef DEBUG 383 { 384 int i; /* Looping var */ 385 386 387 printf("%d node(s) in index.\n\n", ind->num_nodes); 388 389 if (attr) 390 { 391 printf("Node Address Element %s\n", attr); 392 puts("-------- -------- -------------- ------------------------------"); 393 394 for (i = 0; i < ind->num_nodes; i ++) 395 printf("%8d %-8p %-14.14s %s\n", i, ind->nodes[i], 396 ind->nodes[i]->value.element.name, 397 mxmlElementGetAttr(ind->nodes[i], attr)); 398 } 399 else 400 { 401 puts("Node Address Element"); 402 puts("-------- -------- --------------"); 403 404 for (i = 0; i < ind->num_nodes; i ++) 405 printf("%8d %-8p %s\n", i, ind->nodes[i], 406 ind->nodes[i]->value.element.name); 407 } 408 409 putchar('\n'); 410 } 411 #endif /* DEBUG */ 412 413 if (ind->num_nodes > 1) 414 index_sort(ind, 0, ind->num_nodes - 1); 415 416 #ifdef DEBUG 417 { 418 int i; /* Looping var */ 419 420 421 puts("After sorting:\n"); 422 423 if (attr) 424 { 425 printf("Node Address Element %s\n", attr); 426 puts("-------- -------- -------------- ------------------------------"); 427 428 for (i = 0; i < ind->num_nodes; i ++) 429 printf("%8d %-8p %-14.14s %s\n", i, ind->nodes[i], 430 ind->nodes[i]->value.element.name, 431 mxmlElementGetAttr(ind->nodes[i], attr)); 432 } 433 else 434 { 435 puts("Node Address Element"); 436 puts("-------- -------- --------------"); 437 438 for (i = 0; i < ind->num_nodes; i ++) 439 printf("%8d %-8p %s\n", i, ind->nodes[i], 440 ind->nodes[i]->value.element.name); 441 } 442 443 putchar('\n'); 444 } 445 #endif /* DEBUG */ 446 447 /* 448 * Return the new index... 449 */ 450 451 return (ind); 452 } 453 454 455 /* 456 * 'mxmlIndexReset()' - Reset the enumeration/find pointer in the index and 457 * return the first node in the index. 458 * 459 * This function should be called prior to using @link mxmlIndexEnum@ or 460 * @link mxmlIndexFind@ for the first time. 461 */ 462 463 mxml_node_t * /* O - First node or @code NULL@ if there is none */ 464 mxmlIndexReset(mxml_index_t *ind) /* I - Index to reset */ 465 { 466 #ifdef DEBUG 467 printf("mxmlIndexReset(ind=%p)\n", ind); 468 #endif /* DEBUG */ 469 470 /* 471 * Range check input... 472 */ 473 474 if (!ind) 475 return (NULL); 476 477 /* 478 * Set the index to the first element... 479 */ 480 481 ind->cur_node = 0; 482 483 /* 484 * Return the first node... 485 */ 486 487 if (ind->num_nodes) 488 return (ind->nodes[0]); 489 else 490 return (NULL); 491 } 492 493 494 /* 495 * 'index_compare()' - Compare two nodes. 496 */ 497 498 static int /* O - Result of comparison */ 499 index_compare(mxml_index_t *ind, /* I - Index */ 500 mxml_node_t *first, /* I - First node */ 501 mxml_node_t *second) /* I - Second node */ 502 { 503 int diff; /* Difference */ 504 505 506 /* 507 * Check the element name... 508 */ 509 510 if ((diff = strcmp(first->value.element.name, 511 second->value.element.name)) != 0) 512 return (diff); 513 514 /* 515 * Check the attribute value... 516 */ 517 518 if (ind->attr) 519 { 520 if ((diff = strcmp(mxmlElementGetAttr(first, ind->attr), 521 mxmlElementGetAttr(second, ind->attr))) != 0) 522 return (diff); 523 } 524 525 /* 526 * No difference, return 0... 527 */ 528 529 return (0); 530 } 531 532 533 /* 534 * 'index_find()' - Compare a node with index values. 535 */ 536 537 static int /* O - Result of comparison */ 538 index_find(mxml_index_t *ind, /* I - Index */ 539 const char *element, /* I - Element name or @code NULL@ */ 540 const char *value, /* I - Attribute value or @code NULL@ */ 541 mxml_node_t *node) /* I - Node */ 542 { 543 int diff; /* Difference */ 544 545 546 /* 547 * Check the element name... 548 */ 549 550 if (element) 551 { 552 if ((diff = strcmp(element, node->value.element.name)) != 0) 553 return (diff); 554 } 555 556 /* 557 * Check the attribute value... 558 */ 559 560 if (value) 561 { 562 if ((diff = strcmp(value, mxmlElementGetAttr(node, ind->attr))) != 0) 563 return (diff); 564 } 565 566 /* 567 * No difference, return 0... 568 */ 569 570 return (0); 571 } 572 573 574 /* 575 * 'index_sort()' - Sort the nodes in the index... 576 * 577 * This function implements the classic quicksort algorithm... 578 */ 579 580 static void 581 index_sort(mxml_index_t *ind, /* I - Index to sort */ 582 int left, /* I - Left node in partition */ 583 int right) /* I - Right node in partition */ 584 { 585 mxml_node_t *pivot, /* Pivot node */ 586 *temp; /* Swap node */ 587 int templ, /* Temporary left node */ 588 tempr; /* Temporary right node */ 589 590 591 /* 592 * Loop until we have sorted all the way to the right... 593 */ 594 595 do 596 { 597 /* 598 * Sort the pivot in the current partition... 599 */ 600 601 pivot = ind->nodes[left]; 602 603 for (templ = left, tempr = right; templ < tempr;) 604 { 605 /* 606 * Move left while left node <= pivot node... 607 */ 608 609 while ((templ < right) && 610 index_compare(ind, ind->nodes[templ], pivot) <= 0) 611 templ ++; 612 613 /* 614 * Move right while right node > pivot node... 615 */ 616 617 while ((tempr > left) && 618 index_compare(ind, ind->nodes[tempr], pivot) > 0) 619 tempr --; 620 621 /* 622 * Swap nodes if needed... 623 */ 624 625 if (templ < tempr) 626 { 627 temp = ind->nodes[templ]; 628 ind->nodes[templ] = ind->nodes[tempr]; 629 ind->nodes[tempr] = temp; 630 } 631 } 632 633 /* 634 * When we get here, the right (tempr) node is the new position for the 635 * pivot node... 636 */ 637 638 if (index_compare(ind, pivot, ind->nodes[tempr]) > 0) 639 { 640 ind->nodes[left] = ind->nodes[tempr]; 641 ind->nodes[tempr] = pivot; 642 } 643 644 /* 645 * Recursively sort the left partition as needed... 646 */ 647 648 if (left < (tempr - 1)) 649 index_sort(ind, left, tempr - 1); 650 } 651 while (right > (left = tempr + 1)); 652 } 653