1 /* 2 * entities.c : implementation for the XML entities handling 3 * 4 * See Copyright for the status of this software. 5 * 6 * daniel@veillard.com 7 */ 8 9 /* To avoid EBCDIC trouble when parsing on zOS */ 10 #if defined(__MVS__) 11 #pragma convert("ISO8859-1") 12 #endif 13 14 #define IN_LIBXML 15 #include "libxml.h" 16 17 #include <string.h> 18 #include <stdlib.h> 19 20 #include <libxml/xmlmemory.h> 21 #include <libxml/hash.h> 22 #include <libxml/entities.h> 23 #include <libxml/parser.h> 24 #include <libxml/parserInternals.h> 25 #include <libxml/xmlerror.h> 26 #include <libxml/globals.h> 27 #include <libxml/dict.h> 28 29 #include "save.h" 30 31 /* 32 * The XML predefined entities. 33 */ 34 35 static xmlEntity xmlEntityLt = { 36 NULL, XML_ENTITY_DECL, BAD_CAST "lt", 37 NULL, NULL, NULL, NULL, NULL, NULL, 38 BAD_CAST "<", BAD_CAST "<", 1, 39 XML_INTERNAL_PREDEFINED_ENTITY, 40 NULL, NULL, NULL, NULL, 0, 1 41 }; 42 static xmlEntity xmlEntityGt = { 43 NULL, XML_ENTITY_DECL, BAD_CAST "gt", 44 NULL, NULL, NULL, NULL, NULL, NULL, 45 BAD_CAST ">", BAD_CAST ">", 1, 46 XML_INTERNAL_PREDEFINED_ENTITY, 47 NULL, NULL, NULL, NULL, 0, 1 48 }; 49 static xmlEntity xmlEntityAmp = { 50 NULL, XML_ENTITY_DECL, BAD_CAST "amp", 51 NULL, NULL, NULL, NULL, NULL, NULL, 52 BAD_CAST "&", BAD_CAST "&", 1, 53 XML_INTERNAL_PREDEFINED_ENTITY, 54 NULL, NULL, NULL, NULL, 0, 1 55 }; 56 static xmlEntity xmlEntityQuot = { 57 NULL, XML_ENTITY_DECL, BAD_CAST "quot", 58 NULL, NULL, NULL, NULL, NULL, NULL, 59 BAD_CAST "\"", BAD_CAST "\"", 1, 60 XML_INTERNAL_PREDEFINED_ENTITY, 61 NULL, NULL, NULL, NULL, 0, 1 62 }; 63 static xmlEntity xmlEntityApos = { 64 NULL, XML_ENTITY_DECL, BAD_CAST "apos", 65 NULL, NULL, NULL, NULL, NULL, NULL, 66 BAD_CAST "'", BAD_CAST "'", 1, 67 XML_INTERNAL_PREDEFINED_ENTITY, 68 NULL, NULL, NULL, NULL, 0, 1 69 }; 70 71 /** 72 * xmlEntitiesErrMemory: 73 * @extra: extra information 74 * 75 * Handle an out of memory condition 76 */ 77 static void 78 xmlEntitiesErrMemory(const char *extra) 79 { 80 __xmlSimpleError(XML_FROM_TREE, XML_ERR_NO_MEMORY, NULL, NULL, extra); 81 } 82 83 /** 84 * xmlEntitiesErr: 85 * @code: the error code 86 * @msg: the message 87 * 88 * Raise an error. 89 */ 90 static void LIBXML_ATTR_FORMAT(2,0) 91 xmlEntitiesErr(xmlParserErrors code, const char *msg) 92 { 93 __xmlSimpleError(XML_FROM_TREE, code, NULL, msg, NULL); 94 } 95 96 /** 97 * xmlEntitiesWarn: 98 * @code: the error code 99 * @msg: the message 100 * 101 * Raise a warning. 102 */ 103 static void LIBXML_ATTR_FORMAT(2,0) 104 xmlEntitiesWarn(xmlParserErrors code, const char *msg, const xmlChar *str1) 105 { 106 __xmlRaiseError(NULL, NULL, NULL, 107 NULL, NULL, XML_FROM_TREE, code, 108 XML_ERR_WARNING, NULL, 0, 109 (const char *)str1, NULL, NULL, 0, 0, 110 msg, (const char *)str1, NULL); 111 } 112 113 /* 114 * xmlFreeEntity : clean-up an entity record. 115 */ 116 static void 117 xmlFreeEntity(xmlEntityPtr entity) 118 { 119 xmlDictPtr dict = NULL; 120 121 if (entity == NULL) 122 return; 123 124 if (entity->doc != NULL) 125 dict = entity->doc->dict; 126 127 128 if ((entity->children) && (entity->owner == 1) && 129 (entity == (xmlEntityPtr) entity->children->parent)) 130 xmlFreeNodeList(entity->children); 131 if ((entity->name != NULL) && 132 ((dict == NULL) || (!xmlDictOwns(dict, entity->name)))) 133 xmlFree((char *) entity->name); 134 if (entity->ExternalID != NULL) 135 xmlFree((char *) entity->ExternalID); 136 if (entity->SystemID != NULL) 137 xmlFree((char *) entity->SystemID); 138 if (entity->URI != NULL) 139 xmlFree((char *) entity->URI); 140 if (entity->content != NULL) 141 xmlFree((char *) entity->content); 142 if (entity->orig != NULL) 143 xmlFree((char *) entity->orig); 144 xmlFree(entity); 145 } 146 147 /* 148 * xmlCreateEntity: 149 * 150 * internal routine doing the entity node structures allocations 151 */ 152 static xmlEntityPtr 153 xmlCreateEntity(xmlDictPtr dict, const xmlChar *name, int type, 154 const xmlChar *ExternalID, const xmlChar *SystemID, 155 const xmlChar *content) { 156 xmlEntityPtr ret; 157 158 ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity)); 159 if (ret == NULL) { 160 xmlEntitiesErrMemory("xmlCreateEntity: malloc failed"); 161 return(NULL); 162 } 163 memset(ret, 0, sizeof(xmlEntity)); 164 ret->type = XML_ENTITY_DECL; 165 ret->checked = 0; 166 167 /* 168 * fill the structure. 169 */ 170 ret->etype = (xmlEntityType) type; 171 if (dict == NULL) { 172 ret->name = xmlStrdup(name); 173 if (ExternalID != NULL) 174 ret->ExternalID = xmlStrdup(ExternalID); 175 if (SystemID != NULL) 176 ret->SystemID = xmlStrdup(SystemID); 177 } else { 178 ret->name = xmlDictLookup(dict, name, -1); 179 ret->ExternalID = xmlStrdup(ExternalID); 180 ret->SystemID = xmlStrdup(SystemID); 181 } 182 if (content != NULL) { 183 ret->length = xmlStrlen(content); 184 ret->content = xmlStrndup(content, ret->length); 185 } else { 186 ret->length = 0; 187 ret->content = NULL; 188 } 189 ret->URI = NULL; /* to be computed by the layer knowing 190 the defining entity */ 191 ret->orig = NULL; 192 ret->owner = 0; 193 194 return(ret); 195 } 196 197 /* 198 * xmlAddEntity : register a new entity for an entities table. 199 */ 200 static xmlEntityPtr 201 xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type, 202 const xmlChar *ExternalID, const xmlChar *SystemID, 203 const xmlChar *content) { 204 xmlDictPtr dict = NULL; 205 xmlEntitiesTablePtr table = NULL; 206 xmlEntityPtr ret, predef; 207 208 if (name == NULL) 209 return(NULL); 210 if (dtd == NULL) 211 return(NULL); 212 if (dtd->doc != NULL) 213 dict = dtd->doc->dict; 214 215 switch (type) { 216 case XML_INTERNAL_GENERAL_ENTITY: 217 case XML_EXTERNAL_GENERAL_PARSED_ENTITY: 218 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: 219 predef = xmlGetPredefinedEntity(name); 220 if (predef != NULL) { 221 int valid = 0; 222 223 /* 4.6 Predefined Entities */ 224 if ((type == XML_INTERNAL_GENERAL_ENTITY) && 225 (content != NULL)) { 226 int c = predef->content[0]; 227 228 if (((content[0] == c) && (content[1] == 0)) && 229 ((c == '>') || (c == '\'') || (c == '"'))) { 230 valid = 1; 231 } else if ((content[0] == '&') && (content[1] == '#')) { 232 if (content[2] == 'x') { 233 xmlChar *hex = BAD_CAST "0123456789ABCDEF"; 234 xmlChar ref[] = "00;"; 235 236 ref[0] = hex[c / 16 % 16]; 237 ref[1] = hex[c % 16]; 238 if (xmlStrcasecmp(&content[3], ref) == 0) 239 valid = 1; 240 } else { 241 xmlChar ref[] = "00;"; 242 243 ref[0] = '0' + c / 10 % 10; 244 ref[1] = '0' + c % 10; 245 if (xmlStrEqual(&content[2], ref)) 246 valid = 1; 247 } 248 } 249 } 250 if (!valid) { 251 xmlEntitiesWarn(XML_ERR_ENTITY_PROCESSING, 252 "xmlAddEntity: invalid redeclaration of predefined" 253 " entity '%s'", name); 254 return(NULL); 255 } 256 } 257 if (dtd->entities == NULL) 258 dtd->entities = xmlHashCreateDict(0, dict); 259 table = dtd->entities; 260 break; 261 case XML_INTERNAL_PARAMETER_ENTITY: 262 case XML_EXTERNAL_PARAMETER_ENTITY: 263 if (dtd->pentities == NULL) 264 dtd->pentities = xmlHashCreateDict(0, dict); 265 table = dtd->pentities; 266 break; 267 case XML_INTERNAL_PREDEFINED_ENTITY: 268 return(NULL); 269 } 270 if (table == NULL) 271 return(NULL); 272 ret = xmlCreateEntity(dict, name, type, ExternalID, SystemID, content); 273 if (ret == NULL) 274 return(NULL); 275 ret->doc = dtd->doc; 276 277 if (xmlHashAddEntry(table, name, ret)) { 278 /* 279 * entity was already defined at another level. 280 */ 281 xmlFreeEntity(ret); 282 return(NULL); 283 } 284 return(ret); 285 } 286 287 /** 288 * xmlGetPredefinedEntity: 289 * @name: the entity name 290 * 291 * Check whether this name is an predefined entity. 292 * 293 * Returns NULL if not, otherwise the entity 294 */ 295 xmlEntityPtr 296 xmlGetPredefinedEntity(const xmlChar *name) { 297 if (name == NULL) return(NULL); 298 switch (name[0]) { 299 case 'l': 300 if (xmlStrEqual(name, BAD_CAST "lt")) 301 return(&xmlEntityLt); 302 break; 303 case 'g': 304 if (xmlStrEqual(name, BAD_CAST "gt")) 305 return(&xmlEntityGt); 306 break; 307 case 'a': 308 if (xmlStrEqual(name, BAD_CAST "amp")) 309 return(&xmlEntityAmp); 310 if (xmlStrEqual(name, BAD_CAST "apos")) 311 return(&xmlEntityApos); 312 break; 313 case 'q': 314 if (xmlStrEqual(name, BAD_CAST "quot")) 315 return(&xmlEntityQuot); 316 break; 317 default: 318 break; 319 } 320 return(NULL); 321 } 322 323 /** 324 * xmlAddDtdEntity: 325 * @doc: the document 326 * @name: the entity name 327 * @type: the entity type XML_xxx_yyy_ENTITY 328 * @ExternalID: the entity external ID if available 329 * @SystemID: the entity system ID if available 330 * @content: the entity content 331 * 332 * Register a new entity for this document DTD external subset. 333 * 334 * Returns a pointer to the entity or NULL in case of error 335 */ 336 xmlEntityPtr 337 xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type, 338 const xmlChar *ExternalID, const xmlChar *SystemID, 339 const xmlChar *content) { 340 xmlEntityPtr ret; 341 xmlDtdPtr dtd; 342 343 if (doc == NULL) { 344 xmlEntitiesErr(XML_DTD_NO_DOC, 345 "xmlAddDtdEntity: document is NULL"); 346 return(NULL); 347 } 348 if (doc->extSubset == NULL) { 349 xmlEntitiesErr(XML_DTD_NO_DTD, 350 "xmlAddDtdEntity: document without external subset"); 351 return(NULL); 352 } 353 dtd = doc->extSubset; 354 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content); 355 if (ret == NULL) return(NULL); 356 357 /* 358 * Link it to the DTD 359 */ 360 ret->parent = dtd; 361 ret->doc = dtd->doc; 362 if (dtd->last == NULL) { 363 dtd->children = dtd->last = (xmlNodePtr) ret; 364 } else { 365 dtd->last->next = (xmlNodePtr) ret; 366 ret->prev = dtd->last; 367 dtd->last = (xmlNodePtr) ret; 368 } 369 return(ret); 370 } 371 372 /** 373 * xmlAddDocEntity: 374 * @doc: the document 375 * @name: the entity name 376 * @type: the entity type XML_xxx_yyy_ENTITY 377 * @ExternalID: the entity external ID if available 378 * @SystemID: the entity system ID if available 379 * @content: the entity content 380 * 381 * Register a new entity for this document. 382 * 383 * Returns a pointer to the entity or NULL in case of error 384 */ 385 xmlEntityPtr 386 xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type, 387 const xmlChar *ExternalID, const xmlChar *SystemID, 388 const xmlChar *content) { 389 xmlEntityPtr ret; 390 xmlDtdPtr dtd; 391 392 if (doc == NULL) { 393 xmlEntitiesErr(XML_DTD_NO_DOC, 394 "xmlAddDocEntity: document is NULL"); 395 return(NULL); 396 } 397 if (doc->intSubset == NULL) { 398 xmlEntitiesErr(XML_DTD_NO_DTD, 399 "xmlAddDocEntity: document without internal subset"); 400 return(NULL); 401 } 402 dtd = doc->intSubset; 403 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content); 404 if (ret == NULL) return(NULL); 405 406 /* 407 * Link it to the DTD 408 */ 409 ret->parent = dtd; 410 ret->doc = dtd->doc; 411 if (dtd->last == NULL) { 412 dtd->children = dtd->last = (xmlNodePtr) ret; 413 } else { 414 dtd->last->next = (xmlNodePtr) ret; 415 ret->prev = dtd->last; 416 dtd->last = (xmlNodePtr) ret; 417 } 418 return(ret); 419 } 420 421 /** 422 * xmlNewEntity: 423 * @doc: the document 424 * @name: the entity name 425 * @type: the entity type XML_xxx_yyy_ENTITY 426 * @ExternalID: the entity external ID if available 427 * @SystemID: the entity system ID if available 428 * @content: the entity content 429 * 430 * Create a new entity, this differs from xmlAddDocEntity() that if 431 * the document is NULL or has no internal subset defined, then an 432 * unlinked entity structure will be returned, it is then the responsibility 433 * of the caller to link it to the document later or free it when not needed 434 * anymore. 435 * 436 * Returns a pointer to the entity or NULL in case of error 437 */ 438 xmlEntityPtr 439 xmlNewEntity(xmlDocPtr doc, const xmlChar *name, int type, 440 const xmlChar *ExternalID, const xmlChar *SystemID, 441 const xmlChar *content) { 442 xmlEntityPtr ret; 443 xmlDictPtr dict; 444 445 if ((doc != NULL) && (doc->intSubset != NULL)) { 446 return(xmlAddDocEntity(doc, name, type, ExternalID, SystemID, content)); 447 } 448 if (doc != NULL) 449 dict = doc->dict; 450 else 451 dict = NULL; 452 ret = xmlCreateEntity(dict, name, type, ExternalID, SystemID, content); 453 if (ret == NULL) 454 return(NULL); 455 ret->doc = doc; 456 return(ret); 457 } 458 459 /** 460 * xmlGetEntityFromTable: 461 * @table: an entity table 462 * @name: the entity name 463 * @parameter: look for parameter entities 464 * 465 * Do an entity lookup in the table. 466 * returns the corresponding parameter entity, if found. 467 * 468 * Returns A pointer to the entity structure or NULL if not found. 469 */ 470 static xmlEntityPtr 471 xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) { 472 return((xmlEntityPtr) xmlHashLookup(table, name)); 473 } 474 475 /** 476 * xmlGetParameterEntity: 477 * @doc: the document referencing the entity 478 * @name: the entity name 479 * 480 * Do an entity lookup in the internal and external subsets and 481 * returns the corresponding parameter entity, if found. 482 * 483 * Returns A pointer to the entity structure or NULL if not found. 484 */ 485 xmlEntityPtr 486 xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) { 487 xmlEntitiesTablePtr table; 488 xmlEntityPtr ret; 489 490 if (doc == NULL) 491 return(NULL); 492 if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) { 493 table = (xmlEntitiesTablePtr) doc->intSubset->pentities; 494 ret = xmlGetEntityFromTable(table, name); 495 if (ret != NULL) 496 return(ret); 497 } 498 if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) { 499 table = (xmlEntitiesTablePtr) doc->extSubset->pentities; 500 return(xmlGetEntityFromTable(table, name)); 501 } 502 return(NULL); 503 } 504 505 /** 506 * xmlGetDtdEntity: 507 * @doc: the document referencing the entity 508 * @name: the entity name 509 * 510 * Do an entity lookup in the DTD entity hash table and 511 * returns the corresponding entity, if found. 512 * Note: the first argument is the document node, not the DTD node. 513 * 514 * Returns A pointer to the entity structure or NULL if not found. 515 */ 516 xmlEntityPtr 517 xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) { 518 xmlEntitiesTablePtr table; 519 520 if (doc == NULL) 521 return(NULL); 522 if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) { 523 table = (xmlEntitiesTablePtr) doc->extSubset->entities; 524 return(xmlGetEntityFromTable(table, name)); 525 } 526 return(NULL); 527 } 528 529 /** 530 * xmlGetDocEntity: 531 * @doc: the document referencing the entity 532 * @name: the entity name 533 * 534 * Do an entity lookup in the document entity hash table and 535 * returns the corresponding entity, otherwise a lookup is done 536 * in the predefined entities too. 537 * 538 * Returns A pointer to the entity structure or NULL if not found. 539 */ 540 xmlEntityPtr 541 xmlGetDocEntity(const xmlDoc *doc, const xmlChar *name) { 542 xmlEntityPtr cur; 543 xmlEntitiesTablePtr table; 544 545 if (doc != NULL) { 546 if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) { 547 table = (xmlEntitiesTablePtr) doc->intSubset->entities; 548 cur = xmlGetEntityFromTable(table, name); 549 if (cur != NULL) 550 return(cur); 551 } 552 if (doc->standalone != 1) { 553 if ((doc->extSubset != NULL) && 554 (doc->extSubset->entities != NULL)) { 555 table = (xmlEntitiesTablePtr) doc->extSubset->entities; 556 cur = xmlGetEntityFromTable(table, name); 557 if (cur != NULL) 558 return(cur); 559 } 560 } 561 } 562 return(xmlGetPredefinedEntity(name)); 563 } 564 565 /* 566 * Macro used to grow the current buffer. 567 */ 568 #define growBufferReentrant() { \ 569 xmlChar *tmp; \ 570 size_t new_size = buffer_size * 2; \ 571 if (new_size < buffer_size) goto mem_error; \ 572 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ 573 if (tmp == NULL) goto mem_error; \ 574 buffer = tmp; \ 575 buffer_size = new_size; \ 576 } 577 578 /** 579 * xmlEncodeEntitiesInternal: 580 * @doc: the document containing the string 581 * @input: A string to convert to XML. 582 * @attr: are we handling an attribute value 583 * 584 * Do a global encoding of a string, replacing the predefined entities 585 * and non ASCII values with their entities and CharRef counterparts. 586 * Contrary to xmlEncodeEntities, this routine is reentrant, and result 587 * must be deallocated. 588 * 589 * Returns A newly allocated string with the substitution done. 590 */ 591 static xmlChar * 592 xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) { 593 const xmlChar *cur = input; 594 xmlChar *buffer = NULL; 595 xmlChar *out = NULL; 596 size_t buffer_size = 0; 597 int html = 0; 598 599 if (input == NULL) return(NULL); 600 if (doc != NULL) 601 html = (doc->type == XML_HTML_DOCUMENT_NODE); 602 603 /* 604 * allocate an translation buffer. 605 */ 606 buffer_size = 1000; 607 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); 608 if (buffer == NULL) { 609 xmlEntitiesErrMemory("xmlEncodeEntities: malloc failed"); 610 return(NULL); 611 } 612 out = buffer; 613 614 while (*cur != '\0') { 615 size_t indx = out - buffer; 616 if (indx + 100 > buffer_size) { 617 618 growBufferReentrant(); 619 out = &buffer[indx]; 620 } 621 622 /* 623 * By default one have to encode at least '<', '>', '"' and '&' ! 624 */ 625 if (*cur == '<') { 626 const xmlChar *end; 627 628 /* 629 * Special handling of server side include in HTML attributes 630 */ 631 if (html && attr && 632 (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') && 633 ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) { 634 while (cur != end) { 635 *out++ = *cur++; 636 indx = out - buffer; 637 if (indx + 100 > buffer_size) { 638 growBufferReentrant(); 639 out = &buffer[indx]; 640 } 641 } 642 *out++ = *cur++; 643 *out++ = *cur++; 644 *out++ = *cur++; 645 continue; 646 } 647 *out++ = '&'; 648 *out++ = 'l'; 649 *out++ = 't'; 650 *out++ = ';'; 651 } else if (*cur == '>') { 652 *out++ = '&'; 653 *out++ = 'g'; 654 *out++ = 't'; 655 *out++ = ';'; 656 } else if (*cur == '&') { 657 /* 658 * Special handling of &{...} construct from HTML 4, see 659 * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1 660 */ 661 if (html && attr && (cur[1] == '{') && 662 (strchr((const char *) cur, '}'))) { 663 while (*cur != '}') { 664 *out++ = *cur++; 665 indx = out - buffer; 666 if (indx + 100 > buffer_size) { 667 growBufferReentrant(); 668 out = &buffer[indx]; 669 } 670 } 671 *out++ = *cur++; 672 continue; 673 } 674 *out++ = '&'; 675 *out++ = 'a'; 676 *out++ = 'm'; 677 *out++ = 'p'; 678 *out++ = ';'; 679 } else if (((*cur >= 0x20) && (*cur < 0x80)) || 680 (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) { 681 /* 682 * default case, just copy ! 683 */ 684 *out++ = *cur; 685 } else if (*cur >= 0x80) { 686 if (((doc != NULL) && (doc->encoding != NULL)) || (html)) { 687 /* 688 * Bjørn Reese <br@sseusa.com> provided the patch 689 xmlChar xc; 690 xc = (*cur & 0x3F) << 6; 691 if (cur[1] != 0) { 692 xc += *(++cur) & 0x3F; 693 *out++ = xc; 694 } else 695 */ 696 *out++ = *cur; 697 } else { 698 /* 699 * We assume we have UTF-8 input. 700 * It must match either: 701 * 110xxxxx 10xxxxxx 702 * 1110xxxx 10xxxxxx 10xxxxxx 703 * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 704 * That is: 705 * cur[0] is 11xxxxxx 706 * cur[1] is 10xxxxxx 707 * cur[2] is 10xxxxxx if cur[0] is 111xxxxx 708 * cur[3] is 10xxxxxx if cur[0] is 1111xxxx 709 * cur[0] is not 11111xxx 710 */ 711 char buf[11], *ptr; 712 int val = 0, l = 1; 713 714 if (((cur[0] & 0xC0) != 0xC0) || 715 ((cur[1] & 0xC0) != 0x80) || 716 (((cur[0] & 0xE0) == 0xE0) && ((cur[2] & 0xC0) != 0x80)) || 717 (((cur[0] & 0xF0) == 0xF0) && ((cur[3] & 0xC0) != 0x80)) || 718 (((cur[0] & 0xF8) == 0xF8))) { 719 xmlEntitiesErr(XML_CHECK_NOT_UTF8, 720 "xmlEncodeEntities: input not UTF-8"); 721 if (doc != NULL) 722 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); 723 snprintf(buf, sizeof(buf), "&#%d;", *cur); 724 buf[sizeof(buf) - 1] = 0; 725 ptr = buf; 726 while (*ptr != 0) *out++ = *ptr++; 727 cur++; 728 continue; 729 } else if (*cur < 0xE0) { 730 val = (cur[0]) & 0x1F; 731 val <<= 6; 732 val |= (cur[1]) & 0x3F; 733 l = 2; 734 } else if (*cur < 0xF0) { 735 val = (cur[0]) & 0x0F; 736 val <<= 6; 737 val |= (cur[1]) & 0x3F; 738 val <<= 6; 739 val |= (cur[2]) & 0x3F; 740 l = 3; 741 } else if (*cur < 0xF8) { 742 val = (cur[0]) & 0x07; 743 val <<= 6; 744 val |= (cur[1]) & 0x3F; 745 val <<= 6; 746 val |= (cur[2]) & 0x3F; 747 val <<= 6; 748 val |= (cur[3]) & 0x3F; 749 l = 4; 750 } 751 if ((l == 1) || (!IS_CHAR(val))) { 752 xmlEntitiesErr(XML_ERR_INVALID_CHAR, 753 "xmlEncodeEntities: char out of range\n"); 754 if (doc != NULL) 755 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); 756 snprintf(buf, sizeof(buf), "&#%d;", *cur); 757 buf[sizeof(buf) - 1] = 0; 758 ptr = buf; 759 while (*ptr != 0) *out++ = *ptr++; 760 cur++; 761 continue; 762 } 763 /* 764 * We could do multiple things here. Just save as a char ref 765 */ 766 snprintf(buf, sizeof(buf), "&#x%X;", val); 767 buf[sizeof(buf) - 1] = 0; 768 ptr = buf; 769 while (*ptr != 0) *out++ = *ptr++; 770 cur += l; 771 continue; 772 } 773 } else if (IS_BYTE_CHAR(*cur)) { 774 char buf[11], *ptr; 775 776 snprintf(buf, sizeof(buf), "&#%d;", *cur); 777 buf[sizeof(buf) - 1] = 0; 778 ptr = buf; 779 while (*ptr != 0) *out++ = *ptr++; 780 } 781 cur++; 782 } 783 *out = 0; 784 return(buffer); 785 786 mem_error: 787 xmlEntitiesErrMemory("xmlEncodeEntities: realloc failed"); 788 xmlFree(buffer); 789 return(NULL); 790 } 791 792 /** 793 * xmlEncodeAttributeEntities: 794 * @doc: the document containing the string 795 * @input: A string to convert to XML. 796 * 797 * Do a global encoding of a string, replacing the predefined entities 798 * and non ASCII values with their entities and CharRef counterparts for 799 * attribute values. 800 * 801 * Returns A newly allocated string with the substitution done. 802 */ 803 xmlChar * 804 xmlEncodeAttributeEntities(xmlDocPtr doc, const xmlChar *input) { 805 return xmlEncodeEntitiesInternal(doc, input, 1); 806 } 807 808 /** 809 * xmlEncodeEntitiesReentrant: 810 * @doc: the document containing the string 811 * @input: A string to convert to XML. 812 * 813 * Do a global encoding of a string, replacing the predefined entities 814 * and non ASCII values with their entities and CharRef counterparts. 815 * Contrary to xmlEncodeEntities, this routine is reentrant, and result 816 * must be deallocated. 817 * 818 * Returns A newly allocated string with the substitution done. 819 */ 820 xmlChar * 821 xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { 822 return xmlEncodeEntitiesInternal(doc, input, 0); 823 } 824 825 /** 826 * xmlEncodeSpecialChars: 827 * @doc: the document containing the string 828 * @input: A string to convert to XML. 829 * 830 * Do a global encoding of a string, replacing the predefined entities 831 * this routine is reentrant, and result must be deallocated. 832 * 833 * Returns A newly allocated string with the substitution done. 834 */ 835 xmlChar * 836 xmlEncodeSpecialChars(const xmlDoc *doc ATTRIBUTE_UNUSED, const xmlChar *input) { 837 const xmlChar *cur = input; 838 xmlChar *buffer = NULL; 839 xmlChar *out = NULL; 840 size_t buffer_size = 0; 841 if (input == NULL) return(NULL); 842 843 /* 844 * allocate an translation buffer. 845 */ 846 buffer_size = 1000; 847 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); 848 if (buffer == NULL) { 849 xmlEntitiesErrMemory("xmlEncodeSpecialChars: malloc failed"); 850 return(NULL); 851 } 852 out = buffer; 853 854 while (*cur != '\0') { 855 size_t indx = out - buffer; 856 if (indx + 10 > buffer_size) { 857 858 growBufferReentrant(); 859 out = &buffer[indx]; 860 } 861 862 /* 863 * By default one have to encode at least '<', '>', '"' and '&' ! 864 */ 865 if (*cur == '<') { 866 *out++ = '&'; 867 *out++ = 'l'; 868 *out++ = 't'; 869 *out++ = ';'; 870 } else if (*cur == '>') { 871 *out++ = '&'; 872 *out++ = 'g'; 873 *out++ = 't'; 874 *out++ = ';'; 875 } else if (*cur == '&') { 876 *out++ = '&'; 877 *out++ = 'a'; 878 *out++ = 'm'; 879 *out++ = 'p'; 880 *out++ = ';'; 881 } else if (*cur == '"') { 882 *out++ = '&'; 883 *out++ = 'q'; 884 *out++ = 'u'; 885 *out++ = 'o'; 886 *out++ = 't'; 887 *out++ = ';'; 888 } else if (*cur == '\r') { 889 *out++ = '&'; 890 *out++ = '#'; 891 *out++ = '1'; 892 *out++ = '3'; 893 *out++ = ';'; 894 } else { 895 /* 896 * Works because on UTF-8, all extended sequences cannot 897 * result in bytes in the ASCII range. 898 */ 899 *out++ = *cur; 900 } 901 cur++; 902 } 903 *out = 0; 904 return(buffer); 905 906 mem_error: 907 xmlEntitiesErrMemory("xmlEncodeSpecialChars: realloc failed"); 908 xmlFree(buffer); 909 return(NULL); 910 } 911 912 /** 913 * xmlCreateEntitiesTable: 914 * 915 * create and initialize an empty entities hash table. 916 * This really doesn't make sense and should be deprecated 917 * 918 * Returns the xmlEntitiesTablePtr just created or NULL in case of error. 919 */ 920 xmlEntitiesTablePtr 921 xmlCreateEntitiesTable(void) { 922 return((xmlEntitiesTablePtr) xmlHashCreate(0)); 923 } 924 925 /** 926 * xmlFreeEntityWrapper: 927 * @entity: An entity 928 * @name: its name 929 * 930 * Deallocate the memory used by an entities in the hash table. 931 */ 932 static void 933 xmlFreeEntityWrapper(void *entity, const xmlChar *name ATTRIBUTE_UNUSED) { 934 if (entity != NULL) 935 xmlFreeEntity((xmlEntityPtr) entity); 936 } 937 938 /** 939 * xmlFreeEntitiesTable: 940 * @table: An entity table 941 * 942 * Deallocate the memory used by an entities hash table. 943 */ 944 void 945 xmlFreeEntitiesTable(xmlEntitiesTablePtr table) { 946 xmlHashFree(table, xmlFreeEntityWrapper); 947 } 948 949 #ifdef LIBXML_TREE_ENABLED 950 /** 951 * xmlCopyEntity: 952 * @ent: An entity 953 * 954 * Build a copy of an entity 955 * 956 * Returns the new xmlEntitiesPtr or NULL in case of error. 957 */ 958 static void * 959 xmlCopyEntity(void *payload, const xmlChar *name ATTRIBUTE_UNUSED) { 960 xmlEntityPtr ent = (xmlEntityPtr) payload; 961 xmlEntityPtr cur; 962 963 cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity)); 964 if (cur == NULL) { 965 xmlEntitiesErrMemory("xmlCopyEntity:: malloc failed"); 966 return(NULL); 967 } 968 memset(cur, 0, sizeof(xmlEntity)); 969 cur->type = XML_ENTITY_DECL; 970 971 cur->etype = ent->etype; 972 if (ent->name != NULL) 973 cur->name = xmlStrdup(ent->name); 974 if (ent->ExternalID != NULL) 975 cur->ExternalID = xmlStrdup(ent->ExternalID); 976 if (ent->SystemID != NULL) 977 cur->SystemID = xmlStrdup(ent->SystemID); 978 if (ent->content != NULL) 979 cur->content = xmlStrdup(ent->content); 980 if (ent->orig != NULL) 981 cur->orig = xmlStrdup(ent->orig); 982 if (ent->URI != NULL) 983 cur->URI = xmlStrdup(ent->URI); 984 return(cur); 985 } 986 987 /** 988 * xmlCopyEntitiesTable: 989 * @table: An entity table 990 * 991 * Build a copy of an entity table. 992 * 993 * Returns the new xmlEntitiesTablePtr or NULL in case of error. 994 */ 995 xmlEntitiesTablePtr 996 xmlCopyEntitiesTable(xmlEntitiesTablePtr table) { 997 return(xmlHashCopy(table, xmlCopyEntity)); 998 } 999 #endif /* LIBXML_TREE_ENABLED */ 1000 1001 #ifdef LIBXML_OUTPUT_ENABLED 1002 1003 /** 1004 * xmlDumpEntityContent: 1005 * @buf: An XML buffer. 1006 * @content: The entity content. 1007 * 1008 * This will dump the quoted string value, taking care of the special 1009 * treatment required by % 1010 */ 1011 static void 1012 xmlDumpEntityContent(xmlBufferPtr buf, const xmlChar *content) { 1013 if (buf->alloc == XML_BUFFER_ALLOC_IMMUTABLE) return; 1014 if (xmlStrchr(content, '%')) { 1015 const xmlChar * base, *cur; 1016 1017 xmlBufferCCat(buf, "\""); 1018 base = cur = content; 1019 while (*cur != 0) { 1020 if (*cur == '"') { 1021 if (base != cur) 1022 xmlBufferAdd(buf, base, cur - base); 1023 xmlBufferAdd(buf, BAD_CAST """, 6); 1024 cur++; 1025 base = cur; 1026 } else if (*cur == '%') { 1027 if (base != cur) 1028 xmlBufferAdd(buf, base, cur - base); 1029 xmlBufferAdd(buf, BAD_CAST "%", 6); 1030 cur++; 1031 base = cur; 1032 } else { 1033 cur++; 1034 } 1035 } 1036 if (base != cur) 1037 xmlBufferAdd(buf, base, cur - base); 1038 xmlBufferCCat(buf, "\""); 1039 } else { 1040 xmlBufferWriteQuotedString(buf, content); 1041 } 1042 } 1043 1044 /** 1045 * xmlDumpEntityDecl: 1046 * @buf: An XML buffer. 1047 * @ent: An entity table 1048 * 1049 * This will dump the content of the entity table as an XML DTD definition 1050 */ 1051 void 1052 xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) { 1053 if ((buf == NULL) || (ent == NULL)) return; 1054 switch (ent->etype) { 1055 case XML_INTERNAL_GENERAL_ENTITY: 1056 xmlBufferWriteChar(buf, "<!ENTITY "); 1057 xmlBufferWriteCHAR(buf, ent->name); 1058 xmlBufferWriteChar(buf, " "); 1059 if (ent->orig != NULL) 1060 xmlBufferWriteQuotedString(buf, ent->orig); 1061 else 1062 xmlDumpEntityContent(buf, ent->content); 1063 xmlBufferWriteChar(buf, ">\n"); 1064 break; 1065 case XML_EXTERNAL_GENERAL_PARSED_ENTITY: 1066 xmlBufferWriteChar(buf, "<!ENTITY "); 1067 xmlBufferWriteCHAR(buf, ent->name); 1068 if (ent->ExternalID != NULL) { 1069 xmlBufferWriteChar(buf, " PUBLIC "); 1070 xmlBufferWriteQuotedString(buf, ent->ExternalID); 1071 xmlBufferWriteChar(buf, " "); 1072 xmlBufferWriteQuotedString(buf, ent->SystemID); 1073 } else { 1074 xmlBufferWriteChar(buf, " SYSTEM "); 1075 xmlBufferWriteQuotedString(buf, ent->SystemID); 1076 } 1077 xmlBufferWriteChar(buf, ">\n"); 1078 break; 1079 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: 1080 xmlBufferWriteChar(buf, "<!ENTITY "); 1081 xmlBufferWriteCHAR(buf, ent->name); 1082 if (ent->ExternalID != NULL) { 1083 xmlBufferWriteChar(buf, " PUBLIC "); 1084 xmlBufferWriteQuotedString(buf, ent->ExternalID); 1085 xmlBufferWriteChar(buf, " "); 1086 xmlBufferWriteQuotedString(buf, ent->SystemID); 1087 } else { 1088 xmlBufferWriteChar(buf, " SYSTEM "); 1089 xmlBufferWriteQuotedString(buf, ent->SystemID); 1090 } 1091 if (ent->content != NULL) { /* Should be true ! */ 1092 xmlBufferWriteChar(buf, " NDATA "); 1093 if (ent->orig != NULL) 1094 xmlBufferWriteCHAR(buf, ent->orig); 1095 else 1096 xmlBufferWriteCHAR(buf, ent->content); 1097 } 1098 xmlBufferWriteChar(buf, ">\n"); 1099 break; 1100 case XML_INTERNAL_PARAMETER_ENTITY: 1101 xmlBufferWriteChar(buf, "<!ENTITY % "); 1102 xmlBufferWriteCHAR(buf, ent->name); 1103 xmlBufferWriteChar(buf, " "); 1104 if (ent->orig == NULL) 1105 xmlDumpEntityContent(buf, ent->content); 1106 else 1107 xmlBufferWriteQuotedString(buf, ent->orig); 1108 xmlBufferWriteChar(buf, ">\n"); 1109 break; 1110 case XML_EXTERNAL_PARAMETER_ENTITY: 1111 xmlBufferWriteChar(buf, "<!ENTITY % "); 1112 xmlBufferWriteCHAR(buf, ent->name); 1113 if (ent->ExternalID != NULL) { 1114 xmlBufferWriteChar(buf, " PUBLIC "); 1115 xmlBufferWriteQuotedString(buf, ent->ExternalID); 1116 xmlBufferWriteChar(buf, " "); 1117 xmlBufferWriteQuotedString(buf, ent->SystemID); 1118 } else { 1119 xmlBufferWriteChar(buf, " SYSTEM "); 1120 xmlBufferWriteQuotedString(buf, ent->SystemID); 1121 } 1122 xmlBufferWriteChar(buf, ">\n"); 1123 break; 1124 default: 1125 xmlEntitiesErr(XML_DTD_UNKNOWN_ENTITY, 1126 "xmlDumpEntitiesDecl: internal: unknown type entity type"); 1127 } 1128 } 1129 1130 /** 1131 * xmlDumpEntityDeclScan: 1132 * @ent: An entity table 1133 * @buf: An XML buffer. 1134 * 1135 * When using the hash table scan function, arguments need to be reversed 1136 */ 1137 static void 1138 xmlDumpEntityDeclScan(void *ent, void *buf, 1139 const xmlChar *name ATTRIBUTE_UNUSED) { 1140 xmlDumpEntityDecl((xmlBufferPtr) buf, (xmlEntityPtr) ent); 1141 } 1142 1143 /** 1144 * xmlDumpEntitiesTable: 1145 * @buf: An XML buffer. 1146 * @table: An entity table 1147 * 1148 * This will dump the content of the entity table as an XML DTD definition 1149 */ 1150 void 1151 xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) { 1152 xmlHashScan(table, xmlDumpEntityDeclScan, buf); 1153 } 1154 #endif /* LIBXML_OUTPUT_ENABLED */ 1155