1 /*
2 * entities.c : implementation for the XML entities handling
3 *
4 * See Copyright for the status of this software.
5 *
6 * daniel@veillard.com
7 */
8
9 /* To avoid EBCDIC trouble when parsing on zOS */
10 #if defined(__MVS__)
11 #pragma convert("ISO8859-1")
12 #endif
13
14 #define IN_LIBXML
15 #include "libxml.h"
16
17 #include <string.h>
18 #include <stdlib.h>
19
20 #include <libxml/xmlmemory.h>
21 #include <libxml/hash.h>
22 #include <libxml/entities.h>
23 #include <libxml/parser.h>
24 #include <libxml/parserInternals.h>
25 #include <libxml/xmlerror.h>
26 #include <libxml/globals.h>
27 #include <libxml/dict.h>
28
29 #include "save.h"
30
31 /*
32 * The XML predefined entities.
33 */
34
35 static xmlEntity xmlEntityLt = {
36 NULL, XML_ENTITY_DECL, BAD_CAST "lt",
37 NULL, NULL, NULL, NULL, NULL, NULL,
38 BAD_CAST "<", BAD_CAST "<", 1,
39 XML_INTERNAL_PREDEFINED_ENTITY,
40 NULL, NULL, NULL, NULL, 0, 1
41 };
42 static xmlEntity xmlEntityGt = {
43 NULL, XML_ENTITY_DECL, BAD_CAST "gt",
44 NULL, NULL, NULL, NULL, NULL, NULL,
45 BAD_CAST ">", BAD_CAST ">", 1,
46 XML_INTERNAL_PREDEFINED_ENTITY,
47 NULL, NULL, NULL, NULL, 0, 1
48 };
49 static xmlEntity xmlEntityAmp = {
50 NULL, XML_ENTITY_DECL, BAD_CAST "amp",
51 NULL, NULL, NULL, NULL, NULL, NULL,
52 BAD_CAST "&", BAD_CAST "&", 1,
53 XML_INTERNAL_PREDEFINED_ENTITY,
54 NULL, NULL, NULL, NULL, 0, 1
55 };
56 static xmlEntity xmlEntityQuot = {
57 NULL, XML_ENTITY_DECL, BAD_CAST "quot",
58 NULL, NULL, NULL, NULL, NULL, NULL,
59 BAD_CAST "\"", BAD_CAST "\"", 1,
60 XML_INTERNAL_PREDEFINED_ENTITY,
61 NULL, NULL, NULL, NULL, 0, 1
62 };
63 static xmlEntity xmlEntityApos = {
64 NULL, XML_ENTITY_DECL, BAD_CAST "apos",
65 NULL, NULL, NULL, NULL, NULL, NULL,
66 BAD_CAST "'", BAD_CAST "'", 1,
67 XML_INTERNAL_PREDEFINED_ENTITY,
68 NULL, NULL, NULL, NULL, 0, 1
69 };
70
71 /**
72 * xmlEntitiesErrMemory:
73 * @extra: extra information
74 *
75 * Handle an out of memory condition
76 */
77 static void
xmlEntitiesErrMemory(const char * extra)78 xmlEntitiesErrMemory(const char *extra)
79 {
80 __xmlSimpleError(XML_FROM_TREE, XML_ERR_NO_MEMORY, NULL, NULL, extra);
81 }
82
83 /**
84 * xmlEntitiesErr:
85 * @code: the error code
86 * @msg: the message
87 *
88 * Raise an error.
89 */
90 static void LIBXML_ATTR_FORMAT(2,0)
xmlEntitiesErr(xmlParserErrors code,const char * msg)91 xmlEntitiesErr(xmlParserErrors code, const char *msg)
92 {
93 __xmlSimpleError(XML_FROM_TREE, code, NULL, msg, NULL);
94 }
95
96 /**
97 * xmlEntitiesWarn:
98 * @code: the error code
99 * @msg: the message
100 *
101 * Raise a warning.
102 */
103 static void LIBXML_ATTR_FORMAT(2,0)
xmlEntitiesWarn(xmlParserErrors code,const char * msg,const xmlChar * str1)104 xmlEntitiesWarn(xmlParserErrors code, const char *msg, const xmlChar *str1)
105 {
106 __xmlRaiseError(NULL, NULL, NULL,
107 NULL, NULL, XML_FROM_TREE, code,
108 XML_ERR_WARNING, NULL, 0,
109 (const char *)str1, NULL, NULL, 0, 0,
110 msg, (const char *)str1, NULL);
111 }
112
113 /*
114 * xmlFreeEntity : clean-up an entity record.
115 */
116 static void
xmlFreeEntity(xmlEntityPtr entity)117 xmlFreeEntity(xmlEntityPtr entity)
118 {
119 xmlDictPtr dict = NULL;
120
121 if (entity == NULL)
122 return;
123
124 if (entity->doc != NULL)
125 dict = entity->doc->dict;
126
127
128 if ((entity->children) && (entity->owner == 1) &&
129 (entity == (xmlEntityPtr) entity->children->parent))
130 xmlFreeNodeList(entity->children);
131 if ((entity->name != NULL) &&
132 ((dict == NULL) || (!xmlDictOwns(dict, entity->name))))
133 xmlFree((char *) entity->name);
134 if (entity->ExternalID != NULL)
135 xmlFree((char *) entity->ExternalID);
136 if (entity->SystemID != NULL)
137 xmlFree((char *) entity->SystemID);
138 if (entity->URI != NULL)
139 xmlFree((char *) entity->URI);
140 if (entity->content != NULL)
141 xmlFree((char *) entity->content);
142 if (entity->orig != NULL)
143 xmlFree((char *) entity->orig);
144 xmlFree(entity);
145 }
146
147 /*
148 * xmlCreateEntity:
149 *
150 * internal routine doing the entity node structures allocations
151 */
152 static xmlEntityPtr
xmlCreateEntity(xmlDictPtr dict,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)153 xmlCreateEntity(xmlDictPtr dict, const xmlChar *name, int type,
154 const xmlChar *ExternalID, const xmlChar *SystemID,
155 const xmlChar *content) {
156 xmlEntityPtr ret;
157
158 ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
159 if (ret == NULL) {
160 xmlEntitiesErrMemory("xmlCreateEntity: malloc failed");
161 return(NULL);
162 }
163 memset(ret, 0, sizeof(xmlEntity));
164 ret->type = XML_ENTITY_DECL;
165 ret->checked = 0;
166
167 /*
168 * fill the structure.
169 */
170 ret->etype = (xmlEntityType) type;
171 if (dict == NULL) {
172 ret->name = xmlStrdup(name);
173 if (ExternalID != NULL)
174 ret->ExternalID = xmlStrdup(ExternalID);
175 if (SystemID != NULL)
176 ret->SystemID = xmlStrdup(SystemID);
177 } else {
178 ret->name = xmlDictLookup(dict, name, -1);
179 ret->ExternalID = xmlStrdup(ExternalID);
180 ret->SystemID = xmlStrdup(SystemID);
181 }
182 if (content != NULL) {
183 ret->length = xmlStrlen(content);
184 ret->content = xmlStrndup(content, ret->length);
185 } else {
186 ret->length = 0;
187 ret->content = NULL;
188 }
189 ret->URI = NULL; /* to be computed by the layer knowing
190 the defining entity */
191 ret->orig = NULL;
192 ret->owner = 0;
193
194 return(ret);
195 }
196
197 /*
198 * xmlAddEntity : register a new entity for an entities table.
199 */
200 static xmlEntityPtr
xmlAddEntity(xmlDtdPtr dtd,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)201 xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type,
202 const xmlChar *ExternalID, const xmlChar *SystemID,
203 const xmlChar *content) {
204 xmlDictPtr dict = NULL;
205 xmlEntitiesTablePtr table = NULL;
206 xmlEntityPtr ret, predef;
207
208 if (name == NULL)
209 return(NULL);
210 if (dtd == NULL)
211 return(NULL);
212 if (dtd->doc != NULL)
213 dict = dtd->doc->dict;
214
215 switch (type) {
216 case XML_INTERNAL_GENERAL_ENTITY:
217 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
218 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
219 predef = xmlGetPredefinedEntity(name);
220 if (predef != NULL) {
221 int valid = 0;
222
223 /* 4.6 Predefined Entities */
224 if ((type == XML_INTERNAL_GENERAL_ENTITY) &&
225 (content != NULL)) {
226 int c = predef->content[0];
227
228 if (((content[0] == c) && (content[1] == 0)) &&
229 ((c == '>') || (c == '\'') || (c == '"'))) {
230 valid = 1;
231 } else if ((content[0] == '&') && (content[1] == '#')) {
232 if (content[2] == 'x') {
233 xmlChar *hex = BAD_CAST "0123456789ABCDEF";
234 xmlChar ref[] = "00;";
235
236 ref[0] = hex[c / 16 % 16];
237 ref[1] = hex[c % 16];
238 if (xmlStrcasecmp(&content[3], ref) == 0)
239 valid = 1;
240 } else {
241 xmlChar ref[] = "00;";
242
243 ref[0] = '0' + c / 10 % 10;
244 ref[1] = '0' + c % 10;
245 if (xmlStrEqual(&content[2], ref))
246 valid = 1;
247 }
248 }
249 }
250 if (!valid) {
251 xmlEntitiesWarn(XML_ERR_ENTITY_PROCESSING,
252 "xmlAddEntity: invalid redeclaration of predefined"
253 " entity '%s'", name);
254 return(NULL);
255 }
256 }
257 if (dtd->entities == NULL)
258 dtd->entities = xmlHashCreateDict(0, dict);
259 table = dtd->entities;
260 break;
261 case XML_INTERNAL_PARAMETER_ENTITY:
262 case XML_EXTERNAL_PARAMETER_ENTITY:
263 if (dtd->pentities == NULL)
264 dtd->pentities = xmlHashCreateDict(0, dict);
265 table = dtd->pentities;
266 break;
267 case XML_INTERNAL_PREDEFINED_ENTITY:
268 return(NULL);
269 }
270 if (table == NULL)
271 return(NULL);
272 ret = xmlCreateEntity(dict, name, type, ExternalID, SystemID, content);
273 if (ret == NULL)
274 return(NULL);
275 ret->doc = dtd->doc;
276
277 if (xmlHashAddEntry(table, name, ret)) {
278 /*
279 * entity was already defined at another level.
280 */
281 xmlFreeEntity(ret);
282 return(NULL);
283 }
284 return(ret);
285 }
286
287 /**
288 * xmlGetPredefinedEntity:
289 * @name: the entity name
290 *
291 * Check whether this name is an predefined entity.
292 *
293 * Returns NULL if not, otherwise the entity
294 */
295 xmlEntityPtr
xmlGetPredefinedEntity(const xmlChar * name)296 xmlGetPredefinedEntity(const xmlChar *name) {
297 if (name == NULL) return(NULL);
298 switch (name[0]) {
299 case 'l':
300 if (xmlStrEqual(name, BAD_CAST "lt"))
301 return(&xmlEntityLt);
302 break;
303 case 'g':
304 if (xmlStrEqual(name, BAD_CAST "gt"))
305 return(&xmlEntityGt);
306 break;
307 case 'a':
308 if (xmlStrEqual(name, BAD_CAST "amp"))
309 return(&xmlEntityAmp);
310 if (xmlStrEqual(name, BAD_CAST "apos"))
311 return(&xmlEntityApos);
312 break;
313 case 'q':
314 if (xmlStrEqual(name, BAD_CAST "quot"))
315 return(&xmlEntityQuot);
316 break;
317 default:
318 break;
319 }
320 return(NULL);
321 }
322
323 /**
324 * xmlAddDtdEntity:
325 * @doc: the document
326 * @name: the entity name
327 * @type: the entity type XML_xxx_yyy_ENTITY
328 * @ExternalID: the entity external ID if available
329 * @SystemID: the entity system ID if available
330 * @content: the entity content
331 *
332 * Register a new entity for this document DTD external subset.
333 *
334 * Returns a pointer to the entity or NULL in case of error
335 */
336 xmlEntityPtr
xmlAddDtdEntity(xmlDocPtr doc,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)337 xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
338 const xmlChar *ExternalID, const xmlChar *SystemID,
339 const xmlChar *content) {
340 xmlEntityPtr ret;
341 xmlDtdPtr dtd;
342
343 if (doc == NULL) {
344 xmlEntitiesErr(XML_DTD_NO_DOC,
345 "xmlAddDtdEntity: document is NULL");
346 return(NULL);
347 }
348 if (doc->extSubset == NULL) {
349 xmlEntitiesErr(XML_DTD_NO_DTD,
350 "xmlAddDtdEntity: document without external subset");
351 return(NULL);
352 }
353 dtd = doc->extSubset;
354 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
355 if (ret == NULL) return(NULL);
356
357 /*
358 * Link it to the DTD
359 */
360 ret->parent = dtd;
361 ret->doc = dtd->doc;
362 if (dtd->last == NULL) {
363 dtd->children = dtd->last = (xmlNodePtr) ret;
364 } else {
365 dtd->last->next = (xmlNodePtr) ret;
366 ret->prev = dtd->last;
367 dtd->last = (xmlNodePtr) ret;
368 }
369 return(ret);
370 }
371
372 /**
373 * xmlAddDocEntity:
374 * @doc: the document
375 * @name: the entity name
376 * @type: the entity type XML_xxx_yyy_ENTITY
377 * @ExternalID: the entity external ID if available
378 * @SystemID: the entity system ID if available
379 * @content: the entity content
380 *
381 * Register a new entity for this document.
382 *
383 * Returns a pointer to the entity or NULL in case of error
384 */
385 xmlEntityPtr
xmlAddDocEntity(xmlDocPtr doc,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)386 xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type,
387 const xmlChar *ExternalID, const xmlChar *SystemID,
388 const xmlChar *content) {
389 xmlEntityPtr ret;
390 xmlDtdPtr dtd;
391
392 if (doc == NULL) {
393 xmlEntitiesErr(XML_DTD_NO_DOC,
394 "xmlAddDocEntity: document is NULL");
395 return(NULL);
396 }
397 if (doc->intSubset == NULL) {
398 xmlEntitiesErr(XML_DTD_NO_DTD,
399 "xmlAddDocEntity: document without internal subset");
400 return(NULL);
401 }
402 dtd = doc->intSubset;
403 ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
404 if (ret == NULL) return(NULL);
405
406 /*
407 * Link it to the DTD
408 */
409 ret->parent = dtd;
410 ret->doc = dtd->doc;
411 if (dtd->last == NULL) {
412 dtd->children = dtd->last = (xmlNodePtr) ret;
413 } else {
414 dtd->last->next = (xmlNodePtr) ret;
415 ret->prev = dtd->last;
416 dtd->last = (xmlNodePtr) ret;
417 }
418 return(ret);
419 }
420
421 /**
422 * xmlNewEntity:
423 * @doc: the document
424 * @name: the entity name
425 * @type: the entity type XML_xxx_yyy_ENTITY
426 * @ExternalID: the entity external ID if available
427 * @SystemID: the entity system ID if available
428 * @content: the entity content
429 *
430 * Create a new entity, this differs from xmlAddDocEntity() that if
431 * the document is NULL or has no internal subset defined, then an
432 * unlinked entity structure will be returned, it is then the responsibility
433 * of the caller to link it to the document later or free it when not needed
434 * anymore.
435 *
436 * Returns a pointer to the entity or NULL in case of error
437 */
438 xmlEntityPtr
xmlNewEntity(xmlDocPtr doc,const xmlChar * name,int type,const xmlChar * ExternalID,const xmlChar * SystemID,const xmlChar * content)439 xmlNewEntity(xmlDocPtr doc, const xmlChar *name, int type,
440 const xmlChar *ExternalID, const xmlChar *SystemID,
441 const xmlChar *content) {
442 xmlEntityPtr ret;
443 xmlDictPtr dict;
444
445 if ((doc != NULL) && (doc->intSubset != NULL)) {
446 return(xmlAddDocEntity(doc, name, type, ExternalID, SystemID, content));
447 }
448 if (doc != NULL)
449 dict = doc->dict;
450 else
451 dict = NULL;
452 ret = xmlCreateEntity(dict, name, type, ExternalID, SystemID, content);
453 if (ret == NULL)
454 return(NULL);
455 ret->doc = doc;
456 return(ret);
457 }
458
459 /**
460 * xmlGetEntityFromTable:
461 * @table: an entity table
462 * @name: the entity name
463 * @parameter: look for parameter entities
464 *
465 * Do an entity lookup in the table.
466 * returns the corresponding parameter entity, if found.
467 *
468 * Returns A pointer to the entity structure or NULL if not found.
469 */
470 static xmlEntityPtr
xmlGetEntityFromTable(xmlEntitiesTablePtr table,const xmlChar * name)471 xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) {
472 return((xmlEntityPtr) xmlHashLookup(table, name));
473 }
474
475 /**
476 * xmlGetParameterEntity:
477 * @doc: the document referencing the entity
478 * @name: the entity name
479 *
480 * Do an entity lookup in the internal and external subsets and
481 * returns the corresponding parameter entity, if found.
482 *
483 * Returns A pointer to the entity structure or NULL if not found.
484 */
485 xmlEntityPtr
xmlGetParameterEntity(xmlDocPtr doc,const xmlChar * name)486 xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
487 xmlEntitiesTablePtr table;
488 xmlEntityPtr ret;
489
490 if (doc == NULL)
491 return(NULL);
492 if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) {
493 table = (xmlEntitiesTablePtr) doc->intSubset->pentities;
494 ret = xmlGetEntityFromTable(table, name);
495 if (ret != NULL)
496 return(ret);
497 }
498 if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) {
499 table = (xmlEntitiesTablePtr) doc->extSubset->pentities;
500 return(xmlGetEntityFromTable(table, name));
501 }
502 return(NULL);
503 }
504
505 /**
506 * xmlGetDtdEntity:
507 * @doc: the document referencing the entity
508 * @name: the entity name
509 *
510 * Do an entity lookup in the DTD entity hash table and
511 * returns the corresponding entity, if found.
512 * Note: the first argument is the document node, not the DTD node.
513 *
514 * Returns A pointer to the entity structure or NULL if not found.
515 */
516 xmlEntityPtr
xmlGetDtdEntity(xmlDocPtr doc,const xmlChar * name)517 xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
518 xmlEntitiesTablePtr table;
519
520 if (doc == NULL)
521 return(NULL);
522 if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
523 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
524 return(xmlGetEntityFromTable(table, name));
525 }
526 return(NULL);
527 }
528
529 /**
530 * xmlGetDocEntity:
531 * @doc: the document referencing the entity
532 * @name: the entity name
533 *
534 * Do an entity lookup in the document entity hash table and
535 * returns the corresponding entity, otherwise a lookup is done
536 * in the predefined entities too.
537 *
538 * Returns A pointer to the entity structure or NULL if not found.
539 */
540 xmlEntityPtr
xmlGetDocEntity(const xmlDoc * doc,const xmlChar * name)541 xmlGetDocEntity(const xmlDoc *doc, const xmlChar *name) {
542 xmlEntityPtr cur;
543 xmlEntitiesTablePtr table;
544
545 if (doc != NULL) {
546 if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
547 table = (xmlEntitiesTablePtr) doc->intSubset->entities;
548 cur = xmlGetEntityFromTable(table, name);
549 if (cur != NULL)
550 return(cur);
551 }
552 if (doc->standalone != 1) {
553 if ((doc->extSubset != NULL) &&
554 (doc->extSubset->entities != NULL)) {
555 table = (xmlEntitiesTablePtr) doc->extSubset->entities;
556 cur = xmlGetEntityFromTable(table, name);
557 if (cur != NULL)
558 return(cur);
559 }
560 }
561 }
562 return(xmlGetPredefinedEntity(name));
563 }
564
565 /*
566 * Macro used to grow the current buffer.
567 */
568 #define growBufferReentrant() { \
569 xmlChar *tmp; \
570 size_t new_size = buffer_size * 2; \
571 if (new_size < buffer_size) goto mem_error; \
572 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
573 if (tmp == NULL) goto mem_error; \
574 buffer = tmp; \
575 buffer_size = new_size; \
576 }
577
578 /**
579 * xmlEncodeEntitiesInternal:
580 * @doc: the document containing the string
581 * @input: A string to convert to XML.
582 * @attr: are we handling an attribute value
583 *
584 * Do a global encoding of a string, replacing the predefined entities
585 * and non ASCII values with their entities and CharRef counterparts.
586 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
587 * must be deallocated.
588 *
589 * Returns A newly allocated string with the substitution done.
590 */
591 static xmlChar *
xmlEncodeEntitiesInternal(xmlDocPtr doc,const xmlChar * input,int attr)592 xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
593 const xmlChar *cur = input;
594 xmlChar *buffer = NULL;
595 xmlChar *out = NULL;
596 size_t buffer_size = 0;
597 int html = 0;
598
599 if (input == NULL) return(NULL);
600 if (doc != NULL)
601 html = (doc->type == XML_HTML_DOCUMENT_NODE);
602
603 /*
604 * allocate an translation buffer.
605 */
606 buffer_size = 1000;
607 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
608 if (buffer == NULL) {
609 xmlEntitiesErrMemory("xmlEncodeEntities: malloc failed");
610 return(NULL);
611 }
612 out = buffer;
613
614 while (*cur != '\0') {
615 size_t indx = out - buffer;
616 if (indx + 100 > buffer_size) {
617
618 growBufferReentrant();
619 out = &buffer[indx];
620 }
621
622 /*
623 * By default one have to encode at least '<', '>', '"' and '&' !
624 */
625 if (*cur == '<') {
626 const xmlChar *end;
627
628 /*
629 * Special handling of server side include in HTML attributes
630 */
631 if (html && attr &&
632 (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') &&
633 ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) {
634 while (cur != end) {
635 *out++ = *cur++;
636 indx = out - buffer;
637 if (indx + 100 > buffer_size) {
638 growBufferReentrant();
639 out = &buffer[indx];
640 }
641 }
642 *out++ = *cur++;
643 *out++ = *cur++;
644 *out++ = *cur++;
645 continue;
646 }
647 *out++ = '&';
648 *out++ = 'l';
649 *out++ = 't';
650 *out++ = ';';
651 } else if (*cur == '>') {
652 *out++ = '&';
653 *out++ = 'g';
654 *out++ = 't';
655 *out++ = ';';
656 } else if (*cur == '&') {
657 /*
658 * Special handling of &{...} construct from HTML 4, see
659 * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1
660 */
661 if (html && attr && (cur[1] == '{') &&
662 (strchr((const char *) cur, '}'))) {
663 while (*cur != '}') {
664 *out++ = *cur++;
665 indx = out - buffer;
666 if (indx + 100 > buffer_size) {
667 growBufferReentrant();
668 out = &buffer[indx];
669 }
670 }
671 *out++ = *cur++;
672 continue;
673 }
674 *out++ = '&';
675 *out++ = 'a';
676 *out++ = 'm';
677 *out++ = 'p';
678 *out++ = ';';
679 } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
680 (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) {
681 /*
682 * default case, just copy !
683 */
684 *out++ = *cur;
685 } else if (*cur >= 0x80) {
686 if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
687 /*
688 * Bjørn Reese <br@sseusa.com> provided the patch
689 xmlChar xc;
690 xc = (*cur & 0x3F) << 6;
691 if (cur[1] != 0) {
692 xc += *(++cur) & 0x3F;
693 *out++ = xc;
694 } else
695 */
696 *out++ = *cur;
697 } else {
698 /*
699 * We assume we have UTF-8 input.
700 * It must match either:
701 * 110xxxxx 10xxxxxx
702 * 1110xxxx 10xxxxxx 10xxxxxx
703 * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
704 * That is:
705 * cur[0] is 11xxxxxx
706 * cur[1] is 10xxxxxx
707 * cur[2] is 10xxxxxx if cur[0] is 111xxxxx
708 * cur[3] is 10xxxxxx if cur[0] is 1111xxxx
709 * cur[0] is not 11111xxx
710 */
711 char buf[11], *ptr;
712 int val = 0, l = 1;
713
714 if (((cur[0] & 0xC0) != 0xC0) ||
715 ((cur[1] & 0xC0) != 0x80) ||
716 (((cur[0] & 0xE0) == 0xE0) && ((cur[2] & 0xC0) != 0x80)) ||
717 (((cur[0] & 0xF0) == 0xF0) && ((cur[3] & 0xC0) != 0x80)) ||
718 (((cur[0] & 0xF8) == 0xF8))) {
719 xmlEntitiesErr(XML_CHECK_NOT_UTF8,
720 "xmlEncodeEntities: input not UTF-8");
721 if (doc != NULL)
722 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
723 snprintf(buf, sizeof(buf), "&#%d;", *cur);
724 buf[sizeof(buf) - 1] = 0;
725 ptr = buf;
726 while (*ptr != 0) *out++ = *ptr++;
727 cur++;
728 continue;
729 } else if (*cur < 0xE0) {
730 val = (cur[0]) & 0x1F;
731 val <<= 6;
732 val |= (cur[1]) & 0x3F;
733 l = 2;
734 } else if (*cur < 0xF0) {
735 val = (cur[0]) & 0x0F;
736 val <<= 6;
737 val |= (cur[1]) & 0x3F;
738 val <<= 6;
739 val |= (cur[2]) & 0x3F;
740 l = 3;
741 } else if (*cur < 0xF8) {
742 val = (cur[0]) & 0x07;
743 val <<= 6;
744 val |= (cur[1]) & 0x3F;
745 val <<= 6;
746 val |= (cur[2]) & 0x3F;
747 val <<= 6;
748 val |= (cur[3]) & 0x3F;
749 l = 4;
750 }
751 if ((l == 1) || (!IS_CHAR(val))) {
752 xmlEntitiesErr(XML_ERR_INVALID_CHAR,
753 "xmlEncodeEntities: char out of range\n");
754 if (doc != NULL)
755 doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
756 snprintf(buf, sizeof(buf), "&#%d;", *cur);
757 buf[sizeof(buf) - 1] = 0;
758 ptr = buf;
759 while (*ptr != 0) *out++ = *ptr++;
760 cur++;
761 continue;
762 }
763 /*
764 * We could do multiple things here. Just save as a char ref
765 */
766 snprintf(buf, sizeof(buf), "&#x%X;", val);
767 buf[sizeof(buf) - 1] = 0;
768 ptr = buf;
769 while (*ptr != 0) *out++ = *ptr++;
770 cur += l;
771 continue;
772 }
773 } else if (IS_BYTE_CHAR(*cur)) {
774 char buf[11], *ptr;
775
776 snprintf(buf, sizeof(buf), "&#%d;", *cur);
777 buf[sizeof(buf) - 1] = 0;
778 ptr = buf;
779 while (*ptr != 0) *out++ = *ptr++;
780 }
781 cur++;
782 }
783 *out = 0;
784 return(buffer);
785
786 mem_error:
787 xmlEntitiesErrMemory("xmlEncodeEntities: realloc failed");
788 xmlFree(buffer);
789 return(NULL);
790 }
791
792 /**
793 * xmlEncodeAttributeEntities:
794 * @doc: the document containing the string
795 * @input: A string to convert to XML.
796 *
797 * Do a global encoding of a string, replacing the predefined entities
798 * and non ASCII values with their entities and CharRef counterparts for
799 * attribute values.
800 *
801 * Returns A newly allocated string with the substitution done.
802 */
803 xmlChar *
xmlEncodeAttributeEntities(xmlDocPtr doc,const xmlChar * input)804 xmlEncodeAttributeEntities(xmlDocPtr doc, const xmlChar *input) {
805 return xmlEncodeEntitiesInternal(doc, input, 1);
806 }
807
808 /**
809 * xmlEncodeEntitiesReentrant:
810 * @doc: the document containing the string
811 * @input: A string to convert to XML.
812 *
813 * Do a global encoding of a string, replacing the predefined entities
814 * and non ASCII values with their entities and CharRef counterparts.
815 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
816 * must be deallocated.
817 *
818 * Returns A newly allocated string with the substitution done.
819 */
820 xmlChar *
xmlEncodeEntitiesReentrant(xmlDocPtr doc,const xmlChar * input)821 xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
822 return xmlEncodeEntitiesInternal(doc, input, 0);
823 }
824
825 /**
826 * xmlEncodeSpecialChars:
827 * @doc: the document containing the string
828 * @input: A string to convert to XML.
829 *
830 * Do a global encoding of a string, replacing the predefined entities
831 * this routine is reentrant, and result must be deallocated.
832 *
833 * Returns A newly allocated string with the substitution done.
834 */
835 xmlChar *
xmlEncodeSpecialChars(const xmlDoc * doc ATTRIBUTE_UNUSED,const xmlChar * input)836 xmlEncodeSpecialChars(const xmlDoc *doc ATTRIBUTE_UNUSED, const xmlChar *input) {
837 const xmlChar *cur = input;
838 xmlChar *buffer = NULL;
839 xmlChar *out = NULL;
840 size_t buffer_size = 0;
841 if (input == NULL) return(NULL);
842
843 /*
844 * allocate an translation buffer.
845 */
846 buffer_size = 1000;
847 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
848 if (buffer == NULL) {
849 xmlEntitiesErrMemory("xmlEncodeSpecialChars: malloc failed");
850 return(NULL);
851 }
852 out = buffer;
853
854 while (*cur != '\0') {
855 size_t indx = out - buffer;
856 if (indx + 10 > buffer_size) {
857
858 growBufferReentrant();
859 out = &buffer[indx];
860 }
861
862 /*
863 * By default one have to encode at least '<', '>', '"' and '&' !
864 */
865 if (*cur == '<') {
866 *out++ = '&';
867 *out++ = 'l';
868 *out++ = 't';
869 *out++ = ';';
870 } else if (*cur == '>') {
871 *out++ = '&';
872 *out++ = 'g';
873 *out++ = 't';
874 *out++ = ';';
875 } else if (*cur == '&') {
876 *out++ = '&';
877 *out++ = 'a';
878 *out++ = 'm';
879 *out++ = 'p';
880 *out++ = ';';
881 } else if (*cur == '"') {
882 *out++ = '&';
883 *out++ = 'q';
884 *out++ = 'u';
885 *out++ = 'o';
886 *out++ = 't';
887 *out++ = ';';
888 } else if (*cur == '\r') {
889 *out++ = '&';
890 *out++ = '#';
891 *out++ = '1';
892 *out++ = '3';
893 *out++ = ';';
894 } else {
895 /*
896 * Works because on UTF-8, all extended sequences cannot
897 * result in bytes in the ASCII range.
898 */
899 *out++ = *cur;
900 }
901 cur++;
902 }
903 *out = 0;
904 return(buffer);
905
906 mem_error:
907 xmlEntitiesErrMemory("xmlEncodeSpecialChars: realloc failed");
908 xmlFree(buffer);
909 return(NULL);
910 }
911
912 /**
913 * xmlCreateEntitiesTable:
914 *
915 * create and initialize an empty entities hash table.
916 * This really doesn't make sense and should be deprecated
917 *
918 * Returns the xmlEntitiesTablePtr just created or NULL in case of error.
919 */
920 xmlEntitiesTablePtr
xmlCreateEntitiesTable(void)921 xmlCreateEntitiesTable(void) {
922 return((xmlEntitiesTablePtr) xmlHashCreate(0));
923 }
924
925 /**
926 * xmlFreeEntityWrapper:
927 * @entity: An entity
928 * @name: its name
929 *
930 * Deallocate the memory used by an entities in the hash table.
931 */
932 static void
xmlFreeEntityWrapper(void * entity,const xmlChar * name ATTRIBUTE_UNUSED)933 xmlFreeEntityWrapper(void *entity, const xmlChar *name ATTRIBUTE_UNUSED) {
934 if (entity != NULL)
935 xmlFreeEntity((xmlEntityPtr) entity);
936 }
937
938 /**
939 * xmlFreeEntitiesTable:
940 * @table: An entity table
941 *
942 * Deallocate the memory used by an entities hash table.
943 */
944 void
xmlFreeEntitiesTable(xmlEntitiesTablePtr table)945 xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
946 xmlHashFree(table, xmlFreeEntityWrapper);
947 }
948
949 #ifdef LIBXML_TREE_ENABLED
950 /**
951 * xmlCopyEntity:
952 * @ent: An entity
953 *
954 * Build a copy of an entity
955 *
956 * Returns the new xmlEntitiesPtr or NULL in case of error.
957 */
958 static void *
xmlCopyEntity(void * payload,const xmlChar * name ATTRIBUTE_UNUSED)959 xmlCopyEntity(void *payload, const xmlChar *name ATTRIBUTE_UNUSED) {
960 xmlEntityPtr ent = (xmlEntityPtr) payload;
961 xmlEntityPtr cur;
962
963 cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
964 if (cur == NULL) {
965 xmlEntitiesErrMemory("xmlCopyEntity:: malloc failed");
966 return(NULL);
967 }
968 memset(cur, 0, sizeof(xmlEntity));
969 cur->type = XML_ENTITY_DECL;
970
971 cur->etype = ent->etype;
972 if (ent->name != NULL)
973 cur->name = xmlStrdup(ent->name);
974 if (ent->ExternalID != NULL)
975 cur->ExternalID = xmlStrdup(ent->ExternalID);
976 if (ent->SystemID != NULL)
977 cur->SystemID = xmlStrdup(ent->SystemID);
978 if (ent->content != NULL)
979 cur->content = xmlStrdup(ent->content);
980 if (ent->orig != NULL)
981 cur->orig = xmlStrdup(ent->orig);
982 if (ent->URI != NULL)
983 cur->URI = xmlStrdup(ent->URI);
984 return(cur);
985 }
986
987 /**
988 * xmlCopyEntitiesTable:
989 * @table: An entity table
990 *
991 * Build a copy of an entity table.
992 *
993 * Returns the new xmlEntitiesTablePtr or NULL in case of error.
994 */
995 xmlEntitiesTablePtr
xmlCopyEntitiesTable(xmlEntitiesTablePtr table)996 xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
997 return(xmlHashCopy(table, xmlCopyEntity));
998 }
999 #endif /* LIBXML_TREE_ENABLED */
1000
1001 #ifdef LIBXML_OUTPUT_ENABLED
1002
1003 /**
1004 * xmlDumpEntityContent:
1005 * @buf: An XML buffer.
1006 * @content: The entity content.
1007 *
1008 * This will dump the quoted string value, taking care of the special
1009 * treatment required by %
1010 */
1011 static void
xmlDumpEntityContent(xmlBufferPtr buf,const xmlChar * content)1012 xmlDumpEntityContent(xmlBufferPtr buf, const xmlChar *content) {
1013 if (buf->alloc == XML_BUFFER_ALLOC_IMMUTABLE) return;
1014 if (xmlStrchr(content, '%')) {
1015 const xmlChar * base, *cur;
1016
1017 xmlBufferCCat(buf, "\"");
1018 base = cur = content;
1019 while (*cur != 0) {
1020 if (*cur == '"') {
1021 if (base != cur)
1022 xmlBufferAdd(buf, base, cur - base);
1023 xmlBufferAdd(buf, BAD_CAST """, 6);
1024 cur++;
1025 base = cur;
1026 } else if (*cur == '%') {
1027 if (base != cur)
1028 xmlBufferAdd(buf, base, cur - base);
1029 xmlBufferAdd(buf, BAD_CAST "%", 6);
1030 cur++;
1031 base = cur;
1032 } else {
1033 cur++;
1034 }
1035 }
1036 if (base != cur)
1037 xmlBufferAdd(buf, base, cur - base);
1038 xmlBufferCCat(buf, "\"");
1039 } else {
1040 xmlBufferWriteQuotedString(buf, content);
1041 }
1042 }
1043
1044 /**
1045 * xmlDumpEntityDecl:
1046 * @buf: An XML buffer.
1047 * @ent: An entity table
1048 *
1049 * This will dump the content of the entity table as an XML DTD definition
1050 */
1051 void
xmlDumpEntityDecl(xmlBufferPtr buf,xmlEntityPtr ent)1052 xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) {
1053 if ((buf == NULL) || (ent == NULL)) return;
1054 switch (ent->etype) {
1055 case XML_INTERNAL_GENERAL_ENTITY:
1056 xmlBufferWriteChar(buf, "<!ENTITY ");
1057 xmlBufferWriteCHAR(buf, ent->name);
1058 xmlBufferWriteChar(buf, " ");
1059 if (ent->orig != NULL)
1060 xmlBufferWriteQuotedString(buf, ent->orig);
1061 else
1062 xmlDumpEntityContent(buf, ent->content);
1063 xmlBufferWriteChar(buf, ">\n");
1064 break;
1065 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1066 xmlBufferWriteChar(buf, "<!ENTITY ");
1067 xmlBufferWriteCHAR(buf, ent->name);
1068 if (ent->ExternalID != NULL) {
1069 xmlBufferWriteChar(buf, " PUBLIC ");
1070 xmlBufferWriteQuotedString(buf, ent->ExternalID);
1071 xmlBufferWriteChar(buf, " ");
1072 xmlBufferWriteQuotedString(buf, ent->SystemID);
1073 } else {
1074 xmlBufferWriteChar(buf, " SYSTEM ");
1075 xmlBufferWriteQuotedString(buf, ent->SystemID);
1076 }
1077 xmlBufferWriteChar(buf, ">\n");
1078 break;
1079 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1080 xmlBufferWriteChar(buf, "<!ENTITY ");
1081 xmlBufferWriteCHAR(buf, ent->name);
1082 if (ent->ExternalID != NULL) {
1083 xmlBufferWriteChar(buf, " PUBLIC ");
1084 xmlBufferWriteQuotedString(buf, ent->ExternalID);
1085 xmlBufferWriteChar(buf, " ");
1086 xmlBufferWriteQuotedString(buf, ent->SystemID);
1087 } else {
1088 xmlBufferWriteChar(buf, " SYSTEM ");
1089 xmlBufferWriteQuotedString(buf, ent->SystemID);
1090 }
1091 if (ent->content != NULL) { /* Should be true ! */
1092 xmlBufferWriteChar(buf, " NDATA ");
1093 if (ent->orig != NULL)
1094 xmlBufferWriteCHAR(buf, ent->orig);
1095 else
1096 xmlBufferWriteCHAR(buf, ent->content);
1097 }
1098 xmlBufferWriteChar(buf, ">\n");
1099 break;
1100 case XML_INTERNAL_PARAMETER_ENTITY:
1101 xmlBufferWriteChar(buf, "<!ENTITY % ");
1102 xmlBufferWriteCHAR(buf, ent->name);
1103 xmlBufferWriteChar(buf, " ");
1104 if (ent->orig == NULL)
1105 xmlDumpEntityContent(buf, ent->content);
1106 else
1107 xmlBufferWriteQuotedString(buf, ent->orig);
1108 xmlBufferWriteChar(buf, ">\n");
1109 break;
1110 case XML_EXTERNAL_PARAMETER_ENTITY:
1111 xmlBufferWriteChar(buf, "<!ENTITY % ");
1112 xmlBufferWriteCHAR(buf, ent->name);
1113 if (ent->ExternalID != NULL) {
1114 xmlBufferWriteChar(buf, " PUBLIC ");
1115 xmlBufferWriteQuotedString(buf, ent->ExternalID);
1116 xmlBufferWriteChar(buf, " ");
1117 xmlBufferWriteQuotedString(buf, ent->SystemID);
1118 } else {
1119 xmlBufferWriteChar(buf, " SYSTEM ");
1120 xmlBufferWriteQuotedString(buf, ent->SystemID);
1121 }
1122 xmlBufferWriteChar(buf, ">\n");
1123 break;
1124 default:
1125 xmlEntitiesErr(XML_DTD_UNKNOWN_ENTITY,
1126 "xmlDumpEntitiesDecl: internal: unknown type entity type");
1127 }
1128 }
1129
1130 /**
1131 * xmlDumpEntityDeclScan:
1132 * @ent: An entity table
1133 * @buf: An XML buffer.
1134 *
1135 * When using the hash table scan function, arguments need to be reversed
1136 */
1137 static void
xmlDumpEntityDeclScan(void * ent,void * buf,const xmlChar * name ATTRIBUTE_UNUSED)1138 xmlDumpEntityDeclScan(void *ent, void *buf,
1139 const xmlChar *name ATTRIBUTE_UNUSED) {
1140 xmlDumpEntityDecl((xmlBufferPtr) buf, (xmlEntityPtr) ent);
1141 }
1142
1143 /**
1144 * xmlDumpEntitiesTable:
1145 * @buf: An XML buffer.
1146 * @table: An entity table
1147 *
1148 * This will dump the content of the entity table as an XML DTD definition
1149 */
1150 void
xmlDumpEntitiesTable(xmlBufferPtr buf,xmlEntitiesTablePtr table)1151 xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
1152 xmlHashScan(table, xmlDumpEntityDeclScan, buf);
1153 }
1154 #endif /* LIBXML_OUTPUT_ENABLED */
1155