xref: /reactos/sdk/lib/3rdparty/libxml2/entities.c (revision 09dde2cf)
1 /*
2  * entities.c : implementation for the XML entities handling
3  *
4  * See Copyright for the status of this software.
5  *
6  * daniel@veillard.com
7  */
8 
9 /* To avoid EBCDIC trouble when parsing on zOS */
10 #if defined(__MVS__)
11 #pragma convert("ISO8859-1")
12 #endif
13 
14 #define IN_LIBXML
15 #include "libxml.h"
16 
17 #include <string.h>
18 #include <stdlib.h>
19 
20 #include <libxml/xmlmemory.h>
21 #include <libxml/hash.h>
22 #include <libxml/entities.h>
23 #include <libxml/parser.h>
24 #include <libxml/parserInternals.h>
25 #include <libxml/xmlerror.h>
26 #include <libxml/globals.h>
27 #include <libxml/dict.h>
28 
29 #include "save.h"
30 
31 /*
32  * The XML predefined entities.
33  */
34 
35 static xmlEntity xmlEntityLt = {
36     NULL, XML_ENTITY_DECL, BAD_CAST "lt",
37     NULL, NULL, NULL, NULL, NULL, NULL,
38     BAD_CAST "<", BAD_CAST "<", 1,
39     XML_INTERNAL_PREDEFINED_ENTITY,
40     NULL, NULL, NULL, NULL, 0, 1
41 };
42 static xmlEntity xmlEntityGt = {
43     NULL, XML_ENTITY_DECL, BAD_CAST "gt",
44     NULL, NULL, NULL, NULL, NULL, NULL,
45     BAD_CAST ">", BAD_CAST ">", 1,
46     XML_INTERNAL_PREDEFINED_ENTITY,
47     NULL, NULL, NULL, NULL, 0, 1
48 };
49 static xmlEntity xmlEntityAmp = {
50     NULL, XML_ENTITY_DECL, BAD_CAST "amp",
51     NULL, NULL, NULL, NULL, NULL, NULL,
52     BAD_CAST "&", BAD_CAST "&", 1,
53     XML_INTERNAL_PREDEFINED_ENTITY,
54     NULL, NULL, NULL, NULL, 0, 1
55 };
56 static xmlEntity xmlEntityQuot = {
57     NULL, XML_ENTITY_DECL, BAD_CAST "quot",
58     NULL, NULL, NULL, NULL, NULL, NULL,
59     BAD_CAST "\"", BAD_CAST "\"", 1,
60     XML_INTERNAL_PREDEFINED_ENTITY,
61     NULL, NULL, NULL, NULL, 0, 1
62 };
63 static xmlEntity xmlEntityApos = {
64     NULL, XML_ENTITY_DECL, BAD_CAST "apos",
65     NULL, NULL, NULL, NULL, NULL, NULL,
66     BAD_CAST "'", BAD_CAST "'", 1,
67     XML_INTERNAL_PREDEFINED_ENTITY,
68     NULL, NULL, NULL, NULL, 0, 1
69 };
70 
71 /**
72  * xmlEntitiesErrMemory:
73  * @extra:  extra information
74  *
75  * Handle an out of memory condition
76  */
77 static void
78 xmlEntitiesErrMemory(const char *extra)
79 {
80     __xmlSimpleError(XML_FROM_TREE, XML_ERR_NO_MEMORY, NULL, NULL, extra);
81 }
82 
83 /**
84  * xmlEntitiesErr:
85  * @code:  the error code
86  * @msg:  the message
87  *
88  * Raise an error.
89  */
90 static void LIBXML_ATTR_FORMAT(2,0)
91 xmlEntitiesErr(xmlParserErrors code, const char *msg)
92 {
93     __xmlSimpleError(XML_FROM_TREE, code, NULL, msg, NULL);
94 }
95 
96 /**
97  * xmlEntitiesWarn:
98  * @code:  the error code
99  * @msg:  the message
100  *
101  * Raise a warning.
102  */
103 static void LIBXML_ATTR_FORMAT(2,0)
104 xmlEntitiesWarn(xmlParserErrors code, const char *msg, const xmlChar *str1)
105 {
106     __xmlRaiseError(NULL, NULL, NULL,
107                 NULL, NULL, XML_FROM_TREE, code,
108                 XML_ERR_WARNING, NULL, 0,
109                 (const char *)str1, NULL, NULL, 0, 0,
110                 msg, (const char *)str1, NULL);
111 }
112 
113 /*
114  * xmlFreeEntity : clean-up an entity record.
115  */
116 static void
117 xmlFreeEntity(xmlEntityPtr entity)
118 {
119     xmlDictPtr dict = NULL;
120 
121     if (entity == NULL)
122         return;
123 
124     if (entity->doc != NULL)
125         dict = entity->doc->dict;
126 
127 
128     if ((entity->children) && (entity->owner == 1) &&
129         (entity == (xmlEntityPtr) entity->children->parent))
130         xmlFreeNodeList(entity->children);
131     if ((entity->name != NULL) &&
132         ((dict == NULL) || (!xmlDictOwns(dict, entity->name))))
133         xmlFree((char *) entity->name);
134     if (entity->ExternalID != NULL)
135         xmlFree((char *) entity->ExternalID);
136     if (entity->SystemID != NULL)
137         xmlFree((char *) entity->SystemID);
138     if (entity->URI != NULL)
139         xmlFree((char *) entity->URI);
140     if (entity->content != NULL)
141         xmlFree((char *) entity->content);
142     if (entity->orig != NULL)
143         xmlFree((char *) entity->orig);
144     xmlFree(entity);
145 }
146 
147 /*
148  * xmlCreateEntity:
149  *
150  * internal routine doing the entity node structures allocations
151  */
152 static xmlEntityPtr
153 xmlCreateEntity(xmlDictPtr dict, const xmlChar *name, int type,
154 	        const xmlChar *ExternalID, const xmlChar *SystemID,
155 	        const xmlChar *content) {
156     xmlEntityPtr ret;
157 
158     ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
159     if (ret == NULL) {
160         xmlEntitiesErrMemory("xmlCreateEntity: malloc failed");
161 	return(NULL);
162     }
163     memset(ret, 0, sizeof(xmlEntity));
164     ret->type = XML_ENTITY_DECL;
165     ret->checked = 0;
166 
167     /*
168      * fill the structure.
169      */
170     ret->etype = (xmlEntityType) type;
171     if (dict == NULL) {
172 	ret->name = xmlStrdup(name);
173 	if (ExternalID != NULL)
174 	    ret->ExternalID = xmlStrdup(ExternalID);
175 	if (SystemID != NULL)
176 	    ret->SystemID = xmlStrdup(SystemID);
177     } else {
178         ret->name = xmlDictLookup(dict, name, -1);
179 	ret->ExternalID = xmlStrdup(ExternalID);
180 	ret->SystemID = xmlStrdup(SystemID);
181     }
182     if (content != NULL) {
183         ret->length = xmlStrlen(content);
184 	ret->content = xmlStrndup(content, ret->length);
185      } else {
186         ret->length = 0;
187         ret->content = NULL;
188     }
189     ret->URI = NULL; /* to be computed by the layer knowing
190 			the defining entity */
191     ret->orig = NULL;
192     ret->owner = 0;
193 
194     return(ret);
195 }
196 
197 /*
198  * xmlAddEntity : register a new entity for an entities table.
199  */
200 static xmlEntityPtr
201 xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type,
202 	  const xmlChar *ExternalID, const xmlChar *SystemID,
203 	  const xmlChar *content) {
204     xmlDictPtr dict = NULL;
205     xmlEntitiesTablePtr table = NULL;
206     xmlEntityPtr ret, predef;
207 
208     if (name == NULL)
209 	return(NULL);
210     if (dtd == NULL)
211 	return(NULL);
212     if (dtd->doc != NULL)
213         dict = dtd->doc->dict;
214 
215     switch (type) {
216         case XML_INTERNAL_GENERAL_ENTITY:
217         case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
218         case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
219             predef = xmlGetPredefinedEntity(name);
220             if (predef != NULL) {
221                 int valid = 0;
222 
223                 /* 4.6 Predefined Entities */
224                 if ((type == XML_INTERNAL_GENERAL_ENTITY) &&
225                     (content != NULL)) {
226                     int c = predef->content[0];
227 
228                     if (((content[0] == c) && (content[1] == 0)) &&
229                         ((c == '>') || (c == '\'') || (c == '"'))) {
230                         valid = 1;
231                     } else if ((content[0] == '&') && (content[1] == '#')) {
232                         if (content[2] == 'x') {
233                             xmlChar *hex = BAD_CAST "0123456789ABCDEF";
234                             xmlChar ref[] = "00;";
235 
236                             ref[0] = hex[c / 16 % 16];
237                             ref[1] = hex[c % 16];
238                             if (xmlStrcasecmp(&content[3], ref) == 0)
239                                 valid = 1;
240                         } else {
241                             xmlChar ref[] = "00;";
242 
243                             ref[0] = '0' + c / 10 % 10;
244                             ref[1] = '0' + c % 10;
245                             if (xmlStrEqual(&content[2], ref))
246                                 valid = 1;
247                         }
248                     }
249                 }
250                 if (!valid) {
251                     xmlEntitiesWarn(XML_ERR_ENTITY_PROCESSING,
252                             "xmlAddEntity: invalid redeclaration of predefined"
253                             " entity '%s'", name);
254                     return(NULL);
255                 }
256             }
257 	    if (dtd->entities == NULL)
258 		dtd->entities = xmlHashCreateDict(0, dict);
259 	    table = dtd->entities;
260 	    break;
261         case XML_INTERNAL_PARAMETER_ENTITY:
262         case XML_EXTERNAL_PARAMETER_ENTITY:
263 	    if (dtd->pentities == NULL)
264 		dtd->pentities = xmlHashCreateDict(0, dict);
265 	    table = dtd->pentities;
266 	    break;
267         case XML_INTERNAL_PREDEFINED_ENTITY:
268 	    return(NULL);
269     }
270     if (table == NULL)
271 	return(NULL);
272     ret = xmlCreateEntity(dict, name, type, ExternalID, SystemID, content);
273     if (ret == NULL)
274         return(NULL);
275     ret->doc = dtd->doc;
276 
277     if (xmlHashAddEntry(table, name, ret)) {
278 	/*
279 	 * entity was already defined at another level.
280 	 */
281         xmlFreeEntity(ret);
282 	return(NULL);
283     }
284     return(ret);
285 }
286 
287 /**
288  * xmlGetPredefinedEntity:
289  * @name:  the entity name
290  *
291  * Check whether this name is an predefined entity.
292  *
293  * Returns NULL if not, otherwise the entity
294  */
295 xmlEntityPtr
296 xmlGetPredefinedEntity(const xmlChar *name) {
297     if (name == NULL) return(NULL);
298     switch (name[0]) {
299         case 'l':
300 	    if (xmlStrEqual(name, BAD_CAST "lt"))
301 	        return(&xmlEntityLt);
302 	    break;
303         case 'g':
304 	    if (xmlStrEqual(name, BAD_CAST "gt"))
305 	        return(&xmlEntityGt);
306 	    break;
307         case 'a':
308 	    if (xmlStrEqual(name, BAD_CAST "amp"))
309 	        return(&xmlEntityAmp);
310 	    if (xmlStrEqual(name, BAD_CAST "apos"))
311 	        return(&xmlEntityApos);
312 	    break;
313         case 'q':
314 	    if (xmlStrEqual(name, BAD_CAST "quot"))
315 	        return(&xmlEntityQuot);
316 	    break;
317 	default:
318 	    break;
319     }
320     return(NULL);
321 }
322 
323 /**
324  * xmlAddDtdEntity:
325  * @doc:  the document
326  * @name:  the entity name
327  * @type:  the entity type XML_xxx_yyy_ENTITY
328  * @ExternalID:  the entity external ID if available
329  * @SystemID:  the entity system ID if available
330  * @content:  the entity content
331  *
332  * Register a new entity for this document DTD external subset.
333  *
334  * Returns a pointer to the entity or NULL in case of error
335  */
336 xmlEntityPtr
337 xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
338 	        const xmlChar *ExternalID, const xmlChar *SystemID,
339 		const xmlChar *content) {
340     xmlEntityPtr ret;
341     xmlDtdPtr dtd;
342 
343     if (doc == NULL) {
344 	xmlEntitiesErr(XML_DTD_NO_DOC,
345 	        "xmlAddDtdEntity: document is NULL");
346 	return(NULL);
347     }
348     if (doc->extSubset == NULL) {
349 	xmlEntitiesErr(XML_DTD_NO_DTD,
350 	        "xmlAddDtdEntity: document without external subset");
351 	return(NULL);
352     }
353     dtd = doc->extSubset;
354     ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
355     if (ret == NULL) return(NULL);
356 
357     /*
358      * Link it to the DTD
359      */
360     ret->parent = dtd;
361     ret->doc = dtd->doc;
362     if (dtd->last == NULL) {
363 	dtd->children = dtd->last = (xmlNodePtr) ret;
364     } else {
365         dtd->last->next = (xmlNodePtr) ret;
366 	ret->prev = dtd->last;
367 	dtd->last = (xmlNodePtr) ret;
368     }
369     return(ret);
370 }
371 
372 /**
373  * xmlAddDocEntity:
374  * @doc:  the document
375  * @name:  the entity name
376  * @type:  the entity type XML_xxx_yyy_ENTITY
377  * @ExternalID:  the entity external ID if available
378  * @SystemID:  the entity system ID if available
379  * @content:  the entity content
380  *
381  * Register a new entity for this document.
382  *
383  * Returns a pointer to the entity or NULL in case of error
384  */
385 xmlEntityPtr
386 xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type,
387 	        const xmlChar *ExternalID, const xmlChar *SystemID,
388 	        const xmlChar *content) {
389     xmlEntityPtr ret;
390     xmlDtdPtr dtd;
391 
392     if (doc == NULL) {
393 	xmlEntitiesErr(XML_DTD_NO_DOC,
394 	        "xmlAddDocEntity: document is NULL");
395 	return(NULL);
396     }
397     if (doc->intSubset == NULL) {
398 	xmlEntitiesErr(XML_DTD_NO_DTD,
399 	        "xmlAddDocEntity: document without internal subset");
400 	return(NULL);
401     }
402     dtd = doc->intSubset;
403     ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
404     if (ret == NULL) return(NULL);
405 
406     /*
407      * Link it to the DTD
408      */
409     ret->parent = dtd;
410     ret->doc = dtd->doc;
411     if (dtd->last == NULL) {
412 	dtd->children = dtd->last = (xmlNodePtr) ret;
413     } else {
414 	dtd->last->next = (xmlNodePtr) ret;
415 	ret->prev = dtd->last;
416 	dtd->last = (xmlNodePtr) ret;
417     }
418     return(ret);
419 }
420 
421 /**
422  * xmlNewEntity:
423  * @doc:  the document
424  * @name:  the entity name
425  * @type:  the entity type XML_xxx_yyy_ENTITY
426  * @ExternalID:  the entity external ID if available
427  * @SystemID:  the entity system ID if available
428  * @content:  the entity content
429  *
430  * Create a new entity, this differs from xmlAddDocEntity() that if
431  * the document is NULL or has no internal subset defined, then an
432  * unlinked entity structure will be returned, it is then the responsibility
433  * of the caller to link it to the document later or free it when not needed
434  * anymore.
435  *
436  * Returns a pointer to the entity or NULL in case of error
437  */
438 xmlEntityPtr
439 xmlNewEntity(xmlDocPtr doc, const xmlChar *name, int type,
440 	     const xmlChar *ExternalID, const xmlChar *SystemID,
441 	     const xmlChar *content) {
442     xmlEntityPtr ret;
443     xmlDictPtr dict;
444 
445     if ((doc != NULL) && (doc->intSubset != NULL)) {
446 	return(xmlAddDocEntity(doc, name, type, ExternalID, SystemID, content));
447     }
448     if (doc != NULL)
449         dict = doc->dict;
450     else
451         dict = NULL;
452     ret = xmlCreateEntity(dict, name, type, ExternalID, SystemID, content);
453     if (ret == NULL)
454         return(NULL);
455     ret->doc = doc;
456     return(ret);
457 }
458 
459 /**
460  * xmlGetEntityFromTable:
461  * @table:  an entity table
462  * @name:  the entity name
463  * @parameter:  look for parameter entities
464  *
465  * Do an entity lookup in the table.
466  * returns the corresponding parameter entity, if found.
467  *
468  * Returns A pointer to the entity structure or NULL if not found.
469  */
470 static xmlEntityPtr
471 xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) {
472     return((xmlEntityPtr) xmlHashLookup(table, name));
473 }
474 
475 /**
476  * xmlGetParameterEntity:
477  * @doc:  the document referencing the entity
478  * @name:  the entity name
479  *
480  * Do an entity lookup in the internal and external subsets and
481  * returns the corresponding parameter entity, if found.
482  *
483  * Returns A pointer to the entity structure or NULL if not found.
484  */
485 xmlEntityPtr
486 xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
487     xmlEntitiesTablePtr table;
488     xmlEntityPtr ret;
489 
490     if (doc == NULL)
491 	return(NULL);
492     if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) {
493 	table = (xmlEntitiesTablePtr) doc->intSubset->pentities;
494 	ret = xmlGetEntityFromTable(table, name);
495 	if (ret != NULL)
496 	    return(ret);
497     }
498     if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) {
499 	table = (xmlEntitiesTablePtr) doc->extSubset->pentities;
500 	return(xmlGetEntityFromTable(table, name));
501     }
502     return(NULL);
503 }
504 
505 /**
506  * xmlGetDtdEntity:
507  * @doc:  the document referencing the entity
508  * @name:  the entity name
509  *
510  * Do an entity lookup in the DTD entity hash table and
511  * returns the corresponding entity, if found.
512  * Note: the first argument is the document node, not the DTD node.
513  *
514  * Returns A pointer to the entity structure or NULL if not found.
515  */
516 xmlEntityPtr
517 xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
518     xmlEntitiesTablePtr table;
519 
520     if (doc == NULL)
521 	return(NULL);
522     if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
523 	table = (xmlEntitiesTablePtr) doc->extSubset->entities;
524 	return(xmlGetEntityFromTable(table, name));
525     }
526     return(NULL);
527 }
528 
529 /**
530  * xmlGetDocEntity:
531  * @doc:  the document referencing the entity
532  * @name:  the entity name
533  *
534  * Do an entity lookup in the document entity hash table and
535  * returns the corresponding entity, otherwise a lookup is done
536  * in the predefined entities too.
537  *
538  * Returns A pointer to the entity structure or NULL if not found.
539  */
540 xmlEntityPtr
541 xmlGetDocEntity(const xmlDoc *doc, const xmlChar *name) {
542     xmlEntityPtr cur;
543     xmlEntitiesTablePtr table;
544 
545     if (doc != NULL) {
546 	if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
547 	    table = (xmlEntitiesTablePtr) doc->intSubset->entities;
548 	    cur = xmlGetEntityFromTable(table, name);
549 	    if (cur != NULL)
550 		return(cur);
551 	}
552 	if (doc->standalone != 1) {
553 	    if ((doc->extSubset != NULL) &&
554 		(doc->extSubset->entities != NULL)) {
555 		table = (xmlEntitiesTablePtr) doc->extSubset->entities;
556 		cur = xmlGetEntityFromTable(table, name);
557 		if (cur != NULL)
558 		    return(cur);
559 	    }
560 	}
561     }
562     return(xmlGetPredefinedEntity(name));
563 }
564 
565 /*
566  * Macro used to grow the current buffer.
567  */
568 #define growBufferReentrant() {						\
569     xmlChar *tmp;                                                       \
570     size_t new_size = buffer_size * 2;                                  \
571     if (new_size < buffer_size) goto mem_error;                         \
572     tmp = (xmlChar *) xmlRealloc(buffer, new_size);	                \
573     if (tmp == NULL) goto mem_error;                                    \
574     buffer = tmp;							\
575     buffer_size = new_size;						\
576 }
577 
578 /**
579  * xmlEncodeEntitiesInternal:
580  * @doc:  the document containing the string
581  * @input:  A string to convert to XML.
582  * @attr: are we handling an attribute value
583  *
584  * Do a global encoding of a string, replacing the predefined entities
585  * and non ASCII values with their entities and CharRef counterparts.
586  * Contrary to xmlEncodeEntities, this routine is reentrant, and result
587  * must be deallocated.
588  *
589  * Returns A newly allocated string with the substitution done.
590  */
591 static xmlChar *
592 xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
593     const xmlChar *cur = input;
594     xmlChar *buffer = NULL;
595     xmlChar *out = NULL;
596     size_t buffer_size = 0;
597     int html = 0;
598 
599     if (input == NULL) return(NULL);
600     if (doc != NULL)
601         html = (doc->type == XML_HTML_DOCUMENT_NODE);
602 
603     /*
604      * allocate an translation buffer.
605      */
606     buffer_size = 1000;
607     buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
608     if (buffer == NULL) {
609         xmlEntitiesErrMemory("xmlEncodeEntities: malloc failed");
610 	return(NULL);
611     }
612     out = buffer;
613 
614     while (*cur != '\0') {
615         size_t indx = out - buffer;
616         if (indx + 100 > buffer_size) {
617 
618 	    growBufferReentrant();
619 	    out = &buffer[indx];
620 	}
621 
622 	/*
623 	 * By default one have to encode at least '<', '>', '"' and '&' !
624 	 */
625 	if (*cur == '<') {
626 	    const xmlChar *end;
627 
628 	    /*
629 	     * Special handling of server side include in HTML attributes
630 	     */
631 	    if (html && attr &&
632 	        (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') &&
633 	        ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) {
634 	        while (cur != end) {
635 		    *out++ = *cur++;
636 		    indx = out - buffer;
637 		    if (indx + 100 > buffer_size) {
638 			growBufferReentrant();
639 			out = &buffer[indx];
640 		    }
641 		}
642 		*out++ = *cur++;
643 		*out++ = *cur++;
644 		*out++ = *cur++;
645 		continue;
646 	    }
647 	    *out++ = '&';
648 	    *out++ = 'l';
649 	    *out++ = 't';
650 	    *out++ = ';';
651 	} else if (*cur == '>') {
652 	    *out++ = '&';
653 	    *out++ = 'g';
654 	    *out++ = 't';
655 	    *out++ = ';';
656 	} else if (*cur == '&') {
657 	    /*
658 	     * Special handling of &{...} construct from HTML 4, see
659 	     * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1
660 	     */
661 	    if (html && attr && (cur[1] == '{') &&
662 	        (strchr((const char *) cur, '}'))) {
663 	        while (*cur != '}') {
664 		    *out++ = *cur++;
665 		    indx = out - buffer;
666 		    if (indx + 100 > buffer_size) {
667 			growBufferReentrant();
668 			out = &buffer[indx];
669 		    }
670 		}
671 		*out++ = *cur++;
672 		continue;
673 	    }
674 	    *out++ = '&';
675 	    *out++ = 'a';
676 	    *out++ = 'm';
677 	    *out++ = 'p';
678 	    *out++ = ';';
679 	} else if (((*cur >= 0x20) && (*cur < 0x80)) ||
680 	    (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) {
681 	    /*
682 	     * default case, just copy !
683 	     */
684 	    *out++ = *cur;
685 	} else if (*cur >= 0x80) {
686 	    if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
687 		/*
688 		 * Bjørn Reese <br@sseusa.com> provided the patch
689 	        xmlChar xc;
690 	        xc = (*cur & 0x3F) << 6;
691 	        if (cur[1] != 0) {
692 		    xc += *(++cur) & 0x3F;
693 		    *out++ = xc;
694 	        } else
695 		 */
696 		*out++ = *cur;
697 	    } else {
698 		/*
699 		 * We assume we have UTF-8 input.
700 		 * It must match either:
701 		 *   110xxxxx 10xxxxxx
702 		 *   1110xxxx 10xxxxxx 10xxxxxx
703 		 *   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
704 		 * That is:
705 		 *   cur[0] is 11xxxxxx
706 		 *   cur[1] is 10xxxxxx
707 		 *   cur[2] is 10xxxxxx if cur[0] is 111xxxxx
708 		 *   cur[3] is 10xxxxxx if cur[0] is 1111xxxx
709 		 *   cur[0] is not 11111xxx
710 		 */
711 		char buf[11], *ptr;
712 		int val = 0, l = 1;
713 
714 		if (((cur[0] & 0xC0) != 0xC0) ||
715 		    ((cur[1] & 0xC0) != 0x80) ||
716 		    (((cur[0] & 0xE0) == 0xE0) && ((cur[2] & 0xC0) != 0x80)) ||
717 		    (((cur[0] & 0xF0) == 0xF0) && ((cur[3] & 0xC0) != 0x80)) ||
718 		    (((cur[0] & 0xF8) == 0xF8))) {
719 		    xmlEntitiesErr(XML_CHECK_NOT_UTF8,
720 			    "xmlEncodeEntities: input not UTF-8");
721 		    if (doc != NULL)
722 			doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
723 		    snprintf(buf, sizeof(buf), "&#%d;", *cur);
724 		    buf[sizeof(buf) - 1] = 0;
725 		    ptr = buf;
726 		    while (*ptr != 0) *out++ = *ptr++;
727 		    cur++;
728 		    continue;
729 		} else if (*cur < 0xE0) {
730                     val = (cur[0]) & 0x1F;
731 		    val <<= 6;
732 		    val |= (cur[1]) & 0x3F;
733 		    l = 2;
734 		} else if (*cur < 0xF0) {
735                     val = (cur[0]) & 0x0F;
736 		    val <<= 6;
737 		    val |= (cur[1]) & 0x3F;
738 		    val <<= 6;
739 		    val |= (cur[2]) & 0x3F;
740 		    l = 3;
741 		} else if (*cur < 0xF8) {
742                     val = (cur[0]) & 0x07;
743 		    val <<= 6;
744 		    val |= (cur[1]) & 0x3F;
745 		    val <<= 6;
746 		    val |= (cur[2]) & 0x3F;
747 		    val <<= 6;
748 		    val |= (cur[3]) & 0x3F;
749 		    l = 4;
750 		}
751 		if ((l == 1) || (!IS_CHAR(val))) {
752 		    xmlEntitiesErr(XML_ERR_INVALID_CHAR,
753 			"xmlEncodeEntities: char out of range\n");
754 		    if (doc != NULL)
755 			doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
756 		    snprintf(buf, sizeof(buf), "&#%d;", *cur);
757 		    buf[sizeof(buf) - 1] = 0;
758 		    ptr = buf;
759 		    while (*ptr != 0) *out++ = *ptr++;
760 		    cur++;
761 		    continue;
762 		}
763 		/*
764 		 * We could do multiple things here. Just save as a char ref
765 		 */
766 		snprintf(buf, sizeof(buf), "&#x%X;", val);
767 		buf[sizeof(buf) - 1] = 0;
768 		ptr = buf;
769 		while (*ptr != 0) *out++ = *ptr++;
770 		cur += l;
771 		continue;
772 	    }
773 	} else if (IS_BYTE_CHAR(*cur)) {
774 	    char buf[11], *ptr;
775 
776 	    snprintf(buf, sizeof(buf), "&#%d;", *cur);
777 	    buf[sizeof(buf) - 1] = 0;
778             ptr = buf;
779 	    while (*ptr != 0) *out++ = *ptr++;
780 	}
781 	cur++;
782     }
783     *out = 0;
784     return(buffer);
785 
786 mem_error:
787     xmlEntitiesErrMemory("xmlEncodeEntities: realloc failed");
788     xmlFree(buffer);
789     return(NULL);
790 }
791 
792 /**
793  * xmlEncodeAttributeEntities:
794  * @doc:  the document containing the string
795  * @input:  A string to convert to XML.
796  *
797  * Do a global encoding of a string, replacing the predefined entities
798  * and non ASCII values with their entities and CharRef counterparts for
799  * attribute values.
800  *
801  * Returns A newly allocated string with the substitution done.
802  */
803 xmlChar *
804 xmlEncodeAttributeEntities(xmlDocPtr doc, const xmlChar *input) {
805     return xmlEncodeEntitiesInternal(doc, input, 1);
806 }
807 
808 /**
809  * xmlEncodeEntitiesReentrant:
810  * @doc:  the document containing the string
811  * @input:  A string to convert to XML.
812  *
813  * Do a global encoding of a string, replacing the predefined entities
814  * and non ASCII values with their entities and CharRef counterparts.
815  * Contrary to xmlEncodeEntities, this routine is reentrant, and result
816  * must be deallocated.
817  *
818  * Returns A newly allocated string with the substitution done.
819  */
820 xmlChar *
821 xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
822     return xmlEncodeEntitiesInternal(doc, input, 0);
823 }
824 
825 /**
826  * xmlEncodeSpecialChars:
827  * @doc:  the document containing the string
828  * @input:  A string to convert to XML.
829  *
830  * Do a global encoding of a string, replacing the predefined entities
831  * this routine is reentrant, and result must be deallocated.
832  *
833  * Returns A newly allocated string with the substitution done.
834  */
835 xmlChar *
836 xmlEncodeSpecialChars(const xmlDoc *doc ATTRIBUTE_UNUSED, const xmlChar *input) {
837     const xmlChar *cur = input;
838     xmlChar *buffer = NULL;
839     xmlChar *out = NULL;
840     size_t buffer_size = 0;
841     if (input == NULL) return(NULL);
842 
843     /*
844      * allocate an translation buffer.
845      */
846     buffer_size = 1000;
847     buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
848     if (buffer == NULL) {
849         xmlEntitiesErrMemory("xmlEncodeSpecialChars: malloc failed");
850 	return(NULL);
851     }
852     out = buffer;
853 
854     while (*cur != '\0') {
855         size_t indx = out - buffer;
856         if (indx + 10 > buffer_size) {
857 
858 	    growBufferReentrant();
859 	    out = &buffer[indx];
860 	}
861 
862 	/*
863 	 * By default one have to encode at least '<', '>', '"' and '&' !
864 	 */
865 	if (*cur == '<') {
866 	    *out++ = '&';
867 	    *out++ = 'l';
868 	    *out++ = 't';
869 	    *out++ = ';';
870 	} else if (*cur == '>') {
871 	    *out++ = '&';
872 	    *out++ = 'g';
873 	    *out++ = 't';
874 	    *out++ = ';';
875 	} else if (*cur == '&') {
876 	    *out++ = '&';
877 	    *out++ = 'a';
878 	    *out++ = 'm';
879 	    *out++ = 'p';
880 	    *out++ = ';';
881 	} else if (*cur == '"') {
882 	    *out++ = '&';
883 	    *out++ = 'q';
884 	    *out++ = 'u';
885 	    *out++ = 'o';
886 	    *out++ = 't';
887 	    *out++ = ';';
888 	} else if (*cur == '\r') {
889 	    *out++ = '&';
890 	    *out++ = '#';
891 	    *out++ = '1';
892 	    *out++ = '3';
893 	    *out++ = ';';
894 	} else {
895 	    /*
896 	     * Works because on UTF-8, all extended sequences cannot
897 	     * result in bytes in the ASCII range.
898 	     */
899 	    *out++ = *cur;
900 	}
901 	cur++;
902     }
903     *out = 0;
904     return(buffer);
905 
906 mem_error:
907     xmlEntitiesErrMemory("xmlEncodeSpecialChars: realloc failed");
908     xmlFree(buffer);
909     return(NULL);
910 }
911 
912 /**
913  * xmlCreateEntitiesTable:
914  *
915  * create and initialize an empty entities hash table.
916  * This really doesn't make sense and should be deprecated
917  *
918  * Returns the xmlEntitiesTablePtr just created or NULL in case of error.
919  */
920 xmlEntitiesTablePtr
921 xmlCreateEntitiesTable(void) {
922     return((xmlEntitiesTablePtr) xmlHashCreate(0));
923 }
924 
925 /**
926  * xmlFreeEntityWrapper:
927  * @entity:  An entity
928  * @name:  its name
929  *
930  * Deallocate the memory used by an entities in the hash table.
931  */
932 static void
933 xmlFreeEntityWrapper(void *entity, const xmlChar *name ATTRIBUTE_UNUSED) {
934     if (entity != NULL)
935 	xmlFreeEntity((xmlEntityPtr) entity);
936 }
937 
938 /**
939  * xmlFreeEntitiesTable:
940  * @table:  An entity table
941  *
942  * Deallocate the memory used by an entities hash table.
943  */
944 void
945 xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
946     xmlHashFree(table, xmlFreeEntityWrapper);
947 }
948 
949 #ifdef LIBXML_TREE_ENABLED
950 /**
951  * xmlCopyEntity:
952  * @ent:  An entity
953  *
954  * Build a copy of an entity
955  *
956  * Returns the new xmlEntitiesPtr or NULL in case of error.
957  */
958 static void *
959 xmlCopyEntity(void *payload, const xmlChar *name ATTRIBUTE_UNUSED) {
960     xmlEntityPtr ent = (xmlEntityPtr) payload;
961     xmlEntityPtr cur;
962 
963     cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
964     if (cur == NULL) {
965         xmlEntitiesErrMemory("xmlCopyEntity:: malloc failed");
966 	return(NULL);
967     }
968     memset(cur, 0, sizeof(xmlEntity));
969     cur->type = XML_ENTITY_DECL;
970 
971     cur->etype = ent->etype;
972     if (ent->name != NULL)
973 	cur->name = xmlStrdup(ent->name);
974     if (ent->ExternalID != NULL)
975 	cur->ExternalID = xmlStrdup(ent->ExternalID);
976     if (ent->SystemID != NULL)
977 	cur->SystemID = xmlStrdup(ent->SystemID);
978     if (ent->content != NULL)
979 	cur->content = xmlStrdup(ent->content);
980     if (ent->orig != NULL)
981 	cur->orig = xmlStrdup(ent->orig);
982     if (ent->URI != NULL)
983 	cur->URI = xmlStrdup(ent->URI);
984     return(cur);
985 }
986 
987 /**
988  * xmlCopyEntitiesTable:
989  * @table:  An entity table
990  *
991  * Build a copy of an entity table.
992  *
993  * Returns the new xmlEntitiesTablePtr or NULL in case of error.
994  */
995 xmlEntitiesTablePtr
996 xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
997     return(xmlHashCopy(table, xmlCopyEntity));
998 }
999 #endif /* LIBXML_TREE_ENABLED */
1000 
1001 #ifdef LIBXML_OUTPUT_ENABLED
1002 
1003 /**
1004  * xmlDumpEntityContent:
1005  * @buf:  An XML buffer.
1006  * @content:  The entity content.
1007  *
1008  * This will dump the quoted string value, taking care of the special
1009  * treatment required by %
1010  */
1011 static void
1012 xmlDumpEntityContent(xmlBufferPtr buf, const xmlChar *content) {
1013     if (buf->alloc == XML_BUFFER_ALLOC_IMMUTABLE) return;
1014     if (xmlStrchr(content, '%')) {
1015         const xmlChar * base, *cur;
1016 
1017 	xmlBufferCCat(buf, "\"");
1018 	base = cur = content;
1019 	while (*cur != 0) {
1020 	    if (*cur == '"') {
1021 		if (base != cur)
1022 		    xmlBufferAdd(buf, base, cur - base);
1023 		xmlBufferAdd(buf, BAD_CAST "&quot;", 6);
1024 		cur++;
1025 		base = cur;
1026 	    } else if (*cur == '%') {
1027 		if (base != cur)
1028 		    xmlBufferAdd(buf, base, cur - base);
1029 		xmlBufferAdd(buf, BAD_CAST "&#x25;", 6);
1030 		cur++;
1031 		base = cur;
1032 	    } else {
1033 		cur++;
1034 	    }
1035 	}
1036 	if (base != cur)
1037 	    xmlBufferAdd(buf, base, cur - base);
1038 	xmlBufferCCat(buf, "\"");
1039     } else {
1040         xmlBufferWriteQuotedString(buf, content);
1041     }
1042 }
1043 
1044 /**
1045  * xmlDumpEntityDecl:
1046  * @buf:  An XML buffer.
1047  * @ent:  An entity table
1048  *
1049  * This will dump the content of the entity table as an XML DTD definition
1050  */
1051 void
1052 xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) {
1053     if ((buf == NULL) || (ent == NULL)) return;
1054     switch (ent->etype) {
1055 	case XML_INTERNAL_GENERAL_ENTITY:
1056 	    xmlBufferWriteChar(buf, "<!ENTITY ");
1057 	    xmlBufferWriteCHAR(buf, ent->name);
1058 	    xmlBufferWriteChar(buf, " ");
1059 	    if (ent->orig != NULL)
1060 		xmlBufferWriteQuotedString(buf, ent->orig);
1061 	    else
1062 		xmlDumpEntityContent(buf, ent->content);
1063 	    xmlBufferWriteChar(buf, ">\n");
1064 	    break;
1065 	case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1066 	    xmlBufferWriteChar(buf, "<!ENTITY ");
1067 	    xmlBufferWriteCHAR(buf, ent->name);
1068 	    if (ent->ExternalID != NULL) {
1069 		 xmlBufferWriteChar(buf, " PUBLIC ");
1070 		 xmlBufferWriteQuotedString(buf, ent->ExternalID);
1071 		 xmlBufferWriteChar(buf, " ");
1072 		 xmlBufferWriteQuotedString(buf, ent->SystemID);
1073 	    } else {
1074 		 xmlBufferWriteChar(buf, " SYSTEM ");
1075 		 xmlBufferWriteQuotedString(buf, ent->SystemID);
1076 	    }
1077 	    xmlBufferWriteChar(buf, ">\n");
1078 	    break;
1079 	case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1080 	    xmlBufferWriteChar(buf, "<!ENTITY ");
1081 	    xmlBufferWriteCHAR(buf, ent->name);
1082 	    if (ent->ExternalID != NULL) {
1083 		 xmlBufferWriteChar(buf, " PUBLIC ");
1084 		 xmlBufferWriteQuotedString(buf, ent->ExternalID);
1085 		 xmlBufferWriteChar(buf, " ");
1086 		 xmlBufferWriteQuotedString(buf, ent->SystemID);
1087 	    } else {
1088 		 xmlBufferWriteChar(buf, " SYSTEM ");
1089 		 xmlBufferWriteQuotedString(buf, ent->SystemID);
1090 	    }
1091 	    if (ent->content != NULL) { /* Should be true ! */
1092 		xmlBufferWriteChar(buf, " NDATA ");
1093 		if (ent->orig != NULL)
1094 		    xmlBufferWriteCHAR(buf, ent->orig);
1095 		else
1096 		    xmlBufferWriteCHAR(buf, ent->content);
1097 	    }
1098 	    xmlBufferWriteChar(buf, ">\n");
1099 	    break;
1100 	case XML_INTERNAL_PARAMETER_ENTITY:
1101 	    xmlBufferWriteChar(buf, "<!ENTITY % ");
1102 	    xmlBufferWriteCHAR(buf, ent->name);
1103 	    xmlBufferWriteChar(buf, " ");
1104 	    if (ent->orig == NULL)
1105 		xmlDumpEntityContent(buf, ent->content);
1106 	    else
1107 		xmlBufferWriteQuotedString(buf, ent->orig);
1108 	    xmlBufferWriteChar(buf, ">\n");
1109 	    break;
1110 	case XML_EXTERNAL_PARAMETER_ENTITY:
1111 	    xmlBufferWriteChar(buf, "<!ENTITY % ");
1112 	    xmlBufferWriteCHAR(buf, ent->name);
1113 	    if (ent->ExternalID != NULL) {
1114 		 xmlBufferWriteChar(buf, " PUBLIC ");
1115 		 xmlBufferWriteQuotedString(buf, ent->ExternalID);
1116 		 xmlBufferWriteChar(buf, " ");
1117 		 xmlBufferWriteQuotedString(buf, ent->SystemID);
1118 	    } else {
1119 		 xmlBufferWriteChar(buf, " SYSTEM ");
1120 		 xmlBufferWriteQuotedString(buf, ent->SystemID);
1121 	    }
1122 	    xmlBufferWriteChar(buf, ">\n");
1123 	    break;
1124 	default:
1125 	    xmlEntitiesErr(XML_DTD_UNKNOWN_ENTITY,
1126 		"xmlDumpEntitiesDecl: internal: unknown type entity type");
1127     }
1128 }
1129 
1130 /**
1131  * xmlDumpEntityDeclScan:
1132  * @ent:  An entity table
1133  * @buf:  An XML buffer.
1134  *
1135  * When using the hash table scan function, arguments need to be reversed
1136  */
1137 static void
1138 xmlDumpEntityDeclScan(void *ent, void *buf,
1139                       const xmlChar *name ATTRIBUTE_UNUSED) {
1140     xmlDumpEntityDecl((xmlBufferPtr) buf, (xmlEntityPtr) ent);
1141 }
1142 
1143 /**
1144  * xmlDumpEntitiesTable:
1145  * @buf:  An XML buffer.
1146  * @table:  An entity table
1147  *
1148  * This will dump the content of the entity table as an XML DTD definition
1149  */
1150 void
1151 xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
1152     xmlHashScan(table, xmlDumpEntityDeclScan, buf);
1153 }
1154 #endif /* LIBXML_OUTPUT_ENABLED */
1155