1c2c66affSColin Finck /*
2c2c66affSColin Finck * HTMLtree.c : implementation of access function for an HTML tree.
3c2c66affSColin Finck *
4c2c66affSColin Finck * See Copyright for the status of this software.
5c2c66affSColin Finck *
6c2c66affSColin Finck * daniel@veillard.com
7c2c66affSColin Finck */
8c2c66affSColin Finck
9c2c66affSColin Finck
10c2c66affSColin Finck #define IN_LIBXML
11c2c66affSColin Finck #include "libxml.h"
12c2c66affSColin Finck #ifdef LIBXML_HTML_ENABLED
13c2c66affSColin Finck
14c2c66affSColin Finck #include <string.h> /* for memset() only ! */
15c2c66affSColin Finck #include <ctype.h>
16c2c66affSColin Finck #include <stdlib.h>
17c2c66affSColin Finck
18c2c66affSColin Finck #include <libxml/xmlmemory.h>
19c2c66affSColin Finck #include <libxml/HTMLparser.h>
20c2c66affSColin Finck #include <libxml/HTMLtree.h>
21c2c66affSColin Finck #include <libxml/entities.h>
22c2c66affSColin Finck #include <libxml/valid.h>
23c2c66affSColin Finck #include <libxml/xmlerror.h>
24c2c66affSColin Finck #include <libxml/parserInternals.h>
25c2c66affSColin Finck #include <libxml/globals.h>
26c2c66affSColin Finck #include <libxml/uri.h>
27c2c66affSColin Finck
28c2c66affSColin Finck #include "buf.h"
29c2c66affSColin Finck
30c2c66affSColin Finck /************************************************************************
31c2c66affSColin Finck * *
32c2c66affSColin Finck * Getting/Setting encoding meta tags *
33c2c66affSColin Finck * *
34c2c66affSColin Finck ************************************************************************/
35c2c66affSColin Finck
36c2c66affSColin Finck /**
37c2c66affSColin Finck * htmlGetMetaEncoding:
38c2c66affSColin Finck * @doc: the document
39c2c66affSColin Finck *
40c2c66affSColin Finck * Encoding definition lookup in the Meta tags
41c2c66affSColin Finck *
42c2c66affSColin Finck * Returns the current encoding as flagged in the HTML source
43c2c66affSColin Finck */
44c2c66affSColin Finck const xmlChar *
htmlGetMetaEncoding(htmlDocPtr doc)45c2c66affSColin Finck htmlGetMetaEncoding(htmlDocPtr doc) {
46c2c66affSColin Finck htmlNodePtr cur;
47c2c66affSColin Finck const xmlChar *content;
48c2c66affSColin Finck const xmlChar *encoding;
49c2c66affSColin Finck
50c2c66affSColin Finck if (doc == NULL)
51c2c66affSColin Finck return(NULL);
52c2c66affSColin Finck cur = doc->children;
53c2c66affSColin Finck
54c2c66affSColin Finck /*
55c2c66affSColin Finck * Search the html
56c2c66affSColin Finck */
57c2c66affSColin Finck while (cur != NULL) {
58c2c66affSColin Finck if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
59c2c66affSColin Finck if (xmlStrEqual(cur->name, BAD_CAST"html"))
60c2c66affSColin Finck break;
61c2c66affSColin Finck if (xmlStrEqual(cur->name, BAD_CAST"head"))
62c2c66affSColin Finck goto found_head;
63c2c66affSColin Finck if (xmlStrEqual(cur->name, BAD_CAST"meta"))
64c2c66affSColin Finck goto found_meta;
65c2c66affSColin Finck }
66c2c66affSColin Finck cur = cur->next;
67c2c66affSColin Finck }
68c2c66affSColin Finck if (cur == NULL)
69c2c66affSColin Finck return(NULL);
70c2c66affSColin Finck cur = cur->children;
71c2c66affSColin Finck
72c2c66affSColin Finck /*
73c2c66affSColin Finck * Search the head
74c2c66affSColin Finck */
75c2c66affSColin Finck while (cur != NULL) {
76c2c66affSColin Finck if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
77c2c66affSColin Finck if (xmlStrEqual(cur->name, BAD_CAST"head"))
78c2c66affSColin Finck break;
79c2c66affSColin Finck if (xmlStrEqual(cur->name, BAD_CAST"meta"))
80c2c66affSColin Finck goto found_meta;
81c2c66affSColin Finck }
82c2c66affSColin Finck cur = cur->next;
83c2c66affSColin Finck }
84c2c66affSColin Finck if (cur == NULL)
85c2c66affSColin Finck return(NULL);
86c2c66affSColin Finck found_head:
87c2c66affSColin Finck cur = cur->children;
88c2c66affSColin Finck
89c2c66affSColin Finck /*
90c2c66affSColin Finck * Search the meta elements
91c2c66affSColin Finck */
92c2c66affSColin Finck found_meta:
93c2c66affSColin Finck while (cur != NULL) {
94c2c66affSColin Finck if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
95c2c66affSColin Finck if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
96c2c66affSColin Finck xmlAttrPtr attr = cur->properties;
97c2c66affSColin Finck int http;
98c2c66affSColin Finck const xmlChar *value;
99c2c66affSColin Finck
100c2c66affSColin Finck content = NULL;
101c2c66affSColin Finck http = 0;
102c2c66affSColin Finck while (attr != NULL) {
103c2c66affSColin Finck if ((attr->children != NULL) &&
104c2c66affSColin Finck (attr->children->type == XML_TEXT_NODE) &&
105c2c66affSColin Finck (attr->children->next == NULL)) {
106c2c66affSColin Finck value = attr->children->content;
107c2c66affSColin Finck if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
108c2c66affSColin Finck && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
109c2c66affSColin Finck http = 1;
110c2c66affSColin Finck else if ((value != NULL)
111c2c66affSColin Finck && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
112c2c66affSColin Finck content = value;
113c2c66affSColin Finck if ((http != 0) && (content != NULL))
114c2c66affSColin Finck goto found_content;
115c2c66affSColin Finck }
116c2c66affSColin Finck attr = attr->next;
117c2c66affSColin Finck }
118c2c66affSColin Finck }
119c2c66affSColin Finck }
120c2c66affSColin Finck cur = cur->next;
121c2c66affSColin Finck }
122c2c66affSColin Finck return(NULL);
123c2c66affSColin Finck
124c2c66affSColin Finck found_content:
125c2c66affSColin Finck encoding = xmlStrstr(content, BAD_CAST"charset=");
126c2c66affSColin Finck if (encoding == NULL)
127c2c66affSColin Finck encoding = xmlStrstr(content, BAD_CAST"Charset=");
128c2c66affSColin Finck if (encoding == NULL)
129c2c66affSColin Finck encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
130c2c66affSColin Finck if (encoding != NULL) {
131c2c66affSColin Finck encoding += 8;
132c2c66affSColin Finck } else {
133c2c66affSColin Finck encoding = xmlStrstr(content, BAD_CAST"charset =");
134c2c66affSColin Finck if (encoding == NULL)
135c2c66affSColin Finck encoding = xmlStrstr(content, BAD_CAST"Charset =");
136c2c66affSColin Finck if (encoding == NULL)
137c2c66affSColin Finck encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
138c2c66affSColin Finck if (encoding != NULL)
139c2c66affSColin Finck encoding += 9;
140c2c66affSColin Finck }
141c2c66affSColin Finck if (encoding != NULL) {
142c2c66affSColin Finck while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
143c2c66affSColin Finck }
144c2c66affSColin Finck return(encoding);
145c2c66affSColin Finck }
146c2c66affSColin Finck
147c2c66affSColin Finck /**
148c2c66affSColin Finck * htmlSetMetaEncoding:
149c2c66affSColin Finck * @doc: the document
150c2c66affSColin Finck * @encoding: the encoding string
151c2c66affSColin Finck *
152c2c66affSColin Finck * Sets the current encoding in the Meta tags
153c2c66affSColin Finck * NOTE: this will not change the document content encoding, just
154c2c66affSColin Finck * the META flag associated.
155c2c66affSColin Finck *
156c2c66affSColin Finck * Returns 0 in case of success and -1 in case of error
157c2c66affSColin Finck */
158c2c66affSColin Finck int
htmlSetMetaEncoding(htmlDocPtr doc,const xmlChar * encoding)159c2c66affSColin Finck htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
160c2c66affSColin Finck htmlNodePtr cur, meta = NULL, head = NULL;
161c2c66affSColin Finck const xmlChar *content = NULL;
162c2c66affSColin Finck char newcontent[100];
163c2c66affSColin Finck
164c2c66affSColin Finck newcontent[0] = 0;
165c2c66affSColin Finck
166c2c66affSColin Finck if (doc == NULL)
167c2c66affSColin Finck return(-1);
168c2c66affSColin Finck
169c2c66affSColin Finck /* html isn't a real encoding it's just libxml2 way to get entities */
170c2c66affSColin Finck if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
171c2c66affSColin Finck return(-1);
172c2c66affSColin Finck
173c2c66affSColin Finck if (encoding != NULL) {
174c2c66affSColin Finck snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
175c2c66affSColin Finck (char *)encoding);
176c2c66affSColin Finck newcontent[sizeof(newcontent) - 1] = 0;
177c2c66affSColin Finck }
178c2c66affSColin Finck
179c2c66affSColin Finck cur = doc->children;
180c2c66affSColin Finck
181c2c66affSColin Finck /*
182c2c66affSColin Finck * Search the html
183c2c66affSColin Finck */
184c2c66affSColin Finck while (cur != NULL) {
185c2c66affSColin Finck if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
186c2c66affSColin Finck if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
187c2c66affSColin Finck break;
188c2c66affSColin Finck if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
189c2c66affSColin Finck goto found_head;
190c2c66affSColin Finck if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
191c2c66affSColin Finck goto found_meta;
192c2c66affSColin Finck }
193c2c66affSColin Finck cur = cur->next;
194c2c66affSColin Finck }
195c2c66affSColin Finck if (cur == NULL)
196c2c66affSColin Finck return(-1);
197c2c66affSColin Finck cur = cur->children;
198c2c66affSColin Finck
199c2c66affSColin Finck /*
200c2c66affSColin Finck * Search the head
201c2c66affSColin Finck */
202c2c66affSColin Finck while (cur != NULL) {
203c2c66affSColin Finck if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
204c2c66affSColin Finck if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
205c2c66affSColin Finck break;
206c2c66affSColin Finck if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
207c2c66affSColin Finck head = cur->parent;
208c2c66affSColin Finck goto found_meta;
209c2c66affSColin Finck }
210c2c66affSColin Finck }
211c2c66affSColin Finck cur = cur->next;
212c2c66affSColin Finck }
213c2c66affSColin Finck if (cur == NULL)
214c2c66affSColin Finck return(-1);
215c2c66affSColin Finck found_head:
216c2c66affSColin Finck head = cur;
217c2c66affSColin Finck if (cur->children == NULL)
218c2c66affSColin Finck goto create;
219c2c66affSColin Finck cur = cur->children;
220c2c66affSColin Finck
221c2c66affSColin Finck found_meta:
222c2c66affSColin Finck /*
223c2c66affSColin Finck * Search and update all the remaining the meta elements carrying
22440ee59d6SThomas Faber * encoding information
225c2c66affSColin Finck */
226c2c66affSColin Finck while (cur != NULL) {
227c2c66affSColin Finck if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
228c2c66affSColin Finck if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
229c2c66affSColin Finck xmlAttrPtr attr = cur->properties;
230c2c66affSColin Finck int http;
231c2c66affSColin Finck const xmlChar *value;
232c2c66affSColin Finck
233c2c66affSColin Finck content = NULL;
234c2c66affSColin Finck http = 0;
235c2c66affSColin Finck while (attr != NULL) {
236c2c66affSColin Finck if ((attr->children != NULL) &&
237c2c66affSColin Finck (attr->children->type == XML_TEXT_NODE) &&
238c2c66affSColin Finck (attr->children->next == NULL)) {
239c2c66affSColin Finck value = attr->children->content;
240c2c66affSColin Finck if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
241c2c66affSColin Finck && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
242c2c66affSColin Finck http = 1;
243c2c66affSColin Finck else
244c2c66affSColin Finck {
245c2c66affSColin Finck if ((value != NULL) &&
246c2c66affSColin Finck (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
247c2c66affSColin Finck content = value;
248c2c66affSColin Finck }
249c2c66affSColin Finck if ((http != 0) && (content != NULL))
250c2c66affSColin Finck break;
251c2c66affSColin Finck }
252c2c66affSColin Finck attr = attr->next;
253c2c66affSColin Finck }
254c2c66affSColin Finck if ((http != 0) && (content != NULL)) {
255c2c66affSColin Finck meta = cur;
256c2c66affSColin Finck break;
257c2c66affSColin Finck }
258c2c66affSColin Finck
259c2c66affSColin Finck }
260c2c66affSColin Finck }
261c2c66affSColin Finck cur = cur->next;
262c2c66affSColin Finck }
263c2c66affSColin Finck create:
264c2c66affSColin Finck if (meta == NULL) {
265c2c66affSColin Finck if ((encoding != NULL) && (head != NULL)) {
266c2c66affSColin Finck /*
267c2c66affSColin Finck * Create a new Meta element with the right attributes
268c2c66affSColin Finck */
269c2c66affSColin Finck
270c2c66affSColin Finck meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
271c2c66affSColin Finck if (head->children == NULL)
272c2c66affSColin Finck xmlAddChild(head, meta);
273c2c66affSColin Finck else
274c2c66affSColin Finck xmlAddPrevSibling(head->children, meta);
275c2c66affSColin Finck xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
276c2c66affSColin Finck xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
277c2c66affSColin Finck }
278c2c66affSColin Finck } else {
279c2c66affSColin Finck /* remove the meta tag if NULL is passed */
280c2c66affSColin Finck if (encoding == NULL) {
281c2c66affSColin Finck xmlUnlinkNode(meta);
282c2c66affSColin Finck xmlFreeNode(meta);
283c2c66affSColin Finck }
284c2c66affSColin Finck /* change the document only if there is a real encoding change */
285c2c66affSColin Finck else if (xmlStrcasestr(content, encoding) == NULL) {
286c2c66affSColin Finck xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
287c2c66affSColin Finck }
288c2c66affSColin Finck }
289c2c66affSColin Finck
290c2c66affSColin Finck
291c2c66affSColin Finck return(0);
292c2c66affSColin Finck }
293c2c66affSColin Finck
294c2c66affSColin Finck /**
295c2c66affSColin Finck * booleanHTMLAttrs:
296c2c66affSColin Finck *
297c2c66affSColin Finck * These are the HTML attributes which will be output
298c2c66affSColin Finck * in minimized form, i.e. <option selected="selected"> will be
299c2c66affSColin Finck * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
300c2c66affSColin Finck *
301c2c66affSColin Finck */
302*911153daSThomas Faber static const char* const htmlBooleanAttrs[] = {
303c2c66affSColin Finck "checked", "compact", "declare", "defer", "disabled", "ismap",
304c2c66affSColin Finck "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
305c2c66affSColin Finck "selected", NULL
306c2c66affSColin Finck };
307c2c66affSColin Finck
308c2c66affSColin Finck
309c2c66affSColin Finck /**
310c2c66affSColin Finck * htmlIsBooleanAttr:
311c2c66affSColin Finck * @name: the name of the attribute to check
312c2c66affSColin Finck *
313c2c66affSColin Finck * Determine if a given attribute is a boolean attribute.
314c2c66affSColin Finck *
315c2c66affSColin Finck * returns: false if the attribute is not boolean, true otherwise.
316c2c66affSColin Finck */
317c2c66affSColin Finck int
htmlIsBooleanAttr(const xmlChar * name)318c2c66affSColin Finck htmlIsBooleanAttr(const xmlChar *name)
319c2c66affSColin Finck {
320c2c66affSColin Finck int i = 0;
321c2c66affSColin Finck
322c2c66affSColin Finck while (htmlBooleanAttrs[i] != NULL) {
323c2c66affSColin Finck if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
324c2c66affSColin Finck return 1;
325c2c66affSColin Finck i++;
326c2c66affSColin Finck }
327c2c66affSColin Finck return 0;
328c2c66affSColin Finck }
329c2c66affSColin Finck
330c2c66affSColin Finck #ifdef LIBXML_OUTPUT_ENABLED
331c2c66affSColin Finck /*
332c2c66affSColin Finck * private routine exported from xmlIO.c
333c2c66affSColin Finck */
334c2c66affSColin Finck xmlOutputBufferPtr
335c2c66affSColin Finck xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
336c2c66affSColin Finck /************************************************************************
337c2c66affSColin Finck * *
338c2c66affSColin Finck * Output error handlers *
339c2c66affSColin Finck * *
340c2c66affSColin Finck ************************************************************************/
341c2c66affSColin Finck /**
342c2c66affSColin Finck * htmlSaveErrMemory:
34340ee59d6SThomas Faber * @extra: extra information
344c2c66affSColin Finck *
345c2c66affSColin Finck * Handle an out of memory condition
346c2c66affSColin Finck */
347c2c66affSColin Finck static void
htmlSaveErrMemory(const char * extra)348c2c66affSColin Finck htmlSaveErrMemory(const char *extra)
349c2c66affSColin Finck {
350c2c66affSColin Finck __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
351c2c66affSColin Finck }
352c2c66affSColin Finck
353c2c66affSColin Finck /**
354c2c66affSColin Finck * htmlSaveErr:
355c2c66affSColin Finck * @code: the error number
356c2c66affSColin Finck * @node: the location of the error.
35740ee59d6SThomas Faber * @extra: extra information
358c2c66affSColin Finck *
359c2c66affSColin Finck * Handle an out of memory condition
360c2c66affSColin Finck */
361c2c66affSColin Finck static void
htmlSaveErr(int code,xmlNodePtr node,const char * extra)362c2c66affSColin Finck htmlSaveErr(int code, xmlNodePtr node, const char *extra)
363c2c66affSColin Finck {
364c2c66affSColin Finck const char *msg = NULL;
365c2c66affSColin Finck
366c2c66affSColin Finck switch(code) {
367c2c66affSColin Finck case XML_SAVE_NOT_UTF8:
368c2c66affSColin Finck msg = "string is not in UTF-8\n";
369c2c66affSColin Finck break;
370c2c66affSColin Finck case XML_SAVE_CHAR_INVALID:
371c2c66affSColin Finck msg = "invalid character value\n";
372c2c66affSColin Finck break;
373c2c66affSColin Finck case XML_SAVE_UNKNOWN_ENCODING:
374c2c66affSColin Finck msg = "unknown encoding %s\n";
375c2c66affSColin Finck break;
376c2c66affSColin Finck case XML_SAVE_NO_DOCTYPE:
377c2c66affSColin Finck msg = "HTML has no DOCTYPE\n";
378c2c66affSColin Finck break;
379c2c66affSColin Finck default:
380c2c66affSColin Finck msg = "unexpected error number\n";
381c2c66affSColin Finck }
382c2c66affSColin Finck __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
383c2c66affSColin Finck }
384c2c66affSColin Finck
385c2c66affSColin Finck /************************************************************************
386c2c66affSColin Finck * *
387c2c66affSColin Finck * Dumping HTML tree content to a simple buffer *
388c2c66affSColin Finck * *
389c2c66affSColin Finck ************************************************************************/
390c2c66affSColin Finck
391c2c66affSColin Finck /**
392c2c66affSColin Finck * htmlBufNodeDumpFormat:
393c2c66affSColin Finck * @buf: the xmlBufPtr output
394c2c66affSColin Finck * @doc: the document
395c2c66affSColin Finck * @cur: the current node
396c2c66affSColin Finck * @format: should formatting spaces been added
397c2c66affSColin Finck *
398c2c66affSColin Finck * Dump an HTML node, recursive behaviour,children are printed too.
399c2c66affSColin Finck *
400c2c66affSColin Finck * Returns the number of byte written or -1 in case of error
401c2c66affSColin Finck */
402c2c66affSColin Finck static size_t
htmlBufNodeDumpFormat(xmlBufPtr buf,xmlDocPtr doc,xmlNodePtr cur,int format)403c2c66affSColin Finck htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
404c2c66affSColin Finck int format) {
405c2c66affSColin Finck size_t use;
406c2c66affSColin Finck int ret;
407c2c66affSColin Finck xmlOutputBufferPtr outbuf;
408c2c66affSColin Finck
409c2c66affSColin Finck if (cur == NULL) {
410c2c66affSColin Finck return (-1);
411c2c66affSColin Finck }
412c2c66affSColin Finck if (buf == NULL) {
413c2c66affSColin Finck return (-1);
414c2c66affSColin Finck }
415c2c66affSColin Finck outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
416c2c66affSColin Finck if (outbuf == NULL) {
417c2c66affSColin Finck htmlSaveErrMemory("allocating HTML output buffer");
418c2c66affSColin Finck return (-1);
419c2c66affSColin Finck }
420c2c66affSColin Finck memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
421c2c66affSColin Finck outbuf->buffer = buf;
422c2c66affSColin Finck outbuf->encoder = NULL;
423c2c66affSColin Finck outbuf->writecallback = NULL;
424c2c66affSColin Finck outbuf->closecallback = NULL;
425c2c66affSColin Finck outbuf->context = NULL;
426c2c66affSColin Finck outbuf->written = 0;
427c2c66affSColin Finck
428c2c66affSColin Finck use = xmlBufUse(buf);
429c2c66affSColin Finck htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
430c2c66affSColin Finck xmlFree(outbuf);
431c2c66affSColin Finck ret = xmlBufUse(buf) - use;
432c2c66affSColin Finck return (ret);
433c2c66affSColin Finck }
434c2c66affSColin Finck
435c2c66affSColin Finck /**
436c2c66affSColin Finck * htmlNodeDump:
437c2c66affSColin Finck * @buf: the HTML buffer output
438c2c66affSColin Finck * @doc: the document
439c2c66affSColin Finck * @cur: the current node
440c2c66affSColin Finck *
441c2c66affSColin Finck * Dump an HTML node, recursive behaviour,children are printed too,
442c2c66affSColin Finck * and formatting returns are added.
443c2c66affSColin Finck *
444c2c66affSColin Finck * Returns the number of byte written or -1 in case of error
445c2c66affSColin Finck */
446c2c66affSColin Finck int
htmlNodeDump(xmlBufferPtr buf,xmlDocPtr doc,xmlNodePtr cur)447c2c66affSColin Finck htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
448c2c66affSColin Finck xmlBufPtr buffer;
449c2c66affSColin Finck size_t ret;
450c2c66affSColin Finck
451c2c66affSColin Finck if ((buf == NULL) || (cur == NULL))
452c2c66affSColin Finck return(-1);
453c2c66affSColin Finck
454c2c66affSColin Finck xmlInitParser();
455c2c66affSColin Finck buffer = xmlBufFromBuffer(buf);
456c2c66affSColin Finck if (buffer == NULL)
457c2c66affSColin Finck return(-1);
458c2c66affSColin Finck
459c2c66affSColin Finck ret = htmlBufNodeDumpFormat(buffer, doc, cur, 1);
460c2c66affSColin Finck
461c2c66affSColin Finck xmlBufBackToBuffer(buffer);
462c2c66affSColin Finck
463c2c66affSColin Finck if (ret > INT_MAX)
464c2c66affSColin Finck return(-1);
465c2c66affSColin Finck return((int) ret);
466c2c66affSColin Finck }
467c2c66affSColin Finck
468c2c66affSColin Finck /**
469c2c66affSColin Finck * htmlNodeDumpFileFormat:
470c2c66affSColin Finck * @out: the FILE pointer
471c2c66affSColin Finck * @doc: the document
472c2c66affSColin Finck * @cur: the current node
473c2c66affSColin Finck * @encoding: the document encoding
474c2c66affSColin Finck * @format: should formatting spaces been added
475c2c66affSColin Finck *
476c2c66affSColin Finck * Dump an HTML node, recursive behaviour,children are printed too.
477c2c66affSColin Finck *
478c2c66affSColin Finck * TODO: if encoding == NULL try to save in the doc encoding
479c2c66affSColin Finck *
480c2c66affSColin Finck * returns: the number of byte written or -1 in case of failure.
481c2c66affSColin Finck */
482c2c66affSColin Finck int
htmlNodeDumpFileFormat(FILE * out,xmlDocPtr doc,xmlNodePtr cur,const char * encoding,int format)483c2c66affSColin Finck htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
484c2c66affSColin Finck xmlNodePtr cur, const char *encoding, int format) {
485c2c66affSColin Finck xmlOutputBufferPtr buf;
486c2c66affSColin Finck xmlCharEncodingHandlerPtr handler = NULL;
487c2c66affSColin Finck int ret;
488c2c66affSColin Finck
489c2c66affSColin Finck xmlInitParser();
490c2c66affSColin Finck
491c2c66affSColin Finck if (encoding != NULL) {
492c2c66affSColin Finck xmlCharEncoding enc;
493c2c66affSColin Finck
494c2c66affSColin Finck enc = xmlParseCharEncoding(encoding);
495c2c66affSColin Finck if (enc != XML_CHAR_ENCODING_UTF8) {
496c2c66affSColin Finck handler = xmlFindCharEncodingHandler(encoding);
497c2c66affSColin Finck if (handler == NULL)
498c2c66affSColin Finck htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
499c2c66affSColin Finck }
5001fe58c40SThomas Faber } else {
501c2c66affSColin Finck /*
502c2c66affSColin Finck * Fallback to HTML or ASCII when the encoding is unspecified
503c2c66affSColin Finck */
504c2c66affSColin Finck if (handler == NULL)
505c2c66affSColin Finck handler = xmlFindCharEncodingHandler("HTML");
506c2c66affSColin Finck if (handler == NULL)
507c2c66affSColin Finck handler = xmlFindCharEncodingHandler("ascii");
5081fe58c40SThomas Faber }
509c2c66affSColin Finck
510c2c66affSColin Finck /*
511c2c66affSColin Finck * save the content to a temp buffer.
512c2c66affSColin Finck */
513c2c66affSColin Finck buf = xmlOutputBufferCreateFile(out, handler);
514c2c66affSColin Finck if (buf == NULL) return(0);
515c2c66affSColin Finck
51640ee59d6SThomas Faber htmlNodeDumpFormatOutput(buf, doc, cur, NULL, format);
517c2c66affSColin Finck
518c2c66affSColin Finck ret = xmlOutputBufferClose(buf);
519c2c66affSColin Finck return(ret);
520c2c66affSColin Finck }
521c2c66affSColin Finck
522c2c66affSColin Finck /**
523c2c66affSColin Finck * htmlNodeDumpFile:
524c2c66affSColin Finck * @out: the FILE pointer
525c2c66affSColin Finck * @doc: the document
526c2c66affSColin Finck * @cur: the current node
527c2c66affSColin Finck *
528c2c66affSColin Finck * Dump an HTML node, recursive behaviour,children are printed too,
529c2c66affSColin Finck * and formatting returns are added.
530c2c66affSColin Finck */
531c2c66affSColin Finck void
htmlNodeDumpFile(FILE * out,xmlDocPtr doc,xmlNodePtr cur)532c2c66affSColin Finck htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
533c2c66affSColin Finck htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
534c2c66affSColin Finck }
535c2c66affSColin Finck
536c2c66affSColin Finck /**
537c2c66affSColin Finck * htmlDocDumpMemoryFormat:
538c2c66affSColin Finck * @cur: the document
539c2c66affSColin Finck * @mem: OUT: the memory pointer
540c2c66affSColin Finck * @size: OUT: the memory length
541c2c66affSColin Finck * @format: should formatting spaces been added
542c2c66affSColin Finck *
543c2c66affSColin Finck * Dump an HTML document in memory and return the xmlChar * and it's size.
544c2c66affSColin Finck * It's up to the caller to free the memory.
545c2c66affSColin Finck */
546c2c66affSColin Finck void
htmlDocDumpMemoryFormat(xmlDocPtr cur,xmlChar ** mem,int * size,int format)547c2c66affSColin Finck htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
548c2c66affSColin Finck xmlOutputBufferPtr buf;
549c2c66affSColin Finck xmlCharEncodingHandlerPtr handler = NULL;
550c2c66affSColin Finck const char *encoding;
551c2c66affSColin Finck
552c2c66affSColin Finck xmlInitParser();
553c2c66affSColin Finck
554c2c66affSColin Finck if ((mem == NULL) || (size == NULL))
555c2c66affSColin Finck return;
556c2c66affSColin Finck if (cur == NULL) {
557c2c66affSColin Finck *mem = NULL;
558c2c66affSColin Finck *size = 0;
559c2c66affSColin Finck return;
560c2c66affSColin Finck }
561c2c66affSColin Finck
562c2c66affSColin Finck encoding = (const char *) htmlGetMetaEncoding(cur);
563c2c66affSColin Finck
564c2c66affSColin Finck if (encoding != NULL) {
565c2c66affSColin Finck xmlCharEncoding enc;
566c2c66affSColin Finck
567c2c66affSColin Finck enc = xmlParseCharEncoding(encoding);
5681fe58c40SThomas Faber if (enc != XML_CHAR_ENCODING_UTF8) {
569c2c66affSColin Finck handler = xmlFindCharEncodingHandler(encoding);
570c2c66affSColin Finck if (handler == NULL)
571c2c66affSColin Finck htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
572c2c66affSColin Finck
5731fe58c40SThomas Faber }
574c2c66affSColin Finck } else {
575c2c66affSColin Finck /*
576c2c66affSColin Finck * Fallback to HTML or ASCII when the encoding is unspecified
577c2c66affSColin Finck */
578c2c66affSColin Finck if (handler == NULL)
579c2c66affSColin Finck handler = xmlFindCharEncodingHandler("HTML");
580c2c66affSColin Finck if (handler == NULL)
581c2c66affSColin Finck handler = xmlFindCharEncodingHandler("ascii");
5821fe58c40SThomas Faber }
583c2c66affSColin Finck
584c2c66affSColin Finck buf = xmlAllocOutputBufferInternal(handler);
585c2c66affSColin Finck if (buf == NULL) {
586c2c66affSColin Finck *mem = NULL;
587c2c66affSColin Finck *size = 0;
588c2c66affSColin Finck return;
589c2c66affSColin Finck }
590c2c66affSColin Finck
591c2c66affSColin Finck htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
592c2c66affSColin Finck
593c2c66affSColin Finck xmlOutputBufferFlush(buf);
594c2c66affSColin Finck if (buf->conv != NULL) {
595c2c66affSColin Finck *size = xmlBufUse(buf->conv);
596c2c66affSColin Finck *mem = xmlStrndup(xmlBufContent(buf->conv), *size);
597c2c66affSColin Finck } else {
598c2c66affSColin Finck *size = xmlBufUse(buf->buffer);
599c2c66affSColin Finck *mem = xmlStrndup(xmlBufContent(buf->buffer), *size);
600c2c66affSColin Finck }
601c2c66affSColin Finck (void)xmlOutputBufferClose(buf);
602c2c66affSColin Finck }
603c2c66affSColin Finck
604c2c66affSColin Finck /**
605c2c66affSColin Finck * htmlDocDumpMemory:
606c2c66affSColin Finck * @cur: the document
607c2c66affSColin Finck * @mem: OUT: the memory pointer
608c2c66affSColin Finck * @size: OUT: the memory length
609c2c66affSColin Finck *
610c2c66affSColin Finck * Dump an HTML document in memory and return the xmlChar * and it's size.
611c2c66affSColin Finck * It's up to the caller to free the memory.
612c2c66affSColin Finck */
613c2c66affSColin Finck void
htmlDocDumpMemory(xmlDocPtr cur,xmlChar ** mem,int * size)614c2c66affSColin Finck htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
615c2c66affSColin Finck htmlDocDumpMemoryFormat(cur, mem, size, 1);
616c2c66affSColin Finck }
617c2c66affSColin Finck
618c2c66affSColin Finck
619c2c66affSColin Finck /************************************************************************
620c2c66affSColin Finck * *
621c2c66affSColin Finck * Dumping HTML tree content to an I/O output buffer *
622c2c66affSColin Finck * *
623c2c66affSColin Finck ************************************************************************/
624c2c66affSColin Finck
625c2c66affSColin Finck void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
626c2c66affSColin Finck
627c2c66affSColin Finck /**
628c2c66affSColin Finck * htmlDtdDumpOutput:
629c2c66affSColin Finck * @buf: the HTML buffer output
630c2c66affSColin Finck * @doc: the document
631c2c66affSColin Finck * @encoding: the encoding string
632c2c66affSColin Finck *
633c2c66affSColin Finck * TODO: check whether encoding is needed
634c2c66affSColin Finck *
635c2c66affSColin Finck * Dump the HTML document DTD, if any.
636c2c66affSColin Finck */
637c2c66affSColin Finck static void
htmlDtdDumpOutput(xmlOutputBufferPtr buf,xmlDocPtr doc,const char * encoding ATTRIBUTE_UNUSED)638c2c66affSColin Finck htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
639c2c66affSColin Finck const char *encoding ATTRIBUTE_UNUSED) {
640c2c66affSColin Finck xmlDtdPtr cur = doc->intSubset;
641c2c66affSColin Finck
642c2c66affSColin Finck if (cur == NULL) {
643c2c66affSColin Finck htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
644c2c66affSColin Finck return;
645c2c66affSColin Finck }
646c2c66affSColin Finck xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
647c2c66affSColin Finck xmlOutputBufferWriteString(buf, (const char *)cur->name);
648c2c66affSColin Finck if (cur->ExternalID != NULL) {
649c2c66affSColin Finck xmlOutputBufferWriteString(buf, " PUBLIC ");
650c2c66affSColin Finck xmlBufWriteQuotedString(buf->buffer, cur->ExternalID);
651c2c66affSColin Finck if (cur->SystemID != NULL) {
652c2c66affSColin Finck xmlOutputBufferWriteString(buf, " ");
653c2c66affSColin Finck xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
654c2c66affSColin Finck }
655c2c66affSColin Finck } else if (cur->SystemID != NULL &&
656c2c66affSColin Finck xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat")) {
657c2c66affSColin Finck xmlOutputBufferWriteString(buf, " SYSTEM ");
658c2c66affSColin Finck xmlBufWriteQuotedString(buf->buffer, cur->SystemID);
659c2c66affSColin Finck }
660c2c66affSColin Finck xmlOutputBufferWriteString(buf, ">\n");
661c2c66affSColin Finck }
662c2c66affSColin Finck
663c2c66affSColin Finck /**
664c2c66affSColin Finck * htmlAttrDumpOutput:
665c2c66affSColin Finck * @buf: the HTML buffer output
666c2c66affSColin Finck * @doc: the document
667c2c66affSColin Finck * @cur: the attribute pointer
668c2c66affSColin Finck *
669c2c66affSColin Finck * Dump an HTML attribute
670c2c66affSColin Finck */
671c2c66affSColin Finck static void
htmlAttrDumpOutput(xmlOutputBufferPtr buf,xmlDocPtr doc,xmlAttrPtr cur)67240ee59d6SThomas Faber htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
673c2c66affSColin Finck xmlChar *value;
674c2c66affSColin Finck
675c2c66affSColin Finck /*
676c2c66affSColin Finck * The html output method should not escape a & character
677c2c66affSColin Finck * occurring in an attribute value immediately followed by
678c2c66affSColin Finck * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
679c2c66affSColin Finck * This is implemented in xmlEncodeEntitiesReentrant
680c2c66affSColin Finck */
681c2c66affSColin Finck
682c2c66affSColin Finck if (cur == NULL) {
683c2c66affSColin Finck return;
684c2c66affSColin Finck }
685c2c66affSColin Finck xmlOutputBufferWriteString(buf, " ");
686c2c66affSColin Finck if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
687c2c66affSColin Finck xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
688c2c66affSColin Finck xmlOutputBufferWriteString(buf, ":");
689c2c66affSColin Finck }
690c2c66affSColin Finck xmlOutputBufferWriteString(buf, (const char *)cur->name);
691c2c66affSColin Finck if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
692c2c66affSColin Finck value = xmlNodeListGetString(doc, cur->children, 0);
693c2c66affSColin Finck if (value) {
694c2c66affSColin Finck xmlOutputBufferWriteString(buf, "=");
695c2c66affSColin Finck if ((cur->ns == NULL) && (cur->parent != NULL) &&
696c2c66affSColin Finck (cur->parent->ns == NULL) &&
697c2c66affSColin Finck ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
698c2c66affSColin Finck (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
699c2c66affSColin Finck (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
700c2c66affSColin Finck ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
701c2c66affSColin Finck (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
70240ee59d6SThomas Faber xmlChar *escaped;
703c2c66affSColin Finck xmlChar *tmp = value;
704c2c66affSColin Finck
705c2c66affSColin Finck while (IS_BLANK_CH(*tmp)) tmp++;
706c2c66affSColin Finck
70740ee59d6SThomas Faber /*
70840ee59d6SThomas Faber * the < and > have already been escaped at the entity level
70940ee59d6SThomas Faber * And doing so here breaks server side includes
71040ee59d6SThomas Faber */
71140ee59d6SThomas Faber escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+<>");
712c2c66affSColin Finck if (escaped != NULL) {
71340ee59d6SThomas Faber xmlBufWriteQuotedString(buf->buffer, escaped);
714c2c66affSColin Finck xmlFree(escaped);
715c2c66affSColin Finck } else {
71640ee59d6SThomas Faber xmlBufWriteQuotedString(buf->buffer, value);
717c2c66affSColin Finck }
718c2c66affSColin Finck } else {
719c2c66affSColin Finck xmlBufWriteQuotedString(buf->buffer, value);
720c2c66affSColin Finck }
721c2c66affSColin Finck xmlFree(value);
722c2c66affSColin Finck } else {
723c2c66affSColin Finck xmlOutputBufferWriteString(buf, "=\"\"");
724c2c66affSColin Finck }
725c2c66affSColin Finck }
726c2c66affSColin Finck }
727c2c66affSColin Finck
728c2c66affSColin Finck /**
729c2c66affSColin Finck * htmlNodeDumpFormatOutput:
730c2c66affSColin Finck * @buf: the HTML buffer output
731c2c66affSColin Finck * @doc: the document
732c2c66affSColin Finck * @cur: the current node
73340ee59d6SThomas Faber * @encoding: the encoding string (unused)
734c2c66affSColin Finck * @format: should formatting spaces been added
735c2c66affSColin Finck *
736c2c66affSColin Finck * Dump an HTML node, recursive behaviour,children are printed too.
737c2c66affSColin Finck */
738c2c66affSColin Finck void
htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf,xmlDocPtr doc,xmlNodePtr cur,const char * encoding ATTRIBUTE_UNUSED,int format)739c2c66affSColin Finck htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
74040ee59d6SThomas Faber xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED,
74140ee59d6SThomas Faber int format) {
7427244e0c5SThomas Faber xmlNodePtr root, parent;
74340ee59d6SThomas Faber xmlAttrPtr attr;
744c2c66affSColin Finck const htmlElemDesc * info;
745c2c66affSColin Finck
746c2c66affSColin Finck xmlInitParser();
747c2c66affSColin Finck
748c2c66affSColin Finck if ((cur == NULL) || (buf == NULL)) {
749c2c66affSColin Finck return;
750c2c66affSColin Finck }
751c2c66affSColin Finck
75240ee59d6SThomas Faber root = cur;
7537244e0c5SThomas Faber parent = cur->parent;
75440ee59d6SThomas Faber while (1) {
75540ee59d6SThomas Faber switch (cur->type) {
75640ee59d6SThomas Faber case XML_HTML_DOCUMENT_NODE:
75740ee59d6SThomas Faber case XML_DOCUMENT_NODE:
75840ee59d6SThomas Faber if (((xmlDocPtr) cur)->intSubset != NULL) {
75940ee59d6SThomas Faber htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
760c2c66affSColin Finck }
76140ee59d6SThomas Faber if (cur->children != NULL) {
7627244e0c5SThomas Faber /* Always validate cur->parent when descending. */
7637244e0c5SThomas Faber if (cur->parent == parent) {
7647244e0c5SThomas Faber parent = cur;
76540ee59d6SThomas Faber cur = cur->children;
76640ee59d6SThomas Faber continue;
767c2c66affSColin Finck }
7687244e0c5SThomas Faber } else {
7697244e0c5SThomas Faber xmlOutputBufferWriteString(buf, "\n");
7707244e0c5SThomas Faber }
77140ee59d6SThomas Faber break;
772c2c66affSColin Finck
77340ee59d6SThomas Faber case XML_ELEMENT_NODE:
774c2c66affSColin Finck /*
7757244e0c5SThomas Faber * Some users like lxml are known to pass nodes with a corrupted
7767244e0c5SThomas Faber * tree structure. Fall back to a recursive call to handle this
7777244e0c5SThomas Faber * case.
7787244e0c5SThomas Faber */
7797244e0c5SThomas Faber if ((cur->parent != parent) && (cur->children != NULL)) {
7807244e0c5SThomas Faber htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
7817244e0c5SThomas Faber break;
7827244e0c5SThomas Faber }
7837244e0c5SThomas Faber
7847244e0c5SThomas Faber /*
785c2c66affSColin Finck * Get specific HTML info for that node.
786c2c66affSColin Finck */
787c2c66affSColin Finck if (cur->ns == NULL)
788c2c66affSColin Finck info = htmlTagLookup(cur->name);
789c2c66affSColin Finck else
790c2c66affSColin Finck info = NULL;
791c2c66affSColin Finck
792c2c66affSColin Finck xmlOutputBufferWriteString(buf, "<");
793c2c66affSColin Finck if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
794c2c66affSColin Finck xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
795c2c66affSColin Finck xmlOutputBufferWriteString(buf, ":");
796c2c66affSColin Finck }
797c2c66affSColin Finck xmlOutputBufferWriteString(buf, (const char *)cur->name);
798c2c66affSColin Finck if (cur->nsDef)
799c2c66affSColin Finck xmlNsListDumpOutput(buf, cur->nsDef);
80040ee59d6SThomas Faber attr = cur->properties;
80140ee59d6SThomas Faber while (attr != NULL) {
80240ee59d6SThomas Faber htmlAttrDumpOutput(buf, doc, attr);
80340ee59d6SThomas Faber attr = attr->next;
80440ee59d6SThomas Faber }
805c2c66affSColin Finck
806c2c66affSColin Finck if ((info != NULL) && (info->empty)) {
807c2c66affSColin Finck xmlOutputBufferWriteString(buf, ">");
80840ee59d6SThomas Faber } else if (cur->children == NULL) {
809c2c66affSColin Finck if ((info != NULL) && (info->saveEndTag != 0) &&
810c2c66affSColin Finck (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
811c2c66affSColin Finck (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
812c2c66affSColin Finck xmlOutputBufferWriteString(buf, ">");
813c2c66affSColin Finck } else {
814c2c66affSColin Finck xmlOutputBufferWriteString(buf, "></");
815c2c66affSColin Finck if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
81640ee59d6SThomas Faber xmlOutputBufferWriteString(buf,
81740ee59d6SThomas Faber (const char *)cur->ns->prefix);
818c2c66affSColin Finck xmlOutputBufferWriteString(buf, ":");
819c2c66affSColin Finck }
820c2c66affSColin Finck xmlOutputBufferWriteString(buf, (const char *)cur->name);
821c2c66affSColin Finck xmlOutputBufferWriteString(buf, ">");
822c2c66affSColin Finck }
82340ee59d6SThomas Faber } else {
82440ee59d6SThomas Faber xmlOutputBufferWriteString(buf, ">");
82540ee59d6SThomas Faber if ((format) && (info != NULL) && (!info->isinline) &&
82640ee59d6SThomas Faber (cur->children->type != HTML_TEXT_NODE) &&
82740ee59d6SThomas Faber (cur->children->type != HTML_ENTITY_REF_NODE) &&
82840ee59d6SThomas Faber (cur->children != cur->last) &&
82940ee59d6SThomas Faber (cur->name != NULL) &&
83040ee59d6SThomas Faber (cur->name[0] != 'p')) /* p, pre, param */
83140ee59d6SThomas Faber xmlOutputBufferWriteString(buf, "\n");
8327244e0c5SThomas Faber parent = cur;
83340ee59d6SThomas Faber cur = cur->children;
83440ee59d6SThomas Faber continue;
83540ee59d6SThomas Faber }
83640ee59d6SThomas Faber
837c2c66affSColin Finck if ((format) && (cur->next != NULL) &&
838c2c66affSColin Finck (info != NULL) && (!info->isinline)) {
839c2c66affSColin Finck if ((cur->next->type != HTML_TEXT_NODE) &&
840c2c66affSColin Finck (cur->next->type != HTML_ENTITY_REF_NODE) &&
8417244e0c5SThomas Faber (parent != NULL) &&
8427244e0c5SThomas Faber (parent->name != NULL) &&
8437244e0c5SThomas Faber (parent->name[0] != 'p')) /* p, pre, param */
844c2c66affSColin Finck xmlOutputBufferWriteString(buf, "\n");
845c2c66affSColin Finck }
846c2c66affSColin Finck
84740ee59d6SThomas Faber break;
84840ee59d6SThomas Faber
84940ee59d6SThomas Faber case XML_ATTRIBUTE_NODE:
85040ee59d6SThomas Faber htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur);
85140ee59d6SThomas Faber break;
85240ee59d6SThomas Faber
85340ee59d6SThomas Faber case HTML_TEXT_NODE:
85440ee59d6SThomas Faber if (cur->content == NULL)
85540ee59d6SThomas Faber break;
85640ee59d6SThomas Faber if (((cur->name == (const xmlChar *)xmlStringText) ||
85740ee59d6SThomas Faber (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
8587244e0c5SThomas Faber ((parent == NULL) ||
8597244e0c5SThomas Faber ((xmlStrcasecmp(parent->name, BAD_CAST "script")) &&
8607244e0c5SThomas Faber (xmlStrcasecmp(parent->name, BAD_CAST "style"))))) {
86140ee59d6SThomas Faber xmlChar *buffer;
86240ee59d6SThomas Faber
86340ee59d6SThomas Faber buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
86440ee59d6SThomas Faber if (buffer != NULL) {
86540ee59d6SThomas Faber xmlOutputBufferWriteString(buf, (const char *)buffer);
86640ee59d6SThomas Faber xmlFree(buffer);
86740ee59d6SThomas Faber }
86840ee59d6SThomas Faber } else {
869c2c66affSColin Finck xmlOutputBufferWriteString(buf, (const char *)cur->content);
870c2c66affSColin Finck }
87140ee59d6SThomas Faber break;
87240ee59d6SThomas Faber
87340ee59d6SThomas Faber case HTML_COMMENT_NODE:
87440ee59d6SThomas Faber if (cur->content != NULL) {
87540ee59d6SThomas Faber xmlOutputBufferWriteString(buf, "<!--");
87640ee59d6SThomas Faber xmlOutputBufferWriteString(buf, (const char *)cur->content);
87740ee59d6SThomas Faber xmlOutputBufferWriteString(buf, "-->");
87840ee59d6SThomas Faber }
87940ee59d6SThomas Faber break;
88040ee59d6SThomas Faber
88140ee59d6SThomas Faber case HTML_PI_NODE:
88240ee59d6SThomas Faber if (cur->name != NULL) {
88340ee59d6SThomas Faber xmlOutputBufferWriteString(buf, "<?");
88440ee59d6SThomas Faber xmlOutputBufferWriteString(buf, (const char *)cur->name);
88540ee59d6SThomas Faber if (cur->content != NULL) {
88640ee59d6SThomas Faber xmlOutputBufferWriteString(buf, " ");
88740ee59d6SThomas Faber xmlOutputBufferWriteString(buf,
88840ee59d6SThomas Faber (const char *)cur->content);
88940ee59d6SThomas Faber }
89040ee59d6SThomas Faber xmlOutputBufferWriteString(buf, ">");
89140ee59d6SThomas Faber }
89240ee59d6SThomas Faber break;
89340ee59d6SThomas Faber
89440ee59d6SThomas Faber case HTML_ENTITY_REF_NODE:
89540ee59d6SThomas Faber xmlOutputBufferWriteString(buf, "&");
89640ee59d6SThomas Faber xmlOutputBufferWriteString(buf, (const char *)cur->name);
89740ee59d6SThomas Faber xmlOutputBufferWriteString(buf, ";");
89840ee59d6SThomas Faber break;
89940ee59d6SThomas Faber
90040ee59d6SThomas Faber case HTML_PRESERVE_NODE:
90140ee59d6SThomas Faber if (cur->content != NULL) {
90240ee59d6SThomas Faber xmlOutputBufferWriteString(buf, (const char *)cur->content);
90340ee59d6SThomas Faber }
90440ee59d6SThomas Faber break;
90540ee59d6SThomas Faber
90640ee59d6SThomas Faber default:
90740ee59d6SThomas Faber break;
90840ee59d6SThomas Faber }
90940ee59d6SThomas Faber
91040ee59d6SThomas Faber while (1) {
91140ee59d6SThomas Faber if (cur == root)
91240ee59d6SThomas Faber return;
91340ee59d6SThomas Faber if (cur->next != NULL) {
91440ee59d6SThomas Faber cur = cur->next;
91540ee59d6SThomas Faber break;
91640ee59d6SThomas Faber }
91740ee59d6SThomas Faber
9187244e0c5SThomas Faber cur = parent;
9197244e0c5SThomas Faber /* cur->parent was validated when descending. */
9207244e0c5SThomas Faber parent = cur->parent;
92140ee59d6SThomas Faber
92240ee59d6SThomas Faber if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
92340ee59d6SThomas Faber (cur->type == XML_DOCUMENT_NODE)) {
924c2c66affSColin Finck xmlOutputBufferWriteString(buf, "\n");
92540ee59d6SThomas Faber } else {
92640ee59d6SThomas Faber if ((format) && (cur->ns == NULL))
92740ee59d6SThomas Faber info = htmlTagLookup(cur->name);
92840ee59d6SThomas Faber else
92940ee59d6SThomas Faber info = NULL;
93040ee59d6SThomas Faber
931c2c66affSColin Finck if ((format) && (info != NULL) && (!info->isinline) &&
932c2c66affSColin Finck (cur->last->type != HTML_TEXT_NODE) &&
933c2c66affSColin Finck (cur->last->type != HTML_ENTITY_REF_NODE) &&
934c2c66affSColin Finck (cur->children != cur->last) &&
935c2c66affSColin Finck (cur->name != NULL) &&
936c2c66affSColin Finck (cur->name[0] != 'p')) /* p, pre, param */
937c2c66affSColin Finck xmlOutputBufferWriteString(buf, "\n");
93840ee59d6SThomas Faber
939c2c66affSColin Finck xmlOutputBufferWriteString(buf, "</");
940c2c66affSColin Finck if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
941c2c66affSColin Finck xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
942c2c66affSColin Finck xmlOutputBufferWriteString(buf, ":");
943c2c66affSColin Finck }
944c2c66affSColin Finck xmlOutputBufferWriteString(buf, (const char *)cur->name);
945c2c66affSColin Finck xmlOutputBufferWriteString(buf, ">");
94640ee59d6SThomas Faber
947c2c66affSColin Finck if ((format) && (info != NULL) && (!info->isinline) &&
948c2c66affSColin Finck (cur->next != NULL)) {
949c2c66affSColin Finck if ((cur->next->type != HTML_TEXT_NODE) &&
950c2c66affSColin Finck (cur->next->type != HTML_ENTITY_REF_NODE) &&
9517244e0c5SThomas Faber (parent != NULL) &&
9527244e0c5SThomas Faber (parent->name != NULL) &&
9537244e0c5SThomas Faber (parent->name[0] != 'p')) /* p, pre, param */
954c2c66affSColin Finck xmlOutputBufferWriteString(buf, "\n");
955c2c66affSColin Finck }
956c2c66affSColin Finck }
95740ee59d6SThomas Faber }
95840ee59d6SThomas Faber }
95940ee59d6SThomas Faber }
960c2c66affSColin Finck
961c2c66affSColin Finck /**
962c2c66affSColin Finck * htmlNodeDumpOutput:
963c2c66affSColin Finck * @buf: the HTML buffer output
964c2c66affSColin Finck * @doc: the document
965c2c66affSColin Finck * @cur: the current node
96640ee59d6SThomas Faber * @encoding: the encoding string (unused)
967c2c66affSColin Finck *
968c2c66affSColin Finck * Dump an HTML node, recursive behaviour,children are printed too,
969c2c66affSColin Finck * and formatting returns/spaces are added.
970c2c66affSColin Finck */
971c2c66affSColin Finck void
htmlNodeDumpOutput(xmlOutputBufferPtr buf,xmlDocPtr doc,xmlNodePtr cur,const char * encoding ATTRIBUTE_UNUSED)972c2c66affSColin Finck htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
97340ee59d6SThomas Faber xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED) {
97440ee59d6SThomas Faber htmlNodeDumpFormatOutput(buf, doc, cur, NULL, 1);
975c2c66affSColin Finck }
976c2c66affSColin Finck
977c2c66affSColin Finck /**
978c2c66affSColin Finck * htmlDocContentDumpFormatOutput:
979c2c66affSColin Finck * @buf: the HTML buffer output
980c2c66affSColin Finck * @cur: the document
98140ee59d6SThomas Faber * @encoding: the encoding string (unused)
982c2c66affSColin Finck * @format: should formatting spaces been added
983c2c66affSColin Finck *
984c2c66affSColin Finck * Dump an HTML document.
985c2c66affSColin Finck */
986c2c66affSColin Finck void
htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf,xmlDocPtr cur,const char * encoding ATTRIBUTE_UNUSED,int format)987c2c66affSColin Finck htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
98840ee59d6SThomas Faber const char *encoding ATTRIBUTE_UNUSED,
98940ee59d6SThomas Faber int format) {
990*911153daSThomas Faber int type = 0;
991*911153daSThomas Faber if (cur) {
992*911153daSThomas Faber type = cur->type;
993*911153daSThomas Faber cur->type = XML_HTML_DOCUMENT_NODE;
994*911153daSThomas Faber }
99540ee59d6SThomas Faber htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, format);
996*911153daSThomas Faber if (cur)
997*911153daSThomas Faber cur->type = (xmlElementType) type;
998c2c66affSColin Finck }
999c2c66affSColin Finck
1000c2c66affSColin Finck /**
1001c2c66affSColin Finck * htmlDocContentDumpOutput:
1002c2c66affSColin Finck * @buf: the HTML buffer output
1003c2c66affSColin Finck * @cur: the document
100440ee59d6SThomas Faber * @encoding: the encoding string (unused)
1005c2c66affSColin Finck *
1006f22fa382SThomas Faber * Dump an HTML document. Formatting return/spaces are added.
1007c2c66affSColin Finck */
1008c2c66affSColin Finck void
htmlDocContentDumpOutput(xmlOutputBufferPtr buf,xmlDocPtr cur,const char * encoding ATTRIBUTE_UNUSED)1009c2c66affSColin Finck htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
101040ee59d6SThomas Faber const char *encoding ATTRIBUTE_UNUSED) {
101140ee59d6SThomas Faber htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, 1);
1012c2c66affSColin Finck }
1013c2c66affSColin Finck
1014c2c66affSColin Finck /************************************************************************
1015c2c66affSColin Finck * *
1016c2c66affSColin Finck * Saving functions front-ends *
1017c2c66affSColin Finck * *
1018c2c66affSColin Finck ************************************************************************/
1019c2c66affSColin Finck
1020c2c66affSColin Finck /**
1021c2c66affSColin Finck * htmlDocDump:
1022c2c66affSColin Finck * @f: the FILE*
1023c2c66affSColin Finck * @cur: the document
1024c2c66affSColin Finck *
1025c2c66affSColin Finck * Dump an HTML document to an open FILE.
1026c2c66affSColin Finck *
1027c2c66affSColin Finck * returns: the number of byte written or -1 in case of failure.
1028c2c66affSColin Finck */
1029c2c66affSColin Finck int
htmlDocDump(FILE * f,xmlDocPtr cur)1030c2c66affSColin Finck htmlDocDump(FILE *f, xmlDocPtr cur) {
1031c2c66affSColin Finck xmlOutputBufferPtr buf;
1032c2c66affSColin Finck xmlCharEncodingHandlerPtr handler = NULL;
1033c2c66affSColin Finck const char *encoding;
1034c2c66affSColin Finck int ret;
1035c2c66affSColin Finck
1036c2c66affSColin Finck xmlInitParser();
1037c2c66affSColin Finck
1038c2c66affSColin Finck if ((cur == NULL) || (f == NULL)) {
1039c2c66affSColin Finck return(-1);
1040c2c66affSColin Finck }
1041c2c66affSColin Finck
1042c2c66affSColin Finck encoding = (const char *) htmlGetMetaEncoding(cur);
1043c2c66affSColin Finck
1044c2c66affSColin Finck if (encoding != NULL) {
1045c2c66affSColin Finck xmlCharEncoding enc;
1046c2c66affSColin Finck
1047c2c66affSColin Finck enc = xmlParseCharEncoding(encoding);
10481fe58c40SThomas Faber if (enc != XML_CHAR_ENCODING_UTF8) {
1049c2c66affSColin Finck handler = xmlFindCharEncodingHandler(encoding);
1050c2c66affSColin Finck if (handler == NULL)
1051c2c66affSColin Finck htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
10521fe58c40SThomas Faber }
1053c2c66affSColin Finck } else {
1054c2c66affSColin Finck /*
1055c2c66affSColin Finck * Fallback to HTML or ASCII when the encoding is unspecified
1056c2c66affSColin Finck */
1057c2c66affSColin Finck if (handler == NULL)
1058c2c66affSColin Finck handler = xmlFindCharEncodingHandler("HTML");
1059c2c66affSColin Finck if (handler == NULL)
1060c2c66affSColin Finck handler = xmlFindCharEncodingHandler("ascii");
10611fe58c40SThomas Faber }
1062c2c66affSColin Finck
1063c2c66affSColin Finck buf = xmlOutputBufferCreateFile(f, handler);
1064c2c66affSColin Finck if (buf == NULL) return(-1);
1065c2c66affSColin Finck htmlDocContentDumpOutput(buf, cur, NULL);
1066c2c66affSColin Finck
1067c2c66affSColin Finck ret = xmlOutputBufferClose(buf);
1068c2c66affSColin Finck return(ret);
1069c2c66affSColin Finck }
1070c2c66affSColin Finck
1071c2c66affSColin Finck /**
1072c2c66affSColin Finck * htmlSaveFile:
1073c2c66affSColin Finck * @filename: the filename (or URL)
1074c2c66affSColin Finck * @cur: the document
1075c2c66affSColin Finck *
1076c2c66affSColin Finck * Dump an HTML document to a file. If @filename is "-" the stdout file is
1077c2c66affSColin Finck * used.
1078c2c66affSColin Finck * returns: the number of byte written or -1 in case of failure.
1079c2c66affSColin Finck */
1080c2c66affSColin Finck int
htmlSaveFile(const char * filename,xmlDocPtr cur)1081c2c66affSColin Finck htmlSaveFile(const char *filename, xmlDocPtr cur) {
1082c2c66affSColin Finck xmlOutputBufferPtr buf;
1083c2c66affSColin Finck xmlCharEncodingHandlerPtr handler = NULL;
1084c2c66affSColin Finck const char *encoding;
1085c2c66affSColin Finck int ret;
1086c2c66affSColin Finck
1087c2c66affSColin Finck if ((cur == NULL) || (filename == NULL))
1088c2c66affSColin Finck return(-1);
1089c2c66affSColin Finck
1090c2c66affSColin Finck xmlInitParser();
1091c2c66affSColin Finck
1092c2c66affSColin Finck encoding = (const char *) htmlGetMetaEncoding(cur);
1093c2c66affSColin Finck
1094c2c66affSColin Finck if (encoding != NULL) {
1095c2c66affSColin Finck xmlCharEncoding enc;
1096c2c66affSColin Finck
1097c2c66affSColin Finck enc = xmlParseCharEncoding(encoding);
10981fe58c40SThomas Faber if (enc != XML_CHAR_ENCODING_UTF8) {
1099c2c66affSColin Finck handler = xmlFindCharEncodingHandler(encoding);
1100c2c66affSColin Finck if (handler == NULL)
1101c2c66affSColin Finck htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1102c2c66affSColin Finck }
11031fe58c40SThomas Faber } else {
1104c2c66affSColin Finck /*
1105c2c66affSColin Finck * Fallback to HTML or ASCII when the encoding is unspecified
1106c2c66affSColin Finck */
1107c2c66affSColin Finck if (handler == NULL)
1108c2c66affSColin Finck handler = xmlFindCharEncodingHandler("HTML");
1109c2c66affSColin Finck if (handler == NULL)
1110c2c66affSColin Finck handler = xmlFindCharEncodingHandler("ascii");
11111fe58c40SThomas Faber }
1112c2c66affSColin Finck
1113c2c66affSColin Finck /*
1114c2c66affSColin Finck * save the content to a temp buffer.
1115c2c66affSColin Finck */
1116c2c66affSColin Finck buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1117c2c66affSColin Finck if (buf == NULL) return(0);
1118c2c66affSColin Finck
1119c2c66affSColin Finck htmlDocContentDumpOutput(buf, cur, NULL);
1120c2c66affSColin Finck
1121c2c66affSColin Finck ret = xmlOutputBufferClose(buf);
1122c2c66affSColin Finck return(ret);
1123c2c66affSColin Finck }
1124c2c66affSColin Finck
1125c2c66affSColin Finck /**
1126c2c66affSColin Finck * htmlSaveFileFormat:
1127c2c66affSColin Finck * @filename: the filename
1128c2c66affSColin Finck * @cur: the document
1129c2c66affSColin Finck * @format: should formatting spaces been added
1130c2c66affSColin Finck * @encoding: the document encoding
1131c2c66affSColin Finck *
1132c2c66affSColin Finck * Dump an HTML document to a file using a given encoding.
1133c2c66affSColin Finck *
1134c2c66affSColin Finck * returns: the number of byte written or -1 in case of failure.
1135c2c66affSColin Finck */
1136c2c66affSColin Finck int
htmlSaveFileFormat(const char * filename,xmlDocPtr cur,const char * encoding,int format)1137c2c66affSColin Finck htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1138c2c66affSColin Finck const char *encoding, int format) {
1139c2c66affSColin Finck xmlOutputBufferPtr buf;
1140c2c66affSColin Finck xmlCharEncodingHandlerPtr handler = NULL;
1141c2c66affSColin Finck int ret;
1142c2c66affSColin Finck
1143c2c66affSColin Finck if ((cur == NULL) || (filename == NULL))
1144c2c66affSColin Finck return(-1);
1145c2c66affSColin Finck
1146c2c66affSColin Finck xmlInitParser();
1147c2c66affSColin Finck
1148c2c66affSColin Finck if (encoding != NULL) {
1149c2c66affSColin Finck xmlCharEncoding enc;
1150c2c66affSColin Finck
1151c2c66affSColin Finck enc = xmlParseCharEncoding(encoding);
11521fe58c40SThomas Faber if (enc != XML_CHAR_ENCODING_UTF8) {
1153c2c66affSColin Finck handler = xmlFindCharEncodingHandler(encoding);
1154c2c66affSColin Finck if (handler == NULL)
1155c2c66affSColin Finck htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1156c2c66affSColin Finck }
1157c2c66affSColin Finck htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1158c2c66affSColin Finck } else {
1159c2c66affSColin Finck htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1160c2c66affSColin Finck
1161c2c66affSColin Finck /*
1162c2c66affSColin Finck * Fallback to HTML or ASCII when the encoding is unspecified
1163c2c66affSColin Finck */
1164c2c66affSColin Finck if (handler == NULL)
1165c2c66affSColin Finck handler = xmlFindCharEncodingHandler("HTML");
1166c2c66affSColin Finck if (handler == NULL)
1167c2c66affSColin Finck handler = xmlFindCharEncodingHandler("ascii");
11681fe58c40SThomas Faber }
1169c2c66affSColin Finck
1170c2c66affSColin Finck /*
1171c2c66affSColin Finck * save the content to a temp buffer.
1172c2c66affSColin Finck */
1173c2c66affSColin Finck buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1174c2c66affSColin Finck if (buf == NULL) return(0);
1175c2c66affSColin Finck
1176c2c66affSColin Finck htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1177c2c66affSColin Finck
1178c2c66affSColin Finck ret = xmlOutputBufferClose(buf);
1179c2c66affSColin Finck return(ret);
1180c2c66affSColin Finck }
1181c2c66affSColin Finck
1182c2c66affSColin Finck /**
1183c2c66affSColin Finck * htmlSaveFileEnc:
1184c2c66affSColin Finck * @filename: the filename
1185c2c66affSColin Finck * @cur: the document
1186c2c66affSColin Finck * @encoding: the document encoding
1187c2c66affSColin Finck *
1188c2c66affSColin Finck * Dump an HTML document to a file using a given encoding
1189c2c66affSColin Finck * and formatting returns/spaces are added.
1190c2c66affSColin Finck *
1191c2c66affSColin Finck * returns: the number of byte written or -1 in case of failure.
1192c2c66affSColin Finck */
1193c2c66affSColin Finck int
htmlSaveFileEnc(const char * filename,xmlDocPtr cur,const char * encoding)1194c2c66affSColin Finck htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1195c2c66affSColin Finck return(htmlSaveFileFormat(filename, cur, encoding, 1));
1196c2c66affSColin Finck }
1197c2c66affSColin Finck
1198c2c66affSColin Finck #endif /* LIBXML_OUTPUT_ENABLED */
1199c2c66affSColin Finck
1200c2c66affSColin Finck #endif /* LIBXML_HTML_ENABLED */
1201