1 /*
2  * file_ascii.c -- generic xml import/export filters for hnb
3  *
4  * Copyright (C) 2001-2003 �yvind Kol�s <pippin@users.sourceforge.net>
5  *
6  * This program is free software; you can redistribute it and/or modify it under
7  * the terms of the GNU General Public License as published by the Free
8  * Software Foundation; either version 2, or (at your option) any later
9  * version.
10  *
11  * This program is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14  * more details.
15  *
16  * You should have received a copy of the GNU General Public License along with
17  * this program; if not, write to the Free Software Foundation, Inc., 59
18  * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19  */
20 
21 #if HAVE_CONFIG_H
22 #include <config.h>
23 #endif
24 
25 #include <stdio.h>
26 #include <string.h>
27 #include <stdlib.h>
28 #include "xml_tok.h"
29 
30 #include "cli.h"
31 #include "tree.h"
32 
33 #include "file.h"
34 #include "prefs.h"
35 #include "query.h"
36 #include "util_string.h"
37 
38 #define indent(count,char)	{int j;for(j=0;j<count;j++)fprintf(file,char);}
39 
40 static int xml_cuddle = 0;
41 
42 static char *xmlquote[]={
43 	"<","&lt;",
44 	">","&gt;",
45 	"&","&amp;",
46 	"\"","&quot;",
47 	"'","&apos;",
48 	NULL
49 };
50 
51 static char *xmlunquote[]={
52 	"&lt;","<",
53 	"&gt;",">",
54 	"&amp;","&",
55 	"&quot;","\"",
56 	"&apos;","'",
57 	NULL
58 };
59 
60 /* returns the first occurence of one of the needles, or 0 (termination)
61    if not found, return 0*/
findchar(char * haystack,char * needles)62 static int findchar (char *haystack, char *needles)
63 {
64 	int j = 0;
65 	int k;
66 
67 	while (haystack[j]) {
68 		for (k = 0; k < strlen (needles) + 1; k++)
69 			if (haystack[j] == needles[k])
70 				return j;
71 		j++;
72 	}
73 	return 0;
74 }
75 
xml_export_nodes(FILE * file,Node * node,int level)76 static void xml_export_nodes (FILE * file, Node *node, int level)
77 {
78 	char tag[bufsize];
79 	int flags;
80 	char *data;
81 
82 	static int no_quote = 0;
83 
84 	while (node) {
85 		int data_start = 0;
86 
87 		tag[0] = 0;
88 		flags = node_getflags (node);
89 		data = fixnullstring (node_get (node, TEXT));
90 
91 		indent (level, "  ");
92 
93 		if (data[0] == '<') {	/* calculate start tag, if any */
94 			strcpy (tag, data);
95 			data_start = findchar (tag, ">") + 1;
96 			tag[data_start] = 0;
97 			if (data[1] == '!' || data[1] == '?') {
98 				no_quote++;
99 			}
100 		}
101 
102 		if (no_quote)
103 			fprintf (file, "%s%s", tag, &data[data_start]);
104 		else{
105 			char *quoted=string_replace(&data[data_start],xmlquote);
106 			fprintf (file, "%s%s", tag, quoted);
107 			free(quoted);
108 		}
109 
110 		if (data[0] == '<') {	/* calculate end tag */
111 			strcpy (tag, data);
112 			tag[findchar (tag, " \t>") + 1] = 0;
113 			tag[findchar (tag, " \t>")] = '>';
114 			tag[0] = '/';
115 		}
116 
117 		if (node_right (node)) {
118 			fprintf (file, "\n");
119 			xml_export_nodes (file, node_right (node), level + 1);
120 			indent (level, "  ");
121 			if (data[0] == '<') {
122 				if (data[1] == '!' && data[2] == '-') {
123 					fprintf (file, " -->\n");
124 				} else if (tag[1] != '?' && tag[1] != '!') {
125 					fprintf (file, "<%s\n", tag);
126 				} else {
127 					fprintf (file, "\n");
128 				}
129 			}
130 		} else {
131 			if (data[0] == '<' && data[strlen (data) - 2] != '/') {
132 				if (data[1] == '!' && data[2] == '-') {
133 					fprintf (file, " -->\n");
134 				} else if (tag[1] != '?' && tag[1] != '!') {
135 					fprintf (file, "<%s\n", tag);
136 				} else {
137 					fprintf (file, "\n");
138 				}
139 			} else
140 				fprintf (file, "\n");
141 		}
142 		if (data[0] == '<' && (data[1] == '!' || data[1] == '?')) {
143 			no_quote--;
144 		}
145 
146 		node = node_down (node);
147 	}
148 }
149 
export_xml(int argc,char ** argv,void * data)150 static uint64_t export_xml (int argc, char **argv, void *data)
151 {
152 	Node *node = (Node *) data;
153 	char *filename = argc==2?argv[1]:"";
154 	FILE *file;
155 
156 	if (!strcmp (filename, "-"))
157 		file = stdout;
158 	else
159 		file = fopen (filename, "w");
160 	if (!file) {
161 		cli_outfunf ("xml export, unable to open \"%s\"", filename);
162 		return PTR_TO_UINT64(node);
163 	}
164 
165 	xml_export_nodes (file, node, 0);
166 
167 	if (file != stdout)
168 		fclose (file);
169 
170 	cli_outfunf ("xml export, wrote data to \"%s\"", filename);
171 
172 
173 	return PTR_TO_UINT64(node);
174 }
175 
176 /* joins up tags with data if there is data as the first child
177    of the tag.*/
xml_cuddle_nodes(Node * node)178 static Node *xml_cuddle_nodes (Node *node)
179 {
180 
181 	Node *tnode;
182 	char *tdata;
183 	char data[bufsize];
184 
185 	tnode = node_root (node);
186 
187 	while (tnode) {
188 		if (node_right (tnode)) {
189 			tdata = fixnullstring (node_get (node_right (tnode), TEXT));
190 			if (tdata[0] != '<') {	/* not a child tag */
191 				strcpy (data, fixnullstring (node_get (tnode, TEXT)));
192 				strcat (data, " ");
193 				strcat (data, tdata);
194 				node_set (tnode, TEXT, data);
195 				node_remove (node_right (tnode));
196 			}
197 		}
198 		tnode = node_recurse (tnode);
199 	}
200 
201 	return (node);
202 }
203 
204 
import_xml(int argc,char ** argv,void * data)205 static uint64_t import_xml (int argc, char **argv, void *data)
206 {
207 	Node *node = (Node *) data;
208 	char *filename = argc==2?argv[1]:"";
209 	char *rdata;
210 	int type;
211 	int level = 0;
212 	char nodedata[4096];
213 	xml_tok_state *s;
214 	import_state_t ist;
215 	int got_data = 0;
216 
217 	FILE *file;
218 
219 	nodedata[0] = 0;
220 
221 	file = fopen (filename, "r");
222 	if (!file) {
223 		cli_outfunf ("xml import, unable to open \"%s\"", filename);
224 		return PTR_TO_UINT64(node);
225 	}
226 	s = xml_tok_init (file);
227 	init_import (&ist, node);
228 
229 	while (((type = xml_tok_get (s, &rdata)) != t_eof)) {
230 		if (type == t_error) {
231 			cli_outfunf ("xml import error, parsing og '%s', line:%i %s", filename,
232 						 s->line_no,rdata);
233 			fclose (file);
234 			return PTR_TO_UINT64(node);
235 		}
236 
237 		switch (type) {
238 			case t_prolog:
239 				sprintf (nodedata, "<?%s?>", rdata);
240 				import_node_text (&ist, level, nodedata);
241 				nodedata[0] = 0;
242 				got_data = 0;
243 				break;
244 			case t_dtd:
245 				sprintf (nodedata, "<!%s>", rdata);
246 				import_node_text (&ist, level, nodedata);
247 				nodedata[0] = 0;
248 				got_data = 0;
249 				break;
250 			case t_comment:
251 				sprintf (nodedata, "<!--%s-->", rdata);
252 				import_node_text (&ist, level, nodedata);
253 				break;
254 			case t_tag:
255 				if (got_data) {
256 					char *unquoted=string_replace(nodedata,xmlunquote);
257 					import_node_text (&ist, level, unquoted);
258 					free(unquoted);
259 					got_data = 0;
260 					nodedata[0] = 0;
261 				}
262 				sprintf (nodedata, "<%s", rdata);
263 				break;
264 			case t_att:
265 				sprintf (&nodedata[strlen (nodedata)], " %s=", rdata);
266 				break;
267 			case t_val:
268 				if (strchr (rdata, '"')) {
269 					sprintf (&nodedata[strlen (nodedata)], "'%s'", rdata);
270 				} else {
271 					if (strchr (rdata, '\'')) {
272 						sprintf (&nodedata[strlen (nodedata)], "\"%s\"",
273 								 rdata);
274 					} else {
275 						sprintf (&nodedata[strlen (nodedata)], "\"%s\"",
276 								 rdata);
277 					}
278 				}
279 				break;
280 			case t_endtag:
281 				sprintf (&nodedata[strlen (nodedata)], ">");
282 
283 				import_node_text (&ist, level, nodedata);
284 				nodedata[0] = 0;
285 				level++;
286 				break;
287 			case t_closeemptytag:
288 				sprintf (&nodedata[strlen (nodedata)], "/>");
289 
290 				import_node_text (&ist, level, nodedata);
291 				nodedata[0] = 0;
292 				break;
293 			case t_closetag:
294 				if (got_data) {
295 					char *unquoted=string_replace(nodedata,xmlunquote);
296 					import_node_text (&ist, level, unquoted);
297 					free(unquoted);
298 					got_data = 0;
299 					nodedata[0] = 0;
300 				}
301 				level--;
302 				sprintf (nodedata, "</%s>", rdata);
303 				nodedata[0] = 0;
304 				break;
305 			case t_whitespace:
306 				if (got_data) {
307 					strcpy (&nodedata[strlen (nodedata)], " ");
308 				}
309 				break;
310 			case t_word:
311 				strcpy (&nodedata[strlen (nodedata)], rdata);
312 				got_data = 1;
313 				break;
314 			case t_entity:
315 				got_data = 1;
316 				sprintf (&nodedata[strlen (nodedata)], "&%s;", rdata);
317 				break;
318 			default:
319 				break;
320 		}
321 	}
322 
323 	if (node_getflag (node, F_temp))
324 		node = node_remove (node);	/* remove temporary node, if tree was empty */
325 
326 	if (xml_cuddle)
327 		node = xml_cuddle_nodes (node);
328 
329 	cli_outfunf ("xml import - imported \"%s\" %i lines", filename, s->line_no);
330 	xml_tok_cleanup (s);
331 	return PTR_TO_UINT64(node);
332 }
333 
334 /*
335 !init_file_xml();
336 */
init_file_xml()337 void init_file_xml ()
338 {
339 	cli_add_command ("export_xml", export_xml, "<filename>");
340 	cli_add_command ("import_xml", import_xml, "<filename>");
341 	cli_add_help ("export_xml",
342 				  "Exports the current node, it's siblings and all sublevels to 'filename' as if it was xml markup.\
343 (load an xml file with import_xml or hnb -x file.xml to see how it should be inside hnb.");
344 	cli_add_help ("import_xml",
345 				  "Imports 'filename' and inserts it's contents at the current level.");
346 	cli_add_int ("xml_cuddle", &xml_cuddle,
347 				 "join the data with nodes if no tags within tag");
348 }
349