1 /*
2 * file_ascii.c -- generic xml import/export filters for hnb
3 *
4 * Copyright (C) 2001-2003 �yvind Kol�s <pippin@users.sourceforge.net>
5 *
6 * This program is free software; you can redistribute it and/or modify it under
7 * the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2, or (at your option) any later
9 * version.
10 *
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program; if not, write to the Free Software Foundation, Inc., 59
18 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 */
20
21 #if HAVE_CONFIG_H
22 #include <config.h>
23 #endif
24
25 #include <stdio.h>
26 #include <string.h>
27 #include <stdlib.h>
28 #include "xml_tok.h"
29
30 #include "cli.h"
31 #include "tree.h"
32
33 #include "file.h"
34 #include "prefs.h"
35 #include "query.h"
36 #include "util_string.h"
37
38 #define indent(count,char) {int j;for(j=0;j<count;j++)fprintf(file,char);}
39
40 static int xml_cuddle = 0;
41
42 static char *xmlquote[]={
43 "<","<",
44 ">",">",
45 "&","&",
46 "\"",""",
47 "'","'",
48 NULL
49 };
50
51 static char *xmlunquote[]={
52 "<","<",
53 ">",">",
54 "&","&",
55 ""","\"",
56 "'","'",
57 NULL
58 };
59
60 /* returns the first occurence of one of the needles, or 0 (termination)
61 if not found, return 0*/
findchar(char * haystack,char * needles)62 static int findchar (char *haystack, char *needles)
63 {
64 int j = 0;
65 int k;
66
67 while (haystack[j]) {
68 for (k = 0; k < strlen (needles) + 1; k++)
69 if (haystack[j] == needles[k])
70 return j;
71 j++;
72 }
73 return 0;
74 }
75
xml_export_nodes(FILE * file,Node * node,int level)76 static void xml_export_nodes (FILE * file, Node *node, int level)
77 {
78 char tag[bufsize];
79 int flags;
80 char *data;
81
82 static int no_quote = 0;
83
84 while (node) {
85 int data_start = 0;
86
87 tag[0] = 0;
88 flags = node_getflags (node);
89 data = fixnullstring (node_get (node, TEXT));
90
91 indent (level, " ");
92
93 if (data[0] == '<') { /* calculate start tag, if any */
94 strcpy (tag, data);
95 data_start = findchar (tag, ">") + 1;
96 tag[data_start] = 0;
97 if (data[1] == '!' || data[1] == '?') {
98 no_quote++;
99 }
100 }
101
102 if (no_quote)
103 fprintf (file, "%s%s", tag, &data[data_start]);
104 else{
105 char *quoted=string_replace(&data[data_start],xmlquote);
106 fprintf (file, "%s%s", tag, quoted);
107 free(quoted);
108 }
109
110 if (data[0] == '<') { /* calculate end tag */
111 strcpy (tag, data);
112 tag[findchar (tag, " \t>") + 1] = 0;
113 tag[findchar (tag, " \t>")] = '>';
114 tag[0] = '/';
115 }
116
117 if (node_right (node)) {
118 fprintf (file, "\n");
119 xml_export_nodes (file, node_right (node), level + 1);
120 indent (level, " ");
121 if (data[0] == '<') {
122 if (data[1] == '!' && data[2] == '-') {
123 fprintf (file, " -->\n");
124 } else if (tag[1] != '?' && tag[1] != '!') {
125 fprintf (file, "<%s\n", tag);
126 } else {
127 fprintf (file, "\n");
128 }
129 }
130 } else {
131 if (data[0] == '<' && data[strlen (data) - 2] != '/') {
132 if (data[1] == '!' && data[2] == '-') {
133 fprintf (file, " -->\n");
134 } else if (tag[1] != '?' && tag[1] != '!') {
135 fprintf (file, "<%s\n", tag);
136 } else {
137 fprintf (file, "\n");
138 }
139 } else
140 fprintf (file, "\n");
141 }
142 if (data[0] == '<' && (data[1] == '!' || data[1] == '?')) {
143 no_quote--;
144 }
145
146 node = node_down (node);
147 }
148 }
149
export_xml(int argc,char ** argv,void * data)150 static uint64_t export_xml (int argc, char **argv, void *data)
151 {
152 Node *node = (Node *) data;
153 char *filename = argc==2?argv[1]:"";
154 FILE *file;
155
156 if (!strcmp (filename, "-"))
157 file = stdout;
158 else
159 file = fopen (filename, "w");
160 if (!file) {
161 cli_outfunf ("xml export, unable to open \"%s\"", filename);
162 return PTR_TO_UINT64(node);
163 }
164
165 xml_export_nodes (file, node, 0);
166
167 if (file != stdout)
168 fclose (file);
169
170 cli_outfunf ("xml export, wrote data to \"%s\"", filename);
171
172
173 return PTR_TO_UINT64(node);
174 }
175
176 /* joins up tags with data if there is data as the first child
177 of the tag.*/
xml_cuddle_nodes(Node * node)178 static Node *xml_cuddle_nodes (Node *node)
179 {
180
181 Node *tnode;
182 char *tdata;
183 char data[bufsize];
184
185 tnode = node_root (node);
186
187 while (tnode) {
188 if (node_right (tnode)) {
189 tdata = fixnullstring (node_get (node_right (tnode), TEXT));
190 if (tdata[0] != '<') { /* not a child tag */
191 strcpy (data, fixnullstring (node_get (tnode, TEXT)));
192 strcat (data, " ");
193 strcat (data, tdata);
194 node_set (tnode, TEXT, data);
195 node_remove (node_right (tnode));
196 }
197 }
198 tnode = node_recurse (tnode);
199 }
200
201 return (node);
202 }
203
204
import_xml(int argc,char ** argv,void * data)205 static uint64_t import_xml (int argc, char **argv, void *data)
206 {
207 Node *node = (Node *) data;
208 char *filename = argc==2?argv[1]:"";
209 char *rdata;
210 int type;
211 int level = 0;
212 char nodedata[4096];
213 xml_tok_state *s;
214 import_state_t ist;
215 int got_data = 0;
216
217 FILE *file;
218
219 nodedata[0] = 0;
220
221 file = fopen (filename, "r");
222 if (!file) {
223 cli_outfunf ("xml import, unable to open \"%s\"", filename);
224 return PTR_TO_UINT64(node);
225 }
226 s = xml_tok_init (file);
227 init_import (&ist, node);
228
229 while (((type = xml_tok_get (s, &rdata)) != t_eof)) {
230 if (type == t_error) {
231 cli_outfunf ("xml import error, parsing og '%s', line:%i %s", filename,
232 s->line_no,rdata);
233 fclose (file);
234 return PTR_TO_UINT64(node);
235 }
236
237 switch (type) {
238 case t_prolog:
239 sprintf (nodedata, "<?%s?>", rdata);
240 import_node_text (&ist, level, nodedata);
241 nodedata[0] = 0;
242 got_data = 0;
243 break;
244 case t_dtd:
245 sprintf (nodedata, "<!%s>", rdata);
246 import_node_text (&ist, level, nodedata);
247 nodedata[0] = 0;
248 got_data = 0;
249 break;
250 case t_comment:
251 sprintf (nodedata, "<!--%s-->", rdata);
252 import_node_text (&ist, level, nodedata);
253 break;
254 case t_tag:
255 if (got_data) {
256 char *unquoted=string_replace(nodedata,xmlunquote);
257 import_node_text (&ist, level, unquoted);
258 free(unquoted);
259 got_data = 0;
260 nodedata[0] = 0;
261 }
262 sprintf (nodedata, "<%s", rdata);
263 break;
264 case t_att:
265 sprintf (&nodedata[strlen (nodedata)], " %s=", rdata);
266 break;
267 case t_val:
268 if (strchr (rdata, '"')) {
269 sprintf (&nodedata[strlen (nodedata)], "'%s'", rdata);
270 } else {
271 if (strchr (rdata, '\'')) {
272 sprintf (&nodedata[strlen (nodedata)], "\"%s\"",
273 rdata);
274 } else {
275 sprintf (&nodedata[strlen (nodedata)], "\"%s\"",
276 rdata);
277 }
278 }
279 break;
280 case t_endtag:
281 sprintf (&nodedata[strlen (nodedata)], ">");
282
283 import_node_text (&ist, level, nodedata);
284 nodedata[0] = 0;
285 level++;
286 break;
287 case t_closeemptytag:
288 sprintf (&nodedata[strlen (nodedata)], "/>");
289
290 import_node_text (&ist, level, nodedata);
291 nodedata[0] = 0;
292 break;
293 case t_closetag:
294 if (got_data) {
295 char *unquoted=string_replace(nodedata,xmlunquote);
296 import_node_text (&ist, level, unquoted);
297 free(unquoted);
298 got_data = 0;
299 nodedata[0] = 0;
300 }
301 level--;
302 sprintf (nodedata, "</%s>", rdata);
303 nodedata[0] = 0;
304 break;
305 case t_whitespace:
306 if (got_data) {
307 strcpy (&nodedata[strlen (nodedata)], " ");
308 }
309 break;
310 case t_word:
311 strcpy (&nodedata[strlen (nodedata)], rdata);
312 got_data = 1;
313 break;
314 case t_entity:
315 got_data = 1;
316 sprintf (&nodedata[strlen (nodedata)], "&%s;", rdata);
317 break;
318 default:
319 break;
320 }
321 }
322
323 if (node_getflag (node, F_temp))
324 node = node_remove (node); /* remove temporary node, if tree was empty */
325
326 if (xml_cuddle)
327 node = xml_cuddle_nodes (node);
328
329 cli_outfunf ("xml import - imported \"%s\" %i lines", filename, s->line_no);
330 xml_tok_cleanup (s);
331 return PTR_TO_UINT64(node);
332 }
333
334 /*
335 !init_file_xml();
336 */
init_file_xml()337 void init_file_xml ()
338 {
339 cli_add_command ("export_xml", export_xml, "<filename>");
340 cli_add_command ("import_xml", import_xml, "<filename>");
341 cli_add_help ("export_xml",
342 "Exports the current node, it's siblings and all sublevels to 'filename' as if it was xml markup.\
343 (load an xml file with import_xml or hnb -x file.xml to see how it should be inside hnb.");
344 cli_add_help ("import_xml",
345 "Imports 'filename' and inserts it's contents at the current level.");
346 cli_add_int ("xml_cuddle", &xml_cuddle,
347 "join the data with nodes if no tags within tag");
348 }
349