1 /* This file is part of the Zebra server.
2    Copyright (C) 2004-2013 Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 
18 */
19 
20 /* converts data1 tree to XML record */
21 
22 #if HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25 #include <string.h>
26 
27 #include <idzebra/data1.h>
28 #include <yaz/wrbuf.h>
29 
30 #define IDSGML_MARGIN 75
31 
32 #define PRETTY_FORMAT 0
33 
wordlen(char * b,int max)34 static int wordlen(char *b, int max)
35 {
36     int l = 0;
37 
38     while (l < max && !d1_isspace(*b))
39 	l++, b++;
40     return l;
41 }
42 
indent(WRBUF b,int col)43 static void indent (WRBUF b, int col)
44 {
45     int i;
46     for (i = 0; i<col; i++)
47         wrbuf_putc (b, ' ');
48 }
49 
wrbuf_put_xattr(WRBUF b,data1_xattr * p)50 static void wrbuf_put_xattr(WRBUF b, data1_xattr *p)
51 {
52     for (; p; p = p->next)
53     {
54         wrbuf_putc (b, ' ');
55         if (p->what == DATA1I_xmltext)
56             wrbuf_puts (b, p->name);
57         else
58             wrbuf_xmlputs (b, p->name);
59         if (p->value)
60         {
61             wrbuf_putc (b, '=');
62             wrbuf_putc (b, '"');
63             if (p->what == DATA1I_text)
64                 wrbuf_xmlputs (b, p->value);
65             else
66                 wrbuf_puts (b, p->value);
67             wrbuf_putc (b, '"');
68         }
69     }
70 }
71 
wrbuf_write_tag(WRBUF b,const char * tag,int opening)72 static void wrbuf_write_tag(WRBUF b, const char *tag, int opening)
73 {
74     int i, fixup = 0;
75 
76     /* see if we must fix the tag.. The grs.marc filter produces
77        a data1 tree with not well-formed XML */
78     if (*tag >= '0' && *tag <= '9')
79 	fixup = 1;
80     for (i = 0; tag[i]; i++)
81 	if (strchr( " <>$,()[]", tag[i]))
82 	    fixup = 1;
83     if (fixup)
84     {
85 	wrbuf_puts(b, "tag");
86 	if (opening)
87 	{
88 	    wrbuf_puts(b, " value=\"");
89 	    wrbuf_xmlputs(b, tag);
90 	    wrbuf_puts(b, "\"");
91 	}
92     }
93     else
94 	wrbuf_puts(b, tag);
95 }
96 
nodetoidsgml(data1_node * n,int select,WRBUF b,int col,int pretty_format)97 static int nodetoidsgml(data1_node *n, int select, WRBUF b, int col,
98                         int pretty_format)
99 {
100     data1_node *c;
101 
102     for (c = n->child; c; c = c->next)
103     {
104 	char *tag;
105 
106         if (c->which == DATA1N_preprocess)
107         {
108             if (pretty_format)
109                 indent (b, col);
110 	    wrbuf_puts (b, "<?");
111             wrbuf_xmlputs (b, c->u.preprocess.target);
112             wrbuf_put_xattr (b, c->u.preprocess.attributes);
113             if (c->child)
114                 wrbuf_puts(b, " ");
115             if (nodetoidsgml(c, select, b, (col > 40) ? 40 : col+2,
116                              pretty_format) < 0)
117                 return -1;
118             wrbuf_puts (b, "?>\n");
119         }
120         else if (c->which == DATA1N_tag)
121 	{
122 	    if (select && !c->u.tag.node_selected)
123 		continue;
124             tag = c->u.tag.tag;
125 	    if (!data1_matchstr(tag, "wellknown")) /* skip wellknown */
126 	    {
127 		if (nodetoidsgml(c, select, b, col, pretty_format) < 0)
128 		    return -1;
129 	    }
130 	    else
131 	    {
132                 if (pretty_format)
133                     indent (b, col);
134 		wrbuf_puts(b, "<");
135 		wrbuf_write_tag(b, tag, 1);
136                 wrbuf_put_xattr (b, c->u.tag.attributes);
137 		wrbuf_puts(b, ">");
138                 if (pretty_format)
139                     wrbuf_puts(b, "\n");
140 		if (nodetoidsgml(c, select, b, (col > 40) ? 40 : col+2,
141                                  pretty_format) < 0)
142 		    return -1;
143                 if (pretty_format)
144                     indent (b, col);
145 		wrbuf_puts(b, "</");
146 		wrbuf_write_tag(b, tag, 0);
147 		wrbuf_puts(b, ">");
148                 if (pretty_format)
149                     wrbuf_puts (b, "\n");
150 	    }
151 	}
152 	else if (c->which == DATA1N_data || c->which == DATA1N_comment)
153 	{
154 	    char *p = c->u.data.data;
155 	    int l = c->u.data.len;
156 	    int first = 1;
157 	    int lcol = col;
158 
159             if (pretty_format && !c->u.data.formatted_text)
160                 indent (b, col);
161             if (c->which == DATA1N_comment)
162                 wrbuf_puts (b, "<!--");
163 	    switch (c->u.data.what)
164 	    {
165             case DATA1I_xmltext:
166 		wrbuf_write(b, c->u.data.data, c->u.data.len);
167                 break;
168 	    case DATA1I_text:
169                 if (!pretty_format || c->u.data.formatted_text)
170                 {
171                     wrbuf_xmlputs_n (b, p, l);
172                 }
173                 else
174                 {
175                     while (l)
176                     {
177                         int wlen;
178 
179                         while (l && d1_isspace(*p))
180                             p++, l--;
181                         if (!l)
182                             break;
183                         /* break if we cross margin and word is not too long */
184                         if (lcol + (wlen = wordlen(p, l)) > IDSGML_MARGIN &&
185                             wlen < IDSGML_MARGIN)
186                         {
187                             wrbuf_puts (b, "\n");
188                             indent (b, col);
189                             lcol = col;
190                             first = 1;
191                         }
192                         if (!first)
193                         {
194                             wrbuf_putc(b, ' ');
195                             lcol++;
196                         }
197                         while (l && !d1_isspace(*p))
198                         {
199                             wrbuf_putc(b, *p);
200                             p++;
201                             l--;
202                             lcol++;
203                         }
204                         first = 0;
205                     }
206                     wrbuf_puts(b, "\n");
207                 }
208 		break;
209 	    case DATA1I_num:
210 		wrbuf_xmlputs_n(b, c->u.data.data, c->u.data.len);
211                 if (pretty_format)
212                     wrbuf_puts(b, "\n");
213 		break;
214 	    case DATA1I_oid:
215 		wrbuf_xmlputs_n(b, c->u.data.data, c->u.data.len);
216                 if (pretty_format)
217                     wrbuf_puts(b, "\n");
218 	    }
219             if (c->which == DATA1N_comment)
220             {
221                 wrbuf_puts(b, "-->");
222                 if (pretty_format)
223                     wrbuf_puts(b, "\n");
224             }
225 	}
226     }
227     return 0;
228 }
229 
data1_nodetoidsgml(data1_handle dh,data1_node * n,int select,int * len)230 char *data1_nodetoidsgml (data1_handle dh, data1_node *n, int select, int *len)
231 {
232     WRBUF b = data1_get_wrbuf (dh);
233 
234     wrbuf_rewind(b);
235 
236     if (!data1_is_xmlmode (dh))
237     {
238         wrbuf_puts (b, "<");
239         wrbuf_write_tag(b, n->u.root.type, 1);
240         wrbuf_puts (b, ">\n");
241     }
242     if (nodetoidsgml(n, select, b, 0, 0 /* no pretty format */))
243 	return 0;
244     if (!data1_is_xmlmode (dh))
245     {
246         wrbuf_puts (b, "</");
247         wrbuf_write_tag(b, n->u.root.type, 0);
248         wrbuf_puts (b, ">\n");
249     }
250     *len = wrbuf_len(b);
251     return wrbuf_buf(b);
252 }
253 /*
254  * Local variables:
255  * c-basic-offset: 4
256  * c-file-style: "Stroustrup"
257  * indent-tabs-mode: nil
258  * End:
259  * vim: shiftwidth=4 tabstop=8 expandtab
260  */
261 
262