1 /* This file is part of the Zebra server.
2 Copyright (C) 2004-2013 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17
18 */
19
20 /* converts data1 tree to XML record */
21
22 #if HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25 #include <string.h>
26
27 #include <idzebra/data1.h>
28 #include <yaz/wrbuf.h>
29
30 #define IDSGML_MARGIN 75
31
32 #define PRETTY_FORMAT 0
33
wordlen(char * b,int max)34 static int wordlen(char *b, int max)
35 {
36 int l = 0;
37
38 while (l < max && !d1_isspace(*b))
39 l++, b++;
40 return l;
41 }
42
indent(WRBUF b,int col)43 static void indent (WRBUF b, int col)
44 {
45 int i;
46 for (i = 0; i<col; i++)
47 wrbuf_putc (b, ' ');
48 }
49
wrbuf_put_xattr(WRBUF b,data1_xattr * p)50 static void wrbuf_put_xattr(WRBUF b, data1_xattr *p)
51 {
52 for (; p; p = p->next)
53 {
54 wrbuf_putc (b, ' ');
55 if (p->what == DATA1I_xmltext)
56 wrbuf_puts (b, p->name);
57 else
58 wrbuf_xmlputs (b, p->name);
59 if (p->value)
60 {
61 wrbuf_putc (b, '=');
62 wrbuf_putc (b, '"');
63 if (p->what == DATA1I_text)
64 wrbuf_xmlputs (b, p->value);
65 else
66 wrbuf_puts (b, p->value);
67 wrbuf_putc (b, '"');
68 }
69 }
70 }
71
wrbuf_write_tag(WRBUF b,const char * tag,int opening)72 static void wrbuf_write_tag(WRBUF b, const char *tag, int opening)
73 {
74 int i, fixup = 0;
75
76 /* see if we must fix the tag.. The grs.marc filter produces
77 a data1 tree with not well-formed XML */
78 if (*tag >= '0' && *tag <= '9')
79 fixup = 1;
80 for (i = 0; tag[i]; i++)
81 if (strchr( " <>$,()[]", tag[i]))
82 fixup = 1;
83 if (fixup)
84 {
85 wrbuf_puts(b, "tag");
86 if (opening)
87 {
88 wrbuf_puts(b, " value=\"");
89 wrbuf_xmlputs(b, tag);
90 wrbuf_puts(b, "\"");
91 }
92 }
93 else
94 wrbuf_puts(b, tag);
95 }
96
nodetoidsgml(data1_node * n,int select,WRBUF b,int col,int pretty_format)97 static int nodetoidsgml(data1_node *n, int select, WRBUF b, int col,
98 int pretty_format)
99 {
100 data1_node *c;
101
102 for (c = n->child; c; c = c->next)
103 {
104 char *tag;
105
106 if (c->which == DATA1N_preprocess)
107 {
108 if (pretty_format)
109 indent (b, col);
110 wrbuf_puts (b, "<?");
111 wrbuf_xmlputs (b, c->u.preprocess.target);
112 wrbuf_put_xattr (b, c->u.preprocess.attributes);
113 if (c->child)
114 wrbuf_puts(b, " ");
115 if (nodetoidsgml(c, select, b, (col > 40) ? 40 : col+2,
116 pretty_format) < 0)
117 return -1;
118 wrbuf_puts (b, "?>\n");
119 }
120 else if (c->which == DATA1N_tag)
121 {
122 if (select && !c->u.tag.node_selected)
123 continue;
124 tag = c->u.tag.tag;
125 if (!data1_matchstr(tag, "wellknown")) /* skip wellknown */
126 {
127 if (nodetoidsgml(c, select, b, col, pretty_format) < 0)
128 return -1;
129 }
130 else
131 {
132 if (pretty_format)
133 indent (b, col);
134 wrbuf_puts(b, "<");
135 wrbuf_write_tag(b, tag, 1);
136 wrbuf_put_xattr (b, c->u.tag.attributes);
137 wrbuf_puts(b, ">");
138 if (pretty_format)
139 wrbuf_puts(b, "\n");
140 if (nodetoidsgml(c, select, b, (col > 40) ? 40 : col+2,
141 pretty_format) < 0)
142 return -1;
143 if (pretty_format)
144 indent (b, col);
145 wrbuf_puts(b, "</");
146 wrbuf_write_tag(b, tag, 0);
147 wrbuf_puts(b, ">");
148 if (pretty_format)
149 wrbuf_puts (b, "\n");
150 }
151 }
152 else if (c->which == DATA1N_data || c->which == DATA1N_comment)
153 {
154 char *p = c->u.data.data;
155 int l = c->u.data.len;
156 int first = 1;
157 int lcol = col;
158
159 if (pretty_format && !c->u.data.formatted_text)
160 indent (b, col);
161 if (c->which == DATA1N_comment)
162 wrbuf_puts (b, "<!--");
163 switch (c->u.data.what)
164 {
165 case DATA1I_xmltext:
166 wrbuf_write(b, c->u.data.data, c->u.data.len);
167 break;
168 case DATA1I_text:
169 if (!pretty_format || c->u.data.formatted_text)
170 {
171 wrbuf_xmlputs_n (b, p, l);
172 }
173 else
174 {
175 while (l)
176 {
177 int wlen;
178
179 while (l && d1_isspace(*p))
180 p++, l--;
181 if (!l)
182 break;
183 /* break if we cross margin and word is not too long */
184 if (lcol + (wlen = wordlen(p, l)) > IDSGML_MARGIN &&
185 wlen < IDSGML_MARGIN)
186 {
187 wrbuf_puts (b, "\n");
188 indent (b, col);
189 lcol = col;
190 first = 1;
191 }
192 if (!first)
193 {
194 wrbuf_putc(b, ' ');
195 lcol++;
196 }
197 while (l && !d1_isspace(*p))
198 {
199 wrbuf_putc(b, *p);
200 p++;
201 l--;
202 lcol++;
203 }
204 first = 0;
205 }
206 wrbuf_puts(b, "\n");
207 }
208 break;
209 case DATA1I_num:
210 wrbuf_xmlputs_n(b, c->u.data.data, c->u.data.len);
211 if (pretty_format)
212 wrbuf_puts(b, "\n");
213 break;
214 case DATA1I_oid:
215 wrbuf_xmlputs_n(b, c->u.data.data, c->u.data.len);
216 if (pretty_format)
217 wrbuf_puts(b, "\n");
218 }
219 if (c->which == DATA1N_comment)
220 {
221 wrbuf_puts(b, "-->");
222 if (pretty_format)
223 wrbuf_puts(b, "\n");
224 }
225 }
226 }
227 return 0;
228 }
229
data1_nodetoidsgml(data1_handle dh,data1_node * n,int select,int * len)230 char *data1_nodetoidsgml (data1_handle dh, data1_node *n, int select, int *len)
231 {
232 WRBUF b = data1_get_wrbuf (dh);
233
234 wrbuf_rewind(b);
235
236 if (!data1_is_xmlmode (dh))
237 {
238 wrbuf_puts (b, "<");
239 wrbuf_write_tag(b, n->u.root.type, 1);
240 wrbuf_puts (b, ">\n");
241 }
242 if (nodetoidsgml(n, select, b, 0, 0 /* no pretty format */))
243 return 0;
244 if (!data1_is_xmlmode (dh))
245 {
246 wrbuf_puts (b, "</");
247 wrbuf_write_tag(b, n->u.root.type, 0);
248 wrbuf_puts (b, ">\n");
249 }
250 *len = wrbuf_len(b);
251 return wrbuf_buf(b);
252 }
253 /*
254 * Local variables:
255 * c-basic-offset: 4
256 * c-file-style: "Stroustrup"
257 * indent-tabs-mode: nil
258 * End:
259 * vim: shiftwidth=4 tabstop=8 expandtab
260 */
261
262