1 /* $Id$
2
3 Part of SWI-Prolog
4
5 Author: Jan Wielemaker
6 E-mail: jan@swi.psy.uva.nl
7 WWW: http://www.swi-prolog.org
8 Copyright (C): 1985-2002, University of Amsterdam
9
10 This library is free software; you can redistribute it and/or
11 modify it under the terms of the GNU Lesser General Public
12 License as published by the Free Software Foundation; either
13 version 2.1 of the License, or (at your option) any later version.
14
15 This library is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 Lesser General Public License for more details.
19
20 You should have received a copy of the GNU Lesser General Public
21 License along with this library; if not, write to the Free Software
22 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
24
25 #define _ISOC99_SOURCE 1 /* fwprintf(), etc prototypes */
26 #include <stdio.h>
27 #include <wchar.h>
28 #include <assert.h>
29 #include <string.h>
30 #include <wctype.h>
31 #include <time.h>
32 #include "dtd.h"
33 #include "util.h"
34 #include "prolog.h"
35
36 static int errors;
37
38 /*******************************
39 * PROLOG SYNTAX *
40 *******************************/
41
42 typedef enum
43 { AT_LOWER,
44 AT_QUOTE,
45 AT_FULLSTOP,
46 AT_SYMBOL,
47 AT_SOLO,
48 AT_SPECIAL
49 } atomtype;
50
51
52 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
53 Contributed by Richard O'Keefe. Thanks!
54 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
55
56 static int
atomType(ichar const * s,int len)57 atomType(ichar const *s, int len)
58 { static ichar const symbols[] = L"#$&*+-./:<=>?@\\^`~";
59 unsigned char const *u = (unsigned char const *)s;
60
61 switch (len)
62 { case 0:
63 return AT_QUOTE;
64 case 1:
65 return iswlower(u[0]) ? AT_LOWER
66 : u[0] == '.' ? AT_FULLSTOP
67 : u[0] == '!' ? AT_SOLO
68 : u[0] == ';' ? AT_SOLO
69 : u[0] == ',' ? AT_SOLO
70 : AT_QUOTE;
71 case 2:
72 if (u[0] == '[' && u[1] == ']') return AT_SPECIAL;
73 if (u[0] == '{' && u[1] == '}') return AT_SPECIAL;
74 break;
75 default:
76 break;
77 }
78
79 if (iswlower(u[0]))
80 { do ++u; while (--len > 0 && (iswalnum(*u) || *u == '_'));
81 return len == 0 ? AT_LOWER : AT_QUOTE;
82 } else if (wcschr(symbols, *u) != NULL)
83 { do ++u; while (--len > 0 && wcschr(symbols, *u) != 0);
84 return len == 0 ? AT_SYMBOL : AT_QUOTE;
85 } else
86 { return AT_QUOTE;
87 }
88 }
89
90
91 static const ichar *
atom(const ichar * text)92 atom(const ichar *text)
93 { int len = wcslen(text);
94
95 switch(atomType(text, len))
96 { case AT_QUOTE:
97 case AT_FULLSTOP:
98 { ichar *tmp = ringallo((len*2+1)*sizeof(ichar));
99 ichar *o = tmp;
100
101 *o++ = '\'';
102 for( ; --len >= 0; text++)
103 { switch( *text )
104 { case '\n':
105 *o++ = '\\';
106 *o++ = 'n';
107 break;
108 case '\r':
109 *o++ = '\\';
110 *o++ = 'r';
111 break;
112 case '\t':
113 *o++ = '\\';
114 *o++ = 't';
115 break;
116 case '\'':
117 *o++ = '\\';
118 default:
119 *o++ = *text;
120 }
121 }
122 *o++ = '\'';
123 *o = '\0';
124
125 return tmp;
126 }
127 default:
128 return text;
129 }
130 }
131
132
133 static const char *
134 bool(int val)
135 { return val ? "true" : "false";
136 }
137
138
139 static void
prolog_print_entity(const char * which,dtd_entity * e)140 prolog_print_entity(const char *which, dtd_entity *e)
141 { switch( e->type )
142 { case ET_LITERAL:
143 wprintf(L"%s(%ls, %ls).\n",
144 which,
145 atom(e->name->name),
146 atom(e->value));
147 break;
148 case ET_SYSTEM:
149 wprintf(L"%s(%ls, system(%ls)).\n",
150 which,
151 atom(e->name->name),
152 atom(e->exturl));
153 break;
154 case ET_PUBLIC:
155 wprintf(L"%s(%ls, public(%ls, %ls)).\n",
156 which,
157 atom(e->name->name),
158 atom(e->extid),
159 atom(e->exturl));
160 break;
161 }
162 }
163
164
165 static void
prolog_print_model(dtd_model * m)166 prolog_print_model(dtd_model *m)
167 { dtd_model *sub;
168 int n = 0;
169 const char *sep;
170
171 switch(m->type)
172 { case MT_PCDATA:
173 printf("'#pcdata'");
174 goto card;
175 case MT_ELEMENT:
176 wprintf(L"%ls", atom(m->content.element->name->name));
177 goto card;
178 case MT_AND:
179 sep = " & ";
180 break;
181 case MT_SEQ:
182 sep = ", ";
183 break;
184 case MT_OR:
185 sep = "|";
186 break;
187 case MT_UNDEF:
188 default:
189 assert(0);
190 sep = NULL; /* should not be used */
191 break;
192 }
193
194 printf("(");
195 for(sub = m->content.group; sub; sub=sub->next)
196 { if ( n++ > 0 )
197 printf("%s", sep);
198 prolog_print_model(sub);
199 }
200 printf(")");
201
202 card:
203 switch(m->cardinality)
204 { case MC_ONE:
205 break;
206 case MC_OPT:
207 printf("?");
208 break;
209 case MC_REP:
210 printf("*");
211 break;
212 case MC_PLUS:
213 printf("+");
214 break;
215 }
216 }
217
218
219 static void
prolog_print_content(dtd_element * e)220 prolog_print_content(dtd_element *e)
221 { dtd_edef *def = e->structure;
222
223 switch( def->type )
224 { case C_EMPTY:
225 printf("empty");
226 break;
227 case C_CDATA:
228 printf("cdata");
229 break;
230 case C_RCDATA:
231 printf("rcdata");
232 break;
233 case C_ANY:
234 printf("any");
235 break;
236 default:
237 if ( def->content )
238 { printf("model(");
239 prolog_print_model(def->content);
240 printf(")");
241 } else
242 { printf("[]");
243 fwprintf(stderr,
244 L"Warning: element %s has no content model\n",
245 e->name->name);
246 errors++;
247 }
248 break;
249 }
250 }
251
252
253 static ichar *
istrblank(const ichar * s)254 istrblank(const ichar *s)
255 { for( ; *s; s++ )
256 { if ( iswspace(*s) )
257 return (ichar *)s;
258 }
259
260 return NULL;
261 }
262
263
264 static void
print_listval(attrtype type,int len,const ichar * text)265 print_listval(attrtype type, int len, const ichar *text)
266 { ichar *t = sgml_malloc((len+1)*sizeof(ichar));
267
268 istrncpy(t, text, len);
269 t[len] = '\0';
270
271 if ( type == AT_NUMBERS )
272 wprintf(L"%ls", t);
273 else
274 wprintf(L"%ls", atom(t));
275
276 sgml_free(t);
277 }
278
279
280 static void
prolog_print_attribute(dtd_element * e,dtd_attr * at)281 prolog_print_attribute(dtd_element *e, dtd_attr *at)
282 { wprintf(L" attribute(%ls, %ls, ",
283 atom(e->name->name), atom(at->name->name));
284
285 switch(at->type) /* print type */
286 { case AT_CDATA:
287 printf("cdata");
288 break;
289 case AT_ENTITY:
290 printf("entity");
291 break;
292 case AT_ENTITIES:
293 printf("entities");
294 break;
295 case AT_ID:
296 printf("id");
297 break;
298 case AT_IDREF:
299 printf("idref");
300 break;
301 case AT_IDREFS:
302 printf("list(idref)");
303 break;
304 case AT_NAME:
305 printf("name");
306 break;
307 case AT_NAMES:
308 printf("list(name)");
309 break;
310 case AT_NMTOKEN:
311 printf("nmtoken");
312 break;
313 case AT_NMTOKENS:
314 printf("list(nmtoken)");
315 break;
316 case AT_NOTATION:
317 printf("notation");
318 break;
319 case AT_NUMBER:
320 printf("number");
321 break;
322 case AT_NUMBERS:
323 printf("list(number)");
324 break;
325 case AT_NAMEOF:
326 { dtd_name_list *nl;
327 int n = 0;
328
329 printf("nameof([");
330 for(nl = at->typeex.nameof; nl; nl = nl->next)
331 { if ( n++ > 0 )
332 printf(", ");
333 wprintf(L"%ls", atom(nl->value->name));
334 }
335 printf("])");
336 }
337 break;
338 case AT_NUTOKEN:
339 printf("nutoken");
340 break;
341 case AT_NUTOKENS:
342 printf("list(nutoken)");
343 break;
344 }
345
346 printf(", "); /* print default */
347 switch(at->def)
348 { case AT_REQUIRED:
349 printf("required");
350 break;
351 case AT_CURRENT:
352 printf("current");
353 break;
354 case AT_CONREF:
355 printf("conref");
356 break;
357 case AT_IMPLIED:
358 printf("implied");
359 break;
360 case AT_DEFAULT:
361 case AT_FIXED:
362 { char *f = (at->def == AT_DEFAULT ? "default" : "fixed");
363
364 printf("%s(", f);
365
366 switch( at->type )
367 { case AT_CDATA:
368 wprintf(L"%ls", atom(at->att_def.cdata));
369 break;
370 case AT_NUMBER:
371 printf("%ld", at->att_def.number);
372 break;
373 case AT_NAME:
374 case AT_NUTOKEN:
375 case AT_NMTOKEN:
376 wprintf(L"%ls", atom(at->att_def.name->name));
377 break;
378 default:
379 if ( at->islist )
380 { const ichar *val = at->att_def.list;
381 const ichar *e;
382 int an = 0;
383
384 printf("[");
385 for(e=istrblank(val); e; val = e+1, e=istrblank(val))
386 { if ( e == val )
387 continue; /* skip spaces */
388 if ( an++ > 0 )
389 printf(", ");
390 print_listval(at->type, e-val, val);
391 }
392 if ( an++ > 0 )
393 printf(", ");
394 print_listval(at->type, istrlen(val), val);
395 printf("]");
396 break;
397 }
398 assert(0);
399 }
400
401 printf(")");
402 }
403 }
404
405 printf(").\n");
406 }
407
408
409 static void
prolog_print_element(dtd_element * e,unsigned int flags)410 prolog_print_element(dtd_element *e, unsigned int flags)
411 { ichar nbuf[MAXNMLEN];
412
413 istrcpy(nbuf, e->name->name);
414 istrupper(nbuf);
415
416 wprintf(L"\n%% Element <%s>\n", nbuf);
417
418 if ( e->structure )
419 { dtd_edef *def = e->structure;
420
421 wprintf(L"element(%ls, omit(%s, %s), ",
422 atom(e->name->name),
423 bool(def->omit_open),
424 bool(def->omit_close));
425 prolog_print_content(e);
426 printf(").\n");
427
428 if ( def->excluded )
429 { dtd_element_list *el;
430
431 for(el = def->excluded; el; el=el->next)
432 wprintf(L"exclude(%ls, %ls).\n",
433 atom(e->name->name),
434 atom(el->value->name->name));
435 }
436 if ( def->included )
437 { dtd_element_list *el;
438
439 for(el = def->included; el; el=el->next)
440 wprintf(L"include(%ls, %ls).\n",
441 atom(e->name->name),
442 atom(el->value->name->name));
443 }
444
445 if ( flags & PL_PRINT_ATTRIBUTES )
446 { dtd_attr_list *al;
447
448 for(al=e->attributes; al; al=al->next)
449 prolog_print_attribute(e, al->attribute);
450 }
451 } else
452 { fwprintf(stderr, L"Warning: element %s has no definition\n",
453 e->name->name);
454 errors++;
455 }
456 }
457
458
459 int
prolog_print_dtd(dtd * dtd,unsigned int flags)460 prolog_print_dtd(dtd *dtd, unsigned int flags)
461 { dtd_entity *et;
462 dtd_element *e;
463 time_t now;
464
465 if ( !dtd->doctype )
466 fprintf(stderr, "DTD has no document type\n");
467
468 time(&now);
469
470 if ( !flags )
471 flags = PL_PRINT_ALL;
472
473 errors = 0;
474
475 wprintf(L"/* This file represents the SGML DOCTYPE \"%s\"\n", dtd->doctype);
476 printf(" converted using dtd2pl version %s\n", DTD2PL_VERSION);
477 printf(" Conversion date: %s\n\n", ctime(&now));
478 printf(" dtd2pl is written by Jan Wielemaker\n");
479 printf(" E-mail: jan@swi.psy.uva.nl\n");
480 printf("*/\n\n");
481
482 wprintf(L":- module(%s_dtd, []).\n\n", dtd->doctype);
483 printf(":- op(100, xf, ?).\n");
484 printf(":- op(100, xf, +).\n");
485 printf(":- op(100, xf, *).\n");
486 printf(":- op(200, xfy, &).\n");
487
488 printf("\n");
489 printf(":- discontiguous\n");
490 printf("\tattribute/4,\n");
491 printf("\telement/3,\n");
492 printf("\texclude/2,\n");
493 printf("\tinclude/2.\n");
494
495 if ( flags & PL_PRINT_PENTITIES )
496 { printf("\n");
497 for( et=dtd->pentities; et; et=et->next )
498 prolog_print_entity("parameter_entity", et);
499 }
500
501 if ( flags & PL_PRINT_ENTITIES )
502 { printf("\n");
503 for( et=dtd->entities; et; et=et->next )
504 prolog_print_entity("entity", et);
505 }
506
507 if ( flags & PL_PRINT_ELEMENTS )
508 { printf("\n");
509 for( e=dtd->elements; e; e=e->next )
510 prolog_print_element(e, flags);
511 }
512
513 if ( errors )
514 { fprintf(stderr, "Warning: DTD contained %d errors\n", errors);
515 return FALSE;
516 }
517
518 return TRUE;
519 }
520
521