1 /*  $Id$
2 
3     Part of SWI-Prolog
4 
5     Author:        Jan Wielemaker
6     E-mail:        jan@swi.psy.uva.nl
7     WWW:           http://www.swi-prolog.org
8     Copyright (C): 1985-2002, University of Amsterdam
9 
10     This library is free software; you can redistribute it and/or
11     modify it under the terms of the GNU Lesser General Public
12     License as published by the Free Software Foundation; either
13     version 2.1 of the License, or (at your option) any later version.
14 
15     This library is distributed in the hope that it will be useful,
16     but WITHOUT ANY WARRANTY; without even the implied warranty of
17     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18     Lesser General Public License for more details.
19 
20     You should have received a copy of the GNU Lesser General Public
21     License along with this library; if not, write to the Free Software
22     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23 */
24 
25 #define _ISOC99_SOURCE 1		/* fwprintf(), etc prototypes */
26 #include <stdio.h>
27 #include <wchar.h>
28 #include <assert.h>
29 #include <string.h>
30 #include <wctype.h>
31 #include <time.h>
32 #include "dtd.h"
33 #include "util.h"
34 #include "prolog.h"
35 
36 static int errors;
37 
38 		 /*******************************
39 		 *	  PROLOG SYNTAX		*
40 		 *******************************/
41 
42 typedef enum
43 { AT_LOWER,
44   AT_QUOTE,
45   AT_FULLSTOP,
46   AT_SYMBOL,
47   AT_SOLO,
48   AT_SPECIAL
49 } atomtype;
50 
51 
52 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
53 Contributed by Richard O'Keefe.  Thanks!
54 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
55 
56 static int
atomType(ichar const * s,int len)57 atomType(ichar const *s, int len)
58 { static ichar const symbols[] = L"#$&*+-./:<=>?@\\^`~";
59   unsigned char const *u = (unsigned char const *)s;
60 
61   switch (len)
62   { case 0:
63       return AT_QUOTE;
64     case 1:
65       return iswlower(u[0]) ? AT_LOWER
66 	   : u[0] == '.'    ? AT_FULLSTOP
67 	   : u[0] == '!'    ? AT_SOLO
68 	   : u[0] == ';'    ? AT_SOLO
69 	   : u[0] == ','    ? AT_SOLO
70 	   :                  AT_QUOTE;
71     case 2:
72       if (u[0] == '[' && u[1] == ']') return AT_SPECIAL;
73       if (u[0] == '{' && u[1] == '}') return AT_SPECIAL;
74       break;
75     default:
76       break;
77   }
78 
79   if (iswlower(u[0]))
80   { do ++u; while (--len > 0 && (iswalnum(*u) || *u == '_'));
81     return len == 0 ? AT_LOWER : AT_QUOTE;
82   } else if (wcschr(symbols, *u) != NULL)
83   { do ++u; while (--len > 0 && wcschr(symbols, *u) != 0);
84     return len == 0 ? AT_SYMBOL : AT_QUOTE;
85   } else
86   { return AT_QUOTE;
87   }
88 }
89 
90 
91 static const ichar *
atom(const ichar * text)92 atom(const ichar *text)
93 { int len = wcslen(text);
94 
95   switch(atomType(text, len))
96   { case AT_QUOTE:
97     case AT_FULLSTOP:
98     { ichar *tmp = ringallo((len*2+1)*sizeof(ichar));
99       ichar *o = tmp;
100 
101       *o++ = '\'';
102       for( ; --len >= 0; text++)
103       { switch( *text )
104 	{ case '\n':
105 	    *o++ = '\\';
106 	    *o++ = 'n';
107 	    break;
108 	  case '\r':
109 	    *o++ = '\\';
110 	    *o++ = 'r';
111 	    break;
112 	  case '\t':
113 	    *o++ = '\\';
114 	    *o++ = 't';
115 	    break;
116 	  case '\'':
117 	    *o++ = '\\';
118 	  default:
119 	    *o++ = *text;
120 	}
121       }
122       *o++ = '\'';
123       *o   = '\0';
124 
125       return tmp;
126     }
127     default:
128       return text;
129   }
130 }
131 
132 
133 static const char *
134 bool(int val)
135 { return val ? "true" : "false";
136 }
137 
138 
139 static void
prolog_print_entity(const char * which,dtd_entity * e)140 prolog_print_entity(const char *which, dtd_entity *e)
141 { switch( e->type )
142   { case ET_LITERAL:
143       wprintf(L"%s(%ls, %ls).\n",
144 	      which,
145 	      atom(e->name->name),
146 	      atom(e->value));
147       break;
148     case ET_SYSTEM:
149       wprintf(L"%s(%ls, system(%ls)).\n",
150 	     which,
151 	     atom(e->name->name),
152 	     atom(e->exturl));
153       break;
154     case ET_PUBLIC:
155       wprintf(L"%s(%ls, public(%ls, %ls)).\n",
156 	     which,
157 	     atom(e->name->name),
158 	     atom(e->extid),
159 	     atom(e->exturl));
160       break;
161   }
162 }
163 
164 
165 static void
prolog_print_model(dtd_model * m)166 prolog_print_model(dtd_model *m)
167 { dtd_model *sub;
168   int n = 0;
169   const char *sep;
170 
171   switch(m->type)
172   { case MT_PCDATA:
173       printf("'#pcdata'");
174       goto card;
175     case MT_ELEMENT:
176       wprintf(L"%ls", atom(m->content.element->name->name));
177       goto card;
178     case MT_AND:
179       sep = " & ";
180       break;
181     case MT_SEQ:
182       sep = ", ";
183       break;
184     case MT_OR:
185       sep = "|";
186       break;
187     case MT_UNDEF:
188     default:
189       assert(0);
190       sep = NULL;			/* should not be used */
191       break;
192   }
193 
194   printf("(");
195   for(sub = m->content.group; sub; sub=sub->next)
196   { if ( n++ > 0 )
197       printf("%s", sep);
198     prolog_print_model(sub);
199   }
200   printf(")");
201 
202 card:
203   switch(m->cardinality)
204   { case MC_ONE:
205       break;
206     case MC_OPT:
207       printf("?");
208       break;
209     case MC_REP:
210       printf("*");
211       break;
212     case MC_PLUS:
213       printf("+");
214       break;
215   }
216 }
217 
218 
219 static void
prolog_print_content(dtd_element * e)220 prolog_print_content(dtd_element *e)
221 { dtd_edef *def = e->structure;
222 
223   switch( def->type )
224   { case C_EMPTY:
225       printf("empty");
226       break;
227     case C_CDATA:
228       printf("cdata");
229       break;
230     case C_RCDATA:
231       printf("rcdata");
232       break;
233     case C_ANY:
234       printf("any");
235       break;
236     default:
237       if ( def->content )
238       { printf("model(");
239 	prolog_print_model(def->content);
240 	printf(")");
241       } else
242       { printf("[]");
243 	fwprintf(stderr,
244 		L"Warning: element %s has no content model\n",
245 		e->name->name);
246 	errors++;
247       }
248       break;
249   }
250 }
251 
252 
253 static ichar *
istrblank(const ichar * s)254 istrblank(const ichar *s)
255 { for( ; *s; s++ )
256   { if ( iswspace(*s) )
257       return (ichar *)s;
258   }
259 
260   return NULL;
261 }
262 
263 
264 static void
print_listval(attrtype type,int len,const ichar * text)265 print_listval(attrtype type, int len, const ichar *text)
266 { ichar *t = sgml_malloc((len+1)*sizeof(ichar));
267 
268   istrncpy(t, text, len);
269   t[len] = '\0';
270 
271   if ( type == AT_NUMBERS )
272     wprintf(L"%ls", t);
273   else
274     wprintf(L"%ls", atom(t));
275 
276   sgml_free(t);
277 }
278 
279 
280 static void
prolog_print_attribute(dtd_element * e,dtd_attr * at)281 prolog_print_attribute(dtd_element *e, dtd_attr *at)
282 { wprintf(L"    attribute(%ls, %ls, ",
283 	 atom(e->name->name), atom(at->name->name));
284 
285   switch(at->type)			/* print type */
286   { case AT_CDATA:
287       printf("cdata");
288       break;
289     case AT_ENTITY:
290       printf("entity");
291       break;
292     case AT_ENTITIES:
293       printf("entities");
294       break;
295     case AT_ID:
296       printf("id");
297       break;
298     case AT_IDREF:
299       printf("idref");
300       break;
301     case AT_IDREFS:
302       printf("list(idref)");
303       break;
304     case AT_NAME:
305       printf("name");
306       break;
307     case AT_NAMES:
308       printf("list(name)");
309       break;
310     case AT_NMTOKEN:
311       printf("nmtoken");
312       break;
313     case AT_NMTOKENS:
314       printf("list(nmtoken)");
315       break;
316     case AT_NOTATION:
317       printf("notation");
318       break;
319     case AT_NUMBER:
320       printf("number");
321       break;
322     case AT_NUMBERS:
323       printf("list(number)");
324       break;
325     case AT_NAMEOF:
326     { dtd_name_list *nl;
327       int n = 0;
328 
329       printf("nameof([");
330       for(nl = at->typeex.nameof; nl; nl = nl->next)
331       { if ( n++ > 0 )
332 	  printf(", ");
333 	wprintf(L"%ls", atom(nl->value->name));
334       }
335       printf("])");
336     }
337       break;
338     case AT_NUTOKEN:
339       printf("nutoken");
340       break;
341     case AT_NUTOKENS:
342       printf("list(nutoken)");
343       break;
344   }
345 
346   printf(", ");				/* print default */
347   switch(at->def)
348   { case AT_REQUIRED:
349       printf("required");
350       break;
351     case AT_CURRENT:
352       printf("current");
353       break;
354     case AT_CONREF:
355       printf("conref");
356       break;
357     case AT_IMPLIED:
358       printf("implied");
359       break;
360     case AT_DEFAULT:
361     case AT_FIXED:
362     { char *f = (at->def == AT_DEFAULT ? "default" : "fixed");
363 
364       printf("%s(", f);
365 
366       switch( at->type )
367       { case AT_CDATA:
368 	  wprintf(L"%ls", atom(at->att_def.cdata));
369 	  break;
370 	case AT_NUMBER:
371 	  printf("%ld", at->att_def.number);
372 	  break;
373 	case AT_NAME:
374 	case AT_NUTOKEN:
375 	case AT_NMTOKEN:
376 	  wprintf(L"%ls", atom(at->att_def.name->name));
377 	  break;
378 	default:
379 	  if ( at->islist )
380 	  { const ichar *val = at->att_def.list;
381 	    const ichar *e;
382 	    int an = 0;
383 
384 	    printf("[");
385 	    for(e=istrblank(val); e; val = e+1, e=istrblank(val))
386 	    { if ( e == val )
387 		continue;			/* skip spaces */
388 	      if ( an++ > 0 )
389 		printf(", ");
390 	      print_listval(at->type, e-val, val);
391 	    }
392             if ( an++ > 0 )
393 	      printf(", ");
394 	    print_listval(at->type, istrlen(val), val);
395 	    printf("]");
396 	    break;
397 	  }
398 	  assert(0);
399       }
400 
401       printf(")");
402     }
403   }
404 
405   printf(").\n");
406 }
407 
408 
409 static void
prolog_print_element(dtd_element * e,unsigned int flags)410 prolog_print_element(dtd_element *e, unsigned int flags)
411 { ichar nbuf[MAXNMLEN];
412 
413   istrcpy(nbuf, e->name->name);
414   istrupper(nbuf);
415 
416   wprintf(L"\n%% Element <%s>\n", nbuf);
417 
418   if ( e->structure )
419   { dtd_edef *def = e->structure;
420 
421     wprintf(L"element(%ls, omit(%s, %s), ",
422 	    atom(e->name->name),
423 	    bool(def->omit_open),
424 	    bool(def->omit_close));
425     prolog_print_content(e);
426     printf(").\n");
427 
428     if ( def->excluded )
429     { dtd_element_list *el;
430 
431       for(el = def->excluded; el; el=el->next)
432 	wprintf(L"exclude(%ls, %ls).\n",
433 		atom(e->name->name),
434 		atom(el->value->name->name));
435     }
436     if ( def->included )
437     { dtd_element_list *el;
438 
439       for(el = def->included; el; el=el->next)
440 	wprintf(L"include(%ls, %ls).\n",
441 		atom(e->name->name),
442 		atom(el->value->name->name));
443     }
444 
445     if ( flags & PL_PRINT_ATTRIBUTES )
446     { dtd_attr_list *al;
447 
448       for(al=e->attributes; al; al=al->next)
449 	prolog_print_attribute(e, al->attribute);
450     }
451   } else
452   { fwprintf(stderr, L"Warning: element %s has no definition\n",
453 	     e->name->name);
454     errors++;
455   }
456 }
457 
458 
459 int
prolog_print_dtd(dtd * dtd,unsigned int flags)460 prolog_print_dtd(dtd *dtd, unsigned int flags)
461 { dtd_entity *et;
462   dtd_element *e;
463   time_t now;
464 
465   if ( !dtd->doctype )
466     fprintf(stderr, "DTD has no document type\n");
467 
468   time(&now);
469 
470   if ( !flags )
471     flags = PL_PRINT_ALL;
472 
473   errors = 0;
474 
475   wprintf(L"/*  This file represents the SGML DOCTYPE \"%s\"\n", dtd->doctype);
476   printf("    converted using dtd2pl version %s\n", DTD2PL_VERSION);
477   printf("    Conversion date: %s\n\n", ctime(&now));
478   printf("    dtd2pl is written by Jan Wielemaker\n");
479   printf("    E-mail: jan@swi.psy.uva.nl\n");
480   printf("*/\n\n");
481 
482   wprintf(L":- module(%s_dtd, []).\n\n", dtd->doctype);
483   printf(":- op(100, xf,  ?).\n");
484   printf(":- op(100, xf,  +).\n");
485   printf(":- op(100, xf,  *).\n");
486   printf(":- op(200, xfy, &).\n");
487 
488   printf("\n");
489   printf(":- discontiguous\n");
490   printf("\tattribute/4,\n");
491   printf("\telement/3,\n");
492   printf("\texclude/2,\n");
493   printf("\tinclude/2.\n");
494 
495   if ( flags & PL_PRINT_PENTITIES )
496   { printf("\n");
497     for( et=dtd->pentities; et; et=et->next )
498       prolog_print_entity("parameter_entity", et);
499   }
500 
501   if ( flags & PL_PRINT_ENTITIES )
502   { printf("\n");
503     for( et=dtd->entities; et; et=et->next )
504       prolog_print_entity("entity", et);
505   }
506 
507   if ( flags & PL_PRINT_ELEMENTS )
508   { printf("\n");
509     for( e=dtd->elements; e; e=e->next )
510       prolog_print_element(e, flags);
511   }
512 
513   if ( errors )
514   { fprintf(stderr, "Warning: DTD contained %d errors\n", errors);
515     return FALSE;
516   }
517 
518   return TRUE;
519 }
520 
521