1 /*
2 * $LynxId: dtd_util.c,v 1.78 2015/12/13 23:22:17 tom Exp $
3 *
4 * Given a SGML_dtd structure, write a corresponding flat file, or "C" source.
5 * Given the flat-file, write the "C" source.
6 *
7 * TODO: use symbols for HTMLA_NORMAL, etc.
8 */
9
10 #include <HTUtils.h>
11 #include <HTMLDTD.h>
12 #include <string.h>
13
14 /*
15 * Tweaks to build standalone.
16 */
17 #undef exit
18
19 BOOLEAN WWW_TraceFlag = FALSE;
TraceFP(void)20 FILE *TraceFP(void)
21 {
22 return stderr;
23 }
24
25 /*
26 * Begin the actual utility.
27 */
28 #define GETOPT "chl:o:ts"
29
30 #define NOTE(message) fprintf(output, message "\n");
31 /* *INDENT-OFF* */
32 #ifdef USE_PRETTYSRC
33 # define N HTMLA_NORMAL
34 # define i HTMLA_ANAME
35 # define h HTMLA_HREF
36 # define c HTMLA_CLASS
37 # define x HTMLA_AUXCLASS
38 # define T(t) , t
39 #else
40 # define T(t) /*nothing */
41 #endif
42
43 #define ATTR_TYPE(name) { #name, name##_attr_list }
44
45 static const attr core_attr_list[] = {
46 { "CLASS" T(c) },
47 { "ID" T(i) },
48 { "STYLE" T(N) },
49 { "TITLE" T(N) },
50 { 0 T(N) } /* Terminate list */
51 };
52
53 static const attr i18n_attr_list[] = {
54 { "DIR" T(N) },
55 { "LANG" T(N) },
56 { 0 T(N) } /* Terminate list */
57 };
58
59 static const attr events_attr_list[] = {
60 { "ONCLICK" T(N) },
61 { "ONDBLCLICK" T(N) },
62 { "ONKEYDOWN" T(N) },
63 { "ONKEYPRESS" T(N) },
64 { "ONKEYUP" T(N) },
65 { "ONMOUSEDOWN" T(N) },
66 { "ONMOUSEMOVE" T(N) },
67 { "ONMOUSEOUT" T(N) },
68 { "ONMOUSEOVER" T(N) },
69 { "ONMOUSEUP" T(N) },
70 { 0 T(N) } /* Terminate list */
71 };
72
73 static const attr align_attr_list[] = {
74 { "ALIGN" T(N) },
75 { 0 T(N) } /* Terminate list */
76 };
77
78 static const attr cellalign_attr_list[] = {
79 { "ALIGN" T(N) },
80 { "CHAR" T(N) },
81 { "CHAROFF" T(N) },
82 { "VALIGN" T(N) },
83 { 0 T(N) } /* Terminate list */
84 };
85
86 static const attr bgcolor_attr_list[] = {
87 { "BGCOLOR" T(N) },
88 { 0 T(N) } /* Terminate list */
89 };
90
91 #undef T
92 /* *INDENT-ON* */
93
failed(const char * s)94 static void failed(const char *s)
95 {
96 perror(s);
97 exit(EXIT_FAILURE);
98 }
99
usage(void)100 static void usage(void)
101 {
102 static const char *tbl[] =
103 {
104 "Usage: dtd_util [options]",
105 "",
106 "Options:",
107 " -c generate C-source"
108 " -h generate C-header"
109 " -l load",
110 " -o filename specify output (default: stdout)",
111 " -s strict (HTML DTD 0)",
112 " -t tagsoup (HTML DTD 1)",
113 };
114 unsigned n;
115
116 for (n = 0; n < TABLESIZE(tbl); ++n) {
117 fprintf(stderr, "%s\n", tbl[n]);
118 }
119 exit(EXIT_FAILURE);
120 }
121
SGMLContent2s(SGMLContent contents)122 static const char *SGMLContent2s(SGMLContent contents)
123 {
124 char *value = "?";
125
126 switch (contents) {
127 case SGML_EMPTY:
128 value = "SGML_EMPTY";
129 break;
130 case SGML_LITTERAL:
131 value = "SGML_LITTERAL";
132 break;
133 case SGML_CDATA:
134 value = "SGML_CDATA";
135 break;
136 case SGML_SCRIPT:
137 value = "SGML_SCRIPT";
138 break;
139 case SGML_RCDATA:
140 value = "SGML_RCDATA";
141 break;
142 case SGML_MIXED:
143 value = "SGML_MIXED";
144 break;
145 case SGML_ELEMENT:
146 value = "SGML_ELEMENT";
147 break;
148 case SGML_PCDATA:
149 value = "SGML_PCDATA";
150 break;
151 }
152 return value;
153 }
154
s2SGMLContent(const char * value)155 static SGMLContent s2SGMLContent(const char *value)
156 {
157 static SGMLContent table[] =
158 {
159 SGML_EMPTY,
160 SGML_LITTERAL,
161 SGML_CDATA,
162 SGML_SCRIPT,
163 SGML_RCDATA,
164 SGML_MIXED,
165 SGML_ELEMENT,
166 SGML_PCDATA
167 };
168 unsigned n;
169 SGMLContent result = SGML_EMPTY;
170
171 for (n = 0; n < TABLESIZE(table); ++n) {
172 if (!strcmp(SGMLContent2s(table[n]), value)) {
173 result = table[n];
174 break;
175 }
176 }
177 return result;
178 }
179
180 static void PrintF(FILE *, int, const char *,...) GCC_PRINTFLIKE(3, 4);
181
PrintF(FILE * output,int width,const char * fmt,...)182 static void PrintF(FILE *output, int width, const char *fmt,...)
183 {
184 char buffer[BUFSIZ];
185 va_list ap;
186
187 va_start(ap, fmt);
188 vsprintf(buffer, fmt, ap);
189 va_end(ap);
190
191 fprintf(output, "%-*s", width, buffer);
192 }
193
same_AttrList(AttrList a,AttrList b)194 static int same_AttrList(AttrList a, AttrList b)
195 {
196 int result = 1;
197
198 if (a && b) {
199 while (a->name && b->name) {
200 if (strcmp(a->name, b->name)) {
201 result = 0;
202 break;
203 }
204 ++a, ++b;
205 }
206 if (a->name || b->name)
207 result = 0;
208 } else {
209 result = 0;
210 }
211 return result;
212 }
213
first_attrs(const SGML_dtd * dtd,int which)214 static int first_attrs(const SGML_dtd * dtd, int which)
215 {
216 int check;
217 int result = TRUE;
218
219 for (check = 0; check < which; ++check) {
220 if (dtd->tags[check].attributes == dtd->tags[which].attributes) {
221 result = FALSE;
222 break;
223 } else if (same_AttrList(dtd->tags[check].attributes,
224 dtd->tags[which].attributes)) {
225 result = FALSE;
226 dtd->tags[which].attributes = dtd->tags[check].attributes;
227 break;
228 }
229 }
230 return result;
231 }
232
no_dashes(char * target,const char * source)233 static char *no_dashes(char *target, const char *source)
234 {
235 int j;
236
237 for (j = 0; (target[j] = source[j]) != '\0'; ++j) {
238 if (!isalnum(target[j]))
239 target[j] = '_';
240 }
241 return target;
242 }
243
244 /* the second "OBJECT" is treated specially */
first_object(const SGML_dtd * dtd,int which)245 static int first_object(const SGML_dtd * dtd, int which)
246 {
247 int check;
248
249 for (check = 0; check <= which; ++check) {
250 if (!strcmp(dtd->tags[check].name, "OBJECT"))
251 break;
252 }
253 return (check == which);
254 }
255
NameOfAttrs(const SGML_dtd * dtd,int which)256 static const char *NameOfAttrs(const SGML_dtd * dtd, int which)
257 {
258 int check;
259 const char *result = dtd->tags[which].name;
260
261 for (check = 0; check < which; ++check) {
262 if (dtd->tags[check].attributes == dtd->tags[which].attributes) {
263 result = dtd->tags[check].name;
264 break;
265 }
266 }
267 /* special cases to match existing headers */
268 if (!strcmp(result, "ABBR"))
269 result = "GEN";
270 else if (!strcmp(result, "ARTICLE"))
271 result = "GEN5";
272 else if (!strcmp(result, "BLOCKQUOTE"))
273 result = "BQ";
274 else if (!strcmp(result, "BASEFONT"))
275 result = "FONT";
276 else if (!strcmp(result, "CENTER"))
277 result = "DIV";
278 else if (!strcmp(result, "DIR"))
279 result = "UL";
280 else if (!strcmp(result, "H1"))
281 result = "H";
282 else if (!strcmp(result, "TBODY"))
283 result = "TR";
284 return result;
285 }
286
DEF_name(const SGML_dtd * dtd,int which)287 static const char *DEF_name(const SGML_dtd * dtd, int which)
288 {
289 const char *result = dtd->tags[which].name;
290
291 if (!strcmp(result, "OBJECT") && !first_object(dtd, which))
292 result = "OBJECT_PCDATA";
293 return result;
294 }
295
296 typedef struct {
297 const char *name;
298 const attr *attrs;
299 int count;
300 int which;
301 } AttrInfo;
302
compare_attr_types(const void * a,const void * b)303 static int compare_attr_types(const void *a, const void *b)
304 {
305 const AttrType *p = (const AttrType *) a;
306 const AttrType *q = (const AttrType *) b;
307 int result = 0;
308
309 /* keep lowercase AttrType lists before uppercase, since latter are derived */
310 if (isupper(p->name[0]) ^ isupper(q->name[0])) {
311 if (isupper(p->name[0])) {
312 result = 1;
313 } else {
314 result = -1;
315 }
316 } else {
317 result = strcmp(p->name, q->name);
318 }
319 return result;
320 }
321
len_AttrTypes(const AttrType * data)322 static int len_AttrTypes(const AttrType * data)
323 {
324 int result = 0;
325
326 for (result = 0; data[result].name != 0; ++result) {
327 ;
328 }
329 return result;
330 }
331
sorted_AttrTypes(const AttrType * source)332 static AttrType *sorted_AttrTypes(const AttrType * source)
333 {
334 AttrType *result = 0;
335 unsigned number = len_AttrTypes(source);
336
337 if (number != 0) {
338 result = typecallocn(AttrType, number + 1);
339 if (result != 0) {
340 MemCpy(result, source, number * sizeof(*result));
341 qsort(result, number, sizeof(*result), compare_attr_types);
342 }
343 }
344
345 return result;
346 }
347
compare_attr(const void * a,const void * b)348 static int compare_attr(const void *a, const void *b)
349 {
350 const AttrInfo *p = (const AttrInfo *) a;
351 const AttrInfo *q = (const AttrInfo *) b;
352
353 return strcmp(p->name, q->name);
354 }
355
len_AttrList(AttrList data)356 static int len_AttrList(AttrList data)
357 {
358 int result = 0;
359
360 for (result = 0; data[result].name != 0; ++result) {
361 ;
362 }
363 return result;
364 }
365
sort_uniq_AttrList(attr * data)366 static void sort_uniq_AttrList(attr * data)
367 {
368 unsigned have = len_AttrList(data);
369 unsigned j, k;
370
371 qsort(data, have, sizeof(*data), compare_attr);
372 /*
373 * Eliminate duplicates
374 */
375 for (j = 0; j < have; ++j) {
376 for (k = j; data[k].name; ++k) {
377 if (data[k + 1].name == 0)
378 break;
379 if (strcmp(data[j].name, data[k + 1].name)) {
380 break;
381 }
382 }
383 data[j] = data[k];
384 }
385 memset(data + j, 0, sizeof(data[0]));
386 }
387
copy_AttrList(AttrList data)388 static attr *copy_AttrList(AttrList data)
389 {
390 unsigned need = len_AttrList(data);
391 unsigned n;
392
393 attr *result = (attr *) calloc(need + 1, sizeof(attr));
394
395 for (n = 0; n < need; ++n)
396 result[n] = data[n];
397 sort_uniq_AttrList(result);
398 return result;
399 }
400
merge_AttrLists(const AttrType * data)401 static attr *merge_AttrLists(const AttrType * data)
402 {
403 const AttrType *at;
404 attr *result = 0;
405 unsigned need = 1;
406 unsigned have = 0;
407 unsigned j;
408
409 for (at = data; at->name; ++at) {
410 need += len_AttrList(at->list);
411 }
412 result = (attr *) calloc(need + 1, sizeof(attr));
413 for (at = data; at->name; ++at) {
414 if (!strcmp(at->name, "events")) {
415 ; /* lynx does not use events */
416 } else {
417 for (j = 0; at->list[j].name; ++j) {
418 result[have++] = at->list[j];
419 }
420 }
421 }
422 sort_uniq_AttrList(result);
423 return result;
424 }
425
clean_AttrList(attr * target,AttrList source)426 static int clean_AttrList(attr * target, AttrList source)
427 {
428 int result = 0;
429 int j, k;
430
431 for (j = 0; target[j].name != 0; ++j) {
432 for (k = 0; source[k].name != 0; ++k) {
433 if (!strcmp(target[j].name, source[k].name)) {
434 k = j--;
435 for (;;) {
436 target[k] = target[k + 1];
437 if (target[k++].name == 0)
438 break;
439 }
440 ++result;
441 break;
442 }
443 }
444 }
445 return result;
446 }
447
448 /*
449 * Actually COUNT the number of attributes, to make it possible to edit a
450 * attribute-table in src0_HTMLDTD.h and have all of the files updated by
451 * just doing a "make sources".
452 */
AttrCount(HTTag * tag)453 static int AttrCount(HTTag * tag)
454 {
455 return len_AttrList(tag->attributes);
456 }
457
sorted_attrs(const SGML_dtd * dtd,unsigned * countp)458 static AttrInfo *sorted_attrs(const SGML_dtd * dtd, unsigned *countp)
459 {
460 int j;
461
462 AttrInfo *data = (AttrInfo *) calloc(dtd->number_of_tags, sizeof(AttrInfo));
463 unsigned count = 0;
464
465 /* get the attribute-data */
466 for (j = 0; j < dtd->number_of_tags; ++j) {
467 if (first_attrs(dtd, j)) {
468 data[count].name = NameOfAttrs(dtd, j);
469 data[count].attrs = dtd->tags[j].attributes;
470 data[count].count = AttrCount(&(dtd->tags[j]));
471 data[count].which = j;
472 ++count;
473 }
474 }
475 /* sort the data by the name of their associated tag */
476 qsort(data, count, sizeof(*data), compare_attr);
477 *countp = count;
478 return data;
479 }
480
dump_src_HTTag_Defines(FILE * output,const SGML_dtd * dtd,int which)481 static void dump_src_HTTag_Defines(FILE *output, const SGML_dtd * dtd, int which)
482 {
483 HTTag *tag = &(dtd->tags[which]);
484
485 #define myFMT "0x%05X"
486 fprintf(output,
487 "#define T_%-13s "
488 myFMT "," myFMT "," myFMT "," myFMT "," myFMT "," myFMT
489 "," myFMT "\n",
490 DEF_name(dtd, which),
491 tag->tagclass,
492 tag->contains,
493 tag->icontains,
494 tag->contained,
495 tag->icontained,
496 tag->canclose,
497 tag->flags);
498 }
499
dump_AttrItem(FILE * output,const attr * data)500 static void dump_AttrItem(FILE *output, const attr * data)
501 {
502 char buffer[BUFSIZ];
503 char pretty = 'N';
504
505 sprintf(buffer, "\"%s\"", data->name);
506 #ifdef USE_PRETTYSRC
507 switch (data->type) {
508 case HTMLA_NORMAL:
509 pretty = 'N';
510 break;
511 case HTMLA_ANAME:
512 pretty = 'i';
513 break;
514 case HTMLA_HREF:
515 pretty = 'h';
516 break;
517 case HTMLA_CLASS:
518 pretty = 'c';
519 break;
520 case HTMLA_AUXCLASS:
521 pretty = 'x';
522 break;
523 }
524 #endif
525 fprintf(output, "\t{ %-15s T(%c) },\n", buffer, pretty);
526 }
527
dump_AttrItem0(FILE * output)528 static void dump_AttrItem0(FILE *output)
529 {
530 fprintf(output, "\t{ 0 T(N) }\t/* Terminate list */\n");
531 }
532
dump_src_AttrType(FILE * output,const char * name,AttrList data,const char ** from)533 static void dump_src_AttrType(FILE *output, const char *name, AttrList data, const char **from)
534 {
535 int n;
536
537 fprintf(output, "static const attr %s_attr_list[] = {\n", name);
538 if (data != 0) {
539 for (n = 0; data[n].name != 0; ++n) {
540 dump_AttrItem(output, data + n);
541 }
542 }
543 fprintf(output, "\t{ 0 T(N) } /* Terminate list */\n");
544 fprintf(output, "};\n");
545 NOTE("");
546 fprintf(output, "static const AttrType %s_attr_type[] = {\n", name);
547 if (from != 0) {
548 while (*from != 0) {
549 fprintf(output, "\t{ ATTR_TYPE(%s) },\n", *from);
550 ++from;
551 }
552 } else {
553 fprintf(output, "\t{ ATTR_TYPE(%s) },\n", name);
554 }
555 fprintf(output, "\t{ 0, 0 },\n");
556 fprintf(output, "};\n");
557 NOTE("");
558 }
559
dump_src_HTTag_Attrs(FILE * output,const SGML_dtd * dtd,int which)560 static void dump_src_HTTag_Attrs(FILE *output, const SGML_dtd * dtd, int which)
561 {
562 HTTag *tag = &(dtd->tags[which]);
563 attr *list = merge_AttrLists(tag->attr_types);
564 char buffer[BUFSIZ];
565 int n;
566 int limit = len_AttrList(list);
567
568 sprintf(buffer, "static const attr %s_attr[] = {", NameOfAttrs(dtd, which));
569 fprintf(output,
570 "%-40s/* %s attributes */\n", buffer, tag->name);
571 for (n = 0; n < limit; ++n) {
572 dump_AttrItem(output, list + n);
573 }
574 dump_AttrItem0(output);
575 fprintf(output, "};\n");
576 NOTE("");
577 free(list);
578 }
579
dump_src_HTTag(FILE * output,const SGML_dtd * dtd,int which)580 static void dump_src_HTTag(FILE *output, const SGML_dtd * dtd, int which)
581 {
582 HTTag *tag = &(dtd->tags[which]);
583 char *P_macro = "P";
584
585 #ifdef USE_JUSTIFY_ELTS
586 if (!tag->can_justify)
587 P_macro = "P0";
588 #endif
589 PrintF(output, 19, " { %s(%s),", P_macro, tag->name);
590 PrintF(output, 24, "ATTR_DATA(%s), ", NameOfAttrs(dtd, which));
591 PrintF(output, 14, "%s,", SGMLContent2s(tag->contents));
592 fprintf(output, "T_%s", DEF_name(dtd, which));
593 fprintf(output, "},\n");
594 }
595
dump_source(FILE * output,const SGML_dtd * dtd,int dtd_version)596 static void dump_source(FILE *output, const SGML_dtd * dtd, int dtd_version)
597 {
598 static AttrType generic_types[] =
599 {
600 ATTR_TYPE(core),
601 ATTR_TYPE(i18n),
602 ATTR_TYPE(events),
603 ATTR_TYPE(align),
604 ATTR_TYPE(cellalign),
605 ATTR_TYPE(bgcolor),
606 {0, 0}
607 };
608 AttrType *gt;
609
610 const char *marker = "src_HTMLDTD_H";
611 int j;
612
613 unsigned count = 0;
614 AttrInfo *data = sorted_attrs(dtd, &count);
615
616 fprintf(output, "/* %cLynxId%c */\n", '$', '$');
617 fprintf(output, "#ifndef %s%d\n", marker, dtd_version);
618 fprintf(output, "#define %s%d 1\n\n", marker, dtd_version);
619
620 /*
621 * If we ifdef this for once, and make the table names distinct, we can
622 * #include the strict- and tagsoup-output directly in HTMLDTD.c
623 */
624 NOTE("#ifndef once_HTMLDTD");
625 NOTE("#define once_HTMLDTD 1");
626 NOTE("");
627
628 /* construct TagClass-define's */
629 for (j = 0; j <= dtd->number_of_tags; ++j) {
630 dump_src_HTTag_Defines(output, dtd, j);
631 }
632 NOTE("#define T__UNREC_ 0x00000,0x00000,0x00000,0x00000,0x00000,0x00000,0x00000");
633
634 /* construct attribute-tables */
635 NOTE("#ifdef USE_PRETTYSRC");
636 NOTE("# define N HTMLA_NORMAL");
637 NOTE("# define i HTMLA_ANAME");
638 NOTE("# define h HTMLA_HREF");
639 NOTE("# define c HTMLA_CLASS");
640 NOTE("# define x HTMLA_AUXCLASS");
641 NOTE("# define T(t) , t");
642 NOTE("#else");
643 NOTE("# define T(t) /*nothing */");
644 NOTE("#endif");
645 NOTE("/* *INDENT-OFF* */");
646 NOTE("");
647 NOTE("#define ATTR_TYPE(name) #name, name##_attr_list");
648 NOTE("");
649 NOTE("/* generic attributes, used in different tags */");
650 for (gt = generic_types; gt->name != 0; ++gt) {
651 dump_src_AttrType(output, gt->name, gt->list, 0);
652 }
653 NOTE("");
654 NOTE("/* tables defining attributes per-tag in terms of generic attributes (editable) */");
655 for (j = 0; j < (int) count; ++j) {
656 int which = data[j].which;
657
658 if (first_attrs(dtd, which)) {
659 HTTag *tag = &(dtd->tags[which]);
660 const AttrType *types = tag->attr_types;
661 const char *name = NameOfAttrs(dtd, which);
662 attr *list = 0;
663 const char *from_attr[10];
664 int from_size = 0;
665
666 while (types->name != 0) {
667 from_attr[from_size++] = types->name;
668 if (!strcmp(types->name, name)) {
669 list = copy_AttrList(types->list);
670 for (gt = generic_types; gt->name != 0; ++gt) {
671 if (clean_AttrList(list, gt->list)) {
672 int k;
673 int found = 0;
674
675 for (k = 0; k < from_size; ++k) {
676 if (!strcmp(from_attr[k], gt->name)) {
677 found = 1;
678 break;
679 }
680 }
681 if (!found)
682 from_attr[from_size++] = gt->name;
683 break;
684 }
685 }
686 }
687 ++types;
688 }
689 from_attr[from_size] = 0;
690
691 if (list != 0) {
692 dump_src_AttrType(output, name, list, from_attr);
693 free(list);
694 }
695 }
696 }
697 NOTE("");
698 NOTE("/* attribute lists for the runtime (generated by dtd_util) */");
699 for (j = 0; j < (int) count; ++j) {
700 dump_src_HTTag_Attrs(output, dtd, data[j].which);
701 }
702 NOTE("/* *INDENT-ON* */");
703 NOTE("");
704 NOTE("/* justification-flags */");
705 NOTE("#undef N");
706 NOTE("#undef i");
707 NOTE("#undef h");
708 NOTE("#undef c");
709 NOTE("#undef x");
710 NOTE("");
711 NOTE("#undef T");
712 NOTE("");
713 NOTE("/* tag-names */");
714 for (j = 0; j <= dtd->number_of_tags; ++j) {
715 fprintf(output, "#undef %s\n", DEF_name(dtd, j));
716 }
717 NOTE("");
718 NOTE("/* these definitions are used in the tags-tables */");
719 NOTE("#undef P");
720 NOTE("#undef P_");
721 NOTE("#ifdef USE_COLOR_STYLE");
722 NOTE("#define P_(x) #x, (sizeof #x) -1");
723 NOTE("#define NULL_HTTag_ NULL, 0");
724 NOTE("#else");
725 NOTE("#define P_(x) #x");
726 NOTE("#define NULL_HTTag_ NULL");
727 NOTE("#endif");
728 NOTE("");
729 NOTE("#ifdef USE_JUSTIFY_ELTS");
730 NOTE("#define P(x) P_(x), 1");
731 NOTE("#define P0(x) P_(x), 0");
732 NOTE("#define NULL_HTTag NULL_HTTag_,0");
733 NOTE("#else");
734 NOTE("#define P(x) P_(x)");
735 NOTE("#define P0(x) P_(x)");
736 NOTE("#define NULL_HTTag NULL_HTTag_");
737 NOTE("#endif");
738 NOTE("");
739 NOTE("#define ATTR_DATA(name) name##_attr, HTML_##name##_ATTRIBUTES, name##_attr_type");
740 NOTE("");
741 NOTE("#endif /* once_HTMLDTD */");
742 NOTE("/* *INDENT-OFF* */");
743
744 /* construct the tags table */
745 fprintf(output,
746 "static const HTTag tags_table%d[HTML_ALL_ELEMENTS] = {\n",
747 dtd_version);
748 for (j = 0; j <= dtd->number_of_tags; ++j) {
749 if (j == dtd->number_of_tags) {
750 NOTE("/* additional (alternative variants), not counted in HTML_ELEMENTS: */");
751 NOTE("/* This one will be used as a temporary substitute within the parser when");
752 NOTE(" it has been signalled to parse OBJECT content as MIXED. - kw */");
753 }
754 dump_src_HTTag(output, dtd, j);
755 }
756 fprintf(output, "};\n");
757
758 NOTE("/* *INDENT-ON* */");
759 NOTE("");
760 fprintf(output, "#endif /* %s%d */\n", marker, dtd_version);
761
762 free(data);
763 }
764
dump_hdr_attr(FILE * output,AttrInfo * data)765 static void dump_hdr_attr(FILE *output, AttrInfo * data)
766 {
767 int j;
768 char buffer[BUFSIZ];
769
770 for (j = 0; j < data->count; ++j) {
771 PrintF(output, 33, "#define HTML_%s_%s",
772 data->name,
773 no_dashes(buffer, data->attrs[j].name));
774 fprintf(output, "%2d\n", j);
775 }
776 PrintF(output, 33, "#define HTML_%s_ATTRIBUTES", data->name);
777 fprintf(output, "%2d\n", data->count);
778 fprintf(output, "\n");
779 }
780
dump_header(FILE * output,const SGML_dtd * dtd)781 static void dump_header(FILE *output, const SGML_dtd * dtd)
782 {
783 const char *marker = "hdr_HTMLDTD_H";
784 int j;
785
786 unsigned count = 0;
787 AttrInfo *data = sorted_attrs(dtd, &count);
788
789 fprintf(output, "/* %cLynxId%c */\n", '$', '$');
790 fprintf(output, "#ifndef %s\n", marker);
791 fprintf(output, "#define %s 1\n\n", marker);
792
793 NOTE("#ifdef __cplusplus");
794 NOTE("extern \"C\" {");
795 NOTE("#endif");
796
797 NOTE("/*");
798 NOTE("");
799 NOTE(" Element Numbers");
800 NOTE("");
801 NOTE(" Must Match all tables by element!");
802 NOTE(" These include tables in HTMLDTD.c");
803 NOTE(" and code in HTML.c.");
804 NOTE("");
805 NOTE(" */");
806
807 fprintf(output, " typedef enum {\n");
808 for (j = 0; j < dtd->number_of_tags; ++j) {
809 fprintf(output, "\tHTML_%s,\n", dtd->tags[j].name);
810 }
811 NOTE("\tHTML_ALT_OBJECT");
812 NOTE(" } HTMLElement;\n");
813 NOTE("/* Notes: HTML.c uses a different extension of the");
814 NOTE(" HTML_ELEMENTS space privately, see");
815 NOTE(" HTNestedList.h.");
816 NOTE("");
817 NOTE(" Do NOT replace HTML_ELEMENTS with");
818 NOTE(" TABLESIZE(mumble_dtd.tags).");
819 NOTE("");
820 NOTE(" Keep the following defines in synch with");
821 NOTE(" the above enum!");
822 NOTE(" */");
823 NOTE("");
824 NOTE("/* # of elements generally visible to Lynx code */");
825 fprintf(output, "#define HTML_ELEMENTS %d\n", dtd->number_of_tags);
826 NOTE("");
827 NOTE("/* # of elements visible to SGML parser */");
828 fprintf(output, "#define HTML_ALL_ELEMENTS %d\n", dtd->number_of_tags + 1);
829 NOTE("");
830 NOTE("/*");
831 NOTE("");
832 NOTE(" Attribute numbers");
833 NOTE("");
834 NOTE(" Identifier is HTML_<element>_<attribute>.");
835 NOTE(" These must match the tables in HTML.c!");
836 NOTE("");
837 NOTE(" */");
838
839 /* output the sorted list */
840 for (j = 0; j < (int) count; ++j) {
841 dump_hdr_attr(output, data + j);
842 }
843 free(data);
844
845 NOTE("#ifdef __cplusplus");
846 NOTE("}");
847 NOTE("#endif");
848
849 fprintf(output, "#endif\t\t\t\t/* %s */\n", marker);
850 }
851
852 #define FMT_NUM_ATTRS "%d attributes:\n"
853 #define FMT_ONE_ATTR "%d:%d:%s\n"
854 #define NUM_ONE_ATTR 3
855
dump_flat_attrs(FILE * output,const attr * attributes,int number_of_attributes)856 static void dump_flat_attrs(FILE *output,
857 const attr * attributes,
858 int number_of_attributes)
859 {
860 int n;
861
862 fprintf(output, "\t\t" FMT_NUM_ATTRS, number_of_attributes);
863 for (n = 0; n < number_of_attributes; ++n) {
864 fprintf(output, "\t\t\t" FMT_ONE_ATTR, n,
865 #ifdef USE_PRETTYSRC
866 attributes[n].type,
867 #else
868 0, /* need placeholder for source-compat */
869 #endif
870 attributes[n].name
871 );
872 }
873 }
874
dump_flat_attr_types(FILE * output,const AttrType * attr_types)875 static void dump_flat_attr_types(FILE *output, const AttrType * attr_types)
876 {
877 const AttrType *p = sorted_AttrTypes(attr_types);
878 int number = len_AttrTypes(attr_types);
879
880 fprintf(output, "\t\t%d attr_types\n", number);
881
882 if (p != 0) {
883 while (p->name != 0) {
884 fprintf(output, "\t\t\t%s\n", p->name);
885 ++p;
886 }
887 }
888 }
889
dump_flat_SGMLContent(FILE * output,const char * name,SGMLContent contents)890 static void dump_flat_SGMLContent(FILE *output, const char *name, SGMLContent contents)
891 {
892 fprintf(output, "\t\t%s: %s\n", name, SGMLContent2s(contents));
893 }
894
895 #define DUMP(name) \
896 if (theClass & Tgc_##name) {\
897 fprintf(output, " " #name); \
898 theClass &= ~(Tgc_##name); \
899 }
900
dump_flat_TagClass(FILE * output,const char * name,TagClass theClass)901 static void dump_flat_TagClass(FILE *output, const char *name, TagClass theClass)
902 {
903 fprintf(output, "\t\t%s:", name);
904 DUMP(FONTlike);
905 DUMP(EMlike);
906 DUMP(MATHlike);
907 DUMP(Alike);
908 DUMP(formula);
909 DUMP(TRlike);
910 DUMP(SELECTlike);
911 DUMP(FORMlike);
912 DUMP(Plike);
913 DUMP(DIVlike);
914 DUMP(LIlike);
915 DUMP(ULlike);
916 DUMP(BRlike);
917 DUMP(APPLETlike);
918 DUMP(HRlike);
919 DUMP(MAPlike);
920 DUMP(outer);
921 DUMP(BODYlike);
922 DUMP(HEADstuff);
923 DUMP(same);
924 if (theClass)
925 fprintf(output, " OOPS:%#x", theClass);
926 fprintf(output, "\n");
927 }
928
929 #undef DUMP
930
931 #define DUMP(name) \
932 if (theFlags & Tgf_##name) {\
933 fprintf(output, " " #name); \
934 theFlags &= ~(Tgf_##name); \
935 }
936
dump_flat_TagFlags(FILE * output,const char * name,TagFlags theFlags)937 static void dump_flat_TagFlags(FILE *output, const char *name, TagFlags theFlags)
938 {
939 fprintf(output, "\t\t%s:", name);
940 DUMP(endO);
941 DUMP(startO);
942 DUMP(mafse);
943 DUMP(strict);
944 DUMP(nreie);
945 DUMP(frecyc);
946 DUMP(nolyspcl);
947 if (theFlags)
948 fprintf(output, " OOPS:%#x", theFlags);
949 fprintf(output, "\n");
950 }
951
952 #undef DUMP
953
dump_flat_HTTag(FILE * output,unsigned n,HTTag * tag)954 static void dump_flat_HTTag(FILE *output, unsigned n, HTTag * tag)
955 {
956 fprintf(output, "\t%u:%s\n", n, tag->name);
957 #ifdef USE_JUSTIFY_ELTS
958 fprintf(output, "\t\t%s\n", tag->can_justify ? "justify" : "nojustify");
959 #endif
960 dump_flat_attrs(output, tag->attributes, AttrCount(tag));
961 dump_flat_attr_types(output, tag->attr_types);
962 dump_flat_SGMLContent(output, "contents", tag->contents);
963 dump_flat_TagClass(output, "tagclass", tag->tagclass);
964 dump_flat_TagClass(output, "contains", tag->contains);
965 dump_flat_TagClass(output, "icontains", tag->icontains);
966 dump_flat_TagClass(output, "contained", tag->contained);
967 dump_flat_TagClass(output, "icontained", tag->icontained);
968 dump_flat_TagClass(output, "canclose", tag->canclose);
969 dump_flat_TagFlags(output, "flags", tag->flags);
970 }
971
count_attr_types(AttrType * attr_types,HTTag * tag)972 static int count_attr_types(AttrType * attr_types, HTTag * tag)
973 {
974 int count = 0;
975 const AttrType *p;
976 AttrType *q;
977
978 if ((p = tag->attr_types) != 0) {
979 while (p->name != 0) {
980 if ((q = attr_types) != 0) {
981 while (q->name != 0) {
982 if (!strcmp(q->name, p->name)) {
983 --count;
984 break;
985 }
986 ++q;
987 }
988 *q = *p;
989 }
990 ++count;
991 ++p;
992 }
993 }
994 return count;
995 }
996
dump_flatfile(FILE * output,const SGML_dtd * dtd)997 static void dump_flatfile(FILE *output, const SGML_dtd * dtd)
998 {
999 AttrType *attr_types = 0;
1000 int pass;
1001 unsigned count = 0;
1002 unsigned n;
1003
1004 /* merge all of the attr_types data */
1005 for (pass = 0; pass < 2; ++pass) {
1006 for (n = 0; (int) n < dtd->number_of_tags; ++n) {
1007 count += count_attr_types(attr_types, &(dtd->tags[n]));
1008 }
1009 if (pass == 0) {
1010 attr_types = typecallocn(AttrType, count + 1);
1011 count = 0;
1012 } else {
1013 count = len_AttrTypes(attr_types);
1014 qsort(attr_types, count, sizeof(*attr_types), compare_attr_types);
1015 fprintf(output, "%d attr_types\n", count);
1016 for (n = 0; n < count; ++n) {
1017 fprintf(output, "\t%d:%s\n", n, attr_types[n].name);
1018 dump_flat_attrs(output, attr_types[n].list,
1019 len_AttrList(attr_types[n].list));
1020 }
1021 }
1022 }
1023
1024 fprintf(output, "%d tags\n", dtd->number_of_tags);
1025 for (n = 0; (int) n < dtd->number_of_tags; ++n) {
1026 dump_flat_HTTag(output, n, &(dtd->tags[n]));
1027 }
1028 #if 0
1029 fprintf(output, "%d entities\n", dtd->number_of_entities);
1030 for (n = 0; n < dtd->number_of_entities; ++n) {
1031 }
1032 #endif
1033 }
1034
get_line(FILE * input)1035 static char *get_line(FILE *input)
1036 {
1037 char temp[1024];
1038 char *result = 0;
1039
1040 if (fgets(temp, (int) sizeof(temp), input) != 0) {
1041 result = strdup(temp);
1042 }
1043 return result;
1044 }
1045
1046 #define LOAD(name) \
1047 if (!strcmp(data, #name)) {\
1048 *theClass |= Tgc_##name; \
1049 continue; \
1050 }
1051
load_flat_TagClass(FILE * input,const char * name,TagClass * theClass)1052 static int load_flat_TagClass(FILE *input, const char *name, TagClass * theClass)
1053 {
1054 char prefix[80];
1055 char *next = get_line(input);
1056 char *data;
1057 int result = 0;
1058
1059 *theClass = 0;
1060 if (next != 0) {
1061 sprintf(prefix, "\t\t%s:", name);
1062 data = strtok(next, "\n ");
1063
1064 if (data != 0 && !strcmp(data, prefix)) {
1065 result = 1;
1066
1067 while ((data = strtok(NULL, "\n ")) != 0) {
1068
1069 LOAD(FONTlike);
1070 LOAD(EMlike);
1071 LOAD(MATHlike);
1072 LOAD(Alike);
1073 LOAD(formula);
1074 LOAD(TRlike);
1075 LOAD(SELECTlike);
1076 LOAD(FORMlike);
1077 LOAD(Plike);
1078 LOAD(DIVlike);
1079 LOAD(LIlike);
1080 LOAD(ULlike);
1081 LOAD(BRlike);
1082 LOAD(APPLETlike);
1083 LOAD(HRlike);
1084 LOAD(MAPlike);
1085 LOAD(outer);
1086 LOAD(BODYlike);
1087 LOAD(HEADstuff);
1088 LOAD(same);
1089
1090 fprintf(stderr, "Unexpected TagClass '%s'\n", data);
1091 result = 0;
1092 break;
1093 }
1094 } else if (data) {
1095 fprintf(stderr, "load_flat_TagClass: '%s' vs '%s'\n", data, prefix);
1096 }
1097 free(next);
1098 } else {
1099 fprintf(stderr, "Did not find contents\n");
1100 }
1101 return result;
1102 }
1103
1104 #undef LOAD
1105
1106 #define LOAD(name) \
1107 if (!strcmp(data, #name)) {\
1108 *flags |= Tgf_##name; \
1109 continue; \
1110 }
1111
load_flat_TagFlags(FILE * input,const char * name,TagFlags * flags)1112 static int load_flat_TagFlags(FILE *input, const char *name, TagFlags * flags)
1113 {
1114 char prefix[80];
1115 char *next = get_line(input);
1116 char *data;
1117 int result = 0;
1118
1119 *flags = 0;
1120 if (next != 0) {
1121 sprintf(prefix, "\t\t%s:", name);
1122 data = strtok(next, "\n ");
1123
1124 if (data != 0 && !strcmp(data, prefix)) {
1125 result = 1;
1126
1127 while ((data = strtok(NULL, "\n ")) != 0) {
1128
1129 LOAD(endO);
1130 LOAD(startO);
1131 LOAD(mafse);
1132 LOAD(strict);
1133 LOAD(nreie);
1134 LOAD(frecyc);
1135 LOAD(nolyspcl);
1136
1137 fprintf(stderr, "Unexpected TagFlag '%s'\n", data);
1138 result = 0;
1139 break;
1140 }
1141 } else if (data) {
1142 fprintf(stderr, "load_flat_TagFlags: '%s' vs '%s'\n", data, prefix);
1143 }
1144 free(next);
1145 }
1146 return result;
1147 }
1148
1149 #undef LOAD
1150
load_flat_AttrList(FILE * input,AttrList * attrs,int * length)1151 static int load_flat_AttrList(FILE *input, AttrList * attrs, int *length)
1152 {
1153 attr *attributes;
1154 int j, jcmp, code;
1155 int result = 1;
1156 char name[1024];
1157
1158 #ifdef USE_PRETTYSRC
1159 int atype;
1160 #endif
1161
1162 if (fscanf(input, FMT_NUM_ATTRS, length) == 1
1163 && *length > 0
1164 && (attributes = typecallocn(attr, (size_t) (*length + 1))) != 0) {
1165 *attrs = attributes;
1166 for (j = 0; j < *length; ++j) {
1167 code = fscanf(input, FMT_ONE_ATTR,
1168 &jcmp,
1169 &atype,
1170 name
1171 );
1172 if (code == NUM_ONE_ATTR && (j == jcmp)) {
1173 attributes[j].name = strdup(name);
1174 #ifdef USE_PRETTYSRC
1175 attributes[j].type = atype;
1176 #endif
1177 } else {
1178 fprintf(stderr, "Did not find attributes\n");
1179 result = 0;
1180 break;
1181 }
1182 }
1183 if (*length > 1)
1184 qsort(attributes, *length, sizeof(attributes[0]), compare_attr);
1185 }
1186 return result;
1187 }
1188
load_flat_HTTag(FILE * input,unsigned nref,HTTag * tag,AttrType * allTypes)1189 static int load_flat_HTTag(FILE *input, unsigned nref, HTTag * tag, AttrType * allTypes)
1190 {
1191 int result = 0;
1192 unsigned ncmp = 0;
1193 char name[1024];
1194 int code;
1195 int j;
1196
1197 code = fscanf(input, "%d:%s\n", &ncmp, name);
1198 if (code == 2 && (nref == ncmp)) {
1199 result = 1;
1200 tag->name = strdup(name);
1201 #ifdef USE_COLOR_STYLE
1202 tag->name_len = strlen(tag->name);
1203 #endif
1204 #ifdef USE_JUSTIFY_ELTS
1205 if (fscanf(input, "%s\n", name) == 1) {
1206 tag->can_justify = !strcmp(name, "justify");
1207 } else {
1208 fprintf(stderr, "Did not find can_justify\n");
1209 result = 0;
1210 }
1211 #endif
1212 if (result) {
1213 result = load_flat_AttrList(input, &(tag->attributes), &(tag->number_of_attributes));
1214 }
1215 if (result) {
1216 AttrType *myTypes;
1217 int k, count;
1218 char *next = get_line(input);
1219
1220 if (next != 0
1221 && sscanf(next, "%d attr_types\n", &count)
1222 && (myTypes = typecallocn(AttrType, (size_t) (count + 1)))
1223 != 0) {
1224 tag->attr_types = myTypes;
1225 for (k = 0; k < count; ++k) {
1226 next = get_line(input);
1227 if (next != 0
1228 && sscanf(next, "%s\n", name)) {
1229 for (j = 0; allTypes[j].name != 0; ++j) {
1230 if (!strcmp(allTypes[j].name, name)) {
1231 myTypes[k].name = strdup(name);
1232 myTypes[k].list = allTypes[j].list;
1233 break;
1234 }
1235 }
1236 } else {
1237 result = 0;
1238 break;
1239 }
1240 }
1241 if (result && count > 1)
1242 qsort(myTypes, count, sizeof(myTypes[0]), compare_attr_types);
1243 }
1244 }
1245 if (result) {
1246 char *next = get_line(input);
1247
1248 if (next != 0
1249 && sscanf(next, "\t\tcontents: %s\n", name)) {
1250 tag->contents = s2SGMLContent(name);
1251 free(next);
1252 } else {
1253 fprintf(stderr, "Did not find contents\n");
1254 result = 0;
1255 }
1256 }
1257 if (result) {
1258 result = load_flat_TagClass(input, "tagclass", &(tag->tagclass));
1259 }
1260 if (result) {
1261 result = load_flat_TagClass(input, "contains", &(tag->contains));
1262 }
1263 if (result) {
1264 result = load_flat_TagClass(input, "icontains", &(tag->icontains));
1265 }
1266 if (result) {
1267 result = load_flat_TagClass(input, "contained", &(tag->contained));
1268 }
1269 if (result) {
1270 result = load_flat_TagClass(input, "icontained", &(tag->icontained));
1271 }
1272 if (result) {
1273 result = load_flat_TagClass(input, "canclose", &(tag->canclose));
1274 }
1275 if (result) {
1276 result = load_flat_TagFlags(input, "flags", &(tag->flags));
1277 }
1278 } else {
1279 fprintf(stderr, "load_flat_HTTag error\n");
1280 }
1281 return result;
1282 }
1283
load_flat_AttrType(FILE * input,AttrType * types,size_t ncmp)1284 static int load_flat_AttrType(FILE *input, AttrType * types, size_t ncmp)
1285 {
1286 int result = 0;
1287 int ntst;
1288 char name[1024];
1289
1290 if (fscanf(input, "%d:%s\n", &ntst, name) == 2
1291 && (ntst == (int) ncmp)) {
1292 result = 1;
1293 types->name = strdup(name);
1294 if (!load_flat_AttrList(input, &(types->list), &ntst))
1295 result = 0;
1296 }
1297 return result;
1298 }
1299
load_flatfile(FILE * input)1300 static SGML_dtd *load_flatfile(FILE *input)
1301 {
1302 AttrType *attr_types = 0;
1303 SGML_dtd *result = 0;
1304 size_t n;
1305 int number_of_attrs = 0;
1306 int number_of_tags = 0;
1307 HTTag *tag;
1308 int code;
1309
1310 code = fscanf(input, "%d attr_types\n", &number_of_attrs);
1311 if (code
1312 && number_of_attrs
1313 && (attr_types = typecallocn(AttrType, number_of_attrs + 1)) != 0) {
1314 for (n = 0; n < (size_t) number_of_attrs; ++n) {
1315 if (!load_flat_AttrType(input, attr_types + n, n)) {
1316 break;
1317 }
1318 }
1319 }
1320
1321 code = fscanf(input, "%d tags\n", &number_of_tags);
1322 if (code == 1) {
1323 if ((result = typecalloc(SGML_dtd)) != 0
1324 && (result->tags = typecallocn(HTTag, (number_of_tags + 2))) != 0) {
1325 for (n = 0; n < (size_t) number_of_tags; ++n) {
1326 if (load_flat_HTTag(input, n, &(result->tags[n]), attr_types)) {
1327 result->number_of_tags = (n + 1);
1328 } else {
1329 break;
1330 }
1331 }
1332 tag = 0;
1333 for (n = 0; n < (size_t) number_of_tags; ++n) {
1334 if (result->tags[n].name != 0
1335 && !strcmp(result->tags[n].name, "OBJECT")) {
1336 tag = result->tags + number_of_tags;
1337 *tag = result->tags[n];
1338 tag->contents = SGML_MIXED;
1339 tag->flags = Tgf_strict;
1340 break;
1341 }
1342 }
1343 if (tag == 0) {
1344 fprintf(stderr, "Did not find OBJECT tag\n");
1345 result = 0;
1346 }
1347 }
1348 }
1349 return result;
1350 }
1351
main(int argc,char * argv[])1352 int main(int argc, char *argv[])
1353 {
1354 const SGML_dtd *the_dtd = &HTML_dtd;
1355 int ch;
1356 int dtd_version = 0;
1357 int c_option = FALSE;
1358 int h_option = FALSE;
1359 int l_option = FALSE;
1360 FILE *input = stdin;
1361 FILE *output = stdout;
1362
1363 while ((ch = getopt(argc, argv, GETOPT)) != -1) {
1364 switch (ch) {
1365 case 'c':
1366 c_option = TRUE;
1367 break;
1368 case 'h':
1369 h_option = TRUE;
1370 break;
1371 case 'l':
1372 l_option = TRUE;
1373 input = fopen(optarg, "r");
1374 if (input == 0)
1375 failed(optarg);
1376 break;
1377 case 'o':
1378 output = fopen(optarg, "w");
1379 if (output == 0)
1380 failed(optarg);
1381 break;
1382 case 't':
1383 dtd_version = 1;
1384 break;
1385 case 's':
1386 dtd_version = 0;
1387 break;
1388 default:
1389 usage();
1390 }
1391 }
1392
1393 HTSwitchDTD(dtd_version);
1394 if (l_option)
1395 the_dtd = load_flatfile(input);
1396
1397 if (the_dtd != 0) {
1398 if (c_option)
1399 dump_source(output, the_dtd, dtd_version);
1400 if (h_option)
1401 dump_header(output, the_dtd);
1402 if (!c_option && !h_option)
1403 dump_flatfile(output, the_dtd);
1404 }
1405
1406 return EXIT_SUCCESS;
1407 }
1408