1 /* sgmldecl.c -
2 SGML declaration parsing.
3
4 Written by James Clark (jjc@jclark.com).
5 */
6
7 #include "sgmlincl.h"
8
9 /* Symbolic names for the error numbers that are be generated only by
10 this module. */
11
12 #define E_SHUNCHAR 159
13 #define E_STANDARD 163
14 #define E_SIGNIFICANT 164
15 #define E_BADLIT 165
16 #define E_SCOPE 166
17 #define E_XNUM 167
18 #define E_BADVERSION 168
19 #define E_NMUNSUP 169
20 #define E_XNMLIT 170
21 #define E_CHARDESC 171
22 #define E_CHARDUP 172
23 #define E_CHARRANGE 173
24 #define E_7BIT 174
25 #define E_CHARMISSING 175
26 #define E_SHUNNED 176
27 #define E_NONSGML 177
28 #define E_CAPSET 178
29 #define E_CAPMISSING 179
30 #define E_SYNTAX 180
31 #define E_CHARNUM 181
32 #define E_SWITCHES 182
33 #define E_INSTANCE 183
34 #define E_ZEROFEATURE 184
35 #define E_YESNO 185
36 #define E_CAPACITY 186
37 #define E_NOTSUPPORTED 187
38 #define E_FORMAL 189
39 #define E_BADCLASS 190
40 #define E_MUSTBENON 191
41 #define E_BADBASECHAR 199
42 #define E_SYNREFUNUSED 200
43 #define E_SYNREFUNDESC 201
44 #define E_SYNREFUNKNOWN 202
45 #define E_SYNREFUNKNOWNSET 203
46 #define E_FUNDUP 204
47 #define E_BADFUN 205
48 #define E_FUNCHAR 206
49 #define E_GENDELIM 207
50 #define E_SRDELIM 208
51 #define E_BADKEY 209
52 #define E_BADQUANTITY 210
53 #define E_BADNAME 211
54 #define E_REFNAME 212
55 #define E_DUPNAME 213
56 #define E_QUANTITY 214
57 #define E_QTOOBIG 215
58 #define E_NMSTRTCNT 219
59 #define E_NMCHARCNT 220
60 #define E_NMDUP 221
61 #define E_NMBAD 222
62 #define E_NMMINUS 223
63 #define E_UNKNOWNSET 227
64 #define E_TOTALCAP 235
65
66 #define CANON_NMC '.' /* Canonical name character. */
67 #define CANON_NMS 'A' /* Canonical name start character. */
68 #define CANON_MIN ':' /* Canonical minimum data character. */
69
70 #define SUCCESS 1
71 #define FAIL 0
72 #define SIZEOF(v) (sizeof(v)/sizeof(v[0]))
73 #define matches(tok, str) (ustrcmp((tok)+1, (str)) == 0)
74
75 static UNCH standard[] = "ISO 8879:1986";
76
77 #define REFERENCE_SYNTAX "ISO 8879:1986//SYNTAX Reference//EN"
78 #define CORE_SYNTAX "ISO 8879:1986//SYNTAX Core//EN"
79
80 static UNCH (*newkey)[REFNAMELEN+1] = 0;
81
82 struct pmap {
83 char *name;
84 UNIV value;
85 };
86
87 /* The reference capacity set. */
88 #define REFCAPSET \
89 { 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, \
90 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L, 35000L }
91
92 long refcapset[NCAPACITY] = REFCAPSET;
93
94 /* A pmap of known capacity sets. */
95
96 static struct pmap capset_map[] = {
97 { "ISO 8879:1986//CAPACITY Reference//EN", (UNIV)refcapset },
98 { 0 },
99 };
100
101 /* Table of capacity names. Must match *CAP in sgmldecl.h. */
102
103 char *captab[] = {
104 "TOTALCAP",
105 "ENTCAP",
106 "ENTCHCAP",
107 "ELEMCAP",
108 "GRPCAP",
109 "EXGRPCAP",
110 "EXNMCAP",
111 "ATTCAP",
112 "ATTCHCAP",
113 "AVGRPCAP",
114 "NOTCAP",
115 "NOTCHCAP",
116 "IDCAP",
117 "IDREFCAP",
118 "MAPCAP",
119 "LKSETCAP",
120 "LKNMCAP",
121 };
122
123 /* The default SGML declaration. */
124 #define MAXNUMBER 99999999L
125
126 /* Reference quantity set */
127
128 #define REFATTCNT 40
129 #define REFATTSPLEN 960
130 #define REFBSEQLEN 960
131 #define REFDTAGLEN 16
132 #define REFDTEMPLEN 16
133 #define REFENTLVL 16
134 #define REFGRPCNT 32
135 #define REFGRPGTCNT 96
136 #define REFGRPLVL 16
137 #define REFNORMSEP 2
138 #define REFPILEN 240
139 #define REFTAGLEN 960
140 #define REFTAGLVL 24
141
142 #define ALLOC_MAX 65534
143
144 #define BIGINT 30000
145
146 #define MAXATTCNT ((ALLOC_MAX/sizeof(struct ad)) - 2)
147 #define MAXATTSPLEN BIGINT
148 #define MAXBSEQLEN BIGINT
149 #define MAXDTAGLEN 16
150 #define MAXDTEMPLEN 16
151 #define MAXENTLVL ((ALLOC_MAX/sizeof(struct source)) - 1)
152 #define MAXGRPCNT MAXGRPGTCNT
153 /* Must be between 96 and 253 */
154 #define MAXGRPGTCNT 253
155 #define MAXGRPLVL MAXGRPGTCNT
156 #define MAXLITLEN BIGINT
157 /* This guarantees that NAMELEN < LITLEN (ie there's always space for a name
158 in a buffer intended for a literal.) */
159 #define MAXNAMELEN (REFLITLEN - 1)
160 #define MAXNORMSEP 2
161 #define MAXPILEN BIGINT
162 #define MAXTAGLEN BIGINT
163 #define MAXTAGLVL ((ALLOC_MAX/sizeof(struct tag)) - 1)
164
165 /* Table of quantity names. Must match Q* in sgmldecl.h. */
166
167 static char *quantity_names[] = {
168 "ATTCNT",
169 "ATTSPLEN",
170 "BSEQLEN",
171 "DTAGLEN",
172 "DTEMPLEN",
173 "ENTLVL",
174 "GRPCNT",
175 "GRPGTCNT",
176 "GRPLVL",
177 "LITLEN",
178 "NAMELEN",
179 "NORMSEP",
180 "PILEN",
181 "TAGLEN",
182 "TAGLVL",
183 };
184
185 static int max_quantity[] = {
186 MAXATTCNT,
187 MAXATTSPLEN,
188 MAXBSEQLEN,
189 MAXDTAGLEN,
190 MAXDTEMPLEN,
191 MAXENTLVL,
192 MAXGRPCNT,
193 MAXGRPGTCNT,
194 MAXGRPLVL,
195 MAXLITLEN,
196 MAXNAMELEN,
197 MAXNORMSEP,
198 MAXPILEN,
199 MAXTAGLEN,
200 MAXTAGLVL,
201 };
202
203 static char *quantity_changed;
204
205 /* Non-zero means the APPINFO parameter was not NONE. */
206 static int appinfosw = 0;
207
208 struct sgmldecl sd = {
209 REFCAPSET, /* capacity */
210 #ifdef SUPPORT_SUBDOC
211 MAXNUMBER, /* subdoc */
212 #else /* not SUPPORT_SUBDOC */
213 0, /* subdoc */
214 #endif /* not SUPPORT_SUBDOC */
215 1, /* formal */
216 1, /* omittag */
217 1, /* shorttag */
218 1, /* shortref */
219 { 1, 0 }, /* general/entity name case translation */
220 { /* reference quantity set */
221 REFATTCNT,
222 REFATTSPLEN,
223 REFBSEQLEN,
224 REFDTAGLEN,
225 REFDTEMPLEN,
226 REFENTLVL,
227 REFGRPCNT,
228 REFGRPGTCNT,
229 REFGRPLVL,
230 REFLITLEN,
231 REFNAMELEN,
232 REFNORMSEP,
233 REFPILEN,
234 REFTAGLEN,
235 REFTAGLVL,
236 },
237 };
238
239 static int systemcharset[] = {
240 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
241 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
242 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
243 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
244 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
245 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
246 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
247 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
248 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
249 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
250 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
251 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
252 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
253 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
254 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
255 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
256 };
257
258 /* This is a private use designating sequence that by convention
259 refers to the whole system character set whatever it is. */
260
261 #define SYSTEM_CHARSET_DESIGNATING_SEQUENCE "ESC 2/5 2/15 3/0"
262
263 static struct pmap charset_map[] = {
264 { "ESC 2/5 4/0", (UNIV)iso646charset }, /* ISO 646 IRV */
265 { "ESC 2/8 4/2", (UNIV)iso646G0charset }, /* ISO Registration Number 6, ASCII */
266 { "ESC 2/8 4/0", (UNIV)iso646G0charset }, /* ISO Registration Number 6, ASCII */
267 { "ESC 2/13 4/1", (UNIV)iso8859_1charset }, /* Latin 1 */
268 { "ESC 2/1 4/0", (UNIV)iso646C0charset }, /* ISO 646, C0 */
269 { "ESC 2/2 4/3", (UNIV)iso6429C1charset }, /* ISO 6429, C1 */
270 { SYSTEM_CHARSET_DESIGNATING_SEQUENCE, (UNIV)systemcharset },
271 /* system character set */
272 { 0 }
273 };
274
275 static int synrefcharset[256]; /* the syntax reference character set */
276
277 #define CHAR_NONSGML 01
278 #define CHAR_SIGNIFICANT 02
279 #define CHAR_MAGIC 04
280 #define CHAR_SHUNNED 010
281
282 static UNCH char_flags[256];
283 static int done_nonsgml = 0;
284 static UNCH *nlextoke = 0; /* new lextoke */
285 static UNCH *nlextran = 0; /* new lextran */
286 #define MAX_SAVED_ERRS 4
287 static UNIV saved_errs[MAX_SAVED_ERRS];
288 static int nsaved_errs = 0;
289
290 static UNCH kcharset[] = "CHARSET";
291 static UNCH kbaseset[] = "BASESET";
292 static UNCH kdescset[] = "DESCSET";
293 static UNCH kunused[] = "UNUSED";
294 static UNCH kcapacity[] = "CAPACITY";
295 static UNCH kpublic[] = "PUBLIC";
296 static UNCH ksgmlref[] = "SGMLREF";
297 static UNCH kscope[] = "SCOPE";
298 static UNCH kdocument[] = "DOCUMENT";
299 static UNCH kinstance[] = "INSTANCE";
300 static UNCH ksyntax[] = "SYNTAX";
301 static UNCH kswitches[] = "SWITCHES";
302 static UNCH kfeatures[] = "FEATURES";
303 static UNCH kminimize[] = "MINIMIZE";
304 static UNCH kdatatag[] = "DATATAG";
305 static UNCH komittag[] = "OMITTAG";
306 static UNCH krank[] = "RANK";
307 static UNCH kshorttag[] = "SHORTTAG";
308 static UNCH klink[] = "LINK";
309 static UNCH ksimple[] = "SIMPLE";
310 static UNCH kimplicit[] = "IMPLICIT";
311 static UNCH kexplicit[] = "EXPLICIT";
312 static UNCH kother[] = "OTHER";
313 static UNCH kconcur[] = "CONCUR";
314 static UNCH ksubdoc[] = "SUBDOC";
315 static UNCH kformal[] = "FORMAL";
316 static UNCH kyes[] = "YES";
317 static UNCH kno[] = "NO";
318 static UNCH kappinfo[] = "APPINFO";
319 static UNCH knone[] = "NONE";
320 static UNCH kshunchar[] = "SHUNCHAR";
321 static UNCH kcontrols[] = "CONTROLS";
322 static UNCH kfunction[] = "FUNCTION";
323 static UNCH krs[] = "RS";
324 static UNCH kre[] = "RE";
325 static UNCH kspace[] = "SPACE";
326 static UNCH knaming[] = "NAMING";
327 static UNCH klcnmstrt[] = "LCNMSTRT";
328 static UNCH kucnmstrt[] = "UCNMSTRT";
329 static UNCH klcnmchar[] = "LCNMCHAR";
330 static UNCH kucnmchar[] = "UCNMCHAR";
331 static UNCH knamecase[] = "NAMECASE";
332 static UNCH kdelim[] = "DELIM";
333 static UNCH kgeneral[] = "GENERAL";
334 static UNCH kentity[] = "ENTITY";
335 static UNCH kshortref[] = "SHORTREF";
336 static UNCH knames[] = "NAMES";
337 static UNCH kquantity[] = "QUANTITY";
338
339 #define sderr mderr
340
341 static UNIV pmaplookup P((struct pmap *, char *));
342 static UNCH *ltous P((long));
343 static VOID sdfixstandard P((UNCH *, int));
344 static int sdparm P((UNCH *, struct parse *));
345 static int sdname P((UNCH *, UNCH *));
346 static int sdckname P((UNCH *, UNCH *));
347 static int sdversion P((UNCH *));
348 static int sdcharset P((UNCH *));
349 static int sdcsdesc P((UNCH *, int *));
350 static int sdpubcapacity P((UNCH *));
351 static int sdcapacity P((UNCH *));
352 static int sdscope P((UNCH *));
353 static VOID setlexical P((void));
354 static VOID noemptytag P((void));
355 static int sdpubsyntax P((UNCH *));
356 static int sdsyntax P((UNCH *));
357 static int sdxsyntax P((UNCH *));
358 static int sdtranscharnum P((UNCH *));
359 static int sdtranschar P((int));
360 static int sdshunchar P((UNCH *));
361 static int sdsynref P((UNCH *));
362 static int sdfunction P((UNCH *));
363 static int sdnaming P((UNCH *));
364 static int sddelim P((UNCH *));
365 static int sdnames P((UNCH *));
366 static int sdquantity P((UNCH *));
367 static int sdfeatures P((UNCH *));
368 static int sdappinfo P((UNCH *));
369 static VOID sdsaverr P((UNS, UNCH *, UNCH *));
370
371 static VOID bufsalloc P((void));
372 static VOID bufsrealloc P((void));
373
374 /* Parse the SGML declaration. Return non-zero if there was some appinfo. */
375
sgmldecl()376 int sgmldecl()
377 {
378 int i;
379 int errsw = 0;
380 UNCH endbuf[REFNAMELEN+2]; /* buffer for parsing terminating > */
381 static int (*section[]) P((UNCH *)) = {
382 sdversion,
383 sdcharset,
384 sdcapacity,
385 sdscope,
386 sdsyntax,
387 sdfeatures,
388 sdappinfo,
389 };
390 /* These are needed if we use mderr. */
391 parmno = 0;
392 mdname = sgmlkey;
393 subdcl = NULL;
394 nsaved_errs = 0;
395 for (i = 0; i < SIZEOF(section); i++)
396 if ((*section[i])(tbuf) == FAIL) {
397 errsw = 1;
398 break;
399 }
400 if (sd.formal) {
401 /* print saved errors */
402 int i;
403 for (i = 0; i < nsaved_errs; i++)
404 svderr(saved_errs[i]);
405 }
406 else {
407 /* free saved errors */
408 int i;
409 for (i = 0; i < nsaved_errs; i++)
410 msgsfree(saved_errs[i]);
411 }
412
413 if (!errsw)
414 setlexical();
415 bufsrealloc();
416 /* Parse the >. Don't overwrite the appinfo. */
417 if (!errsw)
418 sdparm(endbuf, 0);
419 /* We must exit if we hit end of document. */
420 if (pcbsd.action == EOD_)
421 exiterr(161, &pcbsd);
422 if (!errsw && pcbsd.action != ESGD)
423 sderr(126, (UNCH *)0, (UNCH *)0);
424 return appinfosw;
425 }
426
427 /* Parse the literal (which should contain the version of the
428 standard) at the beginning of a SGML declaration. */
429
sdversion(tbuf)430 static int sdversion(tbuf)
431 UNCH *tbuf;
432 {
433 if (sdparm(tbuf, &pcblitv) != LIT1) {
434 sderr(123, (UNCH *)0, (UNCH *)0);
435 return FAIL;
436 }
437 sdfixstandard(tbuf, 0);
438 if (ustrcmp(tbuf, standard) != 0)
439 sderr(E_BADVERSION, tbuf, standard);
440 return SUCCESS;
441 }
442
443 /* Parse the CHARSET section. Use one token lookahead. */
444
sdcharset(tbuf)445 static int sdcharset(tbuf)
446 UNCH *tbuf;
447 {
448 int i;
449 int status[256];
450
451 if (sdname(tbuf, kcharset) == FAIL) return FAIL;
452 (void)sdparm(tbuf, 0);
453
454 if (sdcsdesc(tbuf, status) == FAIL)
455 return FAIL;
456
457 #if 0
458 for (i = 128; i < 256; i++)
459 if (status[i] != UNDESC)
460 break;
461 if (i >= 256) {
462 /* Only a 7-bit character set was described. Fill it out to 8-bits. */
463 for (i = 128; i < 256; i++)
464 status[i] = UNUSED;
465 #if 0
466 sderr(E_7BIT, (UNCH *)0, (UNCH *)0);
467 #endif
468 }
469 #endif
470 /* Characters that are declared UNUSED in the document character set
471 are assigned to non-SGML. */
472 for (i = 0; i < 256; i++) {
473 if (status[i] == UNDESC) {
474 #if 0
475 sderr(E_CHARMISSING, ltous((long)i), (UNCH *)0);
476 #endif
477 char_flags[i] |= CHAR_NONSGML;
478 }
479 else if (status[i] == UNUSED)
480 char_flags[i] |= CHAR_NONSGML;
481 }
482 done_nonsgml = 1;
483 return SUCCESS;
484 }
485
486 /* Parse a character set description. Uses one character lookahead. */
487
sdcsdesc(tbuf,status)488 static int sdcsdesc(tbuf, status)
489 UNCH *tbuf;
490 int *status;
491 {
492 int i;
493 int nsets = 0;
494 struct fpi fpi;
495
496 for (i = 0; i < 256; i++)
497 status[i] = UNDESC;
498
499 for (;;) {
500 int nchars;
501 int *baseset = 0;
502
503 if (pcbsd.action != NAS1) {
504 if (nsets == 0) {
505 sderr(120, (UNCH *)0, (UNCH *)0);
506 return FAIL;
507 }
508 break;
509 }
510 if (!matches(tbuf, kbaseset)) {
511 if (nsets == 0) {
512 sderr(118, tbuf+1, kbaseset);
513 return FAIL;
514 }
515 break;
516 }
517 nsets++;
518 MEMZERO((UNIV)&fpi, FPISZ);
519 if (sdparm(tbuf, &pcblitv) != LIT1) {
520 sderr(123, (UNCH *)0, (UNCH *)0);
521 return FAIL;
522 }
523 fpi.fpipubis = tbuf;
524 /* Give a warning if it is not a CHARSET fpi. */
525 if (parsefpi(&fpi))
526 sdsaverr(E_FORMAL, (UNCH *)0, (UNCH *)0);
527 else if (fpi.fpic != FPICHARS)
528 sdsaverr(E_BADCLASS, kcharset, (UNCH *)0);
529 else {
530 fpi.fpipubis[fpi.fpil + fpi.fpill] = '\0';
531 baseset = (int *)pmaplookup(charset_map,
532 (char *)fpi.fpipubis + fpi.fpil);
533 if (!baseset)
534 sderr(E_UNKNOWNSET, fpi.fpipubis + fpi.fpil, (UNCH *)0);
535 }
536 if (sdname(tbuf, kdescset) == FAIL) return FAIL;
537 nchars = 0;
538 for (;;) {
539 long start, count;
540 long basenum;
541 if (sdparm(tbuf, 0) != NUM1)
542 break;
543 start = atol((char *)tbuf);
544 if (sdparm(tbuf, 0) != NUM1) {
545 sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
546 return FAIL;
547 }
548 count = atol((char *)tbuf);
549 switch (sdparm(tbuf, &pcblitv)) {
550 case NUM1:
551 basenum = atol((char *)tbuf);
552 break;
553 case LIT1:
554 basenum = UNKNOWN;
555 break;
556 case NAS1:
557 if (matches(tbuf, kunused)) {
558 basenum = UNUSED;
559 break;
560 }
561 /* fall through */
562 default:
563 sderr(E_CHARDESC, ltous(start), (UNCH *)0);
564 return FAIL;
565 }
566 if (start + count > 256)
567 sderr(E_CHARRANGE, (UNCH *)0, (UNCH *)0);
568 else {
569 int i;
570 int lim = (int)start + count;
571 for (i = (int)start; i < lim; i++) {
572 if (status[i] != UNDESC)
573 sderr(E_CHARDUP, ltous((long)i), (UNCH *)0);
574 else if (basenum == UNUSED || basenum == UNKNOWN)
575 status[i] = (int)basenum;
576 else if (baseset == 0)
577 status[i] = UNKNOWN_SET;
578 else {
579 int n = basenum + (i - start);
580 if (n < 0 || n > 255)
581 sderr(E_CHARRANGE, (UNCH *)0, (UNCH *)0);
582 else {
583 if (baseset[n] == UNUSED)
584 sderr(E_BADBASECHAR, ltous((long)n),
585 (UNCH *)0);
586 status[i] = baseset[n];
587 }
588 }
589 }
590 }
591 nchars++;
592 }
593 if (nchars == 0) {
594 sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
595 return FAIL;
596 }
597 }
598 return SUCCESS;
599 }
600
601 /* Parse the CAPACITY section. Uses one token lookahead. */
602
sdcapacity(tbuf)603 static int sdcapacity(tbuf)
604 UNCH *tbuf;
605 {
606 int ncap;
607 int i;
608
609 if (sdckname(tbuf, kcapacity) == FAIL)
610 return FAIL;
611 if (sdparm(tbuf, 0) != NAS1) {
612 sderr(120, (UNCH *)0, (UNCH *)0);
613 return FAIL;
614 }
615 if (matches(tbuf, kpublic))
616 return sdpubcapacity(tbuf);
617 if (!matches(tbuf, ksgmlref)) {
618 sderr(E_CAPACITY, tbuf+1, (UNCH *)0);
619 return FAIL;
620 }
621 memcpy((UNIV)sd.capacity, (UNIV)refcapset, sizeof(sd.capacity));
622 ncap = 0;
623 for (;;) {
624 int capno = -1;
625 int i;
626
627 if (sdparm(tbuf, 0) != NAS1)
628 break;
629 for (i = 0; i < SIZEOF(captab); i++)
630 if (matches(tbuf, captab[i])) {
631 capno = i;
632 break;
633 }
634 if (capno < 0)
635 break;
636 if (sdparm(tbuf, 0) != NUM1) {
637 sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
638 return FAIL;
639 }
640 sd.capacity[capno] = atol((char *)tbuf);
641 ncap++;
642 }
643 if (ncap == 0) {
644 sderr(E_CAPMISSING, (UNCH *)0, (UNCH *)0);
645 return FAIL;
646 }
647 for (i = 1; i < NCAPACITY; i++)
648 if (sd.capacity[i] > sd.capacity[0])
649 sderr(E_TOTALCAP, (UNCH *)captab[i], (UNCH *)0);
650 return SUCCESS;
651 }
652
653 /* Parse a CAPACITY section that started with PUBLIC. Must do one
654 token lookahead, since sdcapacity() also does. */
655
sdpubcapacity(tbuf)656 static int sdpubcapacity(tbuf)
657 UNCH *tbuf;
658 {
659 UNIV ptr;
660 if (sdparm(tbuf, &pcblitv) != LIT1) {
661 sderr(123, (UNCH *)0, (UNCH *)0);
662 return FAIL;
663 }
664 sdfixstandard(tbuf, 1);
665 ptr = pmaplookup(capset_map, (char *)tbuf);
666 if (!ptr)
667 sderr(E_CAPSET, tbuf, (UNCH *)0);
668 else
669 memcpy((UNIV)sd.capacity, (UNIV)ptr, sizeof(sd.capacity));
670 (void)sdparm(tbuf, 0);
671 return SUCCESS;
672 }
673
674 /* Parse the SCOPE section. Uses no lookahead. */
675
sdscope(tbuf)676 static int sdscope(tbuf)
677 UNCH *tbuf;
678 {
679 if (sdckname(tbuf, kscope) == FAIL)
680 return FAIL;
681 if (sdparm(tbuf, 0) != NAS1) {
682 sderr(120, (UNCH *)0, (UNCH *)0);
683 return FAIL;
684 }
685 if (matches(tbuf, kdocument))
686 ;
687 else if (matches(tbuf, kinstance))
688 sderr(E_INSTANCE, (UNCH *)0, (UNCH *)0);
689 else {
690 sderr(E_SCOPE, tbuf+1, (UNCH *)0);
691 return FAIL;
692 }
693 return SUCCESS;
694 }
695
696 /* Parse the SYNTAX section. Uses one token lookahead. */
697
sdsyntax(tbuf)698 static int sdsyntax(tbuf)
699 UNCH *tbuf;
700 {
701 if (sdname(tbuf, ksyntax) == FAIL) return FAIL;
702 if (sdparm(tbuf, 0) != NAS1) {
703 sderr(120, (UNCH *)0, (UNCH *)0);
704 return FAIL;
705 }
706 if (matches(tbuf, kpublic))
707 return sdpubsyntax(tbuf);
708 return sdxsyntax(tbuf);
709 }
710
711 /* Parse the SYNTAX section which starts with PUBLIC. Uses one token
712 lookahead. */
713
sdpubsyntax(tbuf)714 static int sdpubsyntax(tbuf)
715 UNCH *tbuf;
716 {
717 int nswitches;
718 if (sdparm(tbuf, &pcblitv) != LIT1)
719 return FAIL;
720 sdfixstandard(tbuf, 1);
721 if (ustrcmp(tbuf, CORE_SYNTAX) == 0)
722 sd.shortref = 0;
723 else if (ustrcmp(tbuf, REFERENCE_SYNTAX) == 0)
724 sd.shortref = 1;
725 else
726 sderr(E_SYNTAX, tbuf, (UNCH *)0);
727 if (sdparm(tbuf, 0) != NAS1)
728 return SUCCESS;
729 if (!matches(tbuf, kswitches))
730 return SUCCESS;
731 nswitches = 0;
732 for (;;) {
733 int errsw = 0;
734
735 if (sdparm(tbuf, 0) != NUM1)
736 break;
737 if (atol((char *)tbuf) > 255) {
738 sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
739 errsw = 1;
740 }
741 if (sdparm(tbuf, 0) != NUM1) {
742 sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
743 return FAIL;
744 }
745 if (!errsw) {
746 if (atol((char *)tbuf) > 255)
747 sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
748 }
749 nswitches++;
750 }
751 if (nswitches == 0) {
752 sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
753 return FAIL;
754 }
755 sderr(E_SWITCHES, (UNCH *)0, (UNCH *)0);
756 return SUCCESS;
757 }
758
759 /* Parse an explicit concrete syntax. Uses one token lookahead. */
760
761 static
sdxsyntax(tbuf)762 int sdxsyntax(tbuf)
763 UNCH *tbuf;
764 {
765 static int (*section[]) P((UNCH *)) = {
766 sdshunchar,
767 sdsynref,
768 sdfunction,
769 sdnaming,
770 sddelim,
771 sdnames,
772 sdquantity,
773 };
774 int i;
775
776 for (i = 0; i < SIZEOF(section); i++)
777 if ((*section[i])(tbuf) == FAIL)
778 return FAIL;
779 return SUCCESS;
780 }
781
782 /* Parse the SHUNCHAR section. Uses one token lookahead. */
783
784 static
sdshunchar(tbuf)785 int sdshunchar(tbuf)
786 UNCH *tbuf;
787 {
788 int i;
789 for (i = 0; i < 256; i++)
790 char_flags[i] &= ~CHAR_SHUNNED;
791
792 if (sdckname(tbuf, kshunchar) == FAIL)
793 return FAIL;
794
795 if (sdparm(tbuf, 0) == NAS1) {
796 if (matches(tbuf, knone)) {
797 (void)sdparm(tbuf, 0);
798 return SUCCESS;
799 }
800 if (matches(tbuf, kcontrols)) {
801 for (i = 0; i < 256; i++)
802 if (ISASCII(i) && iscntrl(i))
803 char_flags[i] |= CHAR_SHUNNED;
804 if (sdparm(tbuf, 0) != NUM1)
805 return SUCCESS;
806 }
807 }
808 if (pcbsd.action != NUM1) {
809 sderr(E_SHUNCHAR, (UNCH *)0, (UNCH *)0);
810 return FAIL;
811 }
812 do {
813 long n = atol((char *)tbuf);
814 if (n > 255)
815 sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
816 else
817 char_flags[(int)n] |= CHAR_SHUNNED;
818 } while (sdparm(tbuf, 0) == NUM1);
819 return SUCCESS;
820 }
821
822 /* Parse the syntax reference character set. Uses one token lookahead. */
823
824 static
sdsynref(tbuf)825 int sdsynref(tbuf)
826 UNCH *tbuf;
827 {
828 return sdcsdesc(tbuf, synrefcharset);
829 }
830
831 /* Translate a character number from the syntax reference character set
832 to the system character set. If it can't be done, give an error message
833 and return -1. */
834
835 static
sdtranscharnum(tbuf)836 int sdtranscharnum(tbuf)
837 UNCH *tbuf;
838 {
839 long n = atol((char *)tbuf);
840 if (n > 255) {
841 sderr(E_CHARNUM, (UNCH *)0, (UNCH *)0);
842 return -1;
843 }
844 return sdtranschar((int)n);
845 }
846
847
848 static
sdtranschar(n)849 int sdtranschar(n)
850 int n;
851 {
852 int ch = synrefcharset[n];
853 if (ch >= 0)
854 return ch;
855 switch (ch) {
856 case UNUSED:
857 sderr(E_SYNREFUNUSED, ltous((long)n), (UNCH *)0);
858 break;
859 case UNDESC:
860 sderr(E_SYNREFUNDESC, ltous((long)n), (UNCH *)0);
861 break;
862 case UNKNOWN:
863 sderr(E_SYNREFUNKNOWN, ltous((long)n), (UNCH *)0);
864 break;
865 case UNKNOWN_SET:
866 sderr(E_SYNREFUNKNOWNSET, ltous((long)n), (UNCH *)0);
867 break;
868 default:
869 abort();
870 }
871 return -1;
872 }
873
874
875 /* Parse the function section. Uses two tokens lookahead. "NAMING"
876 could be a function name. */
877
878 static
sdfunction(tbuf)879 int sdfunction(tbuf)
880 UNCH *tbuf;
881 {
882 static UNCH *fun[] = { kre, krs, kspace };
883 static int funval[] = { RECHAR, RSCHAR, ' ' };
884 int i;
885 int had_tab = 0;
886 int changed = 0; /* attempted to change reference syntax */
887
888 if (sdckname(tbuf, kfunction) == FAIL)
889 return FAIL;
890 for (i = 0; i < SIZEOF(fun); i++) {
891 int ch;
892 if (sdname(tbuf, fun[i]) == FAIL)
893 return FAIL;
894 if (sdparm(tbuf, 0) != NUM1) {
895 sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
896 return FAIL;
897 }
898 ch = sdtranscharnum(tbuf);
899 if (ch >= 0 && ch != funval[i])
900 changed = 1;
901 }
902 for (;;) {
903 int tabsw = 0;
904 int namingsw = 0;
905 if (sdparm(tbuf, 0) != NAS1) {
906 sderr(120, (UNCH *)0, (UNCH *)0);
907 return FAIL;
908 }
909 if (matches(tbuf, (UNCH *)"TAB")) {
910 tabsw = 1;
911 if (had_tab)
912 sderr(E_FUNDUP, (UNCH *)0, (UNCH *)0);
913 }
914 else {
915 for (i = 0; i < SIZEOF(fun); i++)
916 if (matches(tbuf, fun[i]))
917 sderr(E_BADFUN, fun[i], (UNCH *)0);
918 if (matches(tbuf, knaming))
919 namingsw = 1;
920 else
921 changed = 1;
922 }
923 if (sdparm(tbuf, 0) != NAS1) {
924 sderr(120, (UNCH *)0, (UNCH *)0);
925 return FAIL;
926 }
927 if (namingsw) {
928 if (matches(tbuf, klcnmstrt))
929 break;
930 changed = 1;
931 }
932 if (sdparm(tbuf, 0) != NUM1) {
933 sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
934 return FAIL;
935 }
936 if (tabsw && !had_tab) {
937 int ch = sdtranscharnum(tbuf);
938 if (ch >= 0 && ch != TABCHAR)
939 changed = 1;
940 had_tab = 1;
941 }
942
943 }
944 if (!had_tab)
945 changed = 1;
946 if (changed)
947 sderr(E_FUNCHAR, (UNCH *)0, (UNCH *)0);
948 return SUCCESS;
949 }
950
951 /* Parse the NAMING section. Uses no lookahead. */
952
953 static
sdnaming(tbuf)954 int sdnaming(tbuf)
955 UNCH *tbuf;
956 {
957 int i;
958 int bad = 0;
959 static UNCH *classes[] = { klcnmstrt, kucnmstrt, klcnmchar, kucnmchar };
960 static UNCH *types[] = { kgeneral, kentity };
961
962 #define NCLASSES SIZEOF(classes)
963
964 int bufsize = 4; /* allocated size of buf */
965 UNCH *buf = (UNCH *)rmalloc(bufsize); /* holds characters
966 in naming classes */
967 int bufi = 0; /* next index into buf */
968 int start[NCLASSES]; /* index of first character for each class */
969 int count[NCLASSES]; /* number of characters for each class */
970
971 for (i = 0; i < NCLASSES; i++) {
972 UNCH *s;
973
974 if (sdckname(tbuf, classes[i]) == FAIL) {
975 frem((UNIV)buf);
976 return FAIL;
977 }
978 if (sdparm(tbuf, &pcblitp) != LIT1) {
979 sderr(123, (UNCH *)0, (UNCH *)0);
980 frem((UNIV)buf);
981 return FAIL;
982 }
983 start[i] = bufi;
984
985 for (s = tbuf; *s; s++) {
986 int c = *s;
987 if (c == DELNONCH) {
988 c = UNSHIFTNON(*s);
989 s++;
990 }
991 c = sdtranschar(c);
992 if (c < 0)
993 bad = 1;
994 else if ((char_flags[c] & (CHAR_SIGNIFICANT | CHAR_MAGIC))
995 && c != '.' && c != '-') {
996 int class = lextoke[c];
997 if (class == SEP || class == SP || class == NMC
998 || class == NMS || class == NU)
999 sderr(E_NMBAD, ltous((long)c), (UNCH *)0);
1000 else
1001 sderr(E_NMUNSUP, ltous((long)c), (UNCH *)0);
1002 bad = 1;
1003 }
1004 if (bufi >= bufsize)
1005 buf = (UNCH *)rrealloc((UNIV)buf, bufsize *= 2);
1006 buf[bufi++] = c;
1007 }
1008
1009 count[i] = bufi - start[i];
1010 (void)sdparm(tbuf, 0);
1011 }
1012 if (!bad && count[0] != count[1]) {
1013 sderr(E_NMSTRTCNT, (UNCH *)0, (UNCH *)0);
1014 bad = 1;
1015 }
1016 if (!bad && count[2] != count[3]) {
1017 sderr(E_NMCHARCNT, (UNCH *)0, (UNCH *)0);
1018 bad = 1;
1019 }
1020 if (!bad) {
1021 nlextoke = (UNCH *)rmalloc(256);
1022 memcpy((UNIV)nlextoke, lextoke, 256);
1023 nlextoke['.'] = nlextoke['-'] = INV;
1024
1025 nlextran = (UNCH *)rmalloc(256);
1026 memcpy((UNIV)nlextran, lextran, 256);
1027
1028 for (i = 0; i < count[0]; i++) {
1029 UNCH lc = buf[start[0] + i];
1030 UNCH uc = buf[start[1] + i];
1031 nlextoke[lc] = NMS;
1032 nlextoke[uc] = NMS;
1033 nlextran[lc] = uc;
1034 }
1035
1036 for (i = 0; i < count[2]; i++) {
1037 UNCH lc = buf[start[2] + i];
1038 UNCH uc = buf[start[3] + i];
1039 if (nlextoke[lc] == NMS) {
1040 sderr(E_NMDUP, ltous((long)lc), (UNCH *)0);
1041 bad = 1;
1042 }
1043 else if (nlextoke[uc] == NMS) {
1044 sderr(E_NMDUP, ltous((long)uc), (UNCH *)0);
1045 bad = 1;
1046 }
1047 else {
1048 nlextoke[lc] = NMC;
1049 nlextoke[uc] = NMC;
1050 nlextran[lc] = uc;
1051 }
1052 }
1053 if (nlextoke['-'] != NMC) {
1054 sderr(E_NMMINUS, (UNCH *)0, (UNCH *)0);
1055 bad = 1;
1056 }
1057 if (bad) {
1058 if (nlextoke) {
1059 frem((UNIV)nlextoke);
1060 nlextoke = 0;
1061 }
1062 if (nlextran) {
1063 frem((UNIV)nlextran);
1064 nlextran = 0;
1065 }
1066 }
1067 }
1068
1069 frem((UNIV)buf);
1070
1071 if (sdckname(tbuf, knamecase) == FAIL)
1072 return FAIL;
1073 for (i = 0; i < SIZEOF(types); ++i) {
1074 if (sdname(tbuf, types[i]) == FAIL)
1075 return FAIL;
1076 if (sdparm(tbuf, 0) != NAS1) {
1077 sderr(120, (UNCH *)0, (UNCH *)0);
1078 return FAIL;
1079 }
1080 if (matches(tbuf, kyes))
1081 sd.namecase[i] = 1;
1082 else if (matches(tbuf, kno))
1083 sd.namecase[i] = 0;
1084 else {
1085 sderr(E_YESNO, tbuf+1, (UNCH *)0);
1086 return FAIL;
1087 }
1088 }
1089 return SUCCESS;
1090 }
1091
1092 /* Parse the DELIM section. Uses one token lookahead. */
1093
1094 static
sddelim(tbuf)1095 int sddelim(tbuf)
1096 UNCH *tbuf;
1097 {
1098 int changed = 0;
1099 if (sdname(tbuf, kdelim) == FAIL
1100 || sdname(tbuf, kgeneral) == FAIL
1101 || sdname(tbuf, ksgmlref) == FAIL)
1102 return FAIL;
1103 for (;;) {
1104 if (sdparm(tbuf, 0) != NAS1) {
1105 sderr(120, (UNCH *)0, (UNCH *)0);
1106 return FAIL;
1107 }
1108 if (matches(tbuf, kshortref))
1109 break;
1110 if (sdparm(tbuf, &pcblitp) != LIT1) {
1111 sderr(123, (UNCH *)0, (UNCH *)0);
1112 return FAIL;
1113 }
1114 changed = 1;
1115 }
1116 if (changed) {
1117 sderr(E_GENDELIM, (UNCH *)0,(UNCH *)0);
1118 changed = 0;
1119 }
1120 if (sdparm(tbuf, 0) != NAS1) {
1121 sderr(120, (UNCH *)0, (UNCH *)0);
1122 return FAIL;
1123 }
1124 if (matches(tbuf, ksgmlref))
1125 sd.shortref = 1;
1126 else if (matches(tbuf, knone))
1127 sd.shortref = 0;
1128 else {
1129 sderr(118, tbuf+1, ksgmlref); /* probably they forgot SGMLREF */
1130 return FAIL;
1131 }
1132 while (sdparm(tbuf, &pcblitp) == LIT1)
1133 changed = 1;
1134 if (changed)
1135 sderr(E_SRDELIM, (UNCH *)0, (UNCH *)0);
1136 return SUCCESS;
1137 }
1138
1139 /* Parse the NAMES section. Uses one token lookahead. */
1140
1141 static
sdnames(tbuf)1142 int sdnames(tbuf)
1143 UNCH *tbuf;
1144 {
1145 int i;
1146 if (sdckname(tbuf, knames) == FAIL)
1147 return FAIL;
1148 if (sdname(tbuf, ksgmlref) == FAIL)
1149 return FAIL;
1150
1151 while (sdparm(tbuf, 0) == NAS1) {
1152 int j;
1153 if (matches(tbuf, kquantity))
1154 break;
1155 for (i = 0; i < NKEYS; i++)
1156 if (matches(tbuf, key[i]))
1157 break;
1158 if (i >= NKEYS) {
1159 sderr(E_BADKEY, tbuf+1, (UNCH *)0);
1160 return FAIL;
1161 }
1162 if (sdparm(tbuf, &pcblitp) != NAS1) {
1163 sderr(120, (UNCH *)0, (UNCH *)0);
1164 return FAIL;
1165 }
1166 if (!newkey) {
1167 newkey = (UNCH (*)[REFNAMELEN+1])rmalloc((REFNAMELEN+1)*NKEYS);
1168 MEMZERO((UNIV)newkey, (REFNAMELEN+1)*NKEYS);
1169 }
1170 for (j = 0; j < NKEYS; j++) {
1171 if (matches(tbuf, key[j])) {
1172 sderr(E_REFNAME, tbuf + 1, (UNCH *)0);
1173 break;
1174 }
1175 if (matches(tbuf, newkey[j])) {
1176 sderr(E_DUPNAME, tbuf + 1, (UNCH *)0);
1177 break;
1178 }
1179 }
1180 if (j >= NKEYS)
1181 ustrcpy(newkey[i], tbuf + 1);
1182 }
1183 /* Now install the new keys. */
1184 if (newkey) {
1185 for (i = 0; i < NKEYS; i++)
1186 if (newkey[i][0] != '\0') {
1187 UNCH temp[REFNAMELEN + 1];
1188
1189 ustrcpy(temp, key[i]);
1190 ustrcpy(key[i], newkey[i]);
1191 ustrcpy(newkey[i], temp);
1192 }
1193 }
1194 return SUCCESS;
1195 }
1196
1197 /* Parse the QUANTITY section. Uses one token lookahead. */
1198
sdquantity(tbuf)1199 static int sdquantity(tbuf)
1200 UNCH *tbuf;
1201 {
1202 int quantity[NQUANTITY];
1203 int i;
1204
1205 for (i = 0; i < NQUANTITY; i++)
1206 quantity[i] = -1;
1207 if (sdckname(tbuf, kquantity) == FAIL)
1208 return FAIL;
1209 if (sdname(tbuf, ksgmlref) == FAIL)
1210 return FAIL;
1211 while (sdparm(tbuf, 0) == NAS1 && !matches(tbuf, kfeatures)) {
1212 long n;
1213 for (i = 0; i < SIZEOF(quantity_names); i++)
1214 if (matches(tbuf, quantity_names[i]))
1215 break;
1216 if (i >= SIZEOF(quantity_names)) {
1217 sderr(E_BADQUANTITY, tbuf + 1, (UNCH *)0);
1218 return FAIL;
1219 }
1220 if (sdparm(tbuf, 0) != NUM1) {
1221 sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
1222 return FAIL;
1223 }
1224 n = atol((char *)tbuf);
1225 if (n < sd.quantity[i])
1226 sderr(E_QUANTITY, (UNCH *)quantity_names[i],
1227 ltous((long)sd.quantity[i]));
1228 else if (n > max_quantity[i]) {
1229 sderr(E_QTOOBIG, (UNCH *)quantity_names[i],
1230 ltous((long)max_quantity[i]));
1231 quantity[i] = max_quantity[i];
1232 }
1233 else
1234 quantity[i] = (int)n;
1235 }
1236 for (i = 0; i < NQUANTITY; i++)
1237 if (quantity[i] > 0) {
1238 sd.quantity[i] = quantity[i];
1239 if (!quantity_changed)
1240 quantity_changed = (char *)rmalloc(NQUANTITY);
1241 quantity_changed[i] = 1;
1242 }
1243 return SUCCESS;
1244 }
1245
1246 /* Parse the FEATURES section. Uses no lookahead. */
1247
sdfeatures(tbuf)1248 static int sdfeatures(tbuf)
1249 UNCH *tbuf;
1250 {
1251 static struct {
1252 UNCH *name;
1253 UNCH argtype; /* 0 = no argument, 1 = boolean, 2 = numeric */
1254 UNIV valp; /* UNCH * if boolean, long * if numeric. */
1255 } features[] = {
1256 { kminimize, 0, 0 },
1257 { kdatatag, 1, 0 },
1258 { komittag, 1, (UNIV)&sd.omittag },
1259 { krank, 1, 0 },
1260 { kshorttag, 1, (UNIV)&sd.shorttag },
1261 { klink, 0, 0 },
1262 { ksimple, 2, 0 },
1263 { kimplicit, 1, 0 },
1264 { kexplicit, 2, 0 },
1265 { kother, 0, 0 },
1266 { kconcur, 2, 0 },
1267 { ksubdoc, 2, (UNIV)&sd.subdoc },
1268 { kformal, 1, (UNIV)&sd.formal },
1269 };
1270
1271 int i;
1272
1273 if (sdckname(tbuf, kfeatures) == FAIL)
1274 return FAIL;
1275 for (i = 0; i < SIZEOF(features); i++) {
1276 if (sdname(tbuf, features[i].name) == FAIL) return FAIL;
1277 if (features[i].argtype > 0) {
1278 long n;
1279 if (sdparm(tbuf, 0) != NAS1) {
1280 sderr(120, (UNCH *)0, (UNCH *)0);
1281 return FAIL;
1282 }
1283 if (matches(tbuf, kyes)) {
1284 if (features[i].argtype > 1) {
1285 if (sdparm(tbuf, 0) != NUM1) {
1286 sderr(E_XNUM, (UNCH *)0, (UNCH *)0);
1287 return FAIL;
1288 }
1289 n = atol((char *)tbuf);
1290 if (n == 0)
1291 sderr(E_ZEROFEATURE, features[i].name, (UNCH *)0);
1292 }
1293 else
1294 n = 1;
1295 }
1296 else if (matches(tbuf, kno))
1297 n = 0;
1298 else {
1299 sderr(E_YESNO, tbuf+1, (UNCH *)0);
1300 return FAIL;
1301 }
1302 if (features[i].valp == 0) {
1303 if (n > 0)
1304 sderr(E_NOTSUPPORTED, features[i].name,
1305 (UNCH *)0);
1306 }
1307 else if (features[i].argtype > 1)
1308 *(long *)features[i].valp = n;
1309 else
1310 *(UNCH *)features[i].valp = (UNCH)n;
1311 }
1312 }
1313 if (!sd.shorttag)
1314 noemptytag();
1315 return SUCCESS;
1316 }
1317
1318 /* Parse the APPINFO section. Uses no lookahead. */
1319
sdappinfo(tbuf)1320 static int sdappinfo(tbuf)
1321 UNCH *tbuf;
1322 {
1323 if (sdname(tbuf, kappinfo) == FAIL) return FAIL;
1324 switch (sdparm(tbuf, &pcblitv)) {
1325 case LIT1:
1326 appinfosw = 1;
1327 break;
1328 case NAS1:
1329 if (matches(tbuf, knone))
1330 break;
1331 sderr(118, tbuf+1, knone);
1332 return FAIL;
1333 default:
1334 sderr(E_XNMLIT, knone, (UNCH *)0);
1335 return FAIL;
1336 }
1337 return SUCCESS;
1338 }
1339
1340 /* Change a prefix of ISO 8879-1986 to ISO 8879:1986. Amendment 1 to
1341 the standard requires the latter. */
1342
sdfixstandard(tbuf,silently)1343 static VOID sdfixstandard(tbuf, silently)
1344 UNCH *tbuf;
1345 int silently;
1346 {
1347 if (strncmp((char *)tbuf, "ISO 8879-1986", 13) == 0) {
1348 if (!silently)
1349 sderr(E_STANDARD, (UNCH *)0, (UNCH *)0);
1350 tbuf[8] = ':';
1351 }
1352 }
1353
sdname(tbuf,key)1354 static int sdname(tbuf, key)
1355 UNCH *tbuf;
1356 UNCH *key;
1357 {
1358 if (sdparm(tbuf, 0) != NAS1) {
1359 sderr(120, (UNCH *)0, (UNCH *)0);
1360 return FAIL;
1361 }
1362 if (!matches(tbuf, key)) {
1363 sderr(118, tbuf+1, key);
1364 return FAIL;
1365 }
1366 return SUCCESS;
1367 }
1368
sdckname(tbuf,key)1369 static int sdckname(tbuf, key)
1370 UNCH *tbuf;
1371 UNCH *key;
1372 {
1373 if (pcbsd.action != NAS1) {
1374 sderr(120, (UNCH *)0, (UNCH *)0);
1375 return FAIL;
1376 }
1377 if (!matches(tbuf, key)) {
1378 sderr(118, tbuf+1, key);
1379 return FAIL;
1380 }
1381 return SUCCESS;
1382 }
1383
1384 /* Parse a SGML declaration parameter. If lpcb is NULL, pt must be
1385 REFNAMELEN+2 characters long, otherwise at least LITLEN+2 characters
1386 long. LPCB should be NULL if a literal is not allowed. */
1387
sdparm(pt,lpcb)1388 static int sdparm(pt, lpcb)
1389 UNCH *pt; /* Token buffer. */
1390 struct parse *lpcb; /* PCB for literal parse. */
1391 {
1392 for (;;) {
1393 parse(&pcbsd);
1394 if (pcbsd.action != ISIG)
1395 break;
1396 sderr(E_SIGNIFICANT, (UNCH *)0, (UNCH *)0);
1397 }
1398 ++parmno;
1399 switch (pcbsd.action) {
1400 case LIT1:
1401 if (!lpcb) {
1402 sderr(E_BADLIT, (UNCH *)0, (UNCH *)0);
1403 REPEATCC;
1404 return pcbsd.action = INV_;
1405 }
1406 parselit(pt, lpcb, REFLITLEN, lex.d.lit);
1407 return pcbsd.action;
1408 case LIT2:
1409 if (!lpcb) {
1410 sderr(E_BADLIT, (UNCH *)0, (UNCH *)0);
1411 REPEATCC;
1412 return pcbsd.action = INV_;
1413 }
1414 parselit(pt, lpcb, REFLITLEN, lex.d.lita);
1415 return pcbsd.action = LIT1;
1416 case NAS1:
1417 parsenm(pt, 1);
1418 return pcbsd.action;
1419 case NUM1:
1420 parsetkn(pt, NU, REFNAMELEN);
1421 return pcbsd.action;
1422 }
1423 return pcbsd.action;
1424 }
1425
sdinit()1426 VOID sdinit()
1427 {
1428 int i;
1429 /* Shunned character numbers in the reference concrete syntax. */
1430 static UNCH refshun[] = {
1431 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
1432 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127, 255
1433 };
1434 UNCH **p;
1435 /* A character is magic if it is a non-SGML character used for
1436 some internal purpose in the parser. */
1437 char_flags[EOS] |= CHAR_MAGIC;
1438 char_flags[EOBCHAR] |= CHAR_MAGIC;
1439 char_flags[EOFCHAR] |= CHAR_MAGIC;
1440 char_flags[GENRECHAR] |= CHAR_MAGIC;
1441 char_flags[DELNONCH] |= CHAR_MAGIC;
1442 char_flags[DELCDATA] |= CHAR_MAGIC;
1443 char_flags[DELSDATA] |= CHAR_MAGIC;
1444
1445 /* Figure out the significant SGML characters. */
1446 for (p = lextabs; *p; p++) {
1447 UNCH datclass = (*p)[CANON_DATACHAR];
1448 UNCH nonclass = (*p)[CANON_NONSGML];
1449 for (i = 0; i < 256; i++)
1450 if (!(char_flags[i] & CHAR_MAGIC)
1451 && (*p)[i] != datclass && (*p)[i] != nonclass)
1452 char_flags[i] |= CHAR_SIGNIFICANT;
1453 }
1454 for (i = 0; i < SIZEOF(refshun); i++)
1455 char_flags[refshun[i]] |= CHAR_SHUNNED;
1456 for (i = 0; i < 256; i++)
1457 if (ISASCII(i) && iscntrl(i))
1458 char_flags[i] |= CHAR_SHUNNED;
1459 bufsalloc();
1460 }
1461
1462
1463 static
bufsalloc()1464 VOID bufsalloc()
1465 {
1466 scbs = (struct source *)rmalloc((REFENTLVL+1)*sizeof(struct source));
1467 tbuf = (UNCH *)rmalloc(REFATTSPLEN+REFLITLEN+1);
1468 /* entbuf is used for parsing numeric character references */
1469 entbuf = (UNCH *)rmalloc(REFNAMELEN + 2);
1470 }
1471
1472 static
bufsrealloc()1473 VOID bufsrealloc()
1474 {
1475 UNS size;
1476
1477 if (ENTLVL != REFENTLVL)
1478 scbs = (struct source *)rrealloc((UNIV)scbs,
1479 (ENTLVL+1)*sizeof(struct source));
1480 /* Calculate the size for tbuf. */
1481 size = LITLEN + ATTSPLEN;
1482 if (PILEN > size)
1483 size = PILEN;
1484 if (BSEQLEN > size)
1485 size = BSEQLEN;
1486 if (size != REFATTSPLEN + REFLITLEN)
1487 tbuf = (UNCH *)rrealloc((UNIV)tbuf, size + 1);
1488 if (NAMELEN != REFNAMELEN)
1489 entbuf = (UNCH *)rrealloc((UNIV)entbuf, NAMELEN + 2);
1490 }
1491
1492
1493 /* Check that the non-SGML characters are compatible with the concrete
1494 syntax and munge the lexical tables accordingly. If IMPLIED is
1495 non-zero, then the SGML declaration was implied; in this case, don't
1496 give error messages about shunned characters not being declared
1497 non-SGML. Also make any changes that are required by the NAMING section.
1498 */
1499
setlexical()1500 static VOID setlexical()
1501 {
1502 int i;
1503 UNCH **p;
1504
1505 if (nlextoke) {
1506 /* Handle characters that were made significant by the
1507 NAMING section. */
1508 for (i = 0; i < 256; i++)
1509 if (nlextoke[i] == NMC || nlextoke[i] == NMS)
1510 char_flags[i] |= CHAR_SIGNIFICANT;
1511 }
1512
1513 for (i = 0; i < 256; i++)
1514 if (char_flags[i] & CHAR_SIGNIFICANT) {
1515 /* Significant SGML characters musn't be non-SGML. */
1516 if (char_flags[i] & CHAR_NONSGML) {
1517 UNCH buf[2];
1518 buf[0] = i;
1519 buf[1] = '\0';
1520 sderr(E_NONSGML, buf, (UNCH *)0);
1521 char_flags[i] &= ~CHAR_NONSGML;
1522 }
1523 }
1524 else {
1525 /* Shunned characters that are not significant SGML characters
1526 must be non-SGML. */
1527 if ((char_flags[i] & (CHAR_SHUNNED | CHAR_NONSGML))
1528 == CHAR_SHUNNED) {
1529 sderr(E_SHUNNED, ltous((long)i), (UNCH *)0);
1530 char_flags[i] |= CHAR_NONSGML;
1531 }
1532 }
1533
1534
1535 /* Now munge the lexical tables. */
1536 for (p = lextabs; *p; p++) {
1537 UNCH nonclass = (*p)[CANON_NONSGML];
1538 UNCH datclass = (*p)[CANON_DATACHAR];
1539 UNCH nmcclass = (*p)[CANON_NMC];
1540 UNCH nmsclass = (*p)[CANON_NMS];
1541 UNCH minclass = (*p)[CANON_MIN];
1542 for (i = 0; i < 256; i++) {
1543 if (char_flags[i] & CHAR_NONSGML) {
1544 /* We already know that it's not significant. */
1545 if (!(char_flags[i] & CHAR_MAGIC))
1546 (*p)[i] = nonclass;
1547 }
1548 else {
1549 if (char_flags[i] & CHAR_MAGIC) {
1550 sderr(E_MUSTBENON, ltous((long)i), (UNCH *)0);
1551 }
1552 else if (!(char_flags[i] & CHAR_SIGNIFICANT))
1553 (*p)[i] = datclass;
1554 else if (*p == lexmin) {
1555 /* If it used to be NONSGML, but its now significant,
1556 treat it like a datachar. */
1557 if ((*p)[i] == nonclass)
1558 (*p)[i] = datclass;
1559 }
1560 else if (nlextoke
1561 /* This relies on the fact that lextoke
1562 occurs last in lextabs. */
1563 && lextoke[i] != nlextoke[i]) {
1564 switch (nlextoke[i]) {
1565 case NMC:
1566 (*p)[i] = nmcclass;
1567 break;
1568 case NMS:
1569 (*p)[i] = nmsclass;
1570 break;
1571 case INV:
1572 /* This will happen if period is not a
1573 name character. */
1574 (*p)[i] = minclass;
1575 break;
1576 default:
1577 abort();
1578 }
1579 }
1580 }
1581 }
1582 }
1583 if (nlextran) {
1584 memcpy((UNIV)lextran, (UNIV)nlextran, 256);
1585 frem((UNIV)nlextran);
1586 }
1587 if (nlextoke) {
1588 frem((UNIV)nlextoke);
1589 nlextoke = 0;
1590 }
1591
1592 }
1593
1594 /* Munge parse tables so that empty start and end tags are not recognized. */
1595
noemptytag()1596 static VOID noemptytag()
1597 {
1598 static struct parse *pcbs[] = { &pcbconm, &pcbcone, &pcbconr, &pcbconc };
1599 int i;
1600
1601 for (i = 0; i < SIZEOF(pcbs); i++) {
1602 int maxclass, maxstate;
1603 int j, k, act;
1604 UNCH *plex = pcbs[i]->plex;
1605 UNCH **ptab = pcbs[i]->ptab;
1606
1607 /* Figure out the maximum lexical class. */
1608 maxclass = 0;
1609 for (j = 0; j < 256; j++)
1610 if (plex[j] > maxclass)
1611 maxclass = plex[j];
1612
1613 /* Now figure out the maximum state number and at the same time
1614 change actions. */
1615
1616 maxstate = 0;
1617
1618 for (j = 0; j <= maxstate; j += 2) {
1619 for (k = 0; k <= maxclass; k++)
1620 if (ptab[j][k] > maxstate)
1621 maxstate = ptab[j][k];
1622 /* If the '>' class has an empty start or end tag action,
1623 change it to the action that the NMC class has. */
1624 act = ptab[j + 1][plex['>']];
1625 if (act == NET_ || act == NST_)
1626 ptab[j + 1][plex['>']] = ptab[j + 1][plex['_']];
1627 }
1628 }
1629 }
1630
1631 /* Lookup the value of the entry in pmap PTR whose key is KEY. */
1632
pmaplookup(ptr,key)1633 static UNIV pmaplookup(ptr, key)
1634 struct pmap *ptr;
1635 char *key;
1636 {
1637 for (; ptr->name; ptr++)
1638 if (strcmp(key, ptr->name) == 0)
1639 return ptr->value;
1640 return 0;
1641 }
1642
1643 /* Return an ASCII representation of N. */
1644
ltous(n)1645 static UNCH *ltous(n)
1646 long n;
1647 {
1648 static char buf[sizeof(long)*3 + 2];
1649 sprintf(buf, "%ld", n);
1650 return (UNCH *)buf;
1651 }
1652
sgmlwrsd(fp)1653 VOID sgmlwrsd(fp)
1654 FILE *fp;
1655 {
1656 int i;
1657 int changed;
1658 char *p;
1659 char uc[256]; /* upper case characters (with different lower
1660 case characters) */
1661 char lcletter[256]; /* LC letters: a-z */
1662
1663 fprintf(fp, "<!SGML \"%s\"\n", standard);
1664 fprintf(fp,
1665 "CHARSET\nBASESET \"-//Dummy//CHARSET Dummy//%s\"\nDESCSET\n",
1666 SYSTEM_CHARSET_DESIGNATING_SEQUENCE);
1667
1668 if (!done_nonsgml) {
1669 done_nonsgml = 1;
1670 for (i = 0; i < 256; i++)
1671 if ((char_flags[i] & (CHAR_SIGNIFICANT | CHAR_SHUNNED))
1672 == CHAR_SHUNNED)
1673 char_flags[i] |= CHAR_NONSGML;
1674 }
1675 i = 0;
1676 while (i < 256) {
1677 int j;
1678 for (j = i + 1; j < 256; j++)
1679 if ((char_flags[j] & CHAR_NONSGML)
1680 != (char_flags[i] & CHAR_NONSGML))
1681 break;
1682 if (char_flags[i] & CHAR_NONSGML)
1683 fprintf(fp, "%d %d UNUSED\n", i, j - i);
1684 else
1685 fprintf(fp, "%d %d %d\n", i, j - i, i);
1686 i = j;
1687 }
1688 fprintf(fp, "CAPACITY\n");
1689 changed = 0;
1690 for (i = 0; i < NCAPACITY; i++)
1691 if (refcapset[i] != sd.capacity[i]) {
1692 if (!changed) {
1693 fprintf(fp, "SGMLREF\n");
1694 changed = 1;
1695 }
1696 fprintf(fp, "%s %ld\n", captab[i], sd.capacity[i]);
1697 }
1698 if (!changed)
1699 fprintf(fp, "PUBLIC \"%s\"\n", capset_map[0].name);
1700 fprintf(fp, "SCOPE DOCUMENT\n");
1701
1702 fprintf(fp, "SYNTAX\nSHUNCHAR");
1703 for (i = 0; i < 256; i++)
1704 if (char_flags[i] & CHAR_SHUNNED)
1705 break;
1706 if (i == 256)
1707 fprintf(fp, " NONE\n");
1708 else {
1709 for (; i < 256; i++)
1710 if (char_flags[i] & CHAR_SHUNNED)
1711 fprintf(fp, " %d", i);
1712 fprintf(fp, "\n");
1713 }
1714
1715 fprintf(fp,
1716 "BASESET \"-//Dummy//CHARSET Dummy//%s\"\nDESCSET 0 256 0\n",
1717 SYSTEM_CHARSET_DESIGNATING_SEQUENCE);
1718
1719 fprintf(fp, "FUNCTION\nRE %d\nRS %d\nSPACE %d\nTAB SEPCHAR %d\n",
1720 RECHAR, RSCHAR, ' ', TABCHAR);
1721
1722 MEMZERO((UNIV)uc, 256);
1723 for (i = 0; i < 256; i++)
1724 if (lextran[i] != i)
1725 uc[lextran[i]] = 1;
1726
1727 MEMZERO((UNIV)lcletter, 256);
1728 for (p = "abcdefghijklmnopqrstuvwxyz"; *p; p++)
1729 lcletter[(unsigned char)*p]= 1;
1730
1731 fprintf(fp, "NAMING\n");
1732 fputs("LCNMSTRT \"", fp);
1733 for (i = 0; i < 256; i++)
1734 if (lextoke[i] == NMS && !uc[i] && !lcletter[i])
1735 fprintf(fp, "&#%d;", i);
1736 fputs("\"\n", fp);
1737 fputs("UCNMSTRT \"", fp);
1738 for (i = 0; i < 256; i++)
1739 if (lextoke[i] == NMS && !uc[i] && !lcletter[i])
1740 fprintf(fp, "&#%d;", lextran[i]);
1741 fputs("\"\n", fp);
1742 fputs("LCNMCHAR \"", fp);
1743 for (i = 0; i < 256; i++)
1744 if (lextoke[i] == NMC && !uc[i])
1745 fprintf(fp, "&#%d;", i);
1746 fputs("\"\n", fp);
1747 fputs("UCNMCHAR \"", fp);
1748 for (i = 0; i < 256; i++)
1749 if (lextoke[i] == NMC && !uc[i])
1750 fprintf(fp, "&#%d;", lextran[i]);
1751 fputs("\"\n", fp);
1752
1753 fprintf(fp, "NAMECASE\nGENERAL %s\nENTITY %s\n",
1754 sd.namecase[0] ? "YES" : "NO",
1755 sd.namecase[1] ? "YES" : "NO");
1756 fprintf(fp, "DELIM\nGENERAL SGMLREF\nSHORTREF %s\n",
1757 sd.shortref ? "SGMLREF" : "NONE");
1758 fprintf(fp, "NAMES SGMLREF\n");
1759 if (newkey) {
1760 /* The reference key was saved in newkey. */
1761 for (i = 0; i < NKEYS; i++)
1762 if (newkey[i][0])
1763 fprintf(fp, "%s %s\n", newkey[i], key[i]);
1764 }
1765 fprintf(fp, "QUANTITY SGMLREF\n");
1766 if (quantity_changed)
1767 for (i = 0; i < NQUANTITY; i++)
1768 if (quantity_changed[i])
1769 fprintf(fp, "%s %d\n", quantity_names[i], sd.quantity[i]);
1770 fprintf(fp,
1771 "FEATURES\nMINIMIZE\nDATATAG NO OMITTAG %s RANK NO SHORTTAG %s\n",
1772 sd.omittag ? "YES" : "NO",
1773 sd.shorttag ? "YES" : "NO");
1774 fprintf(fp, "LINK SIMPLE NO IMPLICIT NO EXPLICIT NO\n");
1775 fprintf(fp, "OTHER CONCUR NO ");
1776 if (sd.subdoc > 0)
1777 fprintf(fp, "SUBDOC YES %ld ", sd.subdoc);
1778 else
1779 fprintf(fp, "SUBDOC NO ");
1780 fprintf(fp, "FORMAL %s\n", sd.formal ? "YES" : "NO");
1781 fprintf(fp, "APPINFO NONE");
1782 fprintf(fp, ">\n");
1783 }
1784
1785 /* Save an error to be printed only if FORMAL is declared as YES. */
1786
1787 static
sdsaverr(number,parm1,parm2)1788 VOID sdsaverr(number, parm1, parm2)
1789 UNS number;
1790 UNCH *parm1;
1791 UNCH *parm2;
1792 {
1793 saved_errs[nsaved_errs++] = savmderr(number, parm1, parm2);
1794 }
1795
1796 /*
1797 Local Variables:
1798 c-indent-level: 5
1799 c-continued-statement-offset: 5
1800 c-brace-offset: -5
1801 c-argdecl-indent: 0
1802 c-label-offset: -5
1803 End:
1804 */
1805